1 /* 2 * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 36 */ 37 38 import java.util.function.Function; 39 import java.util.regex.*; 40 import java.util.Random; 41 import java.io.*; 42 import java.util.*; 43 import java.nio.CharBuffer; 44 import java.util.function.Predicate; 45 46 /** 47 * This is a test class created to check the operation of 48 * the Pattern and Matcher classes. 49 */ 50 public class RegExTest { 51 52 private static Random generator = new Random(); 53 private static boolean failure = false; 54 private static int failCount = 0; 55 private static String firstFailure = null; 56 57 /** 58 * Main to interpret arguments and run several tests. 59 * 60 */ 61 public static void main(String[] args) throws Exception { 62 // Most of the tests are in a file 63 processFile("TestCases.txt"); 64 //processFile("PerlCases.txt"); 65 processFile("BMPTestCases.txt"); 66 processFile("SupplementaryTestCases.txt"); 67 68 // These test many randomly generated char patterns 69 bm(); 70 slice(); 71 72 // These are hard to put into the file 73 escapes(); 74 blankInput(); 75 76 // Substitition tests on randomly generated sequences 77 globalSubstitute(); 78 stringbufferSubstitute(); 79 stringbuilderSubstitute(); 80 81 substitutionBasher(); 82 substitutionBasher2(); 83 84 // Canonical Equivalence 85 ceTest(); 86 87 // Anchors 88 anchorTest(); 89 90 // boolean match calls 91 matchesTest(); 92 lookingAtTest(); 93 94 // Pattern API 95 patternMatchesTest(); 96 97 // Misc 98 lookbehindTest(); 99 nullArgumentTest(); 100 backRefTest(); 101 groupCaptureTest(); 102 caretTest(); 103 charClassTest(); 104 emptyPatternTest(); 105 findIntTest(); 106 group0Test(); 107 longPatternTest(); 108 octalTest(); 109 ampersandTest(); 110 negationTest(); 111 splitTest(); 112 appendTest(); 113 caseFoldingTest(); 114 commentsTest(); 115 unixLinesTest(); 116 replaceFirstTest(); 117 gTest(); 118 zTest(); 119 serializeTest(); 120 reluctantRepetitionTest(); 121 multilineDollarTest(); 122 dollarAtEndTest(); 123 caretBetweenTerminatorsTest(); 124 // This RFE rejected in Tiger numOccurrencesTest(); 125 javaCharClassTest(); 126 nonCaptureRepetitionTest(); 127 notCapturedGroupCurlyMatchTest(); 128 escapedSegmentTest(); 129 literalPatternTest(); 130 literalReplacementTest(); 131 regionTest(); 132 toStringTest(); 133 negatedCharClassTest(); 134 findFromTest(); 135 boundsTest(); 136 unicodeWordBoundsTest(); 137 caretAtEndTest(); 138 wordSearchTest(); 139 hitEndTest(); 140 toMatchResultTest(); 141 toMatchResultTest2(); 142 surrogatesInClassTest(); 143 removeQEQuotingTest(); 144 namedGroupCaptureTest(); 145 nonBmpClassComplementTest(); 146 unicodePropertiesTest(); 147 unicodeHexNotationTest(); 148 unicodeClassesTest(); 149 horizontalAndVerticalWSTest(); 150 linebreakTest(); 151 branchTest(); 152 groupCurlyNotFoundSuppTest(); 153 groupCurlyBackoffTest(); 154 patternAsPredicate(); 155 invalidFlags(); 156 157 if (failure) { 158 throw new 159 RuntimeException("RegExTest failed, 1st failure: " + 160 firstFailure); 161 } else { 162 System.err.println("OKAY: All tests passed."); 163 } 164 } 165 166 // Utility functions 167 168 private static String getRandomAlphaString(int length) { 169 StringBuffer buf = new StringBuffer(length); 170 for (int i=0; i<length; i++) { 171 char randChar = (char)(97 + generator.nextInt(26)); 172 buf.append(randChar); 173 } 174 return buf.toString(); 175 } 176 177 private static void check(Matcher m, String expected) { 178 m.find(); 179 if (!m.group().equals(expected)) 180 failCount++; 181 } 182 183 private static void check(Matcher m, String result, boolean expected) { 184 m.find(); 185 if (m.group().equals(result) != expected) 186 failCount++; 187 } 188 189 private static void check(Pattern p, String s, boolean expected) { 190 if (p.matcher(s).find() != expected) 191 failCount++; 192 } 193 194 private static void check(String p, String s, boolean expected) { 195 Matcher matcher = Pattern.compile(p).matcher(s); 196 if (matcher.find() != expected) 197 failCount++; 198 } 199 200 private static void check(String p, char c, boolean expected) { 201 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 202 Pattern pattern = Pattern.compile(propertyPattern); 203 char[] ca = new char[1]; ca[0] = c; 204 Matcher matcher = pattern.matcher(new String(ca)); 205 if (!matcher.find()) 206 failCount++; 207 } 208 209 private static void check(String p, int codePoint, boolean expected) { 210 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 211 Pattern pattern = Pattern.compile(propertyPattern); 212 char[] ca = Character.toChars(codePoint); 213 Matcher matcher = pattern.matcher(new String(ca)); 214 if (!matcher.find()) 215 failCount++; 216 } 217 218 private static void check(String p, int flag, String input, String s, 219 boolean expected) 220 { 221 Pattern pattern = Pattern.compile(p, flag); 222 Matcher matcher = pattern.matcher(input); 223 if (expected) 224 check(matcher, s, expected); 225 else 226 check(pattern, input, false); 227 } 228 229 private static void report(String testName) { 230 int spacesToAdd = 30 - testName.length(); 231 StringBuffer paddedNameBuffer = new StringBuffer(testName); 232 for (int i=0; i<spacesToAdd; i++) 233 paddedNameBuffer.append(" "); 234 String paddedName = paddedNameBuffer.toString(); 235 System.err.println(paddedName + ": " + 236 (failCount==0 ? "Passed":"Failed("+failCount+")")); 237 if (failCount > 0) { 238 failure = true; 239 240 if (firstFailure == null) { 241 firstFailure = testName; 242 } 243 } 244 245 failCount = 0; 246 } 247 248 /** 249 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 250 * supplementary characters. This method does NOT fully take care 251 * of the regex syntax. 252 */ 253 private static String toSupplementaries(String s) { 254 int length = s.length(); 255 StringBuffer sb = new StringBuffer(length * 2); 256 257 for (int i = 0; i < length; ) { 258 char c = s.charAt(i++); 259 if (c == '\\') { 260 sb.append(c); 261 if (i < length) { 262 c = s.charAt(i++); 263 sb.append(c); 264 if (c == 'u') { 265 // assume no syntax error 266 sb.append(s.charAt(i++)); 267 sb.append(s.charAt(i++)); 268 sb.append(s.charAt(i++)); 269 sb.append(s.charAt(i++)); 270 } 271 } 272 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 273 sb.append('\ud800').append((char)('\udc00'+c)); 274 } else { 275 sb.append(c); 276 } 277 } 278 return sb.toString(); 279 } 280 281 // Regular expression tests 282 283 // This is for bug 6178785 284 // Test if an expected NPE gets thrown when passing in a null argument 285 private static boolean check(Runnable test) { 286 try { 287 test.run(); 288 failCount++; 289 return false; 290 } catch (NullPointerException npe) { 291 return true; 292 } 293 } 294 295 private static void nullArgumentTest() { 296 check(() -> Pattern.compile(null)); 297 check(() -> Pattern.matches(null, null)); 298 check(() -> Pattern.matches("xyz", null)); 299 check(() -> Pattern.quote(null)); 300 check(() -> Pattern.compile("xyz").split(null)); 301 check(() -> Pattern.compile("xyz").matcher(null)); 302 303 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 304 m.matches(); 305 check(() -> m.appendTail((StringBuffer) null)); 306 check(() -> m.appendTail((StringBuilder)null)); 307 check(() -> m.replaceAll((String) null)); 308 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 309 check(() -> m.replaceFirst((String)null)); 310 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 311 check(() -> m.appendReplacement((StringBuffer)null, null)); 312 check(() -> m.appendReplacement((StringBuilder)null, null)); 313 check(() -> m.reset(null)); 314 check(() -> Matcher.quoteReplacement(null)); 315 //check(() -> m.usePattern(null)); 316 317 report("Null Argument"); 318 } 319 320 // This is for bug6635133 321 // Test if surrogate pair in Unicode escapes can be handled correctly. 322 private static void surrogatesInClassTest() throws Exception { 323 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 324 Matcher matcher = pattern.matcher("\ud834\udd22"); 325 if (!matcher.find()) 326 failCount++; 327 328 report("Surrogate pair in Unicode escape"); 329 } 330 331 // This is for bug6990617 332 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 333 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 334 // char is an octal digit. 335 private static void removeQEQuotingTest() throws Exception { 336 Pattern pattern = 337 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 338 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 339 if (!matcher.find()) 340 failCount++; 341 342 report("Remove Q/E Quoting"); 343 } 344 345 // This is for bug 4988891 346 // Test toMatchResult to see that it is a copy of the Matcher 347 // that is not affected by subsequent operations on the original 348 private static void toMatchResultTest() throws Exception { 349 Pattern pattern = Pattern.compile("squid"); 350 Matcher matcher = pattern.matcher( 351 "agiantsquidofdestinyasmallsquidoffate"); 352 matcher.find(); 353 int matcherStart1 = matcher.start(); 354 MatchResult mr = matcher.toMatchResult(); 355 if (mr == matcher) 356 failCount++; 357 int resultStart1 = mr.start(); 358 if (matcherStart1 != resultStart1) 359 failCount++; 360 matcher.find(); 361 int matcherStart2 = matcher.start(); 362 int resultStart2 = mr.start(); 363 if (matcherStart2 == resultStart2) 364 failCount++; 365 if (resultStart1 != resultStart2) 366 failCount++; 367 MatchResult mr2 = matcher.toMatchResult(); 368 if (mr == mr2) 369 failCount++; 370 if (mr2.start() != matcherStart2) 371 failCount++; 372 report("toMatchResult is a copy"); 373 } 374 375 private static void checkExpectedISE(Runnable test) { 376 try { 377 test.run(); 378 failCount++; 379 } catch (IllegalStateException x) { 380 } catch (IndexOutOfBoundsException xx) { 381 failCount++; 382 } 383 } 384 385 private static void checkExpectedIOOE(Runnable test) { 386 try { 387 test.run(); 388 failCount++; 389 } catch (IndexOutOfBoundsException x) {} 390 } 391 392 // This is for bug 8074678 393 // Test the result of toMatchResult throws ISE if no match is availble 394 private static void toMatchResultTest2() throws Exception { 395 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 396 matcher.find(); 397 MatchResult mr = matcher.toMatchResult(); 398 399 checkExpectedISE(() -> mr.start()); 400 checkExpectedISE(() -> mr.start(2)); 401 checkExpectedISE(() -> mr.end()); 402 checkExpectedISE(() -> mr.end(2)); 403 checkExpectedISE(() -> mr.group()); 404 checkExpectedISE(() -> mr.group(2)); 405 406 matcher = Pattern.compile("(match)").matcher("there is a match"); 407 matcher.find(); 408 MatchResult mr2 = matcher.toMatchResult(); 409 checkExpectedIOOE(() -> mr2.start(2)); 410 checkExpectedIOOE(() -> mr2.end(2)); 411 checkExpectedIOOE(() -> mr2.group(2)); 412 413 report("toMatchResult2 appropriate exceptions"); 414 } 415 416 // This is for bug 5013885 417 // Must test a slice to see if it reports hitEnd correctly 418 private static void hitEndTest() throws Exception { 419 // Basic test of Slice node 420 Pattern p = Pattern.compile("^squidattack"); 421 Matcher m = p.matcher("squack"); 422 m.find(); 423 if (m.hitEnd()) 424 failCount++; 425 m.reset("squid"); 426 m.find(); 427 if (!m.hitEnd()) 428 failCount++; 429 430 // Test Slice, SliceA and SliceU nodes 431 for (int i=0; i<3; i++) { 432 int flags = 0; 433 if (i==1) flags = Pattern.CASE_INSENSITIVE; 434 if (i==2) flags = Pattern.UNICODE_CASE; 435 p = Pattern.compile("^abc", flags); 436 m = p.matcher("ad"); 437 m.find(); 438 if (m.hitEnd()) 439 failCount++; 440 m.reset("ab"); 441 m.find(); 442 if (!m.hitEnd()) 443 failCount++; 444 } 445 446 // Test Boyer-Moore node 447 p = Pattern.compile("catattack"); 448 m = p.matcher("attack"); 449 m.find(); 450 if (!m.hitEnd()) 451 failCount++; 452 453 p = Pattern.compile("catattack"); 454 m = p.matcher("attackattackattackcatatta"); 455 m.find(); 456 if (!m.hitEnd()) 457 failCount++; 458 report("hitEnd from a Slice"); 459 } 460 461 // This is for bug 4997476 462 // It is weird code submitted by customer demonstrating a regression 463 private static void wordSearchTest() throws Exception { 464 String testString = new String("word1 word2 word3"); 465 Pattern p = Pattern.compile("\\b"); 466 Matcher m = p.matcher(testString); 467 int position = 0; 468 int start = 0; 469 while (m.find(position)) { 470 start = m.start(); 471 if (start == testString.length()) 472 break; 473 if (m.find(start+1)) { 474 position = m.start(); 475 } else { 476 position = testString.length(); 477 } 478 if (testString.substring(start, position).equals(" ")) 479 continue; 480 if (!testString.substring(start, position-1).startsWith("word")) 481 failCount++; 482 } 483 report("Customer word search"); 484 } 485 486 // This is for bug 4994840 487 private static void caretAtEndTest() throws Exception { 488 // Problem only occurs with multiline patterns 489 // containing a beginning-of-line caret "^" followed 490 // by an expression that also matches the empty string. 491 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 492 Matcher matcher = pattern.matcher("\r"); 493 matcher.find(); 494 matcher.find(); 495 report("Caret at end"); 496 } 497 498 // This test is for 4979006 499 // Check to see if word boundary construct properly handles unicode 500 // non spacing marks 501 private static void unicodeWordBoundsTest() throws Exception { 502 String spaces = " "; 503 String wordChar = "a"; 504 String nsm = "\u030a"; 505 506 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 507 508 Pattern pattern = Pattern.compile("\\b"); 509 Matcher matcher = pattern.matcher(""); 510 // S=other B=word character N=non spacing mark .=word boundary 511 // SS.BB.SS 512 String input = spaces + wordChar + wordChar + spaces; 513 twoFindIndexes(input, matcher, 2, 4); 514 // SS.BBN.SS 515 input = spaces + wordChar +wordChar + nsm + spaces; 516 twoFindIndexes(input, matcher, 2, 5); 517 // SS.BN.SS 518 input = spaces + wordChar + nsm + spaces; 519 twoFindIndexes(input, matcher, 2, 4); 520 // SS.BNN.SS 521 input = spaces + wordChar + nsm + nsm + spaces; 522 twoFindIndexes(input, matcher, 2, 5); 523 // SSN.BB.SS 524 input = spaces + nsm + wordChar + wordChar + spaces; 525 twoFindIndexes(input, matcher, 3, 5); 526 // SS.BNB.SS 527 input = spaces + wordChar + nsm + wordChar + spaces; 528 twoFindIndexes(input, matcher, 2, 5); 529 // SSNNSS 530 input = spaces + nsm + nsm + spaces; 531 matcher.reset(input); 532 if (matcher.find()) 533 failCount++; 534 // SSN.BBN.SS 535 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 536 twoFindIndexes(input, matcher, 3, 6); 537 538 report("Unicode word boundary"); 539 } 540 541 private static void twoFindIndexes(String input, Matcher matcher, int a, 542 int b) throws Exception 543 { 544 matcher.reset(input); 545 matcher.find(); 546 if (matcher.start() != a) 547 failCount++; 548 matcher.find(); 549 if (matcher.start() != b) 550 failCount++; 551 } 552 553 // This test is for 6284152 554 static void check(String regex, String input, String[] expected) { 555 List<String> result = new ArrayList<String>(); 556 Pattern p = Pattern.compile(regex); 557 Matcher m = p.matcher(input); 558 while (m.find()) { 559 result.add(m.group()); 560 } 561 if (!Arrays.asList(expected).equals(result)) 562 failCount++; 563 } 564 565 private static void lookbehindTest() throws Exception { 566 //Positive 567 check("(?<=%.{0,5})foo\\d", 568 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 569 new String[]{"foo1", "foo2", "foo3"}); 570 571 //boundary at end of the lookbehind sub-regex should work consistently 572 //with the boundary just after the lookbehind sub-regex 573 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 574 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 575 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 576 check("(?<!abc \\b)foo", "abc foo", new String[0]); 577 578 //Negative 579 check("(?<!%.{0,5})foo\\d", 580 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 581 new String[] {"foo4", "foo5"}); 582 583 //Positive greedy 584 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 585 586 //Positive reluctant 587 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 588 589 //supplementary 590 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 591 new String[] {"fo\ud800\udc00o"}); 592 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 593 new String[] {"fo\ud800\udc00o"}); 594 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 595 new String[] {"fo\ud800\udc00o"}); 596 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 597 new String[] {"fo\ud800\udc00o"}); 598 report("Lookbehind"); 599 } 600 601 // This test is for 4938995 602 // Check to see if weak region boundaries are transparent to 603 // lookahead and lookbehind constructs 604 private static void boundsTest() throws Exception { 605 String fullMessage = "catdogcat"; 606 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 607 Matcher matcher = pattern.matcher("catdogca"); 608 matcher.useTransparentBounds(true); 609 if (matcher.find()) 610 failCount++; 611 matcher.reset("atdogcat"); 612 if (matcher.find()) 613 failCount++; 614 matcher.reset(fullMessage); 615 if (!matcher.find()) 616 failCount++; 617 matcher.reset(fullMessage); 618 matcher.region(0,9); 619 if (!matcher.find()) 620 failCount++; 621 matcher.reset(fullMessage); 622 matcher.region(0,6); 623 if (!matcher.find()) 624 failCount++; 625 matcher.reset(fullMessage); 626 matcher.region(3,6); 627 if (!matcher.find()) 628 failCount++; 629 matcher.useTransparentBounds(false); 630 if (matcher.find()) 631 failCount++; 632 633 // Negative lookahead/lookbehind 634 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 635 matcher = pattern.matcher("dogcat"); 636 matcher.useTransparentBounds(true); 637 matcher.region(0,3); 638 if (matcher.find()) 639 failCount++; 640 matcher.reset("catdog"); 641 matcher.region(3,6); 642 if (matcher.find()) 643 failCount++; 644 matcher.useTransparentBounds(false); 645 matcher.reset("dogcat"); 646 matcher.region(0,3); 647 if (!matcher.find()) 648 failCount++; 649 matcher.reset("catdog"); 650 matcher.region(3,6); 651 if (!matcher.find()) 652 failCount++; 653 654 report("Region bounds transparency"); 655 } 656 657 // This test is for 4945394 658 private static void findFromTest() throws Exception { 659 String message = "This is 40 $0 message."; 660 Pattern pat = Pattern.compile("\\$0"); 661 Matcher match = pat.matcher(message); 662 if (!match.find()) 663 failCount++; 664 if (match.find()) 665 failCount++; 666 if (match.find()) 667 failCount++; 668 report("Check for alternating find"); 669 } 670 671 // This test is for 4872664 and 4892980 672 private static void negatedCharClassTest() throws Exception { 673 Pattern pattern = Pattern.compile("[^>]"); 674 Matcher matcher = pattern.matcher("\u203A"); 675 if (!matcher.matches()) 676 failCount++; 677 pattern = Pattern.compile("[^fr]"); 678 matcher = pattern.matcher("a"); 679 if (!matcher.find()) 680 failCount++; 681 matcher.reset("\u203A"); 682 if (!matcher.find()) 683 failCount++; 684 String s = "for"; 685 String result[] = s.split("[^fr]"); 686 if (!result[0].equals("f")) 687 failCount++; 688 if (!result[1].equals("r")) 689 failCount++; 690 s = "f\u203Ar"; 691 result = s.split("[^fr]"); 692 if (!result[0].equals("f")) 693 failCount++; 694 if (!result[1].equals("r")) 695 failCount++; 696 697 // Test adding to bits, subtracting a node, then adding to bits again 698 pattern = Pattern.compile("[^f\u203Ar]"); 699 matcher = pattern.matcher("a"); 700 if (!matcher.find()) 701 failCount++; 702 matcher.reset("f"); 703 if (matcher.find()) 704 failCount++; 705 matcher.reset("\u203A"); 706 if (matcher.find()) 707 failCount++; 708 matcher.reset("r"); 709 if (matcher.find()) 710 failCount++; 711 matcher.reset("\u203B"); 712 if (!matcher.find()) 713 failCount++; 714 715 // Test subtracting a node, adding to bits, subtracting again 716 pattern = Pattern.compile("[^\u203Ar\u203B]"); 717 matcher = pattern.matcher("a"); 718 if (!matcher.find()) 719 failCount++; 720 matcher.reset("\u203A"); 721 if (matcher.find()) 722 failCount++; 723 matcher.reset("r"); 724 if (matcher.find()) 725 failCount++; 726 matcher.reset("\u203B"); 727 if (matcher.find()) 728 failCount++; 729 matcher.reset("\u203C"); 730 if (!matcher.find()) 731 failCount++; 732 733 report("Negated Character Class"); 734 } 735 736 // This test is for 4628291 737 private static void toStringTest() throws Exception { 738 Pattern pattern = Pattern.compile("b+"); 739 if (pattern.toString() != "b+") 740 failCount++; 741 Matcher matcher = pattern.matcher("aaabbbccc"); 742 String matcherString = matcher.toString(); // unspecified 743 matcher.find(); 744 matcherString = matcher.toString(); // unspecified 745 matcher.region(0,3); 746 matcherString = matcher.toString(); // unspecified 747 matcher.reset(); 748 matcherString = matcher.toString(); // unspecified 749 report("toString"); 750 } 751 752 // This test is for 4808962 753 private static void literalPatternTest() throws Exception { 754 int flags = Pattern.LITERAL; 755 756 Pattern pattern = Pattern.compile("abc\\t$^", flags); 757 check(pattern, "abc\\t$^", true); 758 759 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 760 check(pattern, "abc\\t$^", true); 761 762 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 763 check(pattern, "\\Qa^$bcabc\\E", true); 764 check(pattern, "a^$bcabc", false); 765 766 pattern = Pattern.compile("\\\\Q\\\\E"); 767 check(pattern, "\\Q\\E", true); 768 769 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 770 check(pattern, "abcefg\\Q\\Ehij", true); 771 772 pattern = Pattern.compile("\\\\\\Q\\\\E"); 773 check(pattern, "\\\\\\\\", true); 774 775 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 776 check(pattern, "\\Qa^$bcabc\\E", true); 777 check(pattern, "a^$bcabc", false); 778 779 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 780 check(pattern, "\\Qabc\\Edef", true); 781 check(pattern, "abcdef", false); 782 783 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 784 check(pattern, "abc\\Edef", true); 785 check(pattern, "abcdef", false); 786 787 pattern = Pattern.compile(Pattern.quote("\\E")); 788 check(pattern, "\\E", true); 789 790 pattern = Pattern.compile("((((abc.+?:)", flags); 791 check(pattern, "((((abc.+?:)", true); 792 793 flags |= Pattern.MULTILINE; 794 795 pattern = Pattern.compile("^cat$", flags); 796 check(pattern, "abc^cat$def", true); 797 check(pattern, "cat", false); 798 799 flags |= Pattern.CASE_INSENSITIVE; 800 801 pattern = Pattern.compile("abcdef", flags); 802 check(pattern, "ABCDEF", true); 803 check(pattern, "AbCdEf", true); 804 805 flags |= Pattern.DOTALL; 806 807 pattern = Pattern.compile("a...b", flags); 808 check(pattern, "A...b", true); 809 check(pattern, "Axxxb", false); 810 811 flags |= Pattern.CANON_EQ; 812 813 Pattern p = Pattern.compile("testa\u030a", flags); 814 check(pattern, "testa\u030a", false); 815 check(pattern, "test\u00e5", false); 816 817 // Supplementary character test 818 flags = Pattern.LITERAL; 819 820 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 821 check(pattern, toSupplementaries("abc\\t$^"), true); 822 823 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 824 check(pattern, toSupplementaries("abc\\t$^"), true); 825 826 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 827 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 828 check(pattern, toSupplementaries("a^$bcabc"), false); 829 830 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 831 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 832 check(pattern, toSupplementaries("a^$bcabc"), false); 833 834 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 835 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 836 check(pattern, toSupplementaries("abcdef"), false); 837 838 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 839 check(pattern, toSupplementaries("abc\\Edef"), true); 840 check(pattern, toSupplementaries("abcdef"), false); 841 842 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 843 check(pattern, toSupplementaries("((((abc.+?:)"), true); 844 845 flags |= Pattern.MULTILINE; 846 847 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 848 check(pattern, toSupplementaries("abc^cat$def"), true); 849 check(pattern, toSupplementaries("cat"), false); 850 851 flags |= Pattern.DOTALL; 852 853 // note: this is case-sensitive. 854 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 855 check(pattern, toSupplementaries("a...b"), true); 856 check(pattern, toSupplementaries("axxxb"), false); 857 858 flags |= Pattern.CANON_EQ; 859 860 String t = toSupplementaries("test"); 861 p = Pattern.compile(t + "a\u030a", flags); 862 check(pattern, t + "a\u030a", false); 863 check(pattern, t + "\u00e5", false); 864 865 report("Literal pattern"); 866 } 867 868 // This test is for 4803179 869 // This test is also for 4808962, replacement parts 870 private static void literalReplacementTest() throws Exception { 871 int flags = Pattern.LITERAL; 872 873 Pattern pattern = Pattern.compile("abc", flags); 874 Matcher matcher = pattern.matcher("zzzabczzz"); 875 String replaceTest = "$0"; 876 String result = matcher.replaceAll(replaceTest); 877 if (!result.equals("zzzabczzz")) 878 failCount++; 879 880 matcher.reset(); 881 String literalReplacement = matcher.quoteReplacement(replaceTest); 882 result = matcher.replaceAll(literalReplacement); 883 if (!result.equals("zzz$0zzz")) 884 failCount++; 885 886 matcher.reset(); 887 replaceTest = "\\t$\\$"; 888 literalReplacement = matcher.quoteReplacement(replaceTest); 889 result = matcher.replaceAll(literalReplacement); 890 if (!result.equals("zzz\\t$\\$zzz")) 891 failCount++; 892 893 // Supplementary character test 894 pattern = Pattern.compile(toSupplementaries("abc"), flags); 895 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 896 replaceTest = "$0"; 897 result = matcher.replaceAll(replaceTest); 898 if (!result.equals(toSupplementaries("zzzabczzz"))) 899 failCount++; 900 901 matcher.reset(); 902 literalReplacement = matcher.quoteReplacement(replaceTest); 903 result = matcher.replaceAll(literalReplacement); 904 if (!result.equals(toSupplementaries("zzz$0zzz"))) 905 failCount++; 906 907 matcher.reset(); 908 replaceTest = "\\t$\\$"; 909 literalReplacement = matcher.quoteReplacement(replaceTest); 910 result = matcher.replaceAll(literalReplacement); 911 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 912 failCount++; 913 914 // IAE should be thrown if backslash or '$' is the last character 915 // in replacement string 916 try { 917 "\uac00".replaceAll("\uac00", "$"); 918 failCount++; 919 } catch (IllegalArgumentException iie) { 920 } catch (Exception e) { 921 failCount++; 922 } 923 try { 924 "\uac00".replaceAll("\uac00", "\\"); 925 failCount++; 926 } catch (IllegalArgumentException iie) { 927 } catch (Exception e) { 928 failCount++; 929 } 930 report("Literal replacement"); 931 } 932 933 // This test is for 4757029 934 private static void regionTest() throws Exception { 935 Pattern pattern = Pattern.compile("abc"); 936 Matcher matcher = pattern.matcher("abcdefabc"); 937 938 matcher.region(0,9); 939 if (!matcher.find()) 940 failCount++; 941 if (!matcher.find()) 942 failCount++; 943 matcher.region(0,3); 944 if (!matcher.find()) 945 failCount++; 946 matcher.region(3,6); 947 if (matcher.find()) 948 failCount++; 949 matcher.region(0,2); 950 if (matcher.find()) 951 failCount++; 952 953 expectRegionFail(matcher, 1, -1); 954 expectRegionFail(matcher, -1, -1); 955 expectRegionFail(matcher, -1, 1); 956 expectRegionFail(matcher, 5, 3); 957 expectRegionFail(matcher, 5, 12); 958 expectRegionFail(matcher, 12, 12); 959 960 pattern = Pattern.compile("^abc$"); 961 matcher = pattern.matcher("zzzabczzz"); 962 matcher.region(0,9); 963 if (matcher.find()) 964 failCount++; 965 matcher.region(3,6); 966 if (!matcher.find()) 967 failCount++; 968 matcher.region(3,6); 969 matcher.useAnchoringBounds(false); 970 if (matcher.find()) 971 failCount++; 972 973 // Supplementary character test 974 pattern = Pattern.compile(toSupplementaries("abc")); 975 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 976 matcher.region(0,9*2); 977 if (!matcher.find()) 978 failCount++; 979 if (!matcher.find()) 980 failCount++; 981 matcher.region(0,3*2); 982 if (!matcher.find()) 983 failCount++; 984 matcher.region(1,3*2); 985 if (matcher.find()) 986 failCount++; 987 matcher.region(3*2,6*2); 988 if (matcher.find()) 989 failCount++; 990 matcher.region(0,2*2); 991 if (matcher.find()) 992 failCount++; 993 matcher.region(0,2*2+1); 994 if (matcher.find()) 995 failCount++; 996 997 expectRegionFail(matcher, 1*2, -1); 998 expectRegionFail(matcher, -1, -1); 999 expectRegionFail(matcher, -1, 1*2); 1000 expectRegionFail(matcher, 5*2, 3*2); 1001 expectRegionFail(matcher, 5*2, 12*2); 1002 expectRegionFail(matcher, 12*2, 12*2); 1003 1004 pattern = Pattern.compile(toSupplementaries("^abc$")); 1005 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1006 matcher.region(0,9*2); 1007 if (matcher.find()) 1008 failCount++; 1009 matcher.region(3*2,6*2); 1010 if (!matcher.find()) 1011 failCount++; 1012 matcher.region(3*2+1,6*2); 1013 if (matcher.find()) 1014 failCount++; 1015 matcher.region(3*2,6*2-1); 1016 if (matcher.find()) 1017 failCount++; 1018 matcher.region(3*2,6*2); 1019 matcher.useAnchoringBounds(false); 1020 if (matcher.find()) 1021 failCount++; 1022 report("Regions"); 1023 } 1024 1025 private static void expectRegionFail(Matcher matcher, int index1, 1026 int index2) 1027 { 1028 try { 1029 matcher.region(index1, index2); 1030 failCount++; 1031 } catch (IndexOutOfBoundsException ioobe) { 1032 // Correct result 1033 } catch (IllegalStateException ise) { 1034 // Correct result 1035 } 1036 } 1037 1038 // This test is for 4803197 1039 private static void escapedSegmentTest() throws Exception { 1040 1041 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1042 check(pattern, "dir1\\dir2", true); 1043 1044 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1045 check(pattern, "dir1\\dir2\\", true); 1046 1047 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1048 check(pattern, "dir1\\dir2\\", true); 1049 1050 // Supplementary character test 1051 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1052 check(pattern, toSupplementaries("dir1\\dir2"), true); 1053 1054 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1055 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1056 1057 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1058 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1059 1060 report("Escaped segment"); 1061 } 1062 1063 // This test is for 4792284 1064 private static void nonCaptureRepetitionTest() throws Exception { 1065 String input = "abcdefgh;"; 1066 1067 String[] patterns = new String[] { 1068 "(?:\\w{4})+;", 1069 "(?:\\w{8})*;", 1070 "(?:\\w{2}){2,4};", 1071 "(?:\\w{4}){2,};", // only matches the 1072 ".*?(?:\\w{5})+;", // specified minimum 1073 ".*?(?:\\w{9})*;", // number of reps - OK 1074 "(?:\\w{4})+?;", // lazy repetition - OK 1075 "(?:\\w{4})++;", // possessive repetition - OK 1076 "(?:\\w{2,}?)+;", // non-deterministic - OK 1077 "(\\w{4})+;", // capturing group - OK 1078 }; 1079 1080 for (int i = 0; i < patterns.length; i++) { 1081 // Check find() 1082 check(patterns[i], 0, input, input, true); 1083 // Check matches() 1084 Pattern p = Pattern.compile(patterns[i]); 1085 Matcher m = p.matcher(input); 1086 1087 if (m.matches()) { 1088 if (!m.group(0).equals(input)) 1089 failCount++; 1090 } else { 1091 failCount++; 1092 } 1093 } 1094 1095 report("Non capturing repetition"); 1096 } 1097 1098 // This test is for 6358731 1099 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1100 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1101 Matcher matcher = pattern.matcher("abcd"); 1102 if (!matcher.matches() || 1103 matcher.group(1) != null || 1104 !matcher.group(2).equals("abcd")) { 1105 failCount++; 1106 } 1107 report("Not captured GroupCurly"); 1108 } 1109 1110 // This test is for 4706545 1111 private static void javaCharClassTest() throws Exception { 1112 for (int i=0; i<1000; i++) { 1113 char c = (char)generator.nextInt(); 1114 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1115 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1116 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1117 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1118 check("{javaDigit}", c, Character.isDigit(c)); 1119 check("{javaDefined}", c, Character.isDefined(c)); 1120 check("{javaLetter}", c, Character.isLetter(c)); 1121 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1122 check("{javaJavaIdentifierStart}", c, 1123 Character.isJavaIdentifierStart(c)); 1124 check("{javaJavaIdentifierPart}", c, 1125 Character.isJavaIdentifierPart(c)); 1126 check("{javaUnicodeIdentifierStart}", c, 1127 Character.isUnicodeIdentifierStart(c)); 1128 check("{javaUnicodeIdentifierPart}", c, 1129 Character.isUnicodeIdentifierPart(c)); 1130 check("{javaIdentifierIgnorable}", c, 1131 Character.isIdentifierIgnorable(c)); 1132 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1133 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1134 check("{javaISOControl}", c, Character.isISOControl(c)); 1135 check("{javaMirrored}", c, Character.isMirrored(c)); 1136 1137 } 1138 1139 // Supplementary character test 1140 for (int i=0; i<1000; i++) { 1141 int c = generator.nextInt(Character.MAX_CODE_POINT 1142 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1143 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1144 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1145 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1146 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1147 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1148 check("{javaDigit}", c, Character.isDigit(c)); 1149 check("{javaDefined}", c, Character.isDefined(c)); 1150 check("{javaLetter}", c, Character.isLetter(c)); 1151 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1152 check("{javaJavaIdentifierStart}", c, 1153 Character.isJavaIdentifierStart(c)); 1154 check("{javaJavaIdentifierPart}", c, 1155 Character.isJavaIdentifierPart(c)); 1156 check("{javaUnicodeIdentifierStart}", c, 1157 Character.isUnicodeIdentifierStart(c)); 1158 check("{javaUnicodeIdentifierPart}", c, 1159 Character.isUnicodeIdentifierPart(c)); 1160 check("{javaIdentifierIgnorable}", c, 1161 Character.isIdentifierIgnorable(c)); 1162 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1163 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1164 check("{javaISOControl}", c, Character.isISOControl(c)); 1165 check("{javaMirrored}", c, Character.isMirrored(c)); 1166 } 1167 1168 report("Java character classes"); 1169 } 1170 1171 // This test is for 4523620 1172 /* 1173 private static void numOccurrencesTest() throws Exception { 1174 Pattern pattern = Pattern.compile("aaa"); 1175 1176 if (pattern.numOccurrences("aaaaaa", false) != 2) 1177 failCount++; 1178 if (pattern.numOccurrences("aaaaaa", true) != 4) 1179 failCount++; 1180 1181 pattern = Pattern.compile("^"); 1182 if (pattern.numOccurrences("aaaaaa", false) != 1) 1183 failCount++; 1184 if (pattern.numOccurrences("aaaaaa", true) != 1) 1185 failCount++; 1186 1187 report("Number of Occurrences"); 1188 } 1189 */ 1190 1191 // This test is for 4776374 1192 private static void caretBetweenTerminatorsTest() throws Exception { 1193 int flags1 = Pattern.DOTALL; 1194 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1195 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1196 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1197 1198 check("^....", flags1, "test\ntest", "test", true); 1199 check(".....^", flags1, "test\ntest", "test", false); 1200 check(".....^", flags1, "test\n", "test", false); 1201 check("....^", flags1, "test\r\n", "test", false); 1202 1203 check("^....", flags2, "test\ntest", "test", true); 1204 check("....^", flags2, "test\ntest", "test", false); 1205 check(".....^", flags2, "test\n", "test", false); 1206 check("....^", flags2, "test\r\n", "test", false); 1207 1208 check("^....", flags3, "test\ntest", "test", true); 1209 check(".....^", flags3, "test\ntest", "test\n", true); 1210 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1211 check(".....^", flags3, "test\n", "test", false); 1212 check(".....^", flags3, "test\r\n", "test", false); 1213 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1214 1215 check("^....", flags4, "test\ntest", "test", true); 1216 check(".....^", flags3, "test\ntest", "test\n", true); 1217 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1218 check(".....^", flags4, "test\n", "test\n", false); 1219 check(".....^", flags4, "test\r\n", "test\r", false); 1220 1221 // Supplementary character test 1222 String t = toSupplementaries("test"); 1223 check("^....", flags1, t+"\n"+t, t, true); 1224 check(".....^", flags1, t+"\n"+t, t, false); 1225 check(".....^", flags1, t+"\n", t, false); 1226 check("....^", flags1, t+"\r\n", t, false); 1227 1228 check("^....", flags2, t+"\n"+t, t, true); 1229 check("....^", flags2, t+"\n"+t, t, false); 1230 check(".....^", flags2, t+"\n", t, false); 1231 check("....^", flags2, t+"\r\n", t, false); 1232 1233 check("^....", flags3, t+"\n"+t, t, true); 1234 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1235 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1236 check(".....^", flags3, t+"\n", t, false); 1237 check(".....^", flags3, t+"\r\n", t, false); 1238 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1239 1240 check("^....", flags4, t+"\n"+t, t, true); 1241 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1242 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1243 check(".....^", flags4, t+"\n", t+"\n", false); 1244 check(".....^", flags4, t+"\r\n", t+"\r", false); 1245 1246 report("Caret between terminators"); 1247 } 1248 1249 // This test is for 4727935 1250 private static void dollarAtEndTest() throws Exception { 1251 int flags1 = Pattern.DOTALL; 1252 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1253 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1254 1255 check("....$", flags1, "test\n", "test", true); 1256 check("....$", flags1, "test\r\n", "test", true); 1257 check(".....$", flags1, "test\n", "test\n", true); 1258 check(".....$", flags1, "test\u0085", "test\u0085", true); 1259 check("....$", flags1, "test\u0085", "test", true); 1260 1261 check("....$", flags2, "test\n", "test", true); 1262 check(".....$", flags2, "test\n", "test\n", true); 1263 check(".....$", flags2, "test\u0085", "test\u0085", true); 1264 check("....$", flags2, "test\u0085", "est\u0085", true); 1265 1266 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1267 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1268 check("....$blah", flags3, "test\nblah", "!!!!", false); 1269 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1270 1271 // Supplementary character test 1272 String t = toSupplementaries("test"); 1273 String b = toSupplementaries("blah"); 1274 check("....$", flags1, t+"\n", t, true); 1275 check("....$", flags1, t+"\r\n", t, true); 1276 check(".....$", flags1, t+"\n", t+"\n", true); 1277 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1278 check("....$", flags1, t+"\u0085", t, true); 1279 1280 check("....$", flags2, t+"\n", t, true); 1281 check(".....$", flags2, t+"\n", t+"\n", true); 1282 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1283 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1284 1285 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1286 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1287 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1288 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1289 1290 report("Dollar at End"); 1291 } 1292 1293 // This test is for 4711773 1294 private static void multilineDollarTest() throws Exception { 1295 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1296 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1297 matcher.find(); 1298 if (matcher.start(0) != 9) 1299 failCount++; 1300 matcher.find(); 1301 if (matcher.start(0) != 20) 1302 failCount++; 1303 1304 // Supplementary character test 1305 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1306 matcher.find(); 1307 if (matcher.start(0) != 9*2) 1308 failCount++; 1309 matcher.find(); 1310 if (matcher.start(0) != 20*2) 1311 failCount++; 1312 1313 report("Multiline Dollar"); 1314 } 1315 1316 private static void reluctantRepetitionTest() throws Exception { 1317 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1318 check(p, "1 word word word 2", true); 1319 check(p, "1 wor wo w 2", true); 1320 check(p, "1 word word 2", true); 1321 check(p, "1 word 2", true); 1322 check(p, "1 wo w w 2", true); 1323 check(p, "1 wo w 2", true); 1324 check(p, "1 wor w 2", true); 1325 1326 p = Pattern.compile("([a-z])+?c"); 1327 Matcher m = p.matcher("ababcdefdec"); 1328 check(m, "ababc"); 1329 1330 // Supplementary character test 1331 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1332 m = p.matcher(toSupplementaries("ababcdefdec")); 1333 check(m, toSupplementaries("ababc")); 1334 1335 report("Reluctant Repetition"); 1336 } 1337 1338 private static void serializeTest() throws Exception { 1339 String patternStr = "(b)"; 1340 String matchStr = "b"; 1341 Pattern pattern = Pattern.compile(patternStr); 1342 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1343 ObjectOutputStream oos = new ObjectOutputStream(baos); 1344 oos.writeObject(pattern); 1345 oos.close(); 1346 ObjectInputStream ois = new ObjectInputStream( 1347 new ByteArrayInputStream(baos.toByteArray())); 1348 Pattern serializedPattern = (Pattern)ois.readObject(); 1349 ois.close(); 1350 Matcher matcher = serializedPattern.matcher(matchStr); 1351 if (!matcher.matches()) 1352 failCount++; 1353 if (matcher.groupCount() != 1) 1354 failCount++; 1355 1356 report("Serialization"); 1357 } 1358 1359 private static void gTest() { 1360 Pattern pattern = Pattern.compile("\\G\\w"); 1361 Matcher matcher = pattern.matcher("abc#x#x"); 1362 matcher.find(); 1363 matcher.find(); 1364 matcher.find(); 1365 if (matcher.find()) 1366 failCount++; 1367 1368 pattern = Pattern.compile("\\GA*"); 1369 matcher = pattern.matcher("1A2AA3"); 1370 matcher.find(); 1371 if (matcher.find()) 1372 failCount++; 1373 1374 pattern = Pattern.compile("\\GA*"); 1375 matcher = pattern.matcher("1A2AA3"); 1376 if (!matcher.find(1)) 1377 failCount++; 1378 matcher.find(); 1379 if (matcher.find()) 1380 failCount++; 1381 1382 report("\\G"); 1383 } 1384 1385 private static void zTest() { 1386 Pattern pattern = Pattern.compile("foo\\Z"); 1387 // Positives 1388 check(pattern, "foo\u0085", true); 1389 check(pattern, "foo\u2028", true); 1390 check(pattern, "foo\u2029", true); 1391 check(pattern, "foo\n", true); 1392 check(pattern, "foo\r", true); 1393 check(pattern, "foo\r\n", true); 1394 // Negatives 1395 check(pattern, "fooo", false); 1396 check(pattern, "foo\n\r", false); 1397 1398 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1399 // Positives 1400 check(pattern, "foo", true); 1401 check(pattern, "foo\n", true); 1402 // Negatives 1403 check(pattern, "foo\r", false); 1404 check(pattern, "foo\u0085", false); 1405 check(pattern, "foo\u2028", false); 1406 check(pattern, "foo\u2029", false); 1407 1408 report("\\Z"); 1409 } 1410 1411 private static void replaceFirstTest() { 1412 Pattern pattern = Pattern.compile("(ab)(c*)"); 1413 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1414 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1415 failCount++; 1416 1417 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1418 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1419 failCount++; 1420 1421 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1422 String result = matcher.replaceFirst("$1"); 1423 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1424 failCount++; 1425 1426 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1427 result = matcher.replaceFirst("$2"); 1428 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1429 failCount++; 1430 1431 pattern = Pattern.compile("a*"); 1432 matcher = pattern.matcher("aaaaaaaaaa"); 1433 if (!matcher.replaceFirst("test").equals("test")) 1434 failCount++; 1435 1436 pattern = Pattern.compile("a+"); 1437 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1438 if (!matcher.replaceFirst("test").equals("zzztest")) 1439 failCount++; 1440 1441 // Supplementary character test 1442 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1443 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1444 if (!matcher.replaceFirst(toSupplementaries("test")) 1445 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1446 failCount++; 1447 1448 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1449 if (!matcher.replaceFirst(toSupplementaries("test")). 1450 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1451 failCount++; 1452 1453 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1454 result = matcher.replaceFirst("$1"); 1455 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1456 failCount++; 1457 1458 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1459 result = matcher.replaceFirst("$2"); 1460 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1461 failCount++; 1462 1463 pattern = Pattern.compile(toSupplementaries("a*")); 1464 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1465 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1466 failCount++; 1467 1468 pattern = Pattern.compile(toSupplementaries("a+")); 1469 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1470 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1471 failCount++; 1472 1473 report("Replace First"); 1474 } 1475 1476 private static void unixLinesTest() { 1477 Pattern pattern = Pattern.compile(".*"); 1478 Matcher matcher = pattern.matcher("aa\u2028blah"); 1479 matcher.find(); 1480 if (!matcher.group(0).equals("aa")) 1481 failCount++; 1482 1483 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1484 matcher = pattern.matcher("aa\u2028blah"); 1485 matcher.find(); 1486 if (!matcher.group(0).equals("aa\u2028blah")) 1487 failCount++; 1488 1489 pattern = Pattern.compile("[az]$", 1490 Pattern.MULTILINE | Pattern.UNIX_LINES); 1491 matcher = pattern.matcher("aa\u2028zz"); 1492 check(matcher, "a\u2028", false); 1493 1494 // Supplementary character test 1495 pattern = Pattern.compile(".*"); 1496 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1497 matcher.find(); 1498 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1499 failCount++; 1500 1501 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1502 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1503 matcher.find(); 1504 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1505 failCount++; 1506 1507 pattern = Pattern.compile(toSupplementaries("[az]$"), 1508 Pattern.MULTILINE | Pattern.UNIX_LINES); 1509 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1510 check(matcher, toSupplementaries("a\u2028"), false); 1511 1512 report("Unix Lines"); 1513 } 1514 1515 private static void commentsTest() { 1516 int flags = Pattern.COMMENTS; 1517 1518 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1519 Matcher matcher = pattern.matcher("aa#aa"); 1520 if (!matcher.matches()) 1521 failCount++; 1522 1523 pattern = Pattern.compile("aa # blah", flags); 1524 matcher = pattern.matcher("aa"); 1525 if (!matcher.matches()) 1526 failCount++; 1527 1528 pattern = Pattern.compile("aa blah", flags); 1529 matcher = pattern.matcher("aablah"); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile("aa # blah blech ", flags); 1534 matcher = pattern.matcher("aa"); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile("aa # blah\n ", flags); 1539 matcher = pattern.matcher("aa"); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1544 matcher = pattern.matcher("aabc"); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1549 matcher = pattern.matcher("aabc"); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1554 matcher = pattern.matcher("aabc#blech"); 1555 if (!matcher.matches()) 1556 failCount++; 1557 1558 // Supplementary character test 1559 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1560 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1561 if (!matcher.matches()) 1562 failCount++; 1563 1564 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1565 matcher = pattern.matcher(toSupplementaries("aa")); 1566 if (!matcher.matches()) 1567 failCount++; 1568 1569 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1570 matcher = pattern.matcher(toSupplementaries("aablah")); 1571 if (!matcher.matches()) 1572 failCount++; 1573 1574 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1575 matcher = pattern.matcher(toSupplementaries("aa")); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1580 matcher = pattern.matcher(toSupplementaries("aa")); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1585 matcher = pattern.matcher(toSupplementaries("aabc")); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1590 matcher = pattern.matcher(toSupplementaries("aabc")); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1595 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 report("Comments"); 1600 } 1601 1602 private static void caseFoldingTest() { // bug 4504687 1603 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1604 Pattern pattern = Pattern.compile("aa", flags); 1605 Matcher matcher = pattern.matcher("ab"); 1606 if (matcher.matches()) 1607 failCount++; 1608 1609 pattern = Pattern.compile("aA", flags); 1610 matcher = pattern.matcher("ab"); 1611 if (matcher.matches()) 1612 failCount++; 1613 1614 pattern = Pattern.compile("aa", flags); 1615 matcher = pattern.matcher("aB"); 1616 if (matcher.matches()) 1617 failCount++; 1618 matcher = pattern.matcher("Ab"); 1619 if (matcher.matches()) 1620 failCount++; 1621 1622 // ASCII "a" 1623 // Latin-1 Supplement "a" + grave 1624 // Cyrillic "a" 1625 String[] patterns = new String[] { 1626 //single 1627 "a", "\u00e0", "\u0430", 1628 //slice 1629 "ab", "\u00e0\u00e1", "\u0430\u0431", 1630 //class single 1631 "[a]", "[\u00e0]", "[\u0430]", 1632 //class range 1633 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1634 //back reference 1635 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1636 }; 1637 1638 String[] texts = new String[] { 1639 "A", "\u00c0", "\u0410", 1640 "AB", "\u00c0\u00c1", "\u0410\u0411", 1641 "A", "\u00c0", "\u0410", 1642 "B", "\u00c2", "\u0411", 1643 "aA", "\u00e0\u00c0", "\u0430\u0410" 1644 }; 1645 1646 boolean[] expected = new boolean[] { 1647 true, false, false, 1648 true, false, false, 1649 true, false, false, 1650 true, false, false, 1651 true, false, false 1652 }; 1653 1654 flags = Pattern.CASE_INSENSITIVE; 1655 for (int i = 0; i < patterns.length; i++) { 1656 pattern = Pattern.compile(patterns[i], flags); 1657 matcher = pattern.matcher(texts[i]); 1658 if (matcher.matches() != expected[i]) { 1659 System.out.println("<1> Failed at " + i); 1660 failCount++; 1661 } 1662 } 1663 1664 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1665 for (int i = 0; i < patterns.length; i++) { 1666 pattern = Pattern.compile(patterns[i], flags); 1667 matcher = pattern.matcher(texts[i]); 1668 if (!matcher.matches()) { 1669 System.out.println("<2> Failed at " + i); 1670 failCount++; 1671 } 1672 } 1673 // flag unicode_case alone should do nothing 1674 flags = Pattern.UNICODE_CASE; 1675 for (int i = 0; i < patterns.length; i++) { 1676 pattern = Pattern.compile(patterns[i], flags); 1677 matcher = pattern.matcher(texts[i]); 1678 if (matcher.matches()) { 1679 System.out.println("<3> Failed at " + i); 1680 failCount++; 1681 } 1682 } 1683 1684 // Special cases: i, I, u+0131 and u+0130 1685 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1686 pattern = Pattern.compile("[h-j]+", flags); 1687 if (!pattern.matcher("\u0131\u0130").matches()) 1688 failCount++; 1689 report("Case Folding"); 1690 } 1691 1692 private static void appendTest() { 1693 Pattern pattern = Pattern.compile("(ab)(cd)"); 1694 Matcher matcher = pattern.matcher("abcd"); 1695 String result = matcher.replaceAll("$2$1"); 1696 if (!result.equals("cdab")) 1697 failCount++; 1698 1699 String s1 = "Swap all: first = 123, second = 456"; 1700 String s2 = "Swap one: first = 123, second = 456"; 1701 String r = "$3$2$1"; 1702 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1703 matcher = pattern.matcher(s1); 1704 1705 result = matcher.replaceAll(r); 1706 if (!result.equals("Swap all: 123 = first, 456 = second")) 1707 failCount++; 1708 1709 matcher = pattern.matcher(s2); 1710 1711 if (matcher.find()) { 1712 StringBuffer sb = new StringBuffer(); 1713 matcher.appendReplacement(sb, r); 1714 matcher.appendTail(sb); 1715 result = sb.toString(); 1716 if (!result.equals("Swap one: 123 = first, second = 456")) 1717 failCount++; 1718 } 1719 1720 // Supplementary character test 1721 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1722 matcher = pattern.matcher(toSupplementaries("abcd")); 1723 result = matcher.replaceAll("$2$1"); 1724 if (!result.equals(toSupplementaries("cdab"))) 1725 failCount++; 1726 1727 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1728 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1729 r = toSupplementaries("$3$2$1"); 1730 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1731 matcher = pattern.matcher(s1); 1732 1733 result = matcher.replaceAll(r); 1734 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1735 failCount++; 1736 1737 matcher = pattern.matcher(s2); 1738 1739 if (matcher.find()) { 1740 StringBuffer sb = new StringBuffer(); 1741 matcher.appendReplacement(sb, r); 1742 matcher.appendTail(sb); 1743 result = sb.toString(); 1744 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1745 failCount++; 1746 } 1747 report("Append"); 1748 } 1749 1750 private static void splitTest() { 1751 Pattern pattern = Pattern.compile(":"); 1752 String[] result = pattern.split("foo:and:boo", 2); 1753 if (!result[0].equals("foo")) 1754 failCount++; 1755 if (!result[1].equals("and:boo")) 1756 failCount++; 1757 // Supplementary character test 1758 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1759 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1760 if (!result[0].equals(toSupplementaries("foo"))) 1761 failCount++; 1762 if (!result[1].equals(toSupplementaries("andXboo"))) 1763 failCount++; 1764 1765 CharBuffer cb = CharBuffer.allocate(100); 1766 cb.put("foo:and:boo"); 1767 cb.flip(); 1768 result = pattern.split(cb); 1769 if (!result[0].equals("foo")) 1770 failCount++; 1771 if (!result[1].equals("and")) 1772 failCount++; 1773 if (!result[2].equals("boo")) 1774 failCount++; 1775 1776 // Supplementary character test 1777 CharBuffer cbs = CharBuffer.allocate(100); 1778 cbs.put(toSupplementaries("fooXandXboo")); 1779 cbs.flip(); 1780 result = patternX.split(cbs); 1781 if (!result[0].equals(toSupplementaries("foo"))) 1782 failCount++; 1783 if (!result[1].equals(toSupplementaries("and"))) 1784 failCount++; 1785 if (!result[2].equals(toSupplementaries("boo"))) 1786 failCount++; 1787 1788 String source = "0123456789"; 1789 for (int limit=-2; limit<3; limit++) { 1790 for (int x=0; x<10; x++) { 1791 result = source.split(Integer.toString(x), limit); 1792 int expectedLength = limit < 1 ? 2 : limit; 1793 1794 if ((limit == 0) && (x == 9)) { 1795 // expected dropping of "" 1796 if (result.length != 1) 1797 failCount++; 1798 if (!result[0].equals("012345678")) { 1799 failCount++; 1800 } 1801 } else { 1802 if (result.length != expectedLength) { 1803 failCount++; 1804 } 1805 if (!result[0].equals(source.substring(0,x))) { 1806 if (limit != 1) { 1807 failCount++; 1808 } else { 1809 if (!result[0].equals(source.substring(0,10))) { 1810 failCount++; 1811 } 1812 } 1813 } 1814 if (expectedLength > 1) { // Check segment 2 1815 if (!result[1].equals(source.substring(x+1,10))) 1816 failCount++; 1817 } 1818 } 1819 } 1820 } 1821 // Check the case for no match found 1822 for (int limit=-2; limit<3; limit++) { 1823 result = source.split("e", limit); 1824 if (result.length != 1) 1825 failCount++; 1826 if (!result[0].equals(source)) 1827 failCount++; 1828 } 1829 // Check the case for limit == 0, source = ""; 1830 // split() now returns 0-length for empty source "" see #6559590 1831 source = ""; 1832 result = source.split("e", 0); 1833 if (result.length != 1) 1834 failCount++; 1835 if (!result[0].equals(source)) 1836 failCount++; 1837 1838 // Check both split() and splitAsStraem(), especially for zero-lenth 1839 // input and zero-lenth match cases 1840 String[][] input = new String[][] { 1841 { " ", "Abc Efg Hij" }, // normal non-zero-match 1842 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1843 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1844 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1845 { "(?=\\p{Lu})", "AbcEfg" }, 1846 { "(?=\\p{Lu})", "Abc" }, 1847 { " ", "" }, // zero-length input 1848 { ".*", "" }, 1849 1850 // some tests from PatternStreamTest.java 1851 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1852 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1853 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1854 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1855 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1856 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1857 { "\u56da", "" }, 1858 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1859 { "o", "boo:and:foo" }, 1860 { "o", "booooo:and:fooooo" }, 1861 { "o", "fooooo:" }, 1862 }; 1863 1864 String[][] expected = new String[][] { 1865 { "Abc", "Efg", "Hij" }, 1866 { "", "Abc", "Efg", "Hij" }, 1867 { "Abc", "", "Efg", "Hij" }, 1868 { "Abc", "Efg", "Hij" }, 1869 { "Abc", "Efg" }, 1870 { "Abc" }, 1871 { "" }, 1872 { "" }, 1873 1874 { "awgqwefg1fefw", "vssv1vvv1" }, 1875 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1876 { "awgqwefg", "fefw4vssv", "vvv" }, 1877 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1878 { "1", "23", "456", "7890" }, 1879 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1880 { "" }, 1881 { "This", "is", "testing", "", "with", "different", "separators" }, 1882 { "b", "", ":and:f" }, 1883 { "b", "", "", "", "", ":and:f" }, 1884 { "f", "", "", "", "", ":" }, 1885 }; 1886 for (int i = 0; i < input.length; i++) { 1887 pattern = Pattern.compile(input[i][0]); 1888 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1889 failCount++; 1890 } 1891 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1892 // array for zero-length input for now 1893 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1894 expected[i])) { 1895 failCount++; 1896 } 1897 } 1898 report("Split"); 1899 } 1900 1901 private static void negationTest() { 1902 Pattern pattern = Pattern.compile("[\\[@^]+"); 1903 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1904 if (!matcher.find()) 1905 failCount++; 1906 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1907 failCount++; 1908 pattern = Pattern.compile("[@\\[^]+"); 1909 matcher = pattern.matcher("@@@@[[[[^^^^"); 1910 if (!matcher.find()) 1911 failCount++; 1912 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1913 failCount++; 1914 pattern = Pattern.compile("[@\\[^@]+"); 1915 matcher = pattern.matcher("@@@@[[[[^^^^"); 1916 if (!matcher.find()) 1917 failCount++; 1918 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1919 failCount++; 1920 1921 pattern = Pattern.compile("\\)"); 1922 matcher = pattern.matcher("xxx)xxx"); 1923 if (!matcher.find()) 1924 failCount++; 1925 1926 report("Negation"); 1927 } 1928 1929 private static void ampersandTest() { 1930 Pattern pattern = Pattern.compile("[&@]+"); 1931 check(pattern, "@@@@&&&&", true); 1932 1933 pattern = Pattern.compile("[@&]+"); 1934 check(pattern, "@@@@&&&&", true); 1935 1936 pattern = Pattern.compile("[@\\&]+"); 1937 check(pattern, "@@@@&&&&", true); 1938 1939 report("Ampersand"); 1940 } 1941 1942 private static void octalTest() throws Exception { 1943 Pattern pattern = Pattern.compile("\\u0007"); 1944 Matcher matcher = pattern.matcher("\u0007"); 1945 if (!matcher.matches()) 1946 failCount++; 1947 pattern = Pattern.compile("\\07"); 1948 matcher = pattern.matcher("\u0007"); 1949 if (!matcher.matches()) 1950 failCount++; 1951 pattern = Pattern.compile("\\007"); 1952 matcher = pattern.matcher("\u0007"); 1953 if (!matcher.matches()) 1954 failCount++; 1955 pattern = Pattern.compile("\\0007"); 1956 matcher = pattern.matcher("\u0007"); 1957 if (!matcher.matches()) 1958 failCount++; 1959 pattern = Pattern.compile("\\040"); 1960 matcher = pattern.matcher("\u0020"); 1961 if (!matcher.matches()) 1962 failCount++; 1963 pattern = Pattern.compile("\\0403"); 1964 matcher = pattern.matcher("\u00203"); 1965 if (!matcher.matches()) 1966 failCount++; 1967 pattern = Pattern.compile("\\0103"); 1968 matcher = pattern.matcher("\u0043"); 1969 if (!matcher.matches()) 1970 failCount++; 1971 1972 report("Octal"); 1973 } 1974 1975 private static void longPatternTest() throws Exception { 1976 try { 1977 Pattern pattern = Pattern.compile( 1978 "a 32-character-long pattern xxxx"); 1979 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1980 pattern = Pattern.compile("a thirty four character long regex"); 1981 StringBuffer patternToBe = new StringBuffer(101); 1982 for (int i=0; i<100; i++) 1983 patternToBe.append((char)(97 + i%26)); 1984 pattern = Pattern.compile(patternToBe.toString()); 1985 } catch (PatternSyntaxException e) { 1986 failCount++; 1987 } 1988 1989 // Supplementary character test 1990 try { 1991 Pattern pattern = Pattern.compile( 1992 toSupplementaries("a 32-character-long pattern xxxx")); 1993 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1994 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1995 StringBuffer patternToBe = new StringBuffer(101*2); 1996 for (int i=0; i<100; i++) 1997 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1998 + 97 + i%26)); 1999 pattern = Pattern.compile(patternToBe.toString()); 2000 } catch (PatternSyntaxException e) { 2001 failCount++; 2002 } 2003 report("LongPattern"); 2004 } 2005 2006 private static void group0Test() throws Exception { 2007 Pattern pattern = Pattern.compile("(tes)ting"); 2008 Matcher matcher = pattern.matcher("testing"); 2009 check(matcher, "testing"); 2010 2011 matcher.reset("testing"); 2012 if (matcher.lookingAt()) { 2013 if (!matcher.group(0).equals("testing")) 2014 failCount++; 2015 } else { 2016 failCount++; 2017 } 2018 2019 matcher.reset("testing"); 2020 if (matcher.matches()) { 2021 if (!matcher.group(0).equals("testing")) 2022 failCount++; 2023 } else { 2024 failCount++; 2025 } 2026 2027 pattern = Pattern.compile("(tes)ting"); 2028 matcher = pattern.matcher("testing"); 2029 if (matcher.lookingAt()) { 2030 if (!matcher.group(0).equals("testing")) 2031 failCount++; 2032 } else { 2033 failCount++; 2034 } 2035 2036 pattern = Pattern.compile("^(tes)ting"); 2037 matcher = pattern.matcher("testing"); 2038 if (matcher.matches()) { 2039 if (!matcher.group(0).equals("testing")) 2040 failCount++; 2041 } else { 2042 failCount++; 2043 } 2044 2045 // Supplementary character test 2046 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2047 matcher = pattern.matcher(toSupplementaries("testing")); 2048 check(matcher, toSupplementaries("testing")); 2049 2050 matcher.reset(toSupplementaries("testing")); 2051 if (matcher.lookingAt()) { 2052 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2053 failCount++; 2054 } else { 2055 failCount++; 2056 } 2057 2058 matcher.reset(toSupplementaries("testing")); 2059 if (matcher.matches()) { 2060 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2061 failCount++; 2062 } else { 2063 failCount++; 2064 } 2065 2066 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2067 matcher = pattern.matcher(toSupplementaries("testing")); 2068 if (matcher.lookingAt()) { 2069 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2070 failCount++; 2071 } else { 2072 failCount++; 2073 } 2074 2075 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2076 matcher = pattern.matcher(toSupplementaries("testing")); 2077 if (matcher.matches()) { 2078 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2079 failCount++; 2080 } else { 2081 failCount++; 2082 } 2083 2084 report("Group0"); 2085 } 2086 2087 private static void findIntTest() throws Exception { 2088 Pattern p = Pattern.compile("blah"); 2089 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2090 boolean result = m.find(2); 2091 if (!result) 2092 failCount++; 2093 2094 p = Pattern.compile("$"); 2095 m = p.matcher("1234567890"); 2096 result = m.find(10); 2097 if (!result) 2098 failCount++; 2099 try { 2100 result = m.find(11); 2101 failCount++; 2102 } catch (IndexOutOfBoundsException e) { 2103 // correct result 2104 } 2105 2106 // Supplementary character test 2107 p = Pattern.compile(toSupplementaries("blah")); 2108 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2109 result = m.find(2); 2110 if (!result) 2111 failCount++; 2112 2113 report("FindInt"); 2114 } 2115 2116 private static void emptyPatternTest() throws Exception { 2117 Pattern p = Pattern.compile(""); 2118 Matcher m = p.matcher("foo"); 2119 2120 // Should find empty pattern at beginning of input 2121 boolean result = m.find(); 2122 if (result != true) 2123 failCount++; 2124 if (m.start() != 0) 2125 failCount++; 2126 2127 // Should not match entire input if input is not empty 2128 m.reset(); 2129 result = m.matches(); 2130 if (result == true) 2131 failCount++; 2132 2133 try { 2134 m.start(0); 2135 failCount++; 2136 } catch (IllegalStateException e) { 2137 // Correct result 2138 } 2139 2140 // Should match entire input if input is empty 2141 m.reset(""); 2142 result = m.matches(); 2143 if (result != true) 2144 failCount++; 2145 2146 result = Pattern.matches("", ""); 2147 if (result != true) 2148 failCount++; 2149 2150 result = Pattern.matches("", "foo"); 2151 if (result == true) 2152 failCount++; 2153 report("EmptyPattern"); 2154 } 2155 2156 private static void charClassTest() throws Exception { 2157 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2158 check(pattern, "blahb]blech", true); 2159 2160 pattern = Pattern.compile("[abc[def]]"); 2161 check(pattern, "b", true); 2162 2163 // Supplementary character tests 2164 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2165 check(pattern, toSupplementaries("blahb]blech"), true); 2166 2167 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2168 check(pattern, toSupplementaries("b"), true); 2169 2170 try { 2171 // u00ff when UNICODE_CASE 2172 pattern = Pattern.compile("[ab\u00ffcd]", 2173 Pattern.CASE_INSENSITIVE| 2174 Pattern.UNICODE_CASE); 2175 check(pattern, "ab\u00ffcd", true); 2176 check(pattern, "Ab\u0178Cd", true); 2177 2178 // u00b5 when UNICODE_CASE 2179 pattern = Pattern.compile("[ab\u00b5cd]", 2180 Pattern.CASE_INSENSITIVE| 2181 Pattern.UNICODE_CASE); 2182 check(pattern, "ab\u00b5cd", true); 2183 check(pattern, "Ab\u039cCd", true); 2184 } catch (Exception e) { failCount++; } 2185 2186 /* Special cases 2187 (1)LatinSmallLetterLongS u+017f 2188 (2)LatinSmallLetterDotlessI u+0131 2189 (3)LatineCapitalLetterIWithDotAbove u+0130 2190 (4)KelvinSign u+212a 2191 (5)AngstromSign u+212b 2192 */ 2193 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2194 pattern = Pattern.compile("[sik\u00c5]+", flags); 2195 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2196 failCount++; 2197 2198 report("CharClass"); 2199 } 2200 2201 private static void caretTest() throws Exception { 2202 Pattern pattern = Pattern.compile("\\w*"); 2203 Matcher matcher = pattern.matcher("a#bc#def##g"); 2204 check(matcher, "a"); 2205 check(matcher, ""); 2206 check(matcher, "bc"); 2207 check(matcher, ""); 2208 check(matcher, "def"); 2209 check(matcher, ""); 2210 check(matcher, ""); 2211 check(matcher, "g"); 2212 check(matcher, ""); 2213 if (matcher.find()) 2214 failCount++; 2215 2216 pattern = Pattern.compile("^\\w*"); 2217 matcher = pattern.matcher("a#bc#def##g"); 2218 check(matcher, "a"); 2219 if (matcher.find()) 2220 failCount++; 2221 2222 pattern = Pattern.compile("\\w"); 2223 matcher = pattern.matcher("abc##x"); 2224 check(matcher, "a"); 2225 check(matcher, "b"); 2226 check(matcher, "c"); 2227 check(matcher, "x"); 2228 if (matcher.find()) 2229 failCount++; 2230 2231 pattern = Pattern.compile("^\\w"); 2232 matcher = pattern.matcher("abc##x"); 2233 check(matcher, "a"); 2234 if (matcher.find()) 2235 failCount++; 2236 2237 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2238 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2239 check(matcher, "abc"); 2240 if (matcher.find()) 2241 failCount++; 2242 2243 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2244 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2245 check(matcher, "abc"); 2246 check(matcher, "jkl"); 2247 if (matcher.find()) 2248 failCount++; 2249 2250 pattern = Pattern.compile("^", Pattern.MULTILINE); 2251 matcher = pattern.matcher("this is some text"); 2252 String result = matcher.replaceAll("X"); 2253 if (!result.equals("Xthis is some text")) 2254 failCount++; 2255 2256 pattern = Pattern.compile("^"); 2257 matcher = pattern.matcher("this is some text"); 2258 result = matcher.replaceAll("X"); 2259 if (!result.equals("Xthis is some text")) 2260 failCount++; 2261 2262 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2263 matcher = pattern.matcher("this is some text\n"); 2264 result = matcher.replaceAll("X"); 2265 if (!result.equals("Xthis is some text\n")) 2266 failCount++; 2267 2268 report("Caret"); 2269 } 2270 2271 private static void groupCaptureTest() throws Exception { 2272 // Independent group 2273 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2274 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2275 matcher.find(); 2276 try { 2277 String blah = matcher.group(1); 2278 failCount++; 2279 } catch (IndexOutOfBoundsException ioobe) { 2280 // Good result 2281 } 2282 // Pure group 2283 pattern = Pattern.compile("x+(?:y+)z+"); 2284 matcher = pattern.matcher("xxxyyyzzz"); 2285 matcher.find(); 2286 try { 2287 String blah = matcher.group(1); 2288 failCount++; 2289 } catch (IndexOutOfBoundsException ioobe) { 2290 // Good result 2291 } 2292 2293 // Supplementary character tests 2294 // Independent group 2295 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2296 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2297 matcher.find(); 2298 try { 2299 String blah = matcher.group(1); 2300 failCount++; 2301 } catch (IndexOutOfBoundsException ioobe) { 2302 // Good result 2303 } 2304 // Pure group 2305 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2306 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2307 matcher.find(); 2308 try { 2309 String blah = matcher.group(1); 2310 failCount++; 2311 } catch (IndexOutOfBoundsException ioobe) { 2312 // Good result 2313 } 2314 2315 report("GroupCapture"); 2316 } 2317 2318 private static void backRefTest() throws Exception { 2319 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2320 check(pattern, "zzzaabcazzz", true); 2321 2322 pattern = Pattern.compile("(a*)bc\\1"); 2323 check(pattern, "zzzaabcaazzz", true); 2324 2325 pattern = Pattern.compile("(abc)(def)\\1"); 2326 check(pattern, "abcdefabc", true); 2327 2328 pattern = Pattern.compile("(abc)(def)\\3"); 2329 check(pattern, "abcdefabc", false); 2330 2331 try { 2332 for (int i = 1; i < 10; i++) { 2333 // Make sure backref 1-9 are always accepted 2334 pattern = Pattern.compile("abcdef\\" + i); 2335 // and fail to match if the target group does not exit 2336 check(pattern, "abcdef", false); 2337 } 2338 } catch(PatternSyntaxException e) { 2339 failCount++; 2340 } 2341 2342 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2343 check(pattern, "abcdefghija", false); 2344 check(pattern, "abcdefghija1", true); 2345 2346 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2347 check(pattern, "abcdefghijkk", true); 2348 2349 pattern = Pattern.compile("(a)bcdefghij\\11"); 2350 check(pattern, "abcdefghija1", true); 2351 2352 // Supplementary character tests 2353 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2354 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2355 2356 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2357 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2358 2359 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2360 check(pattern, toSupplementaries("abcdefabc"), true); 2361 2362 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2363 check(pattern, toSupplementaries("abcdefabc"), false); 2364 2365 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2366 check(pattern, toSupplementaries("abcdefghija"), false); 2367 check(pattern, toSupplementaries("abcdefghija1"), true); 2368 2369 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2370 check(pattern, toSupplementaries("abcdefghijkk"), true); 2371 2372 report("BackRef"); 2373 } 2374 2375 /** 2376 * Unicode Technical Report #18, section 2.6 End of Line 2377 * There is no empty line to be matched in the sequence \u000D\u000A 2378 * but there is an empty line in the sequence \u000A\u000D. 2379 */ 2380 private static void anchorTest() throws Exception { 2381 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2382 Matcher m = p.matcher("blah1\r\nblah2"); 2383 m.find(); 2384 m.find(); 2385 if (!m.group().equals("blah2")) 2386 failCount++; 2387 2388 m.reset("blah1\n\rblah2"); 2389 m.find(); 2390 m.find(); 2391 m.find(); 2392 if (!m.group().equals("blah2")) 2393 failCount++; 2394 2395 // Test behavior of $ with \r\n at end of input 2396 p = Pattern.compile(".+$"); 2397 m = p.matcher("blah1\r\n"); 2398 if (!m.find()) 2399 failCount++; 2400 if (!m.group().equals("blah1")) 2401 failCount++; 2402 if (m.find()) 2403 failCount++; 2404 2405 // Test behavior of $ with \r\n at end of input in multiline 2406 p = Pattern.compile(".+$", Pattern.MULTILINE); 2407 m = p.matcher("blah1\r\n"); 2408 if (!m.find()) 2409 failCount++; 2410 if (m.find()) 2411 failCount++; 2412 2413 // Test for $ recognition of \u0085 for bug 4527731 2414 p = Pattern.compile(".+$", Pattern.MULTILINE); 2415 m = p.matcher("blah1\u0085"); 2416 if (!m.find()) 2417 failCount++; 2418 2419 // Supplementary character test 2420 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2421 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2422 m.find(); 2423 m.find(); 2424 if (!m.group().equals(toSupplementaries("blah2"))) 2425 failCount++; 2426 2427 m.reset(toSupplementaries("blah1\n\rblah2")); 2428 m.find(); 2429 m.find(); 2430 m.find(); 2431 if (!m.group().equals(toSupplementaries("blah2"))) 2432 failCount++; 2433 2434 // Test behavior of $ with \r\n at end of input 2435 p = Pattern.compile(".+$"); 2436 m = p.matcher(toSupplementaries("blah1\r\n")); 2437 if (!m.find()) 2438 failCount++; 2439 if (!m.group().equals(toSupplementaries("blah1"))) 2440 failCount++; 2441 if (m.find()) 2442 failCount++; 2443 2444 // Test behavior of $ with \r\n at end of input in multiline 2445 p = Pattern.compile(".+$", Pattern.MULTILINE); 2446 m = p.matcher(toSupplementaries("blah1\r\n")); 2447 if (!m.find()) 2448 failCount++; 2449 if (m.find()) 2450 failCount++; 2451 2452 // Test for $ recognition of \u0085 for bug 4527731 2453 p = Pattern.compile(".+$", Pattern.MULTILINE); 2454 m = p.matcher(toSupplementaries("blah1\u0085")); 2455 if (!m.find()) 2456 failCount++; 2457 2458 report("Anchors"); 2459 } 2460 2461 /** 2462 * A basic sanity test of Matcher.lookingAt(). 2463 */ 2464 private static void lookingAtTest() throws Exception { 2465 Pattern p = Pattern.compile("(ab)(c*)"); 2466 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2467 2468 if (!m.lookingAt()) 2469 failCount++; 2470 2471 if (!m.group().equals(m.group(0))) 2472 failCount++; 2473 2474 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2475 if (m.lookingAt()) 2476 failCount++; 2477 2478 // Supplementary character test 2479 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2480 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2481 2482 if (!m.lookingAt()) 2483 failCount++; 2484 2485 if (!m.group().equals(m.group(0))) 2486 failCount++; 2487 2488 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2489 if (m.lookingAt()) 2490 failCount++; 2491 2492 report("Looking At"); 2493 } 2494 2495 /** 2496 * A basic sanity test of Matcher.matches(). 2497 */ 2498 private static void matchesTest() throws Exception { 2499 // matches() 2500 Pattern p = Pattern.compile("ulb(c*)"); 2501 Matcher m = p.matcher("ulbcccccc"); 2502 if (!m.matches()) 2503 failCount++; 2504 2505 // find() but not matches() 2506 m.reset("zzzulbcccccc"); 2507 if (m.matches()) 2508 failCount++; 2509 2510 // lookingAt() but not matches() 2511 m.reset("ulbccccccdef"); 2512 if (m.matches()) 2513 failCount++; 2514 2515 // matches() 2516 p = Pattern.compile("a|ad"); 2517 m = p.matcher("ad"); 2518 if (!m.matches()) 2519 failCount++; 2520 2521 // Supplementary character test 2522 // matches() 2523 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2524 m = p.matcher(toSupplementaries("ulbcccccc")); 2525 if (!m.matches()) 2526 failCount++; 2527 2528 // find() but not matches() 2529 m.reset(toSupplementaries("zzzulbcccccc")); 2530 if (m.matches()) 2531 failCount++; 2532 2533 // lookingAt() but not matches() 2534 m.reset(toSupplementaries("ulbccccccdef")); 2535 if (m.matches()) 2536 failCount++; 2537 2538 // matches() 2539 p = Pattern.compile(toSupplementaries("a|ad")); 2540 m = p.matcher(toSupplementaries("ad")); 2541 if (!m.matches()) 2542 failCount++; 2543 2544 report("Matches"); 2545 } 2546 2547 /** 2548 * A basic sanity test of Pattern.matches(). 2549 */ 2550 private static void patternMatchesTest() throws Exception { 2551 // matches() 2552 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2553 toSupplementaries("ulbcccccc"))) 2554 failCount++; 2555 2556 // find() but not matches() 2557 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2558 toSupplementaries("zzzulbcccccc"))) 2559 failCount++; 2560 2561 // lookingAt() but not matches() 2562 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2563 toSupplementaries("ulbccccccdef"))) 2564 failCount++; 2565 2566 // Supplementary character test 2567 // matches() 2568 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2569 toSupplementaries("ulbcccccc"))) 2570 failCount++; 2571 2572 // find() but not matches() 2573 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2574 toSupplementaries("zzzulbcccccc"))) 2575 failCount++; 2576 2577 // lookingAt() but not matches() 2578 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2579 toSupplementaries("ulbccccccdef"))) 2580 failCount++; 2581 2582 report("Pattern Matches"); 2583 } 2584 2585 /** 2586 * Canonical equivalence testing. Tests the ability of the engine 2587 * to match sequences that are not explicitly specified in the 2588 * pattern when they are considered equivalent by the Unicode Standard. 2589 */ 2590 private static void ceTest() throws Exception { 2591 // Decomposed char outside char classes 2592 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2593 Matcher m = p.matcher("test\u00e5"); 2594 if (!m.matches()) 2595 failCount++; 2596 2597 m.reset("testa\u030a"); 2598 if (!m.matches()) 2599 failCount++; 2600 2601 // Composed char outside char classes 2602 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2603 m = p.matcher("test\u00e5"); 2604 if (!m.matches()) 2605 failCount++; 2606 2607 m.reset("testa\u030a"); 2608 if (!m.find()) 2609 failCount++; 2610 2611 // Decomposed char inside a char class 2612 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2613 m = p.matcher("test\u00e5"); 2614 if (!m.find()) 2615 failCount++; 2616 2617 m.reset("testa\u030a"); 2618 if (!m.find()) 2619 failCount++; 2620 2621 // Composed char inside a char class 2622 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2623 m = p.matcher("test\u00e5"); 2624 if (!m.find()) 2625 failCount++; 2626 2627 m.reset("testa\u0300"); 2628 if (!m.find()) 2629 failCount++; 2630 2631 m.reset("testa\u030a"); 2632 if (!m.find()) 2633 failCount++; 2634 2635 // Marks that cannot legally change order and be equivalent 2636 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2637 check(p, "testa\u0308\u0300", true); 2638 check(p, "testa\u0300\u0308", false); 2639 2640 // Marks that can legally change order and be equivalent 2641 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2642 check(p, "testa\u0308\u0323", true); 2643 check(p, "testa\u0323\u0308", true); 2644 2645 // Test all equivalences of the sequence a\u0308\u0323\u0300 2646 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2647 check(p, "testa\u0308\u0323\u0300", true); 2648 check(p, "testa\u0323\u0308\u0300", true); 2649 check(p, "testa\u0308\u0300\u0323", true); 2650 check(p, "test\u00e4\u0323\u0300", true); 2651 check(p, "test\u00e4\u0300\u0323", true); 2652 2653 /* 2654 * The following canonical equivalence tests don't work. Bug id: 4916384. 2655 * 2656 // Decomposed hangul (jamos) 2657 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2658 m = p.matcher("\u1100\u1161"); 2659 if (!m.matches()) 2660 failCount++; 2661 2662 m.reset("\uac00"); 2663 if (!m.matches()) 2664 failCount++; 2665 2666 // Composed hangul 2667 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2668 m = p.matcher("\u1100\u1161"); 2669 if (!m.matches()) 2670 failCount++; 2671 2672 m.reset("\uac00"); 2673 if (!m.matches()) 2674 failCount++; 2675 2676 // Decomposed supplementary outside char classes 2677 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2678 m = p.matcher("test\ud834\uddc0"); 2679 if (!m.matches()) 2680 failCount++; 2681 2682 m.reset("test\ud834\uddbc\ud834\udd6f"); 2683 if (!m.matches()) 2684 failCount++; 2685 2686 // Composed supplementary outside char classes 2687 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2688 m.reset("test\ud834\uddbc\ud834\udd6f"); 2689 if (!m.matches()) 2690 failCount++; 2691 2692 m = p.matcher("test\ud834\uddc0"); 2693 if (!m.matches()) 2694 failCount++; 2695 2696 */ 2697 2698 report("Canonical Equivalence"); 2699 } 2700 2701 /** 2702 * A basic sanity test of Matcher.replaceAll(). 2703 */ 2704 private static void globalSubstitute() throws Exception { 2705 // Global substitution with a literal 2706 Pattern p = Pattern.compile("(ab)(c*)"); 2707 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2708 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2709 failCount++; 2710 2711 m.reset("zzzabccczzzabcczzzabccczzz"); 2712 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2713 failCount++; 2714 2715 // Global substitution with groups 2716 m.reset("zzzabccczzzabcczzzabccczzz"); 2717 String result = m.replaceAll("$1"); 2718 if (!result.equals("zzzabzzzabzzzabzzz")) 2719 failCount++; 2720 2721 // Supplementary character test 2722 // Global substitution with a literal 2723 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2724 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2725 if (!m.replaceAll(toSupplementaries("test")). 2726 equals(toSupplementaries("testzzztestzzztest"))) 2727 failCount++; 2728 2729 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2730 if (!m.replaceAll(toSupplementaries("test")). 2731 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2732 failCount++; 2733 2734 // Global substitution with groups 2735 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2736 result = m.replaceAll("$1"); 2737 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2738 failCount++; 2739 2740 report("Global Substitution"); 2741 } 2742 2743 /** 2744 * Tests the usage of Matcher.appendReplacement() with literal 2745 * and group substitutions. 2746 */ 2747 private static void stringbufferSubstitute() throws Exception { 2748 // SB substitution with literal 2749 String blah = "zzzblahzzz"; 2750 Pattern p = Pattern.compile("blah"); 2751 Matcher m = p.matcher(blah); 2752 StringBuffer result = new StringBuffer(); 2753 try { 2754 m.appendReplacement(result, "blech"); 2755 failCount++; 2756 } catch (IllegalStateException e) { 2757 } 2758 m.find(); 2759 m.appendReplacement(result, "blech"); 2760 if (!result.toString().equals("zzzblech")) 2761 failCount++; 2762 2763 m.appendTail(result); 2764 if (!result.toString().equals("zzzblechzzz")) 2765 failCount++; 2766 2767 // SB substitution with groups 2768 blah = "zzzabcdzzz"; 2769 p = Pattern.compile("(ab)(cd)*"); 2770 m = p.matcher(blah); 2771 result = new StringBuffer(); 2772 try { 2773 m.appendReplacement(result, "$1"); 2774 failCount++; 2775 } catch (IllegalStateException e) { 2776 } 2777 m.find(); 2778 m.appendReplacement(result, "$1"); 2779 if (!result.toString().equals("zzzab")) 2780 failCount++; 2781 2782 m.appendTail(result); 2783 if (!result.toString().equals("zzzabzzz")) 2784 failCount++; 2785 2786 // SB substitution with 3 groups 2787 blah = "zzzabcdcdefzzz"; 2788 p = Pattern.compile("(ab)(cd)*(ef)"); 2789 m = p.matcher(blah); 2790 result = new StringBuffer(); 2791 try { 2792 m.appendReplacement(result, "$1w$2w$3"); 2793 failCount++; 2794 } catch (IllegalStateException e) { 2795 } 2796 m.find(); 2797 m.appendReplacement(result, "$1w$2w$3"); 2798 if (!result.toString().equals("zzzabwcdwef")) 2799 failCount++; 2800 2801 m.appendTail(result); 2802 if (!result.toString().equals("zzzabwcdwefzzz")) 2803 failCount++; 2804 2805 // SB substitution with groups and three matches 2806 // skipping middle match 2807 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2808 p = Pattern.compile("(ab)(cd*)"); 2809 m = p.matcher(blah); 2810 result = new StringBuffer(); 2811 try { 2812 m.appendReplacement(result, "$1"); 2813 failCount++; 2814 } catch (IllegalStateException e) { 2815 } 2816 m.find(); 2817 m.appendReplacement(result, "$1"); 2818 if (!result.toString().equals("zzzab")) 2819 failCount++; 2820 2821 m.find(); 2822 m.find(); 2823 m.appendReplacement(result, "$2"); 2824 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2825 failCount++; 2826 2827 m.appendTail(result); 2828 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2829 failCount++; 2830 2831 // Check to make sure escaped $ is ignored 2832 blah = "zzzabcdcdefzzz"; 2833 p = Pattern.compile("(ab)(cd)*(ef)"); 2834 m = p.matcher(blah); 2835 result = new StringBuffer(); 2836 m.find(); 2837 m.appendReplacement(result, "$1w\\$2w$3"); 2838 if (!result.toString().equals("zzzabw$2wef")) 2839 failCount++; 2840 2841 m.appendTail(result); 2842 if (!result.toString().equals("zzzabw$2wefzzz")) 2843 failCount++; 2844 2845 // Check to make sure a reference to nonexistent group causes error 2846 blah = "zzzabcdcdefzzz"; 2847 p = Pattern.compile("(ab)(cd)*(ef)"); 2848 m = p.matcher(blah); 2849 result = new StringBuffer(); 2850 m.find(); 2851 try { 2852 m.appendReplacement(result, "$1w$5w$3"); 2853 failCount++; 2854 } catch (IndexOutOfBoundsException ioobe) { 2855 // Correct result 2856 } 2857 2858 // Check double digit group references 2859 blah = "zzz123456789101112zzz"; 2860 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2861 m = p.matcher(blah); 2862 result = new StringBuffer(); 2863 m.find(); 2864 m.appendReplacement(result, "$1w$11w$3"); 2865 if (!result.toString().equals("zzz1w11w3")) 2866 failCount++; 2867 2868 // Check to make sure it backs off $15 to $1 if only three groups 2869 blah = "zzzabcdcdefzzz"; 2870 p = Pattern.compile("(ab)(cd)*(ef)"); 2871 m = p.matcher(blah); 2872 result = new StringBuffer(); 2873 m.find(); 2874 m.appendReplacement(result, "$1w$15w$3"); 2875 if (!result.toString().equals("zzzabwab5wef")) 2876 failCount++; 2877 2878 2879 // Supplementary character test 2880 // SB substitution with literal 2881 blah = toSupplementaries("zzzblahzzz"); 2882 p = Pattern.compile(toSupplementaries("blah")); 2883 m = p.matcher(blah); 2884 result = new StringBuffer(); 2885 try { 2886 m.appendReplacement(result, toSupplementaries("blech")); 2887 failCount++; 2888 } catch (IllegalStateException e) { 2889 } 2890 m.find(); 2891 m.appendReplacement(result, toSupplementaries("blech")); 2892 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2893 failCount++; 2894 2895 m.appendTail(result); 2896 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2897 failCount++; 2898 2899 // SB substitution with groups 2900 blah = toSupplementaries("zzzabcdzzz"); 2901 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2902 m = p.matcher(blah); 2903 result = new StringBuffer(); 2904 try { 2905 m.appendReplacement(result, "$1"); 2906 failCount++; 2907 } catch (IllegalStateException e) { 2908 } 2909 m.find(); 2910 m.appendReplacement(result, "$1"); 2911 if (!result.toString().equals(toSupplementaries("zzzab"))) 2912 failCount++; 2913 2914 m.appendTail(result); 2915 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2916 failCount++; 2917 2918 // SB substitution with 3 groups 2919 blah = toSupplementaries("zzzabcdcdefzzz"); 2920 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2921 m = p.matcher(blah); 2922 result = new StringBuffer(); 2923 try { 2924 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2925 failCount++; 2926 } catch (IllegalStateException e) { 2927 } 2928 m.find(); 2929 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2930 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2931 failCount++; 2932 2933 m.appendTail(result); 2934 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2935 failCount++; 2936 2937 // SB substitution with groups and three matches 2938 // skipping middle match 2939 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2940 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2941 m = p.matcher(blah); 2942 result = new StringBuffer(); 2943 try { 2944 m.appendReplacement(result, "$1"); 2945 failCount++; 2946 } catch (IllegalStateException e) { 2947 } 2948 m.find(); 2949 m.appendReplacement(result, "$1"); 2950 if (!result.toString().equals(toSupplementaries("zzzab"))) 2951 failCount++; 2952 2953 m.find(); 2954 m.find(); 2955 m.appendReplacement(result, "$2"); 2956 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2957 failCount++; 2958 2959 m.appendTail(result); 2960 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2961 failCount++; 2962 2963 // Check to make sure escaped $ is ignored 2964 blah = toSupplementaries("zzzabcdcdefzzz"); 2965 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2966 m = p.matcher(blah); 2967 result = new StringBuffer(); 2968 m.find(); 2969 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2970 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2971 failCount++; 2972 2973 m.appendTail(result); 2974 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2975 failCount++; 2976 2977 // Check to make sure a reference to nonexistent group causes error 2978 blah = toSupplementaries("zzzabcdcdefzzz"); 2979 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2980 m = p.matcher(blah); 2981 result = new StringBuffer(); 2982 m.find(); 2983 try { 2984 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2985 failCount++; 2986 } catch (IndexOutOfBoundsException ioobe) { 2987 // Correct result 2988 } 2989 2990 // Check double digit group references 2991 blah = toSupplementaries("zzz123456789101112zzz"); 2992 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2993 m = p.matcher(blah); 2994 result = new StringBuffer(); 2995 m.find(); 2996 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2997 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2998 failCount++; 2999 3000 // Check to make sure it backs off $15 to $1 if only three groups 3001 blah = toSupplementaries("zzzabcdcdefzzz"); 3002 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3003 m = p.matcher(blah); 3004 result = new StringBuffer(); 3005 m.find(); 3006 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3007 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3008 failCount++; 3009 3010 // Check nothing has been appended into the output buffer if 3011 // the replacement string triggers IllegalArgumentException. 3012 p = Pattern.compile("(abc)"); 3013 m = p.matcher("abcd"); 3014 result = new StringBuffer(); 3015 m.find(); 3016 try { 3017 m.appendReplacement(result, ("xyz$g")); 3018 failCount++; 3019 } catch (IllegalArgumentException iae) { 3020 if (result.length() != 0) 3021 failCount++; 3022 } 3023 3024 report("SB Substitution"); 3025 } 3026 3027 /** 3028 * Tests the usage of Matcher.appendReplacement() with literal 3029 * and group substitutions. 3030 */ 3031 private static void stringbuilderSubstitute() throws Exception { 3032 // SB substitution with literal 3033 String blah = "zzzblahzzz"; 3034 Pattern p = Pattern.compile("blah"); 3035 Matcher m = p.matcher(blah); 3036 StringBuilder result = new StringBuilder(); 3037 try { 3038 m.appendReplacement(result, "blech"); 3039 failCount++; 3040 } catch (IllegalStateException e) { 3041 } 3042 m.find(); 3043 m.appendReplacement(result, "blech"); 3044 if (!result.toString().equals("zzzblech")) 3045 failCount++; 3046 3047 m.appendTail(result); 3048 if (!result.toString().equals("zzzblechzzz")) 3049 failCount++; 3050 3051 // SB substitution with groups 3052 blah = "zzzabcdzzz"; 3053 p = Pattern.compile("(ab)(cd)*"); 3054 m = p.matcher(blah); 3055 result = new StringBuilder(); 3056 try { 3057 m.appendReplacement(result, "$1"); 3058 failCount++; 3059 } catch (IllegalStateException e) { 3060 } 3061 m.find(); 3062 m.appendReplacement(result, "$1"); 3063 if (!result.toString().equals("zzzab")) 3064 failCount++; 3065 3066 m.appendTail(result); 3067 if (!result.toString().equals("zzzabzzz")) 3068 failCount++; 3069 3070 // SB substitution with 3 groups 3071 blah = "zzzabcdcdefzzz"; 3072 p = Pattern.compile("(ab)(cd)*(ef)"); 3073 m = p.matcher(blah); 3074 result = new StringBuilder(); 3075 try { 3076 m.appendReplacement(result, "$1w$2w$3"); 3077 failCount++; 3078 } catch (IllegalStateException e) { 3079 } 3080 m.find(); 3081 m.appendReplacement(result, "$1w$2w$3"); 3082 if (!result.toString().equals("zzzabwcdwef")) 3083 failCount++; 3084 3085 m.appendTail(result); 3086 if (!result.toString().equals("zzzabwcdwefzzz")) 3087 failCount++; 3088 3089 // SB substitution with groups and three matches 3090 // skipping middle match 3091 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3092 p = Pattern.compile("(ab)(cd*)"); 3093 m = p.matcher(blah); 3094 result = new StringBuilder(); 3095 try { 3096 m.appendReplacement(result, "$1"); 3097 failCount++; 3098 } catch (IllegalStateException e) { 3099 } 3100 m.find(); 3101 m.appendReplacement(result, "$1"); 3102 if (!result.toString().equals("zzzab")) 3103 failCount++; 3104 3105 m.find(); 3106 m.find(); 3107 m.appendReplacement(result, "$2"); 3108 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3109 failCount++; 3110 3111 m.appendTail(result); 3112 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3113 failCount++; 3114 3115 // Check to make sure escaped $ is ignored 3116 blah = "zzzabcdcdefzzz"; 3117 p = Pattern.compile("(ab)(cd)*(ef)"); 3118 m = p.matcher(blah); 3119 result = new StringBuilder(); 3120 m.find(); 3121 m.appendReplacement(result, "$1w\\$2w$3"); 3122 if (!result.toString().equals("zzzabw$2wef")) 3123 failCount++; 3124 3125 m.appendTail(result); 3126 if (!result.toString().equals("zzzabw$2wefzzz")) 3127 failCount++; 3128 3129 // Check to make sure a reference to nonexistent group causes error 3130 blah = "zzzabcdcdefzzz"; 3131 p = Pattern.compile("(ab)(cd)*(ef)"); 3132 m = p.matcher(blah); 3133 result = new StringBuilder(); 3134 m.find(); 3135 try { 3136 m.appendReplacement(result, "$1w$5w$3"); 3137 failCount++; 3138 } catch (IndexOutOfBoundsException ioobe) { 3139 // Correct result 3140 } 3141 3142 // Check double digit group references 3143 blah = "zzz123456789101112zzz"; 3144 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3145 m = p.matcher(blah); 3146 result = new StringBuilder(); 3147 m.find(); 3148 m.appendReplacement(result, "$1w$11w$3"); 3149 if (!result.toString().equals("zzz1w11w3")) 3150 failCount++; 3151 3152 // Check to make sure it backs off $15 to $1 if only three groups 3153 blah = "zzzabcdcdefzzz"; 3154 p = Pattern.compile("(ab)(cd)*(ef)"); 3155 m = p.matcher(blah); 3156 result = new StringBuilder(); 3157 m.find(); 3158 m.appendReplacement(result, "$1w$15w$3"); 3159 if (!result.toString().equals("zzzabwab5wef")) 3160 failCount++; 3161 3162 3163 // Supplementary character test 3164 // SB substitution with literal 3165 blah = toSupplementaries("zzzblahzzz"); 3166 p = Pattern.compile(toSupplementaries("blah")); 3167 m = p.matcher(blah); 3168 result = new StringBuilder(); 3169 try { 3170 m.appendReplacement(result, toSupplementaries("blech")); 3171 failCount++; 3172 } catch (IllegalStateException e) { 3173 } 3174 m.find(); 3175 m.appendReplacement(result, toSupplementaries("blech")); 3176 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3177 failCount++; 3178 m.appendTail(result); 3179 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3180 failCount++; 3181 3182 // SB substitution with groups 3183 blah = toSupplementaries("zzzabcdzzz"); 3184 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3185 m = p.matcher(blah); 3186 result = new StringBuilder(); 3187 try { 3188 m.appendReplacement(result, "$1"); 3189 failCount++; 3190 } catch (IllegalStateException e) { 3191 } 3192 m.find(); 3193 m.appendReplacement(result, "$1"); 3194 if (!result.toString().equals(toSupplementaries("zzzab"))) 3195 failCount++; 3196 3197 m.appendTail(result); 3198 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3199 failCount++; 3200 3201 // SB substitution with 3 groups 3202 blah = toSupplementaries("zzzabcdcdefzzz"); 3203 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3204 m = p.matcher(blah); 3205 result = new StringBuilder(); 3206 try { 3207 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3208 failCount++; 3209 } catch (IllegalStateException e) { 3210 } 3211 m.find(); 3212 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3213 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3214 failCount++; 3215 3216 m.appendTail(result); 3217 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3218 failCount++; 3219 3220 // SB substitution with groups and three matches 3221 // skipping middle match 3222 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3223 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3224 m = p.matcher(blah); 3225 result = new StringBuilder(); 3226 try { 3227 m.appendReplacement(result, "$1"); 3228 failCount++; 3229 } catch (IllegalStateException e) { 3230 } 3231 m.find(); 3232 m.appendReplacement(result, "$1"); 3233 if (!result.toString().equals(toSupplementaries("zzzab"))) 3234 failCount++; 3235 3236 m.find(); 3237 m.find(); 3238 m.appendReplacement(result, "$2"); 3239 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3240 failCount++; 3241 3242 m.appendTail(result); 3243 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3244 failCount++; 3245 3246 // Check to make sure escaped $ is ignored 3247 blah = toSupplementaries("zzzabcdcdefzzz"); 3248 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3249 m = p.matcher(blah); 3250 result = new StringBuilder(); 3251 m.find(); 3252 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3253 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3254 failCount++; 3255 3256 m.appendTail(result); 3257 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3258 failCount++; 3259 3260 // Check to make sure a reference to nonexistent group causes error 3261 blah = toSupplementaries("zzzabcdcdefzzz"); 3262 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3263 m = p.matcher(blah); 3264 result = new StringBuilder(); 3265 m.find(); 3266 try { 3267 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3268 failCount++; 3269 } catch (IndexOutOfBoundsException ioobe) { 3270 // Correct result 3271 } 3272 // Check double digit group references 3273 blah = toSupplementaries("zzz123456789101112zzz"); 3274 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3275 m = p.matcher(blah); 3276 result = new StringBuilder(); 3277 m.find(); 3278 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3279 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3280 failCount++; 3281 3282 // Check to make sure it backs off $15 to $1 if only three groups 3283 blah = toSupplementaries("zzzabcdcdefzzz"); 3284 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3285 m = p.matcher(blah); 3286 result = new StringBuilder(); 3287 m.find(); 3288 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3289 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3290 failCount++; 3291 // Check nothing has been appended into the output buffer if 3292 // the replacement string triggers IllegalArgumentException. 3293 p = Pattern.compile("(abc)"); 3294 m = p.matcher("abcd"); 3295 result = new StringBuilder(); 3296 m.find(); 3297 try { 3298 m.appendReplacement(result, ("xyz$g")); 3299 failCount++; 3300 } catch (IllegalArgumentException iae) { 3301 if (result.length() != 0) 3302 failCount++; 3303 } 3304 report("SB Substitution 2"); 3305 } 3306 3307 /* 3308 * 5 groups of characters are created to make a substitution string. 3309 * A base string will be created including random lead chars, the 3310 * substitution string, and random trailing chars. 3311 * A pattern containing the 5 groups is searched for and replaced with: 3312 * random group + random string + random group. 3313 * The results are checked for correctness. 3314 */ 3315 private static void substitutionBasher() { 3316 for (int runs = 0; runs<1000; runs++) { 3317 // Create a base string to work in 3318 int leadingChars = generator.nextInt(10); 3319 StringBuffer baseBuffer = new StringBuffer(100); 3320 String leadingString = getRandomAlphaString(leadingChars); 3321 baseBuffer.append(leadingString); 3322 3323 // Create 5 groups of random number of random chars 3324 // Create the string to substitute 3325 // Create the pattern string to search for 3326 StringBuffer bufferToSub = new StringBuffer(25); 3327 StringBuffer bufferToPat = new StringBuffer(50); 3328 String[] groups = new String[5]; 3329 for(int i=0; i<5; i++) { 3330 int aGroupSize = generator.nextInt(5)+1; 3331 groups[i] = getRandomAlphaString(aGroupSize); 3332 bufferToSub.append(groups[i]); 3333 bufferToPat.append('('); 3334 bufferToPat.append(groups[i]); 3335 bufferToPat.append(')'); 3336 } 3337 String stringToSub = bufferToSub.toString(); 3338 String pattern = bufferToPat.toString(); 3339 3340 // Place sub string into working string at random index 3341 baseBuffer.append(stringToSub); 3342 3343 // Append random chars to end 3344 int trailingChars = generator.nextInt(10); 3345 String trailingString = getRandomAlphaString(trailingChars); 3346 baseBuffer.append(trailingString); 3347 String baseString = baseBuffer.toString(); 3348 3349 // Create test pattern and matcher 3350 Pattern p = Pattern.compile(pattern); 3351 Matcher m = p.matcher(baseString); 3352 3353 // Reject candidate if pattern happens to start early 3354 m.find(); 3355 if (m.start() < leadingChars) 3356 continue; 3357 3358 // Reject candidate if more than one match 3359 if (m.find()) 3360 continue; 3361 3362 // Construct a replacement string with : 3363 // random group + random string + random group 3364 StringBuffer bufferToRep = new StringBuffer(); 3365 int groupIndex1 = generator.nextInt(5); 3366 bufferToRep.append("$" + (groupIndex1 + 1)); 3367 String randomMidString = getRandomAlphaString(5); 3368 bufferToRep.append(randomMidString); 3369 int groupIndex2 = generator.nextInt(5); 3370 bufferToRep.append("$" + (groupIndex2 + 1)); 3371 String replacement = bufferToRep.toString(); 3372 3373 // Do the replacement 3374 String result = m.replaceAll(replacement); 3375 3376 // Construct expected result 3377 StringBuffer bufferToRes = new StringBuffer(); 3378 bufferToRes.append(leadingString); 3379 bufferToRes.append(groups[groupIndex1]); 3380 bufferToRes.append(randomMidString); 3381 bufferToRes.append(groups[groupIndex2]); 3382 bufferToRes.append(trailingString); 3383 String expectedResult = bufferToRes.toString(); 3384 3385 // Check results 3386 if (!result.equals(expectedResult)) 3387 failCount++; 3388 } 3389 3390 report("Substitution Basher"); 3391 } 3392 3393 /* 3394 * 5 groups of characters are created to make a substitution string. 3395 * A base string will be created including random lead chars, the 3396 * substitution string, and random trailing chars. 3397 * A pattern containing the 5 groups is searched for and replaced with: 3398 * random group + random string + random group. 3399 * The results are checked for correctness. 3400 */ 3401 private static void substitutionBasher2() { 3402 for (int runs = 0; runs<1000; runs++) { 3403 // Create a base string to work in 3404 int leadingChars = generator.nextInt(10); 3405 StringBuilder baseBuffer = new StringBuilder(100); 3406 String leadingString = getRandomAlphaString(leadingChars); 3407 baseBuffer.append(leadingString); 3408 3409 // Create 5 groups of random number of random chars 3410 // Create the string to substitute 3411 // Create the pattern string to search for 3412 StringBuilder bufferToSub = new StringBuilder(25); 3413 StringBuilder bufferToPat = new StringBuilder(50); 3414 String[] groups = new String[5]; 3415 for(int i=0; i<5; i++) { 3416 int aGroupSize = generator.nextInt(5)+1; 3417 groups[i] = getRandomAlphaString(aGroupSize); 3418 bufferToSub.append(groups[i]); 3419 bufferToPat.append('('); 3420 bufferToPat.append(groups[i]); 3421 bufferToPat.append(')'); 3422 } 3423 String stringToSub = bufferToSub.toString(); 3424 String pattern = bufferToPat.toString(); 3425 3426 // Place sub string into working string at random index 3427 baseBuffer.append(stringToSub); 3428 3429 // Append random chars to end 3430 int trailingChars = generator.nextInt(10); 3431 String trailingString = getRandomAlphaString(trailingChars); 3432 baseBuffer.append(trailingString); 3433 String baseString = baseBuffer.toString(); 3434 3435 // Create test pattern and matcher 3436 Pattern p = Pattern.compile(pattern); 3437 Matcher m = p.matcher(baseString); 3438 3439 // Reject candidate if pattern happens to start early 3440 m.find(); 3441 if (m.start() < leadingChars) 3442 continue; 3443 3444 // Reject candidate if more than one match 3445 if (m.find()) 3446 continue; 3447 3448 // Construct a replacement string with : 3449 // random group + random string + random group 3450 StringBuilder bufferToRep = new StringBuilder(); 3451 int groupIndex1 = generator.nextInt(5); 3452 bufferToRep.append("$" + (groupIndex1 + 1)); 3453 String randomMidString = getRandomAlphaString(5); 3454 bufferToRep.append(randomMidString); 3455 int groupIndex2 = generator.nextInt(5); 3456 bufferToRep.append("$" + (groupIndex2 + 1)); 3457 String replacement = bufferToRep.toString(); 3458 3459 // Do the replacement 3460 String result = m.replaceAll(replacement); 3461 3462 // Construct expected result 3463 StringBuilder bufferToRes = new StringBuilder(); 3464 bufferToRes.append(leadingString); 3465 bufferToRes.append(groups[groupIndex1]); 3466 bufferToRes.append(randomMidString); 3467 bufferToRes.append(groups[groupIndex2]); 3468 bufferToRes.append(trailingString); 3469 String expectedResult = bufferToRes.toString(); 3470 3471 // Check results 3472 if (!result.equals(expectedResult)) { 3473 failCount++; 3474 } 3475 } 3476 3477 report("Substitution Basher 2"); 3478 } 3479 3480 /** 3481 * Checks the handling of some escape sequences that the Pattern 3482 * class should process instead of the java compiler. These are 3483 * not in the file because the escapes should be be processed 3484 * by the Pattern class when the regex is compiled. 3485 */ 3486 private static void escapes() throws Exception { 3487 Pattern p = Pattern.compile("\\043"); 3488 Matcher m = p.matcher("#"); 3489 if (!m.find()) 3490 failCount++; 3491 3492 p = Pattern.compile("\\x23"); 3493 m = p.matcher("#"); 3494 if (!m.find()) 3495 failCount++; 3496 3497 p = Pattern.compile("\\u0023"); 3498 m = p.matcher("#"); 3499 if (!m.find()) 3500 failCount++; 3501 3502 report("Escape sequences"); 3503 } 3504 3505 /** 3506 * Checks the handling of blank input situations. These 3507 * tests are incompatible with my test file format. 3508 */ 3509 private static void blankInput() throws Exception { 3510 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3511 Matcher m = p.matcher(""); 3512 if (m.find()) 3513 failCount++; 3514 3515 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3516 m = p.matcher(""); 3517 if (!m.find()) 3518 failCount++; 3519 3520 p = Pattern.compile("abc"); 3521 m = p.matcher(""); 3522 if (m.find()) 3523 failCount++; 3524 3525 p = Pattern.compile("a*"); 3526 m = p.matcher(""); 3527 if (!m.find()) 3528 failCount++; 3529 3530 report("Blank input"); 3531 } 3532 3533 /** 3534 * Tests the Boyer-Moore pattern matching of a character sequence 3535 * on randomly generated patterns. 3536 */ 3537 private static void bm() throws Exception { 3538 doBnM('a'); 3539 report("Boyer Moore (ASCII)"); 3540 3541 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3542 report("Boyer Moore (Supplementary)"); 3543 } 3544 3545 private static void doBnM(int baseCharacter) throws Exception { 3546 int achar=0; 3547 3548 for (int i=0; i<100; i++) { 3549 // Create a short pattern to search for 3550 int patternLength = generator.nextInt(7) + 4; 3551 StringBuffer patternBuffer = new StringBuffer(patternLength); 3552 for (int x=0; x<patternLength; x++) { 3553 int ch = baseCharacter + generator.nextInt(26); 3554 if (Character.isSupplementaryCodePoint(ch)) { 3555 patternBuffer.append(Character.toChars(ch)); 3556 } else { 3557 patternBuffer.append((char)ch); 3558 } 3559 } 3560 String pattern = patternBuffer.toString(); 3561 Pattern p = Pattern.compile(pattern); 3562 3563 // Create a buffer with random ASCII chars that does 3564 // not match the sample 3565 String toSearch = null; 3566 StringBuffer s = null; 3567 Matcher m = p.matcher(""); 3568 do { 3569 s = new StringBuffer(100); 3570 for (int x=0; x<100; x++) { 3571 int ch = baseCharacter + generator.nextInt(26); 3572 if (Character.isSupplementaryCodePoint(ch)) { 3573 s.append(Character.toChars(ch)); 3574 } else { 3575 s.append((char)ch); 3576 } 3577 } 3578 toSearch = s.toString(); 3579 m.reset(toSearch); 3580 } while (m.find()); 3581 3582 // Insert the pattern at a random spot 3583 int insertIndex = generator.nextInt(99); 3584 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3585 insertIndex++; 3586 s = s.insert(insertIndex, pattern); 3587 toSearch = s.toString(); 3588 3589 // Make sure that the pattern is found 3590 m.reset(toSearch); 3591 if (!m.find()) 3592 failCount++; 3593 3594 // Make sure that the match text is the pattern 3595 if (!m.group().equals(pattern)) 3596 failCount++; 3597 3598 // Make sure match occured at insertion point 3599 if (m.start() != insertIndex) 3600 failCount++; 3601 } 3602 } 3603 3604 /** 3605 * Tests the matching of slices on randomly generated patterns. 3606 * The Boyer-Moore optimization is not done on these patterns 3607 * because it uses unicode case folding. 3608 */ 3609 private static void slice() throws Exception { 3610 doSlice(Character.MAX_VALUE); 3611 report("Slice"); 3612 3613 doSlice(Character.MAX_CODE_POINT); 3614 report("Slice (Supplementary)"); 3615 } 3616 3617 private static void doSlice(int maxCharacter) throws Exception { 3618 Random generator = new Random(); 3619 int achar=0; 3620 3621 for (int i=0; i<100; i++) { 3622 // Create a short pattern to search for 3623 int patternLength = generator.nextInt(7) + 4; 3624 StringBuffer patternBuffer = new StringBuffer(patternLength); 3625 for (int x=0; x<patternLength; x++) { 3626 int randomChar = 0; 3627 while (!Character.isLetterOrDigit(randomChar)) 3628 randomChar = generator.nextInt(maxCharacter); 3629 if (Character.isSupplementaryCodePoint(randomChar)) { 3630 patternBuffer.append(Character.toChars(randomChar)); 3631 } else { 3632 patternBuffer.append((char) randomChar); 3633 } 3634 } 3635 String pattern = patternBuffer.toString(); 3636 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3637 3638 // Create a buffer with random chars that does not match the sample 3639 String toSearch = null; 3640 StringBuffer s = null; 3641 Matcher m = p.matcher(""); 3642 do { 3643 s = new StringBuffer(100); 3644 for (int x=0; x<100; x++) { 3645 int randomChar = 0; 3646 while (!Character.isLetterOrDigit(randomChar)) 3647 randomChar = generator.nextInt(maxCharacter); 3648 if (Character.isSupplementaryCodePoint(randomChar)) { 3649 s.append(Character.toChars(randomChar)); 3650 } else { 3651 s.append((char) randomChar); 3652 } 3653 } 3654 toSearch = s.toString(); 3655 m.reset(toSearch); 3656 } while (m.find()); 3657 3658 // Insert the pattern at a random spot 3659 int insertIndex = generator.nextInt(99); 3660 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3661 insertIndex++; 3662 s = s.insert(insertIndex, pattern); 3663 toSearch = s.toString(); 3664 3665 // Make sure that the pattern is found 3666 m.reset(toSearch); 3667 if (!m.find()) 3668 failCount++; 3669 3670 // Make sure that the match text is the pattern 3671 if (!m.group().equals(pattern)) 3672 failCount++; 3673 3674 // Make sure match occured at insertion point 3675 if (m.start() != insertIndex) 3676 failCount++; 3677 } 3678 } 3679 3680 private static void explainFailure(String pattern, String data, 3681 String expected, String actual) { 3682 System.err.println("----------------------------------------"); 3683 System.err.println("Pattern = "+pattern); 3684 System.err.println("Data = "+data); 3685 System.err.println("Expected = " + expected); 3686 System.err.println("Actual = " + actual); 3687 } 3688 3689 private static void explainFailure(String pattern, String data, 3690 Throwable t) { 3691 System.err.println("----------------------------------------"); 3692 System.err.println("Pattern = "+pattern); 3693 System.err.println("Data = "+data); 3694 t.printStackTrace(System.err); 3695 } 3696 3697 // Testing examples from a file 3698 3699 /** 3700 * Goes through the file "TestCases.txt" and creates many patterns 3701 * described in the file, matching the patterns against input lines in 3702 * the file, and comparing the results against the correct results 3703 * also found in the file. The file format is described in comments 3704 * at the head of the file. 3705 */ 3706 private static void processFile(String fileName) throws Exception { 3707 File testCases = new File(System.getProperty("test.src", "."), 3708 fileName); 3709 FileInputStream in = new FileInputStream(testCases); 3710 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3711 3712 // Process next test case. 3713 String aLine; 3714 while((aLine = r.readLine()) != null) { 3715 // Read a line for pattern 3716 String patternString = grabLine(r); 3717 Pattern p = null; 3718 try { 3719 p = compileTestPattern(patternString); 3720 } catch (PatternSyntaxException e) { 3721 String dataString = grabLine(r); 3722 String expectedResult = grabLine(r); 3723 if (expectedResult.startsWith("error")) 3724 continue; 3725 explainFailure(patternString, dataString, e); 3726 failCount++; 3727 continue; 3728 } 3729 3730 // Read a line for input string 3731 String dataString = grabLine(r); 3732 Matcher m = p.matcher(dataString); 3733 StringBuffer result = new StringBuffer(); 3734 3735 // Check for IllegalStateExceptions before a match 3736 failCount += preMatchInvariants(m); 3737 3738 boolean found = m.find(); 3739 3740 if (found) 3741 failCount += postTrueMatchInvariants(m); 3742 else 3743 failCount += postFalseMatchInvariants(m); 3744 3745 if (found) { 3746 result.append("true "); 3747 result.append(m.group(0) + " "); 3748 } else { 3749 result.append("false "); 3750 } 3751 3752 result.append(m.groupCount()); 3753 3754 if (found) { 3755 for (int i=1; i<m.groupCount()+1; i++) 3756 if (m.group(i) != null) 3757 result.append(" " +m.group(i)); 3758 } 3759 3760 // Read a line for the expected result 3761 String expectedResult = grabLine(r); 3762 3763 if (!result.toString().equals(expectedResult)) { 3764 explainFailure(patternString, dataString, expectedResult, result.toString()); 3765 failCount++; 3766 } 3767 } 3768 3769 report(fileName); 3770 } 3771 3772 private static int preMatchInvariants(Matcher m) { 3773 int failCount = 0; 3774 try { 3775 m.start(); 3776 failCount++; 3777 } catch (IllegalStateException ise) {} 3778 try { 3779 m.end(); 3780 failCount++; 3781 } catch (IllegalStateException ise) {} 3782 try { 3783 m.group(); 3784 failCount++; 3785 } catch (IllegalStateException ise) {} 3786 return failCount; 3787 } 3788 3789 private static int postFalseMatchInvariants(Matcher m) { 3790 int failCount = 0; 3791 try { 3792 m.group(); 3793 failCount++; 3794 } catch (IllegalStateException ise) {} 3795 try { 3796 m.start(); 3797 failCount++; 3798 } catch (IllegalStateException ise) {} 3799 try { 3800 m.end(); 3801 failCount++; 3802 } catch (IllegalStateException ise) {} 3803 return failCount; 3804 } 3805 3806 private static int postTrueMatchInvariants(Matcher m) { 3807 int failCount = 0; 3808 //assert(m.start() = m.start(0); 3809 if (m.start() != m.start(0)) 3810 failCount++; 3811 //assert(m.end() = m.end(0); 3812 if (m.start() != m.start(0)) 3813 failCount++; 3814 //assert(m.group() = m.group(0); 3815 if (!m.group().equals(m.group(0))) 3816 failCount++; 3817 try { 3818 m.group(50); 3819 failCount++; 3820 } catch (IndexOutOfBoundsException ise) {} 3821 3822 return failCount; 3823 } 3824 3825 private static Pattern compileTestPattern(String patternString) { 3826 if (!patternString.startsWith("'")) { 3827 return Pattern.compile(patternString); 3828 } 3829 3830 int break1 = patternString.lastIndexOf("'"); 3831 String flagString = patternString.substring( 3832 break1+1, patternString.length()); 3833 patternString = patternString.substring(1, break1); 3834 3835 if (flagString.equals("i")) 3836 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3837 3838 if (flagString.equals("m")) 3839 return Pattern.compile(patternString, Pattern.MULTILINE); 3840 3841 return Pattern.compile(patternString); 3842 } 3843 3844 /** 3845 * Reads a line from the input file. Keeps reading lines until a non 3846 * empty non comment line is read. If the line contains a \n then 3847 * these two characters are replaced by a newline char. If a \\uxxxx 3848 * sequence is read then the sequence is replaced by the unicode char. 3849 */ 3850 private static String grabLine(BufferedReader r) throws Exception { 3851 int index = 0; 3852 String line = r.readLine(); 3853 while (line.startsWith("//") || line.length() < 1) 3854 line = r.readLine(); 3855 while ((index = line.indexOf("\\n")) != -1) { 3856 StringBuffer temp = new StringBuffer(line); 3857 temp.replace(index, index+2, "\n"); 3858 line = temp.toString(); 3859 } 3860 while ((index = line.indexOf("\\u")) != -1) { 3861 StringBuffer temp = new StringBuffer(line); 3862 String value = temp.substring(index+2, index+6); 3863 char aChar = (char)Integer.parseInt(value, 16); 3864 String unicodeChar = "" + aChar; 3865 temp.replace(index, index+6, unicodeChar); 3866 line = temp.toString(); 3867 } 3868 3869 return line; 3870 } 3871 3872 private static void check(Pattern p, String s, String g, String expected) { 3873 Matcher m = p.matcher(s); 3874 m.find(); 3875 if (!m.group(g).equals(expected) || 3876 s.charAt(m.start(g)) != expected.charAt(0) || 3877 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3878 failCount++; 3879 } 3880 3881 private static void checkReplaceFirst(String p, String s, String r, String expected) 3882 { 3883 if (!expected.equals(Pattern.compile(p) 3884 .matcher(s) 3885 .replaceFirst(r))) 3886 failCount++; 3887 } 3888 3889 private static void checkReplaceAll(String p, String s, String r, String expected) 3890 { 3891 if (!expected.equals(Pattern.compile(p) 3892 .matcher(s) 3893 .replaceAll(r))) 3894 failCount++; 3895 } 3896 3897 private static void checkExpectedFail(String p) { 3898 try { 3899 Pattern.compile(p); 3900 } catch (PatternSyntaxException pse) { 3901 //pse.printStackTrace(); 3902 return; 3903 } 3904 failCount++; 3905 } 3906 3907 private static void checkExpectedIAE(Matcher m, String g) { 3908 m.find(); 3909 try { 3910 m.group(g); 3911 } catch (IllegalArgumentException x) { 3912 //iae.printStackTrace(); 3913 try { 3914 m.start(g); 3915 } catch (IllegalArgumentException xx) { 3916 try { 3917 m.start(g); 3918 } catch (IllegalArgumentException xxx) { 3919 return; 3920 } 3921 } 3922 } 3923 failCount++; 3924 } 3925 3926 private static void checkExpectedNPE(Matcher m) { 3927 m.find(); 3928 try { 3929 m.group(null); 3930 } catch (NullPointerException x) { 3931 try { 3932 m.start(null); 3933 } catch (NullPointerException xx) { 3934 try { 3935 m.end(null); 3936 } catch (NullPointerException xxx) { 3937 return; 3938 } 3939 } 3940 } 3941 failCount++; 3942 } 3943 3944 private static void namedGroupCaptureTest() throws Exception { 3945 check(Pattern.compile("x+(?<gname>y+)z+"), 3946 "xxxyyyzzz", 3947 "gname", 3948 "yyy"); 3949 3950 check(Pattern.compile("x+(?<gname8>y+)z+"), 3951 "xxxyyyzzz", 3952 "gname8", 3953 "yyy"); 3954 3955 //backref 3956 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3957 check(pattern, "zzzaabcazzz", true); // found "abca" 3958 3959 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3960 "zzzaabcaazzz", true); 3961 3962 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3963 "abcdefabc", true); 3964 3965 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3966 "abcdefghijkk", true); 3967 3968 // Supplementary character tests 3969 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3970 toSupplementaries("zzzaabcazzz"), true); 3971 3972 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3973 toSupplementaries("zzzaabcaazzz"), true); 3974 3975 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3976 toSupplementaries("abcdefabc"), true); 3977 3978 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3979 "(?<gname>" + 3980 toSupplementaries("k)") + "\\k<gname>"), 3981 toSupplementaries("abcdefghijkk"), true); 3982 3983 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3984 "xxxyyyzzzyyy", 3985 "gname", 3986 "yyy"); 3987 3988 //replaceFirst/All 3989 checkReplaceFirst("(?<gn>ab)(c*)", 3990 "abccczzzabcczzzabccc", 3991 "${gn}", 3992 "abzzzabcczzzabccc"); 3993 3994 checkReplaceAll("(?<gn>ab)(c*)", 3995 "abccczzzabcczzzabccc", 3996 "${gn}", 3997 "abzzzabzzzab"); 3998 3999 4000 checkReplaceFirst("(?<gn>ab)(c*)", 4001 "zzzabccczzzabcczzzabccczzz", 4002 "${gn}", 4003 "zzzabzzzabcczzzabccczzz"); 4004 4005 checkReplaceAll("(?<gn>ab)(c*)", 4006 "zzzabccczzzabcczzzabccczzz", 4007 "${gn}", 4008 "zzzabzzzabzzzabzzz"); 4009 4010 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4011 "zzzabccczzzabcczzzabccczzz", 4012 "${gn2}", 4013 "zzzccczzzabcczzzabccczzz"); 4014 4015 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4016 "zzzabccczzzabcczzzabccczzz", 4017 "${gn2}", 4018 "zzzccczzzcczzzccczzz"); 4019 4020 //toSupplementaries("(ab)(c*)")); 4021 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4022 ")(?<gn2>" + toSupplementaries("c") + "*)", 4023 toSupplementaries("abccczzzabcczzzabccc"), 4024 "${gn1}", 4025 toSupplementaries("abzzzabcczzzabccc")); 4026 4027 4028 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4029 ")(?<gn2>" + toSupplementaries("c") + "*)", 4030 toSupplementaries("abccczzzabcczzzabccc"), 4031 "${gn1}", 4032 toSupplementaries("abzzzabzzzab")); 4033 4034 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4035 ")(?<gn2>" + toSupplementaries("c") + "*)", 4036 toSupplementaries("abccczzzabcczzzabccc"), 4037 "${gn2}", 4038 toSupplementaries("ccczzzabcczzzabccc")); 4039 4040 4041 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4042 ")(?<gn2>" + toSupplementaries("c") + "*)", 4043 toSupplementaries("abccczzzabcczzzabccc"), 4044 "${gn2}", 4045 toSupplementaries("ccczzzcczzzccc")); 4046 4047 checkReplaceFirst("(?<dog>Dog)AndCat", 4048 "zzzDogAndCatzzzDogAndCatzzz", 4049 "${dog}", 4050 "zzzDogzzzDogAndCatzzz"); 4051 4052 4053 checkReplaceAll("(?<dog>Dog)AndCat", 4054 "zzzDogAndCatzzzDogAndCatzzz", 4055 "${dog}", 4056 "zzzDogzzzDogzzz"); 4057 4058 // backref in Matcher & String 4059 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4060 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4061 failCount++; 4062 4063 // negative 4064 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4065 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4066 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4067 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4068 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4069 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4070 "gnameX"); 4071 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4072 report("NamedGroupCapture"); 4073 } 4074 4075 // This is for bug 6969132 4076 private static void nonBmpClassComplementTest() throws Exception { 4077 Pattern p = Pattern.compile("\\P{Lu}"); 4078 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4079 if (m.find() && m.start() == 1) 4080 failCount++; 4081 4082 // from a unicode category 4083 p = Pattern.compile("\\P{Lu}"); 4084 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4085 if (m.find()) 4086 failCount++; 4087 if (!m.hitEnd()) 4088 failCount++; 4089 4090 // block 4091 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4092 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4093 if (m.find() && m.start() == 1) 4094 failCount++; 4095 4096 report("NonBmpClassComplement"); 4097 } 4098 4099 private static void unicodePropertiesTest() throws Exception { 4100 // different forms 4101 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4102 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4103 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4104 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4105 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4106 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4107 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4108 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4109 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4110 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4111 failCount++; 4112 4113 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4114 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4115 Matcher lastSM = common; 4116 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4117 4118 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4119 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4120 Matcher lastBM = latin; 4121 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4122 4123 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4124 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4125 continue; // only pick couple code points, they are the same 4126 } 4127 4128 // Unicode Script 4129 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4130 Matcher m; 4131 String str = new String(Character.toChars(cp)); 4132 if (script == lastScript) { 4133 m = lastSM; 4134 m.reset(str); 4135 } else { 4136 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4137 } 4138 if (!m.matches()) { 4139 failCount++; 4140 } 4141 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4142 other.reset(str); 4143 if (other.matches()) { 4144 failCount++; 4145 } 4146 lastSM = m; 4147 lastScript = script; 4148 4149 // Unicode Block 4150 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4151 if (block == null) { 4152 //System.out.printf("Not a Block: cp=%x%n", cp); 4153 continue; 4154 } 4155 if (block == lastBlock) { 4156 m = lastBM; 4157 m.reset(str); 4158 } else { 4159 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4160 } 4161 if (!m.matches()) { 4162 failCount++; 4163 } 4164 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4165 other.reset(str); 4166 if (other.matches()) { 4167 failCount++; 4168 } 4169 lastBM = m; 4170 lastBlock = block; 4171 } 4172 report("unicodeProperties"); 4173 } 4174 4175 private static void unicodeHexNotationTest() throws Exception { 4176 4177 // negative 4178 checkExpectedFail("\\x{-23}"); 4179 checkExpectedFail("\\x{110000}"); 4180 checkExpectedFail("\\x{}"); 4181 checkExpectedFail("\\x{AB[ef]"); 4182 4183 // codepoint 4184 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4185 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4186 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4187 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4188 4189 // in class 4190 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4191 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4192 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4193 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4194 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4195 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4196 4197 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4198 String s = "A" + new String(Character.toChars(cp)) + "B"; 4199 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4200 : String.format("\\u%04x\\u%04x", 4201 (int) Character.toChars(cp)[0], 4202 (int) Character.toChars(cp)[1]); 4203 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4204 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4205 failCount++; 4206 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4207 failCount++; 4208 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4209 failCount++; 4210 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4211 failCount++; 4212 } 4213 report("unicodeHexNotation"); 4214 } 4215 4216 private static void unicodeClassesTest() throws Exception { 4217 4218 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4219 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4220 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4221 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4222 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4223 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4224 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4225 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4226 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4227 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4228 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4229 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4230 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4231 Matcher bound = Pattern.compile("\\b").matcher(""); 4232 Matcher word = Pattern.compile("\\w++").matcher(""); 4233 // UNICODE_CHARACTER_CLASS 4234 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4235 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4236 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4237 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4238 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4239 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4240 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4241 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4242 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4243 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4244 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4245 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4246 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4247 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4248 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4249 // embedded flag (?U) 4250 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4251 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4252 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4253 4254 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4255 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4256 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4257 // properties 4258 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4259 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4260 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4261 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4262 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4263 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4264 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4265 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4266 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4267 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4268 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4269 4270 // javaMethod 4271 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4272 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4273 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4274 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4275 4276 for (int cp = 1; cp < 0x30000; cp++) { 4277 String str = new String(Character.toChars(cp)); 4278 int type = Character.getType(cp); 4279 if (// lower 4280 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4281 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4282 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4283 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4284 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4285 // upper 4286 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4287 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4288 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4289 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4290 // alpha 4291 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4292 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4293 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4294 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4295 // digit 4296 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4297 Character.isDigit(cp) != digitU.reset(str).matches() || 4298 // alnum 4299 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4300 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4301 // punct 4302 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4303 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4304 // graph 4305 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4306 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4307 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4308 // blank 4309 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4310 != blank.reset(str).matches() || 4311 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4312 // print 4313 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4314 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4315 // cntrl 4316 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4317 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4318 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4319 // hexdigit 4320 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4321 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4322 // space 4323 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4324 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4325 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4326 // word 4327 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4328 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4329 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4330 // bwordb 4331 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4332 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4333 // properties 4334 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4335 Character.isLetter(cp) != letterP.reset(str).matches()|| 4336 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4337 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4338 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4339 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4340 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4341 failCount++; 4342 } 4343 4344 // bounds/word align 4345 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4346 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4347 failCount++; 4348 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4349 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4350 failCount++; 4351 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4352 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4353 failCount++; 4354 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4355 failCount++; 4356 report("unicodePredefinedClasses"); 4357 } 4358 4359 private static void horizontalAndVerticalWSTest() throws Exception { 4360 String hws = new String (new char[] { 4361 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4362 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4363 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4364 0x202f, 0x205f, 0x3000 }); 4365 String vws = new String (new char[] { 4366 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4367 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4368 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4369 failCount++; 4370 if (Pattern.compile("\\H").matcher(hws).find() || 4371 Pattern.compile("[\\H]").matcher(hws).find()) 4372 failCount++; 4373 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4374 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4375 failCount++; 4376 if (Pattern.compile("\\V").matcher(vws).find() || 4377 Pattern.compile("[\\V]").matcher(vws).find()) 4378 failCount++; 4379 String prefix = "abcd"; 4380 String suffix = "efgh"; 4381 String ng = "A"; 4382 for (int i = 0; i < hws.length(); i++) { 4383 String c = String.valueOf(hws.charAt(i)); 4384 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4385 if (!m.find() || !c.equals(m.group())) 4386 failCount++; 4387 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4388 if (!m.find() || !c.equals(m.group())) 4389 failCount++; 4390 4391 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4392 if (!m.find() || !ng.equals(m.group())) 4393 failCount++; 4394 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4395 if (!m.find() || !ng.equals(m.group())) 4396 failCount++; 4397 } 4398 for (int i = 0; i < vws.length(); i++) { 4399 String c = String.valueOf(vws.charAt(i)); 4400 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4401 if (!m.find() || !c.equals(m.group())) 4402 failCount++; 4403 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4404 if (!m.find() || !c.equals(m.group())) 4405 failCount++; 4406 4407 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4408 if (!m.find() || !ng.equals(m.group())) 4409 failCount++; 4410 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4411 if (!m.find() || !ng.equals(m.group())) 4412 failCount++; 4413 } 4414 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4415 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4416 failCount++; 4417 report("horizontalAndVerticalWSTest"); 4418 } 4419 4420 private static void linebreakTest() throws Exception { 4421 String linebreaks = new String (new char[] { 4422 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4423 String crnl = "\r\n"; 4424 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4425 !Pattern.compile("\\R").matcher(crnl).matches() || 4426 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4427 failCount++; 4428 report("linebreakTest"); 4429 } 4430 4431 // #7189363 4432 private static void branchTest() throws Exception { 4433 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4434 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4435 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4436 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4437 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4438 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4439 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4440 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4441 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4442 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4443 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4444 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4445 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4446 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4447 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4448 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4449 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4450 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4451 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4452 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4453 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4454 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4455 failCount++; 4456 report("branchTest"); 4457 } 4458 4459 // This test is for 8007395 4460 private static void groupCurlyNotFoundSuppTest() throws Exception { 4461 String input = "test this as \ud83d\ude0d"; 4462 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4463 "test(.)*(@[a-zA-Z.]+)", 4464 "test([^B])+(@[a-zA-Z.]+)", 4465 "test([^B])*(@[a-zA-Z.]+)", 4466 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4467 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4468 }) { 4469 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4470 .matcher(input); 4471 try { 4472 if (m.find()) { 4473 failCount++; 4474 } 4475 } catch (Exception x) { 4476 failCount++; 4477 } 4478 } 4479 report("GroupCurly NotFoundSupp"); 4480 } 4481 4482 // This test is for 8023647 4483 private static void groupCurlyBackoffTest() throws Exception { 4484 if (!"abc1c".matches("(\\w)+1\\1") || 4485 "abc11".matches("(\\w)+1\\1")) { 4486 failCount++; 4487 } 4488 report("GroupCurly backoff"); 4489 } 4490 4491 // This test is for 8012646 4492 private static void patternAsPredicate() throws Exception { 4493 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4494 4495 if (p.test("")) { 4496 failCount++; 4497 } 4498 if (!p.test("word")) { 4499 failCount++; 4500 } 4501 if (p.test("1234")) { 4502 failCount++; 4503 } 4504 report("Pattern.asPredicate"); 4505 } 4506 4507 // This test is for 8035975 4508 private static void invalidFlags() throws Exception { 4509 for (int flag = 1; flag != 0; flag <<= 1) { 4510 switch (flag) { 4511 case Pattern.CASE_INSENSITIVE: 4512 case Pattern.MULTILINE: 4513 case Pattern.DOTALL: 4514 case Pattern.UNICODE_CASE: 4515 case Pattern.CANON_EQ: 4516 case Pattern.UNIX_LINES: 4517 case Pattern.LITERAL: 4518 case Pattern.UNICODE_CHARACTER_CLASS: 4519 case Pattern.COMMENTS: 4520 // valid flag, continue 4521 break; 4522 default: 4523 try { 4524 Pattern.compile(".", flag); 4525 failCount++; 4526 } catch (IllegalArgumentException expected) { 4527 } 4528 } 4529 } 4530 report("Invalid compile flags"); 4531 } 4532 }