1 /* 2 * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 36 * @key randomness 37 */ 38 39 import java.util.function.Function; 40 import java.util.regex.*; 41 import java.util.Random; 42 import java.io.*; 43 import java.util.*; 44 import java.nio.CharBuffer; 45 import java.util.function.Predicate; 46 47 /** 48 * This is a test class created to check the operation of 49 * the Pattern and Matcher classes. 50 */ 51 public class RegExTest { 52 53 private static Random generator = new Random(); 54 private static boolean failure = false; 55 private static int failCount = 0; 56 private static String firstFailure = null; 57 58 /** 59 * Main to interpret arguments and run several tests. 60 * 61 */ 62 public static void main(String[] args) throws Exception { 63 // Most of the tests are in a file 64 processFile("TestCases.txt"); 65 //processFile("PerlCases.txt"); 66 processFile("BMPTestCases.txt"); 67 processFile("SupplementaryTestCases.txt"); 68 69 // These test many randomly generated char patterns 70 bm(); 71 slice(); 72 73 // These are hard to put into the file 74 escapes(); 75 blankInput(); 76 77 // Substitition tests on randomly generated sequences 78 globalSubstitute(); 79 stringbufferSubstitute(); 80 stringbuilderSubstitute(); 81 82 substitutionBasher(); 83 substitutionBasher2(); 84 85 // Canonical Equivalence 86 ceTest(); 87 88 // Anchors 89 anchorTest(); 90 91 // boolean match calls 92 matchesTest(); 93 lookingAtTest(); 94 95 // Pattern API 96 patternMatchesTest(); 97 98 // Misc 99 lookbehindTest(); 100 nullArgumentTest(); 101 backRefTest(); 102 groupCaptureTest(); 103 caretTest(); 104 charClassTest(); 105 emptyPatternTest(); 106 findIntTest(); 107 group0Test(); 108 longPatternTest(); 109 octalTest(); 110 ampersandTest(); 111 negationTest(); 112 splitTest(); 113 appendTest(); 114 caseFoldingTest(); 115 commentsTest(); 116 unixLinesTest(); 117 replaceFirstTest(); 118 gTest(); 119 zTest(); 120 serializeTest(); 121 reluctantRepetitionTest(); 122 multilineDollarTest(); 123 dollarAtEndTest(); 124 caretBetweenTerminatorsTest(); 125 // This RFE rejected in Tiger numOccurrencesTest(); 126 javaCharClassTest(); 127 nonCaptureRepetitionTest(); 128 notCapturedGroupCurlyMatchTest(); 129 escapedSegmentTest(); 130 literalPatternTest(); 131 literalReplacementTest(); 132 regionTest(); 133 toStringTest(); 134 negatedCharClassTest(); 135 findFromTest(); 136 boundsTest(); 137 unicodeWordBoundsTest(); 138 caretAtEndTest(); 139 wordSearchTest(); 140 hitEndTest(); 141 toMatchResultTest(); 142 toMatchResultTest2(); 143 surrogatesInClassTest(); 144 removeQEQuotingTest(); 145 namedGroupCaptureTest(); 146 nonBmpClassComplementTest(); 147 unicodePropertiesTest(); 148 unicodeHexNotationTest(); 149 unicodeClassesTest(); 150 horizontalAndVerticalWSTest(); 151 linebreakTest(); 152 branchTest(); 153 groupCurlyNotFoundSuppTest(); 154 groupCurlyBackoffTest(); 155 patternAsPredicate(); 156 invalidFlags(); 157 158 if (failure) { 159 throw new 160 RuntimeException("RegExTest failed, 1st failure: " + 161 firstFailure); 162 } else { 163 System.err.println("OKAY: All tests passed."); 164 } 165 } 166 167 // Utility functions 168 169 private static String getRandomAlphaString(int length) { 170 StringBuffer buf = new StringBuffer(length); 171 for (int i=0; i<length; i++) { 172 char randChar = (char)(97 + generator.nextInt(26)); 173 buf.append(randChar); 174 } 175 return buf.toString(); 176 } 177 178 private static void check(Matcher m, String expected) { 179 m.find(); 180 if (!m.group().equals(expected)) 181 failCount++; 182 } 183 184 private static void check(Matcher m, String result, boolean expected) { 185 m.find(); 186 if (m.group().equals(result) != expected) 187 failCount++; 188 } 189 190 private static void check(Pattern p, String s, boolean expected) { 191 if (p.matcher(s).find() != expected) 192 failCount++; 193 } 194 195 private static void check(String p, String s, boolean expected) { 196 Matcher matcher = Pattern.compile(p).matcher(s); 197 if (matcher.find() != expected) 198 failCount++; 199 } 200 201 private static void check(String p, char c, boolean expected) { 202 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 203 Pattern pattern = Pattern.compile(propertyPattern); 204 char[] ca = new char[1]; ca[0] = c; 205 Matcher matcher = pattern.matcher(new String(ca)); 206 if (!matcher.find()) 207 failCount++; 208 } 209 210 private static void check(String p, int codePoint, boolean expected) { 211 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 212 Pattern pattern = Pattern.compile(propertyPattern); 213 char[] ca = Character.toChars(codePoint); 214 Matcher matcher = pattern.matcher(new String(ca)); 215 if (!matcher.find()) 216 failCount++; 217 } 218 219 private static void check(String p, int flag, String input, String s, 220 boolean expected) 221 { 222 Pattern pattern = Pattern.compile(p, flag); 223 Matcher matcher = pattern.matcher(input); 224 if (expected) 225 check(matcher, s, expected); 226 else 227 check(pattern, input, false); 228 } 229 230 private static void report(String testName) { 231 int spacesToAdd = 30 - testName.length(); 232 StringBuffer paddedNameBuffer = new StringBuffer(testName); 233 for (int i=0; i<spacesToAdd; i++) 234 paddedNameBuffer.append(" "); 235 String paddedName = paddedNameBuffer.toString(); 236 System.err.println(paddedName + ": " + 237 (failCount==0 ? "Passed":"Failed("+failCount+")")); 238 if (failCount > 0) { 239 failure = true; 240 241 if (firstFailure == null) { 242 firstFailure = testName; 243 } 244 } 245 246 failCount = 0; 247 } 248 249 /** 250 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 251 * supplementary characters. This method does NOT fully take care 252 * of the regex syntax. 253 */ 254 private static String toSupplementaries(String s) { 255 int length = s.length(); 256 StringBuffer sb = new StringBuffer(length * 2); 257 258 for (int i = 0; i < length; ) { 259 char c = s.charAt(i++); 260 if (c == '\\') { 261 sb.append(c); 262 if (i < length) { 263 c = s.charAt(i++); 264 sb.append(c); 265 if (c == 'u') { 266 // assume no syntax error 267 sb.append(s.charAt(i++)); 268 sb.append(s.charAt(i++)); 269 sb.append(s.charAt(i++)); 270 sb.append(s.charAt(i++)); 271 } 272 } 273 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 274 sb.append('\ud800').append((char)('\udc00'+c)); 275 } else { 276 sb.append(c); 277 } 278 } 279 return sb.toString(); 280 } 281 282 // Regular expression tests 283 284 // This is for bug 6178785 285 // Test if an expected NPE gets thrown when passing in a null argument 286 private static boolean check(Runnable test) { 287 try { 288 test.run(); 289 failCount++; 290 return false; 291 } catch (NullPointerException npe) { 292 return true; 293 } 294 } 295 296 private static void nullArgumentTest() { 297 check(() -> Pattern.compile(null)); 298 check(() -> Pattern.matches(null, null)); 299 check(() -> Pattern.matches("xyz", null)); 300 check(() -> Pattern.quote(null)); 301 check(() -> Pattern.compile("xyz").split(null)); 302 check(() -> Pattern.compile("xyz").matcher(null)); 303 304 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 305 m.matches(); 306 check(() -> m.appendTail((StringBuffer) null)); 307 check(() -> m.appendTail((StringBuilder)null)); 308 check(() -> m.replaceAll((String) null)); 309 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 310 check(() -> m.replaceFirst((String)null)); 311 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 312 check(() -> m.appendReplacement((StringBuffer)null, null)); 313 check(() -> m.appendReplacement((StringBuilder)null, null)); 314 check(() -> m.reset(null)); 315 check(() -> Matcher.quoteReplacement(null)); 316 //check(() -> m.usePattern(null)); 317 318 report("Null Argument"); 319 } 320 321 // This is for bug6635133 322 // Test if surrogate pair in Unicode escapes can be handled correctly. 323 private static void surrogatesInClassTest() throws Exception { 324 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 325 Matcher matcher = pattern.matcher("\ud834\udd22"); 326 if (!matcher.find()) 327 failCount++; 328 329 report("Surrogate pair in Unicode escape"); 330 } 331 332 // This is for bug6990617 333 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 334 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 335 // char is an octal digit. 336 private static void removeQEQuotingTest() throws Exception { 337 Pattern pattern = 338 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 339 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 340 if (!matcher.find()) 341 failCount++; 342 343 report("Remove Q/E Quoting"); 344 } 345 346 // This is for bug 4988891 347 // Test toMatchResult to see that it is a copy of the Matcher 348 // that is not affected by subsequent operations on the original 349 private static void toMatchResultTest() throws Exception { 350 Pattern pattern = Pattern.compile("squid"); 351 Matcher matcher = pattern.matcher( 352 "agiantsquidofdestinyasmallsquidoffate"); 353 matcher.find(); 354 int matcherStart1 = matcher.start(); 355 MatchResult mr = matcher.toMatchResult(); 356 if (mr == matcher) 357 failCount++; 358 int resultStart1 = mr.start(); 359 if (matcherStart1 != resultStart1) 360 failCount++; 361 matcher.find(); 362 int matcherStart2 = matcher.start(); 363 int resultStart2 = mr.start(); 364 if (matcherStart2 == resultStart2) 365 failCount++; 366 if (resultStart1 != resultStart2) 367 failCount++; 368 MatchResult mr2 = matcher.toMatchResult(); 369 if (mr == mr2) 370 failCount++; 371 if (mr2.start() != matcherStart2) 372 failCount++; 373 report("toMatchResult is a copy"); 374 } 375 376 private static void checkExpectedISE(Runnable test) { 377 try { 378 test.run(); 379 failCount++; 380 } catch (IllegalStateException x) { 381 } catch (IndexOutOfBoundsException xx) { 382 failCount++; 383 } 384 } 385 386 private static void checkExpectedIOOE(Runnable test) { 387 try { 388 test.run(); 389 failCount++; 390 } catch (IndexOutOfBoundsException x) {} 391 } 392 393 // This is for bug 8074678 394 // Test the result of toMatchResult throws ISE if no match is availble 395 private static void toMatchResultTest2() throws Exception { 396 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 397 matcher.find(); 398 MatchResult mr = matcher.toMatchResult(); 399 400 checkExpectedISE(() -> mr.start()); 401 checkExpectedISE(() -> mr.start(2)); 402 checkExpectedISE(() -> mr.end()); 403 checkExpectedISE(() -> mr.end(2)); 404 checkExpectedISE(() -> mr.group()); 405 checkExpectedISE(() -> mr.group(2)); 406 407 matcher = Pattern.compile("(match)").matcher("there is a match"); 408 matcher.find(); 409 MatchResult mr2 = matcher.toMatchResult(); 410 checkExpectedIOOE(() -> mr2.start(2)); 411 checkExpectedIOOE(() -> mr2.end(2)); 412 checkExpectedIOOE(() -> mr2.group(2)); 413 414 report("toMatchResult2 appropriate exceptions"); 415 } 416 417 // This is for bug 5013885 418 // Must test a slice to see if it reports hitEnd correctly 419 private static void hitEndTest() throws Exception { 420 // Basic test of Slice node 421 Pattern p = Pattern.compile("^squidattack"); 422 Matcher m = p.matcher("squack"); 423 m.find(); 424 if (m.hitEnd()) 425 failCount++; 426 m.reset("squid"); 427 m.find(); 428 if (!m.hitEnd()) 429 failCount++; 430 431 // Test Slice, SliceA and SliceU nodes 432 for (int i=0; i<3; i++) { 433 int flags = 0; 434 if (i==1) flags = Pattern.CASE_INSENSITIVE; 435 if (i==2) flags = Pattern.UNICODE_CASE; 436 p = Pattern.compile("^abc", flags); 437 m = p.matcher("ad"); 438 m.find(); 439 if (m.hitEnd()) 440 failCount++; 441 m.reset("ab"); 442 m.find(); 443 if (!m.hitEnd()) 444 failCount++; 445 } 446 447 // Test Boyer-Moore node 448 p = Pattern.compile("catattack"); 449 m = p.matcher("attack"); 450 m.find(); 451 if (!m.hitEnd()) 452 failCount++; 453 454 p = Pattern.compile("catattack"); 455 m = p.matcher("attackattackattackcatatta"); 456 m.find(); 457 if (!m.hitEnd()) 458 failCount++; 459 report("hitEnd from a Slice"); 460 } 461 462 // This is for bug 4997476 463 // It is weird code submitted by customer demonstrating a regression 464 private static void wordSearchTest() throws Exception { 465 String testString = new String("word1 word2 word3"); 466 Pattern p = Pattern.compile("\\b"); 467 Matcher m = p.matcher(testString); 468 int position = 0; 469 int start = 0; 470 while (m.find(position)) { 471 start = m.start(); 472 if (start == testString.length()) 473 break; 474 if (m.find(start+1)) { 475 position = m.start(); 476 } else { 477 position = testString.length(); 478 } 479 if (testString.substring(start, position).equals(" ")) 480 continue; 481 if (!testString.substring(start, position-1).startsWith("word")) 482 failCount++; 483 } 484 report("Customer word search"); 485 } 486 487 // This is for bug 4994840 488 private static void caretAtEndTest() throws Exception { 489 // Problem only occurs with multiline patterns 490 // containing a beginning-of-line caret "^" followed 491 // by an expression that also matches the empty string. 492 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 493 Matcher matcher = pattern.matcher("\r"); 494 matcher.find(); 495 matcher.find(); 496 report("Caret at end"); 497 } 498 499 // This test is for 4979006 500 // Check to see if word boundary construct properly handles unicode 501 // non spacing marks 502 private static void unicodeWordBoundsTest() throws Exception { 503 String spaces = " "; 504 String wordChar = "a"; 505 String nsm = "\u030a"; 506 507 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 508 509 Pattern pattern = Pattern.compile("\\b"); 510 Matcher matcher = pattern.matcher(""); 511 // S=other B=word character N=non spacing mark .=word boundary 512 // SS.BB.SS 513 String input = spaces + wordChar + wordChar + spaces; 514 twoFindIndexes(input, matcher, 2, 4); 515 // SS.BBN.SS 516 input = spaces + wordChar +wordChar + nsm + spaces; 517 twoFindIndexes(input, matcher, 2, 5); 518 // SS.BN.SS 519 input = spaces + wordChar + nsm + spaces; 520 twoFindIndexes(input, matcher, 2, 4); 521 // SS.BNN.SS 522 input = spaces + wordChar + nsm + nsm + spaces; 523 twoFindIndexes(input, matcher, 2, 5); 524 // SSN.BB.SS 525 input = spaces + nsm + wordChar + wordChar + spaces; 526 twoFindIndexes(input, matcher, 3, 5); 527 // SS.BNB.SS 528 input = spaces + wordChar + nsm + wordChar + spaces; 529 twoFindIndexes(input, matcher, 2, 5); 530 // SSNNSS 531 input = spaces + nsm + nsm + spaces; 532 matcher.reset(input); 533 if (matcher.find()) 534 failCount++; 535 // SSN.BBN.SS 536 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 537 twoFindIndexes(input, matcher, 3, 6); 538 539 report("Unicode word boundary"); 540 } 541 542 private static void twoFindIndexes(String input, Matcher matcher, int a, 543 int b) throws Exception 544 { 545 matcher.reset(input); 546 matcher.find(); 547 if (matcher.start() != a) 548 failCount++; 549 matcher.find(); 550 if (matcher.start() != b) 551 failCount++; 552 } 553 554 // This test is for 6284152 555 static void check(String regex, String input, String[] expected) { 556 List<String> result = new ArrayList<String>(); 557 Pattern p = Pattern.compile(regex); 558 Matcher m = p.matcher(input); 559 while (m.find()) { 560 result.add(m.group()); 561 } 562 if (!Arrays.asList(expected).equals(result)) 563 failCount++; 564 } 565 566 private static void lookbehindTest() throws Exception { 567 //Positive 568 check("(?<=%.{0,5})foo\\d", 569 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 570 new String[]{"foo1", "foo2", "foo3"}); 571 572 //boundary at end of the lookbehind sub-regex should work consistently 573 //with the boundary just after the lookbehind sub-regex 574 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 575 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 576 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 577 check("(?<!abc \\b)foo", "abc foo", new String[0]); 578 579 //Negative 580 check("(?<!%.{0,5})foo\\d", 581 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 582 new String[] {"foo4", "foo5"}); 583 584 //Positive greedy 585 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 586 587 //Positive reluctant 588 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 589 590 //supplementary 591 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 592 new String[] {"fo\ud800\udc00o"}); 593 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 594 new String[] {"fo\ud800\udc00o"}); 595 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 596 new String[] {"fo\ud800\udc00o"}); 597 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 598 new String[] {"fo\ud800\udc00o"}); 599 report("Lookbehind"); 600 } 601 602 // This test is for 4938995 603 // Check to see if weak region boundaries are transparent to 604 // lookahead and lookbehind constructs 605 private static void boundsTest() throws Exception { 606 String fullMessage = "catdogcat"; 607 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 608 Matcher matcher = pattern.matcher("catdogca"); 609 matcher.useTransparentBounds(true); 610 if (matcher.find()) 611 failCount++; 612 matcher.reset("atdogcat"); 613 if (matcher.find()) 614 failCount++; 615 matcher.reset(fullMessage); 616 if (!matcher.find()) 617 failCount++; 618 matcher.reset(fullMessage); 619 matcher.region(0,9); 620 if (!matcher.find()) 621 failCount++; 622 matcher.reset(fullMessage); 623 matcher.region(0,6); 624 if (!matcher.find()) 625 failCount++; 626 matcher.reset(fullMessage); 627 matcher.region(3,6); 628 if (!matcher.find()) 629 failCount++; 630 matcher.useTransparentBounds(false); 631 if (matcher.find()) 632 failCount++; 633 634 // Negative lookahead/lookbehind 635 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 636 matcher = pattern.matcher("dogcat"); 637 matcher.useTransparentBounds(true); 638 matcher.region(0,3); 639 if (matcher.find()) 640 failCount++; 641 matcher.reset("catdog"); 642 matcher.region(3,6); 643 if (matcher.find()) 644 failCount++; 645 matcher.useTransparentBounds(false); 646 matcher.reset("dogcat"); 647 matcher.region(0,3); 648 if (!matcher.find()) 649 failCount++; 650 matcher.reset("catdog"); 651 matcher.region(3,6); 652 if (!matcher.find()) 653 failCount++; 654 655 report("Region bounds transparency"); 656 } 657 658 // This test is for 4945394 659 private static void findFromTest() throws Exception { 660 String message = "This is 40 $0 message."; 661 Pattern pat = Pattern.compile("\\$0"); 662 Matcher match = pat.matcher(message); 663 if (!match.find()) 664 failCount++; 665 if (match.find()) 666 failCount++; 667 if (match.find()) 668 failCount++; 669 report("Check for alternating find"); 670 } 671 672 // This test is for 4872664 and 4892980 673 private static void negatedCharClassTest() throws Exception { 674 Pattern pattern = Pattern.compile("[^>]"); 675 Matcher matcher = pattern.matcher("\u203A"); 676 if (!matcher.matches()) 677 failCount++; 678 pattern = Pattern.compile("[^fr]"); 679 matcher = pattern.matcher("a"); 680 if (!matcher.find()) 681 failCount++; 682 matcher.reset("\u203A"); 683 if (!matcher.find()) 684 failCount++; 685 String s = "for"; 686 String result[] = s.split("[^fr]"); 687 if (!result[0].equals("f")) 688 failCount++; 689 if (!result[1].equals("r")) 690 failCount++; 691 s = "f\u203Ar"; 692 result = s.split("[^fr]"); 693 if (!result[0].equals("f")) 694 failCount++; 695 if (!result[1].equals("r")) 696 failCount++; 697 698 // Test adding to bits, subtracting a node, then adding to bits again 699 pattern = Pattern.compile("[^f\u203Ar]"); 700 matcher = pattern.matcher("a"); 701 if (!matcher.find()) 702 failCount++; 703 matcher.reset("f"); 704 if (matcher.find()) 705 failCount++; 706 matcher.reset("\u203A"); 707 if (matcher.find()) 708 failCount++; 709 matcher.reset("r"); 710 if (matcher.find()) 711 failCount++; 712 matcher.reset("\u203B"); 713 if (!matcher.find()) 714 failCount++; 715 716 // Test subtracting a node, adding to bits, subtracting again 717 pattern = Pattern.compile("[^\u203Ar\u203B]"); 718 matcher = pattern.matcher("a"); 719 if (!matcher.find()) 720 failCount++; 721 matcher.reset("\u203A"); 722 if (matcher.find()) 723 failCount++; 724 matcher.reset("r"); 725 if (matcher.find()) 726 failCount++; 727 matcher.reset("\u203B"); 728 if (matcher.find()) 729 failCount++; 730 matcher.reset("\u203C"); 731 if (!matcher.find()) 732 failCount++; 733 734 report("Negated Character Class"); 735 } 736 737 // This test is for 4628291 738 private static void toStringTest() throws Exception { 739 Pattern pattern = Pattern.compile("b+"); 740 if (pattern.toString() != "b+") 741 failCount++; 742 Matcher matcher = pattern.matcher("aaabbbccc"); 743 String matcherString = matcher.toString(); // unspecified 744 matcher.find(); 745 matcherString = matcher.toString(); // unspecified 746 matcher.region(0,3); 747 matcherString = matcher.toString(); // unspecified 748 matcher.reset(); 749 matcherString = matcher.toString(); // unspecified 750 report("toString"); 751 } 752 753 // This test is for 4808962 754 private static void literalPatternTest() throws Exception { 755 int flags = Pattern.LITERAL; 756 757 Pattern pattern = Pattern.compile("abc\\t$^", flags); 758 check(pattern, "abc\\t$^", true); 759 760 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 761 check(pattern, "abc\\t$^", true); 762 763 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 764 check(pattern, "\\Qa^$bcabc\\E", true); 765 check(pattern, "a^$bcabc", false); 766 767 pattern = Pattern.compile("\\\\Q\\\\E"); 768 check(pattern, "\\Q\\E", true); 769 770 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 771 check(pattern, "abcefg\\Q\\Ehij", true); 772 773 pattern = Pattern.compile("\\\\\\Q\\\\E"); 774 check(pattern, "\\\\\\\\", true); 775 776 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 777 check(pattern, "\\Qa^$bcabc\\E", true); 778 check(pattern, "a^$bcabc", false); 779 780 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 781 check(pattern, "\\Qabc\\Edef", true); 782 check(pattern, "abcdef", false); 783 784 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 785 check(pattern, "abc\\Edef", true); 786 check(pattern, "abcdef", false); 787 788 pattern = Pattern.compile(Pattern.quote("\\E")); 789 check(pattern, "\\E", true); 790 791 pattern = Pattern.compile("((((abc.+?:)", flags); 792 check(pattern, "((((abc.+?:)", true); 793 794 flags |= Pattern.MULTILINE; 795 796 pattern = Pattern.compile("^cat$", flags); 797 check(pattern, "abc^cat$def", true); 798 check(pattern, "cat", false); 799 800 flags |= Pattern.CASE_INSENSITIVE; 801 802 pattern = Pattern.compile("abcdef", flags); 803 check(pattern, "ABCDEF", true); 804 check(pattern, "AbCdEf", true); 805 806 flags |= Pattern.DOTALL; 807 808 pattern = Pattern.compile("a...b", flags); 809 check(pattern, "A...b", true); 810 check(pattern, "Axxxb", false); 811 812 flags |= Pattern.CANON_EQ; 813 814 Pattern p = Pattern.compile("testa\u030a", flags); 815 check(pattern, "testa\u030a", false); 816 check(pattern, "test\u00e5", false); 817 818 // Supplementary character test 819 flags = Pattern.LITERAL; 820 821 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 822 check(pattern, toSupplementaries("abc\\t$^"), true); 823 824 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 825 check(pattern, toSupplementaries("abc\\t$^"), true); 826 827 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 828 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 829 check(pattern, toSupplementaries("a^$bcabc"), false); 830 831 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 832 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 833 check(pattern, toSupplementaries("a^$bcabc"), false); 834 835 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 836 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 837 check(pattern, toSupplementaries("abcdef"), false); 838 839 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 840 check(pattern, toSupplementaries("abc\\Edef"), true); 841 check(pattern, toSupplementaries("abcdef"), false); 842 843 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 844 check(pattern, toSupplementaries("((((abc.+?:)"), true); 845 846 flags |= Pattern.MULTILINE; 847 848 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 849 check(pattern, toSupplementaries("abc^cat$def"), true); 850 check(pattern, toSupplementaries("cat"), false); 851 852 flags |= Pattern.DOTALL; 853 854 // note: this is case-sensitive. 855 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 856 check(pattern, toSupplementaries("a...b"), true); 857 check(pattern, toSupplementaries("axxxb"), false); 858 859 flags |= Pattern.CANON_EQ; 860 861 String t = toSupplementaries("test"); 862 p = Pattern.compile(t + "a\u030a", flags); 863 check(pattern, t + "a\u030a", false); 864 check(pattern, t + "\u00e5", false); 865 866 report("Literal pattern"); 867 } 868 869 // This test is for 4803179 870 // This test is also for 4808962, replacement parts 871 private static void literalReplacementTest() throws Exception { 872 int flags = Pattern.LITERAL; 873 874 Pattern pattern = Pattern.compile("abc", flags); 875 Matcher matcher = pattern.matcher("zzzabczzz"); 876 String replaceTest = "$0"; 877 String result = matcher.replaceAll(replaceTest); 878 if (!result.equals("zzzabczzz")) 879 failCount++; 880 881 matcher.reset(); 882 String literalReplacement = matcher.quoteReplacement(replaceTest); 883 result = matcher.replaceAll(literalReplacement); 884 if (!result.equals("zzz$0zzz")) 885 failCount++; 886 887 matcher.reset(); 888 replaceTest = "\\t$\\$"; 889 literalReplacement = matcher.quoteReplacement(replaceTest); 890 result = matcher.replaceAll(literalReplacement); 891 if (!result.equals("zzz\\t$\\$zzz")) 892 failCount++; 893 894 // Supplementary character test 895 pattern = Pattern.compile(toSupplementaries("abc"), flags); 896 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 897 replaceTest = "$0"; 898 result = matcher.replaceAll(replaceTest); 899 if (!result.equals(toSupplementaries("zzzabczzz"))) 900 failCount++; 901 902 matcher.reset(); 903 literalReplacement = matcher.quoteReplacement(replaceTest); 904 result = matcher.replaceAll(literalReplacement); 905 if (!result.equals(toSupplementaries("zzz$0zzz"))) 906 failCount++; 907 908 matcher.reset(); 909 replaceTest = "\\t$\\$"; 910 literalReplacement = matcher.quoteReplacement(replaceTest); 911 result = matcher.replaceAll(literalReplacement); 912 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 913 failCount++; 914 915 // IAE should be thrown if backslash or '$' is the last character 916 // in replacement string 917 try { 918 "\uac00".replaceAll("\uac00", "$"); 919 failCount++; 920 } catch (IllegalArgumentException iie) { 921 } catch (Exception e) { 922 failCount++; 923 } 924 try { 925 "\uac00".replaceAll("\uac00", "\\"); 926 failCount++; 927 } catch (IllegalArgumentException iie) { 928 } catch (Exception e) { 929 failCount++; 930 } 931 report("Literal replacement"); 932 } 933 934 // This test is for 4757029 935 private static void regionTest() throws Exception { 936 Pattern pattern = Pattern.compile("abc"); 937 Matcher matcher = pattern.matcher("abcdefabc"); 938 939 matcher.region(0,9); 940 if (!matcher.find()) 941 failCount++; 942 if (!matcher.find()) 943 failCount++; 944 matcher.region(0,3); 945 if (!matcher.find()) 946 failCount++; 947 matcher.region(3,6); 948 if (matcher.find()) 949 failCount++; 950 matcher.region(0,2); 951 if (matcher.find()) 952 failCount++; 953 954 expectRegionFail(matcher, 1, -1); 955 expectRegionFail(matcher, -1, -1); 956 expectRegionFail(matcher, -1, 1); 957 expectRegionFail(matcher, 5, 3); 958 expectRegionFail(matcher, 5, 12); 959 expectRegionFail(matcher, 12, 12); 960 961 pattern = Pattern.compile("^abc$"); 962 matcher = pattern.matcher("zzzabczzz"); 963 matcher.region(0,9); 964 if (matcher.find()) 965 failCount++; 966 matcher.region(3,6); 967 if (!matcher.find()) 968 failCount++; 969 matcher.region(3,6); 970 matcher.useAnchoringBounds(false); 971 if (matcher.find()) 972 failCount++; 973 974 // Supplementary character test 975 pattern = Pattern.compile(toSupplementaries("abc")); 976 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 977 matcher.region(0,9*2); 978 if (!matcher.find()) 979 failCount++; 980 if (!matcher.find()) 981 failCount++; 982 matcher.region(0,3*2); 983 if (!matcher.find()) 984 failCount++; 985 matcher.region(1,3*2); 986 if (matcher.find()) 987 failCount++; 988 matcher.region(3*2,6*2); 989 if (matcher.find()) 990 failCount++; 991 matcher.region(0,2*2); 992 if (matcher.find()) 993 failCount++; 994 matcher.region(0,2*2+1); 995 if (matcher.find()) 996 failCount++; 997 998 expectRegionFail(matcher, 1*2, -1); 999 expectRegionFail(matcher, -1, -1); 1000 expectRegionFail(matcher, -1, 1*2); 1001 expectRegionFail(matcher, 5*2, 3*2); 1002 expectRegionFail(matcher, 5*2, 12*2); 1003 expectRegionFail(matcher, 12*2, 12*2); 1004 1005 pattern = Pattern.compile(toSupplementaries("^abc$")); 1006 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1007 matcher.region(0,9*2); 1008 if (matcher.find()) 1009 failCount++; 1010 matcher.region(3*2,6*2); 1011 if (!matcher.find()) 1012 failCount++; 1013 matcher.region(3*2+1,6*2); 1014 if (matcher.find()) 1015 failCount++; 1016 matcher.region(3*2,6*2-1); 1017 if (matcher.find()) 1018 failCount++; 1019 matcher.region(3*2,6*2); 1020 matcher.useAnchoringBounds(false); 1021 if (matcher.find()) 1022 failCount++; 1023 report("Regions"); 1024 } 1025 1026 private static void expectRegionFail(Matcher matcher, int index1, 1027 int index2) 1028 { 1029 try { 1030 matcher.region(index1, index2); 1031 failCount++; 1032 } catch (IndexOutOfBoundsException ioobe) { 1033 // Correct result 1034 } catch (IllegalStateException ise) { 1035 // Correct result 1036 } 1037 } 1038 1039 // This test is for 4803197 1040 private static void escapedSegmentTest() throws Exception { 1041 1042 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1043 check(pattern, "dir1\\dir2", true); 1044 1045 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1046 check(pattern, "dir1\\dir2\\", true); 1047 1048 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1049 check(pattern, "dir1\\dir2\\", true); 1050 1051 // Supplementary character test 1052 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1053 check(pattern, toSupplementaries("dir1\\dir2"), true); 1054 1055 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1056 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1057 1058 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1059 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1060 1061 report("Escaped segment"); 1062 } 1063 1064 // This test is for 4792284 1065 private static void nonCaptureRepetitionTest() throws Exception { 1066 String input = "abcdefgh;"; 1067 1068 String[] patterns = new String[] { 1069 "(?:\\w{4})+;", 1070 "(?:\\w{8})*;", 1071 "(?:\\w{2}){2,4};", 1072 "(?:\\w{4}){2,};", // only matches the 1073 ".*?(?:\\w{5})+;", // specified minimum 1074 ".*?(?:\\w{9})*;", // number of reps - OK 1075 "(?:\\w{4})+?;", // lazy repetition - OK 1076 "(?:\\w{4})++;", // possessive repetition - OK 1077 "(?:\\w{2,}?)+;", // non-deterministic - OK 1078 "(\\w{4})+;", // capturing group - OK 1079 }; 1080 1081 for (int i = 0; i < patterns.length; i++) { 1082 // Check find() 1083 check(patterns[i], 0, input, input, true); 1084 // Check matches() 1085 Pattern p = Pattern.compile(patterns[i]); 1086 Matcher m = p.matcher(input); 1087 1088 if (m.matches()) { 1089 if (!m.group(0).equals(input)) 1090 failCount++; 1091 } else { 1092 failCount++; 1093 } 1094 } 1095 1096 report("Non capturing repetition"); 1097 } 1098 1099 // This test is for 6358731 1100 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1101 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1102 Matcher matcher = pattern.matcher("abcd"); 1103 if (!matcher.matches() || 1104 matcher.group(1) != null || 1105 !matcher.group(2).equals("abcd")) { 1106 failCount++; 1107 } 1108 report("Not captured GroupCurly"); 1109 } 1110 1111 // This test is for 4706545 1112 private static void javaCharClassTest() throws Exception { 1113 for (int i=0; i<1000; i++) { 1114 char c = (char)generator.nextInt(); 1115 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1116 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1117 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1118 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1119 check("{javaDigit}", c, Character.isDigit(c)); 1120 check("{javaDefined}", c, Character.isDefined(c)); 1121 check("{javaLetter}", c, Character.isLetter(c)); 1122 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1123 check("{javaJavaIdentifierStart}", c, 1124 Character.isJavaIdentifierStart(c)); 1125 check("{javaJavaIdentifierPart}", c, 1126 Character.isJavaIdentifierPart(c)); 1127 check("{javaUnicodeIdentifierStart}", c, 1128 Character.isUnicodeIdentifierStart(c)); 1129 check("{javaUnicodeIdentifierPart}", c, 1130 Character.isUnicodeIdentifierPart(c)); 1131 check("{javaIdentifierIgnorable}", c, 1132 Character.isIdentifierIgnorable(c)); 1133 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1134 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1135 check("{javaISOControl}", c, Character.isISOControl(c)); 1136 check("{javaMirrored}", c, Character.isMirrored(c)); 1137 1138 } 1139 1140 // Supplementary character test 1141 for (int i=0; i<1000; i++) { 1142 int c = generator.nextInt(Character.MAX_CODE_POINT 1143 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1144 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1145 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1146 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1147 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1148 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1149 check("{javaDigit}", c, Character.isDigit(c)); 1150 check("{javaDefined}", c, Character.isDefined(c)); 1151 check("{javaLetter}", c, Character.isLetter(c)); 1152 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1153 check("{javaJavaIdentifierStart}", c, 1154 Character.isJavaIdentifierStart(c)); 1155 check("{javaJavaIdentifierPart}", c, 1156 Character.isJavaIdentifierPart(c)); 1157 check("{javaUnicodeIdentifierStart}", c, 1158 Character.isUnicodeIdentifierStart(c)); 1159 check("{javaUnicodeIdentifierPart}", c, 1160 Character.isUnicodeIdentifierPart(c)); 1161 check("{javaIdentifierIgnorable}", c, 1162 Character.isIdentifierIgnorable(c)); 1163 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1164 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1165 check("{javaISOControl}", c, Character.isISOControl(c)); 1166 check("{javaMirrored}", c, Character.isMirrored(c)); 1167 } 1168 1169 report("Java character classes"); 1170 } 1171 1172 // This test is for 4523620 1173 /* 1174 private static void numOccurrencesTest() throws Exception { 1175 Pattern pattern = Pattern.compile("aaa"); 1176 1177 if (pattern.numOccurrences("aaaaaa", false) != 2) 1178 failCount++; 1179 if (pattern.numOccurrences("aaaaaa", true) != 4) 1180 failCount++; 1181 1182 pattern = Pattern.compile("^"); 1183 if (pattern.numOccurrences("aaaaaa", false) != 1) 1184 failCount++; 1185 if (pattern.numOccurrences("aaaaaa", true) != 1) 1186 failCount++; 1187 1188 report("Number of Occurrences"); 1189 } 1190 */ 1191 1192 // This test is for 4776374 1193 private static void caretBetweenTerminatorsTest() throws Exception { 1194 int flags1 = Pattern.DOTALL; 1195 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1196 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1197 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1198 1199 check("^....", flags1, "test\ntest", "test", true); 1200 check(".....^", flags1, "test\ntest", "test", false); 1201 check(".....^", flags1, "test\n", "test", false); 1202 check("....^", flags1, "test\r\n", "test", false); 1203 1204 check("^....", flags2, "test\ntest", "test", true); 1205 check("....^", flags2, "test\ntest", "test", false); 1206 check(".....^", flags2, "test\n", "test", false); 1207 check("....^", flags2, "test\r\n", "test", false); 1208 1209 check("^....", flags3, "test\ntest", "test", true); 1210 check(".....^", flags3, "test\ntest", "test\n", true); 1211 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1212 check(".....^", flags3, "test\n", "test", false); 1213 check(".....^", flags3, "test\r\n", "test", false); 1214 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1215 1216 check("^....", flags4, "test\ntest", "test", true); 1217 check(".....^", flags3, "test\ntest", "test\n", true); 1218 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1219 check(".....^", flags4, "test\n", "test\n", false); 1220 check(".....^", flags4, "test\r\n", "test\r", false); 1221 1222 // Supplementary character test 1223 String t = toSupplementaries("test"); 1224 check("^....", flags1, t+"\n"+t, t, true); 1225 check(".....^", flags1, t+"\n"+t, t, false); 1226 check(".....^", flags1, t+"\n", t, false); 1227 check("....^", flags1, t+"\r\n", t, false); 1228 1229 check("^....", flags2, t+"\n"+t, t, true); 1230 check("....^", flags2, t+"\n"+t, t, false); 1231 check(".....^", flags2, t+"\n", t, false); 1232 check("....^", flags2, t+"\r\n", t, false); 1233 1234 check("^....", flags3, t+"\n"+t, t, true); 1235 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1236 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1237 check(".....^", flags3, t+"\n", t, false); 1238 check(".....^", flags3, t+"\r\n", t, false); 1239 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1240 1241 check("^....", flags4, t+"\n"+t, t, true); 1242 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1243 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1244 check(".....^", flags4, t+"\n", t+"\n", false); 1245 check(".....^", flags4, t+"\r\n", t+"\r", false); 1246 1247 report("Caret between terminators"); 1248 } 1249 1250 // This test is for 4727935 1251 private static void dollarAtEndTest() throws Exception { 1252 int flags1 = Pattern.DOTALL; 1253 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1254 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1255 1256 check("....$", flags1, "test\n", "test", true); 1257 check("....$", flags1, "test\r\n", "test", true); 1258 check(".....$", flags1, "test\n", "test\n", true); 1259 check(".....$", flags1, "test\u0085", "test\u0085", true); 1260 check("....$", flags1, "test\u0085", "test", true); 1261 1262 check("....$", flags2, "test\n", "test", true); 1263 check(".....$", flags2, "test\n", "test\n", true); 1264 check(".....$", flags2, "test\u0085", "test\u0085", true); 1265 check("....$", flags2, "test\u0085", "est\u0085", true); 1266 1267 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1268 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1269 check("....$blah", flags3, "test\nblah", "!!!!", false); 1270 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1271 1272 // Supplementary character test 1273 String t = toSupplementaries("test"); 1274 String b = toSupplementaries("blah"); 1275 check("....$", flags1, t+"\n", t, true); 1276 check("....$", flags1, t+"\r\n", t, true); 1277 check(".....$", flags1, t+"\n", t+"\n", true); 1278 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1279 check("....$", flags1, t+"\u0085", t, true); 1280 1281 check("....$", flags2, t+"\n", t, true); 1282 check(".....$", flags2, t+"\n", t+"\n", true); 1283 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1284 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1285 1286 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1287 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1288 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1289 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1290 1291 report("Dollar at End"); 1292 } 1293 1294 // This test is for 4711773 1295 private static void multilineDollarTest() throws Exception { 1296 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1297 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1298 matcher.find(); 1299 if (matcher.start(0) != 9) 1300 failCount++; 1301 matcher.find(); 1302 if (matcher.start(0) != 20) 1303 failCount++; 1304 1305 // Supplementary character test 1306 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1307 matcher.find(); 1308 if (matcher.start(0) != 9*2) 1309 failCount++; 1310 matcher.find(); 1311 if (matcher.start(0) != 20*2) 1312 failCount++; 1313 1314 report("Multiline Dollar"); 1315 } 1316 1317 private static void reluctantRepetitionTest() throws Exception { 1318 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1319 check(p, "1 word word word 2", true); 1320 check(p, "1 wor wo w 2", true); 1321 check(p, "1 word word 2", true); 1322 check(p, "1 word 2", true); 1323 check(p, "1 wo w w 2", true); 1324 check(p, "1 wo w 2", true); 1325 check(p, "1 wor w 2", true); 1326 1327 p = Pattern.compile("([a-z])+?c"); 1328 Matcher m = p.matcher("ababcdefdec"); 1329 check(m, "ababc"); 1330 1331 // Supplementary character test 1332 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1333 m = p.matcher(toSupplementaries("ababcdefdec")); 1334 check(m, toSupplementaries("ababc")); 1335 1336 report("Reluctant Repetition"); 1337 } 1338 1339 private static void serializeTest() throws Exception { 1340 String patternStr = "(b)"; 1341 String matchStr = "b"; 1342 Pattern pattern = Pattern.compile(patternStr); 1343 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1344 ObjectOutputStream oos = new ObjectOutputStream(baos); 1345 oos.writeObject(pattern); 1346 oos.close(); 1347 ObjectInputStream ois = new ObjectInputStream( 1348 new ByteArrayInputStream(baos.toByteArray())); 1349 Pattern serializedPattern = (Pattern)ois.readObject(); 1350 ois.close(); 1351 Matcher matcher = serializedPattern.matcher(matchStr); 1352 if (!matcher.matches()) 1353 failCount++; 1354 if (matcher.groupCount() != 1) 1355 failCount++; 1356 1357 report("Serialization"); 1358 } 1359 1360 private static void gTest() { 1361 Pattern pattern = Pattern.compile("\\G\\w"); 1362 Matcher matcher = pattern.matcher("abc#x#x"); 1363 matcher.find(); 1364 matcher.find(); 1365 matcher.find(); 1366 if (matcher.find()) 1367 failCount++; 1368 1369 pattern = Pattern.compile("\\GA*"); 1370 matcher = pattern.matcher("1A2AA3"); 1371 matcher.find(); 1372 if (matcher.find()) 1373 failCount++; 1374 1375 pattern = Pattern.compile("\\GA*"); 1376 matcher = pattern.matcher("1A2AA3"); 1377 if (!matcher.find(1)) 1378 failCount++; 1379 matcher.find(); 1380 if (matcher.find()) 1381 failCount++; 1382 1383 report("\\G"); 1384 } 1385 1386 private static void zTest() { 1387 Pattern pattern = Pattern.compile("foo\\Z"); 1388 // Positives 1389 check(pattern, "foo\u0085", true); 1390 check(pattern, "foo\u2028", true); 1391 check(pattern, "foo\u2029", true); 1392 check(pattern, "foo\n", true); 1393 check(pattern, "foo\r", true); 1394 check(pattern, "foo\r\n", true); 1395 // Negatives 1396 check(pattern, "fooo", false); 1397 check(pattern, "foo\n\r", false); 1398 1399 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1400 // Positives 1401 check(pattern, "foo", true); 1402 check(pattern, "foo\n", true); 1403 // Negatives 1404 check(pattern, "foo\r", false); 1405 check(pattern, "foo\u0085", false); 1406 check(pattern, "foo\u2028", false); 1407 check(pattern, "foo\u2029", false); 1408 1409 report("\\Z"); 1410 } 1411 1412 private static void replaceFirstTest() { 1413 Pattern pattern = Pattern.compile("(ab)(c*)"); 1414 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1415 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1416 failCount++; 1417 1418 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1419 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1420 failCount++; 1421 1422 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1423 String result = matcher.replaceFirst("$1"); 1424 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1425 failCount++; 1426 1427 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1428 result = matcher.replaceFirst("$2"); 1429 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1430 failCount++; 1431 1432 pattern = Pattern.compile("a*"); 1433 matcher = pattern.matcher("aaaaaaaaaa"); 1434 if (!matcher.replaceFirst("test").equals("test")) 1435 failCount++; 1436 1437 pattern = Pattern.compile("a+"); 1438 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1439 if (!matcher.replaceFirst("test").equals("zzztest")) 1440 failCount++; 1441 1442 // Supplementary character test 1443 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1444 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1445 if (!matcher.replaceFirst(toSupplementaries("test")) 1446 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1447 failCount++; 1448 1449 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1450 if (!matcher.replaceFirst(toSupplementaries("test")). 1451 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1452 failCount++; 1453 1454 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1455 result = matcher.replaceFirst("$1"); 1456 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1457 failCount++; 1458 1459 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1460 result = matcher.replaceFirst("$2"); 1461 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1462 failCount++; 1463 1464 pattern = Pattern.compile(toSupplementaries("a*")); 1465 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1466 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1467 failCount++; 1468 1469 pattern = Pattern.compile(toSupplementaries("a+")); 1470 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1471 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1472 failCount++; 1473 1474 report("Replace First"); 1475 } 1476 1477 private static void unixLinesTest() { 1478 Pattern pattern = Pattern.compile(".*"); 1479 Matcher matcher = pattern.matcher("aa\u2028blah"); 1480 matcher.find(); 1481 if (!matcher.group(0).equals("aa")) 1482 failCount++; 1483 1484 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1485 matcher = pattern.matcher("aa\u2028blah"); 1486 matcher.find(); 1487 if (!matcher.group(0).equals("aa\u2028blah")) 1488 failCount++; 1489 1490 pattern = Pattern.compile("[az]$", 1491 Pattern.MULTILINE | Pattern.UNIX_LINES); 1492 matcher = pattern.matcher("aa\u2028zz"); 1493 check(matcher, "a\u2028", false); 1494 1495 // Supplementary character test 1496 pattern = Pattern.compile(".*"); 1497 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1498 matcher.find(); 1499 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1500 failCount++; 1501 1502 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1503 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1504 matcher.find(); 1505 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1506 failCount++; 1507 1508 pattern = Pattern.compile(toSupplementaries("[az]$"), 1509 Pattern.MULTILINE | Pattern.UNIX_LINES); 1510 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1511 check(matcher, toSupplementaries("a\u2028"), false); 1512 1513 report("Unix Lines"); 1514 } 1515 1516 private static void commentsTest() { 1517 int flags = Pattern.COMMENTS; 1518 1519 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1520 Matcher matcher = pattern.matcher("aa#aa"); 1521 if (!matcher.matches()) 1522 failCount++; 1523 1524 pattern = Pattern.compile("aa # blah", flags); 1525 matcher = pattern.matcher("aa"); 1526 if (!matcher.matches()) 1527 failCount++; 1528 1529 pattern = Pattern.compile("aa blah", flags); 1530 matcher = pattern.matcher("aablah"); 1531 if (!matcher.matches()) 1532 failCount++; 1533 1534 pattern = Pattern.compile("aa # blah blech ", flags); 1535 matcher = pattern.matcher("aa"); 1536 if (!matcher.matches()) 1537 failCount++; 1538 1539 pattern = Pattern.compile("aa # blah\n ", flags); 1540 matcher = pattern.matcher("aa"); 1541 if (!matcher.matches()) 1542 failCount++; 1543 1544 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1545 matcher = pattern.matcher("aabc"); 1546 if (!matcher.matches()) 1547 failCount++; 1548 1549 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1550 matcher = pattern.matcher("aabc"); 1551 if (!matcher.matches()) 1552 failCount++; 1553 1554 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1555 matcher = pattern.matcher("aabc#blech"); 1556 if (!matcher.matches()) 1557 failCount++; 1558 1559 // Supplementary character test 1560 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1561 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1562 if (!matcher.matches()) 1563 failCount++; 1564 1565 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1566 matcher = pattern.matcher(toSupplementaries("aa")); 1567 if (!matcher.matches()) 1568 failCount++; 1569 1570 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1571 matcher = pattern.matcher(toSupplementaries("aablah")); 1572 if (!matcher.matches()) 1573 failCount++; 1574 1575 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1576 matcher = pattern.matcher(toSupplementaries("aa")); 1577 if (!matcher.matches()) 1578 failCount++; 1579 1580 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1581 matcher = pattern.matcher(toSupplementaries("aa")); 1582 if (!matcher.matches()) 1583 failCount++; 1584 1585 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1586 matcher = pattern.matcher(toSupplementaries("aabc")); 1587 if (!matcher.matches()) 1588 failCount++; 1589 1590 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1591 matcher = pattern.matcher(toSupplementaries("aabc")); 1592 if (!matcher.matches()) 1593 failCount++; 1594 1595 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1596 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1597 if (!matcher.matches()) 1598 failCount++; 1599 1600 report("Comments"); 1601 } 1602 1603 private static void caseFoldingTest() { // bug 4504687 1604 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1605 Pattern pattern = Pattern.compile("aa", flags); 1606 Matcher matcher = pattern.matcher("ab"); 1607 if (matcher.matches()) 1608 failCount++; 1609 1610 pattern = Pattern.compile("aA", flags); 1611 matcher = pattern.matcher("ab"); 1612 if (matcher.matches()) 1613 failCount++; 1614 1615 pattern = Pattern.compile("aa", flags); 1616 matcher = pattern.matcher("aB"); 1617 if (matcher.matches()) 1618 failCount++; 1619 matcher = pattern.matcher("Ab"); 1620 if (matcher.matches()) 1621 failCount++; 1622 1623 // ASCII "a" 1624 // Latin-1 Supplement "a" + grave 1625 // Cyrillic "a" 1626 String[] patterns = new String[] { 1627 //single 1628 "a", "\u00e0", "\u0430", 1629 //slice 1630 "ab", "\u00e0\u00e1", "\u0430\u0431", 1631 //class single 1632 "[a]", "[\u00e0]", "[\u0430]", 1633 //class range 1634 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1635 //back reference 1636 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1637 }; 1638 1639 String[] texts = new String[] { 1640 "A", "\u00c0", "\u0410", 1641 "AB", "\u00c0\u00c1", "\u0410\u0411", 1642 "A", "\u00c0", "\u0410", 1643 "B", "\u00c2", "\u0411", 1644 "aA", "\u00e0\u00c0", "\u0430\u0410" 1645 }; 1646 1647 boolean[] expected = new boolean[] { 1648 true, false, false, 1649 true, false, false, 1650 true, false, false, 1651 true, false, false, 1652 true, false, false 1653 }; 1654 1655 flags = Pattern.CASE_INSENSITIVE; 1656 for (int i = 0; i < patterns.length; i++) { 1657 pattern = Pattern.compile(patterns[i], flags); 1658 matcher = pattern.matcher(texts[i]); 1659 if (matcher.matches() != expected[i]) { 1660 System.out.println("<1> Failed at " + i); 1661 failCount++; 1662 } 1663 } 1664 1665 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1666 for (int i = 0; i < patterns.length; i++) { 1667 pattern = Pattern.compile(patterns[i], flags); 1668 matcher = pattern.matcher(texts[i]); 1669 if (!matcher.matches()) { 1670 System.out.println("<2> Failed at " + i); 1671 failCount++; 1672 } 1673 } 1674 // flag unicode_case alone should do nothing 1675 flags = Pattern.UNICODE_CASE; 1676 for (int i = 0; i < patterns.length; i++) { 1677 pattern = Pattern.compile(patterns[i], flags); 1678 matcher = pattern.matcher(texts[i]); 1679 if (matcher.matches()) { 1680 System.out.println("<3> Failed at " + i); 1681 failCount++; 1682 } 1683 } 1684 1685 // Special cases: i, I, u+0131 and u+0130 1686 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1687 pattern = Pattern.compile("[h-j]+", flags); 1688 if (!pattern.matcher("\u0131\u0130").matches()) 1689 failCount++; 1690 report("Case Folding"); 1691 } 1692 1693 private static void appendTest() { 1694 Pattern pattern = Pattern.compile("(ab)(cd)"); 1695 Matcher matcher = pattern.matcher("abcd"); 1696 String result = matcher.replaceAll("$2$1"); 1697 if (!result.equals("cdab")) 1698 failCount++; 1699 1700 String s1 = "Swap all: first = 123, second = 456"; 1701 String s2 = "Swap one: first = 123, second = 456"; 1702 String r = "$3$2$1"; 1703 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1704 matcher = pattern.matcher(s1); 1705 1706 result = matcher.replaceAll(r); 1707 if (!result.equals("Swap all: 123 = first, 456 = second")) 1708 failCount++; 1709 1710 matcher = pattern.matcher(s2); 1711 1712 if (matcher.find()) { 1713 StringBuffer sb = new StringBuffer(); 1714 matcher.appendReplacement(sb, r); 1715 matcher.appendTail(sb); 1716 result = sb.toString(); 1717 if (!result.equals("Swap one: 123 = first, second = 456")) 1718 failCount++; 1719 } 1720 1721 // Supplementary character test 1722 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1723 matcher = pattern.matcher(toSupplementaries("abcd")); 1724 result = matcher.replaceAll("$2$1"); 1725 if (!result.equals(toSupplementaries("cdab"))) 1726 failCount++; 1727 1728 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1729 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1730 r = toSupplementaries("$3$2$1"); 1731 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1732 matcher = pattern.matcher(s1); 1733 1734 result = matcher.replaceAll(r); 1735 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1736 failCount++; 1737 1738 matcher = pattern.matcher(s2); 1739 1740 if (matcher.find()) { 1741 StringBuffer sb = new StringBuffer(); 1742 matcher.appendReplacement(sb, r); 1743 matcher.appendTail(sb); 1744 result = sb.toString(); 1745 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1746 failCount++; 1747 } 1748 report("Append"); 1749 } 1750 1751 private static void splitTest() { 1752 Pattern pattern = Pattern.compile(":"); 1753 String[] result = pattern.split("foo:and:boo", 2); 1754 if (!result[0].equals("foo")) 1755 failCount++; 1756 if (!result[1].equals("and:boo")) 1757 failCount++; 1758 // Supplementary character test 1759 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1760 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1761 if (!result[0].equals(toSupplementaries("foo"))) 1762 failCount++; 1763 if (!result[1].equals(toSupplementaries("andXboo"))) 1764 failCount++; 1765 1766 CharBuffer cb = CharBuffer.allocate(100); 1767 cb.put("foo:and:boo"); 1768 cb.flip(); 1769 result = pattern.split(cb); 1770 if (!result[0].equals("foo")) 1771 failCount++; 1772 if (!result[1].equals("and")) 1773 failCount++; 1774 if (!result[2].equals("boo")) 1775 failCount++; 1776 1777 // Supplementary character test 1778 CharBuffer cbs = CharBuffer.allocate(100); 1779 cbs.put(toSupplementaries("fooXandXboo")); 1780 cbs.flip(); 1781 result = patternX.split(cbs); 1782 if (!result[0].equals(toSupplementaries("foo"))) 1783 failCount++; 1784 if (!result[1].equals(toSupplementaries("and"))) 1785 failCount++; 1786 if (!result[2].equals(toSupplementaries("boo"))) 1787 failCount++; 1788 1789 String source = "0123456789"; 1790 for (int limit=-2; limit<3; limit++) { 1791 for (int x=0; x<10; x++) { 1792 result = source.split(Integer.toString(x), limit); 1793 int expectedLength = limit < 1 ? 2 : limit; 1794 1795 if ((limit == 0) && (x == 9)) { 1796 // expected dropping of "" 1797 if (result.length != 1) 1798 failCount++; 1799 if (!result[0].equals("012345678")) { 1800 failCount++; 1801 } 1802 } else { 1803 if (result.length != expectedLength) { 1804 failCount++; 1805 } 1806 if (!result[0].equals(source.substring(0,x))) { 1807 if (limit != 1) { 1808 failCount++; 1809 } else { 1810 if (!result[0].equals(source.substring(0,10))) { 1811 failCount++; 1812 } 1813 } 1814 } 1815 if (expectedLength > 1) { // Check segment 2 1816 if (!result[1].equals(source.substring(x+1,10))) 1817 failCount++; 1818 } 1819 } 1820 } 1821 } 1822 // Check the case for no match found 1823 for (int limit=-2; limit<3; limit++) { 1824 result = source.split("e", limit); 1825 if (result.length != 1) 1826 failCount++; 1827 if (!result[0].equals(source)) 1828 failCount++; 1829 } 1830 // Check the case for limit == 0, source = ""; 1831 // split() now returns 0-length for empty source "" see #6559590 1832 source = ""; 1833 result = source.split("e", 0); 1834 if (result.length != 1) 1835 failCount++; 1836 if (!result[0].equals(source)) 1837 failCount++; 1838 1839 // Check both split() and splitAsStraem(), especially for zero-lenth 1840 // input and zero-lenth match cases 1841 String[][] input = new String[][] { 1842 { " ", "Abc Efg Hij" }, // normal non-zero-match 1843 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1844 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1845 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1846 { "(?=\\p{Lu})", "AbcEfg" }, 1847 { "(?=\\p{Lu})", "Abc" }, 1848 { " ", "" }, // zero-length input 1849 { ".*", "" }, 1850 1851 // some tests from PatternStreamTest.java 1852 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1853 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1854 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1855 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1856 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1857 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1858 { "\u56da", "" }, 1859 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1860 { "o", "boo:and:foo" }, 1861 { "o", "booooo:and:fooooo" }, 1862 { "o", "fooooo:" }, 1863 }; 1864 1865 String[][] expected = new String[][] { 1866 { "Abc", "Efg", "Hij" }, 1867 { "", "Abc", "Efg", "Hij" }, 1868 { "Abc", "", "Efg", "Hij" }, 1869 { "Abc", "Efg", "Hij" }, 1870 { "Abc", "Efg" }, 1871 { "Abc" }, 1872 { "" }, 1873 { "" }, 1874 1875 { "awgqwefg1fefw", "vssv1vvv1" }, 1876 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1877 { "awgqwefg", "fefw4vssv", "vvv" }, 1878 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1879 { "1", "23", "456", "7890" }, 1880 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1881 { "" }, 1882 { "This", "is", "testing", "", "with", "different", "separators" }, 1883 { "b", "", ":and:f" }, 1884 { "b", "", "", "", "", ":and:f" }, 1885 { "f", "", "", "", "", ":" }, 1886 }; 1887 for (int i = 0; i < input.length; i++) { 1888 pattern = Pattern.compile(input[i][0]); 1889 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1890 failCount++; 1891 } 1892 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1893 // array for zero-length input for now 1894 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1895 expected[i])) { 1896 failCount++; 1897 } 1898 } 1899 report("Split"); 1900 } 1901 1902 private static void negationTest() { 1903 Pattern pattern = Pattern.compile("[\\[@^]+"); 1904 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1905 if (!matcher.find()) 1906 failCount++; 1907 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1908 failCount++; 1909 pattern = Pattern.compile("[@\\[^]+"); 1910 matcher = pattern.matcher("@@@@[[[[^^^^"); 1911 if (!matcher.find()) 1912 failCount++; 1913 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1914 failCount++; 1915 pattern = Pattern.compile("[@\\[^@]+"); 1916 matcher = pattern.matcher("@@@@[[[[^^^^"); 1917 if (!matcher.find()) 1918 failCount++; 1919 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1920 failCount++; 1921 1922 pattern = Pattern.compile("\\)"); 1923 matcher = pattern.matcher("xxx)xxx"); 1924 if (!matcher.find()) 1925 failCount++; 1926 1927 report("Negation"); 1928 } 1929 1930 private static void ampersandTest() { 1931 Pattern pattern = Pattern.compile("[&@]+"); 1932 check(pattern, "@@@@&&&&", true); 1933 1934 pattern = Pattern.compile("[@&]+"); 1935 check(pattern, "@@@@&&&&", true); 1936 1937 pattern = Pattern.compile("[@\\&]+"); 1938 check(pattern, "@@@@&&&&", true); 1939 1940 report("Ampersand"); 1941 } 1942 1943 private static void octalTest() throws Exception { 1944 Pattern pattern = Pattern.compile("\\u0007"); 1945 Matcher matcher = pattern.matcher("\u0007"); 1946 if (!matcher.matches()) 1947 failCount++; 1948 pattern = Pattern.compile("\\07"); 1949 matcher = pattern.matcher("\u0007"); 1950 if (!matcher.matches()) 1951 failCount++; 1952 pattern = Pattern.compile("\\007"); 1953 matcher = pattern.matcher("\u0007"); 1954 if (!matcher.matches()) 1955 failCount++; 1956 pattern = Pattern.compile("\\0007"); 1957 matcher = pattern.matcher("\u0007"); 1958 if (!matcher.matches()) 1959 failCount++; 1960 pattern = Pattern.compile("\\040"); 1961 matcher = pattern.matcher("\u0020"); 1962 if (!matcher.matches()) 1963 failCount++; 1964 pattern = Pattern.compile("\\0403"); 1965 matcher = pattern.matcher("\u00203"); 1966 if (!matcher.matches()) 1967 failCount++; 1968 pattern = Pattern.compile("\\0103"); 1969 matcher = pattern.matcher("\u0043"); 1970 if (!matcher.matches()) 1971 failCount++; 1972 1973 report("Octal"); 1974 } 1975 1976 private static void longPatternTest() throws Exception { 1977 try { 1978 Pattern pattern = Pattern.compile( 1979 "a 32-character-long pattern xxxx"); 1980 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1981 pattern = Pattern.compile("a thirty four character long regex"); 1982 StringBuffer patternToBe = new StringBuffer(101); 1983 for (int i=0; i<100; i++) 1984 patternToBe.append((char)(97 + i%26)); 1985 pattern = Pattern.compile(patternToBe.toString()); 1986 } catch (PatternSyntaxException e) { 1987 failCount++; 1988 } 1989 1990 // Supplementary character test 1991 try { 1992 Pattern pattern = Pattern.compile( 1993 toSupplementaries("a 32-character-long pattern xxxx")); 1994 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1995 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1996 StringBuffer patternToBe = new StringBuffer(101*2); 1997 for (int i=0; i<100; i++) 1998 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1999 + 97 + i%26)); 2000 pattern = Pattern.compile(patternToBe.toString()); 2001 } catch (PatternSyntaxException e) { 2002 failCount++; 2003 } 2004 report("LongPattern"); 2005 } 2006 2007 private static void group0Test() throws Exception { 2008 Pattern pattern = Pattern.compile("(tes)ting"); 2009 Matcher matcher = pattern.matcher("testing"); 2010 check(matcher, "testing"); 2011 2012 matcher.reset("testing"); 2013 if (matcher.lookingAt()) { 2014 if (!matcher.group(0).equals("testing")) 2015 failCount++; 2016 } else { 2017 failCount++; 2018 } 2019 2020 matcher.reset("testing"); 2021 if (matcher.matches()) { 2022 if (!matcher.group(0).equals("testing")) 2023 failCount++; 2024 } else { 2025 failCount++; 2026 } 2027 2028 pattern = Pattern.compile("(tes)ting"); 2029 matcher = pattern.matcher("testing"); 2030 if (matcher.lookingAt()) { 2031 if (!matcher.group(0).equals("testing")) 2032 failCount++; 2033 } else { 2034 failCount++; 2035 } 2036 2037 pattern = Pattern.compile("^(tes)ting"); 2038 matcher = pattern.matcher("testing"); 2039 if (matcher.matches()) { 2040 if (!matcher.group(0).equals("testing")) 2041 failCount++; 2042 } else { 2043 failCount++; 2044 } 2045 2046 // Supplementary character test 2047 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2048 matcher = pattern.matcher(toSupplementaries("testing")); 2049 check(matcher, toSupplementaries("testing")); 2050 2051 matcher.reset(toSupplementaries("testing")); 2052 if (matcher.lookingAt()) { 2053 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2054 failCount++; 2055 } else { 2056 failCount++; 2057 } 2058 2059 matcher.reset(toSupplementaries("testing")); 2060 if (matcher.matches()) { 2061 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2062 failCount++; 2063 } else { 2064 failCount++; 2065 } 2066 2067 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2068 matcher = pattern.matcher(toSupplementaries("testing")); 2069 if (matcher.lookingAt()) { 2070 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2071 failCount++; 2072 } else { 2073 failCount++; 2074 } 2075 2076 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2077 matcher = pattern.matcher(toSupplementaries("testing")); 2078 if (matcher.matches()) { 2079 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2080 failCount++; 2081 } else { 2082 failCount++; 2083 } 2084 2085 report("Group0"); 2086 } 2087 2088 private static void findIntTest() throws Exception { 2089 Pattern p = Pattern.compile("blah"); 2090 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2091 boolean result = m.find(2); 2092 if (!result) 2093 failCount++; 2094 2095 p = Pattern.compile("$"); 2096 m = p.matcher("1234567890"); 2097 result = m.find(10); 2098 if (!result) 2099 failCount++; 2100 try { 2101 result = m.find(11); 2102 failCount++; 2103 } catch (IndexOutOfBoundsException e) { 2104 // correct result 2105 } 2106 2107 // Supplementary character test 2108 p = Pattern.compile(toSupplementaries("blah")); 2109 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2110 result = m.find(2); 2111 if (!result) 2112 failCount++; 2113 2114 report("FindInt"); 2115 } 2116 2117 private static void emptyPatternTest() throws Exception { 2118 Pattern p = Pattern.compile(""); 2119 Matcher m = p.matcher("foo"); 2120 2121 // Should find empty pattern at beginning of input 2122 boolean result = m.find(); 2123 if (result != true) 2124 failCount++; 2125 if (m.start() != 0) 2126 failCount++; 2127 2128 // Should not match entire input if input is not empty 2129 m.reset(); 2130 result = m.matches(); 2131 if (result == true) 2132 failCount++; 2133 2134 try { 2135 m.start(0); 2136 failCount++; 2137 } catch (IllegalStateException e) { 2138 // Correct result 2139 } 2140 2141 // Should match entire input if input is empty 2142 m.reset(""); 2143 result = m.matches(); 2144 if (result != true) 2145 failCount++; 2146 2147 result = Pattern.matches("", ""); 2148 if (result != true) 2149 failCount++; 2150 2151 result = Pattern.matches("", "foo"); 2152 if (result == true) 2153 failCount++; 2154 report("EmptyPattern"); 2155 } 2156 2157 private static void charClassTest() throws Exception { 2158 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2159 check(pattern, "blahb]blech", true); 2160 2161 pattern = Pattern.compile("[abc[def]]"); 2162 check(pattern, "b", true); 2163 2164 // Supplementary character tests 2165 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2166 check(pattern, toSupplementaries("blahb]blech"), true); 2167 2168 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2169 check(pattern, toSupplementaries("b"), true); 2170 2171 try { 2172 // u00ff when UNICODE_CASE 2173 pattern = Pattern.compile("[ab\u00ffcd]", 2174 Pattern.CASE_INSENSITIVE| 2175 Pattern.UNICODE_CASE); 2176 check(pattern, "ab\u00ffcd", true); 2177 check(pattern, "Ab\u0178Cd", true); 2178 2179 // u00b5 when UNICODE_CASE 2180 pattern = Pattern.compile("[ab\u00b5cd]", 2181 Pattern.CASE_INSENSITIVE| 2182 Pattern.UNICODE_CASE); 2183 check(pattern, "ab\u00b5cd", true); 2184 check(pattern, "Ab\u039cCd", true); 2185 } catch (Exception e) { failCount++; } 2186 2187 /* Special cases 2188 (1)LatinSmallLetterLongS u+017f 2189 (2)LatinSmallLetterDotlessI u+0131 2190 (3)LatineCapitalLetterIWithDotAbove u+0130 2191 (4)KelvinSign u+212a 2192 (5)AngstromSign u+212b 2193 */ 2194 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2195 pattern = Pattern.compile("[sik\u00c5]+", flags); 2196 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2197 failCount++; 2198 2199 report("CharClass"); 2200 } 2201 2202 private static void caretTest() throws Exception { 2203 Pattern pattern = Pattern.compile("\\w*"); 2204 Matcher matcher = pattern.matcher("a#bc#def##g"); 2205 check(matcher, "a"); 2206 check(matcher, ""); 2207 check(matcher, "bc"); 2208 check(matcher, ""); 2209 check(matcher, "def"); 2210 check(matcher, ""); 2211 check(matcher, ""); 2212 check(matcher, "g"); 2213 check(matcher, ""); 2214 if (matcher.find()) 2215 failCount++; 2216 2217 pattern = Pattern.compile("^\\w*"); 2218 matcher = pattern.matcher("a#bc#def##g"); 2219 check(matcher, "a"); 2220 if (matcher.find()) 2221 failCount++; 2222 2223 pattern = Pattern.compile("\\w"); 2224 matcher = pattern.matcher("abc##x"); 2225 check(matcher, "a"); 2226 check(matcher, "b"); 2227 check(matcher, "c"); 2228 check(matcher, "x"); 2229 if (matcher.find()) 2230 failCount++; 2231 2232 pattern = Pattern.compile("^\\w"); 2233 matcher = pattern.matcher("abc##x"); 2234 check(matcher, "a"); 2235 if (matcher.find()) 2236 failCount++; 2237 2238 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2239 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2240 check(matcher, "abc"); 2241 if (matcher.find()) 2242 failCount++; 2243 2244 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2245 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2246 check(matcher, "abc"); 2247 check(matcher, "jkl"); 2248 if (matcher.find()) 2249 failCount++; 2250 2251 pattern = Pattern.compile("^", Pattern.MULTILINE); 2252 matcher = pattern.matcher("this is some text"); 2253 String result = matcher.replaceAll("X"); 2254 if (!result.equals("Xthis is some text")) 2255 failCount++; 2256 2257 pattern = Pattern.compile("^"); 2258 matcher = pattern.matcher("this is some text"); 2259 result = matcher.replaceAll("X"); 2260 if (!result.equals("Xthis is some text")) 2261 failCount++; 2262 2263 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2264 matcher = pattern.matcher("this is some text\n"); 2265 result = matcher.replaceAll("X"); 2266 if (!result.equals("Xthis is some text\n")) 2267 failCount++; 2268 2269 report("Caret"); 2270 } 2271 2272 private static void groupCaptureTest() throws Exception { 2273 // Independent group 2274 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2275 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2276 matcher.find(); 2277 try { 2278 String blah = matcher.group(1); 2279 failCount++; 2280 } catch (IndexOutOfBoundsException ioobe) { 2281 // Good result 2282 } 2283 // Pure group 2284 pattern = Pattern.compile("x+(?:y+)z+"); 2285 matcher = pattern.matcher("xxxyyyzzz"); 2286 matcher.find(); 2287 try { 2288 String blah = matcher.group(1); 2289 failCount++; 2290 } catch (IndexOutOfBoundsException ioobe) { 2291 // Good result 2292 } 2293 2294 // Supplementary character tests 2295 // Independent group 2296 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2297 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2298 matcher.find(); 2299 try { 2300 String blah = matcher.group(1); 2301 failCount++; 2302 } catch (IndexOutOfBoundsException ioobe) { 2303 // Good result 2304 } 2305 // Pure group 2306 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2307 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2308 matcher.find(); 2309 try { 2310 String blah = matcher.group(1); 2311 failCount++; 2312 } catch (IndexOutOfBoundsException ioobe) { 2313 // Good result 2314 } 2315 2316 report("GroupCapture"); 2317 } 2318 2319 private static void backRefTest() throws Exception { 2320 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2321 check(pattern, "zzzaabcazzz", true); 2322 2323 pattern = Pattern.compile("(a*)bc\\1"); 2324 check(pattern, "zzzaabcaazzz", true); 2325 2326 pattern = Pattern.compile("(abc)(def)\\1"); 2327 check(pattern, "abcdefabc", true); 2328 2329 pattern = Pattern.compile("(abc)(def)\\3"); 2330 check(pattern, "abcdefabc", false); 2331 2332 try { 2333 for (int i = 1; i < 10; i++) { 2334 // Make sure backref 1-9 are always accepted 2335 pattern = Pattern.compile("abcdef\\" + i); 2336 // and fail to match if the target group does not exit 2337 check(pattern, "abcdef", false); 2338 } 2339 } catch(PatternSyntaxException e) { 2340 failCount++; 2341 } 2342 2343 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2344 check(pattern, "abcdefghija", false); 2345 check(pattern, "abcdefghija1", true); 2346 2347 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2348 check(pattern, "abcdefghijkk", true); 2349 2350 pattern = Pattern.compile("(a)bcdefghij\\11"); 2351 check(pattern, "abcdefghija1", true); 2352 2353 // Supplementary character tests 2354 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2355 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2356 2357 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2358 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2359 2360 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2361 check(pattern, toSupplementaries("abcdefabc"), true); 2362 2363 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2364 check(pattern, toSupplementaries("abcdefabc"), false); 2365 2366 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2367 check(pattern, toSupplementaries("abcdefghija"), false); 2368 check(pattern, toSupplementaries("abcdefghija1"), true); 2369 2370 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2371 check(pattern, toSupplementaries("abcdefghijkk"), true); 2372 2373 report("BackRef"); 2374 } 2375 2376 /** 2377 * Unicode Technical Report #18, section 2.6 End of Line 2378 * There is no empty line to be matched in the sequence \u000D\u000A 2379 * but there is an empty line in the sequence \u000A\u000D. 2380 */ 2381 private static void anchorTest() throws Exception { 2382 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2383 Matcher m = p.matcher("blah1\r\nblah2"); 2384 m.find(); 2385 m.find(); 2386 if (!m.group().equals("blah2")) 2387 failCount++; 2388 2389 m.reset("blah1\n\rblah2"); 2390 m.find(); 2391 m.find(); 2392 m.find(); 2393 if (!m.group().equals("blah2")) 2394 failCount++; 2395 2396 // Test behavior of $ with \r\n at end of input 2397 p = Pattern.compile(".+$"); 2398 m = p.matcher("blah1\r\n"); 2399 if (!m.find()) 2400 failCount++; 2401 if (!m.group().equals("blah1")) 2402 failCount++; 2403 if (m.find()) 2404 failCount++; 2405 2406 // Test behavior of $ with \r\n at end of input in multiline 2407 p = Pattern.compile(".+$", Pattern.MULTILINE); 2408 m = p.matcher("blah1\r\n"); 2409 if (!m.find()) 2410 failCount++; 2411 if (m.find()) 2412 failCount++; 2413 2414 // Test for $ recognition of \u0085 for bug 4527731 2415 p = Pattern.compile(".+$", Pattern.MULTILINE); 2416 m = p.matcher("blah1\u0085"); 2417 if (!m.find()) 2418 failCount++; 2419 2420 // Supplementary character test 2421 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2422 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2423 m.find(); 2424 m.find(); 2425 if (!m.group().equals(toSupplementaries("blah2"))) 2426 failCount++; 2427 2428 m.reset(toSupplementaries("blah1\n\rblah2")); 2429 m.find(); 2430 m.find(); 2431 m.find(); 2432 if (!m.group().equals(toSupplementaries("blah2"))) 2433 failCount++; 2434 2435 // Test behavior of $ with \r\n at end of input 2436 p = Pattern.compile(".+$"); 2437 m = p.matcher(toSupplementaries("blah1\r\n")); 2438 if (!m.find()) 2439 failCount++; 2440 if (!m.group().equals(toSupplementaries("blah1"))) 2441 failCount++; 2442 if (m.find()) 2443 failCount++; 2444 2445 // Test behavior of $ with \r\n at end of input in multiline 2446 p = Pattern.compile(".+$", Pattern.MULTILINE); 2447 m = p.matcher(toSupplementaries("blah1\r\n")); 2448 if (!m.find()) 2449 failCount++; 2450 if (m.find()) 2451 failCount++; 2452 2453 // Test for $ recognition of \u0085 for bug 4527731 2454 p = Pattern.compile(".+$", Pattern.MULTILINE); 2455 m = p.matcher(toSupplementaries("blah1\u0085")); 2456 if (!m.find()) 2457 failCount++; 2458 2459 report("Anchors"); 2460 } 2461 2462 /** 2463 * A basic sanity test of Matcher.lookingAt(). 2464 */ 2465 private static void lookingAtTest() throws Exception { 2466 Pattern p = Pattern.compile("(ab)(c*)"); 2467 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2468 2469 if (!m.lookingAt()) 2470 failCount++; 2471 2472 if (!m.group().equals(m.group(0))) 2473 failCount++; 2474 2475 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2476 if (m.lookingAt()) 2477 failCount++; 2478 2479 // Supplementary character test 2480 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2481 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2482 2483 if (!m.lookingAt()) 2484 failCount++; 2485 2486 if (!m.group().equals(m.group(0))) 2487 failCount++; 2488 2489 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2490 if (m.lookingAt()) 2491 failCount++; 2492 2493 report("Looking At"); 2494 } 2495 2496 /** 2497 * A basic sanity test of Matcher.matches(). 2498 */ 2499 private static void matchesTest() throws Exception { 2500 // matches() 2501 Pattern p = Pattern.compile("ulb(c*)"); 2502 Matcher m = p.matcher("ulbcccccc"); 2503 if (!m.matches()) 2504 failCount++; 2505 2506 // find() but not matches() 2507 m.reset("zzzulbcccccc"); 2508 if (m.matches()) 2509 failCount++; 2510 2511 // lookingAt() but not matches() 2512 m.reset("ulbccccccdef"); 2513 if (m.matches()) 2514 failCount++; 2515 2516 // matches() 2517 p = Pattern.compile("a|ad"); 2518 m = p.matcher("ad"); 2519 if (!m.matches()) 2520 failCount++; 2521 2522 // Supplementary character test 2523 // matches() 2524 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2525 m = p.matcher(toSupplementaries("ulbcccccc")); 2526 if (!m.matches()) 2527 failCount++; 2528 2529 // find() but not matches() 2530 m.reset(toSupplementaries("zzzulbcccccc")); 2531 if (m.matches()) 2532 failCount++; 2533 2534 // lookingAt() but not matches() 2535 m.reset(toSupplementaries("ulbccccccdef")); 2536 if (m.matches()) 2537 failCount++; 2538 2539 // matches() 2540 p = Pattern.compile(toSupplementaries("a|ad")); 2541 m = p.matcher(toSupplementaries("ad")); 2542 if (!m.matches()) 2543 failCount++; 2544 2545 report("Matches"); 2546 } 2547 2548 /** 2549 * A basic sanity test of Pattern.matches(). 2550 */ 2551 private static void patternMatchesTest() throws Exception { 2552 // matches() 2553 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2554 toSupplementaries("ulbcccccc"))) 2555 failCount++; 2556 2557 // find() but not matches() 2558 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2559 toSupplementaries("zzzulbcccccc"))) 2560 failCount++; 2561 2562 // lookingAt() but not matches() 2563 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2564 toSupplementaries("ulbccccccdef"))) 2565 failCount++; 2566 2567 // Supplementary character test 2568 // matches() 2569 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2570 toSupplementaries("ulbcccccc"))) 2571 failCount++; 2572 2573 // find() but not matches() 2574 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2575 toSupplementaries("zzzulbcccccc"))) 2576 failCount++; 2577 2578 // lookingAt() but not matches() 2579 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2580 toSupplementaries("ulbccccccdef"))) 2581 failCount++; 2582 2583 report("Pattern Matches"); 2584 } 2585 2586 /** 2587 * Canonical equivalence testing. Tests the ability of the engine 2588 * to match sequences that are not explicitly specified in the 2589 * pattern when they are considered equivalent by the Unicode Standard. 2590 */ 2591 private static void ceTest() throws Exception { 2592 // Decomposed char outside char classes 2593 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2594 Matcher m = p.matcher("test\u00e5"); 2595 if (!m.matches()) 2596 failCount++; 2597 2598 m.reset("testa\u030a"); 2599 if (!m.matches()) 2600 failCount++; 2601 2602 // Composed char outside char classes 2603 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2604 m = p.matcher("test\u00e5"); 2605 if (!m.matches()) 2606 failCount++; 2607 2608 m.reset("testa\u030a"); 2609 if (!m.find()) 2610 failCount++; 2611 2612 // Decomposed char inside a char class 2613 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2614 m = p.matcher("test\u00e5"); 2615 if (!m.find()) 2616 failCount++; 2617 2618 m.reset("testa\u030a"); 2619 if (!m.find()) 2620 failCount++; 2621 2622 // Composed char inside a char class 2623 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2624 m = p.matcher("test\u00e5"); 2625 if (!m.find()) 2626 failCount++; 2627 2628 m.reset("testa\u0300"); 2629 if (!m.find()) 2630 failCount++; 2631 2632 m.reset("testa\u030a"); 2633 if (!m.find()) 2634 failCount++; 2635 2636 // Marks that cannot legally change order and be equivalent 2637 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2638 check(p, "testa\u0308\u0300", true); 2639 check(p, "testa\u0300\u0308", false); 2640 2641 // Marks that can legally change order and be equivalent 2642 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2643 check(p, "testa\u0308\u0323", true); 2644 check(p, "testa\u0323\u0308", true); 2645 2646 // Test all equivalences of the sequence a\u0308\u0323\u0300 2647 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2648 check(p, "testa\u0308\u0323\u0300", true); 2649 check(p, "testa\u0323\u0308\u0300", true); 2650 check(p, "testa\u0308\u0300\u0323", true); 2651 check(p, "test\u00e4\u0323\u0300", true); 2652 check(p, "test\u00e4\u0300\u0323", true); 2653 2654 /* 2655 * The following canonical equivalence tests don't work. Bug id: 4916384. 2656 * 2657 // Decomposed hangul (jamos) 2658 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2659 m = p.matcher("\u1100\u1161"); 2660 if (!m.matches()) 2661 failCount++; 2662 2663 m.reset("\uac00"); 2664 if (!m.matches()) 2665 failCount++; 2666 2667 // Composed hangul 2668 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2669 m = p.matcher("\u1100\u1161"); 2670 if (!m.matches()) 2671 failCount++; 2672 2673 m.reset("\uac00"); 2674 if (!m.matches()) 2675 failCount++; 2676 2677 // Decomposed supplementary outside char classes 2678 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2679 m = p.matcher("test\ud834\uddc0"); 2680 if (!m.matches()) 2681 failCount++; 2682 2683 m.reset("test\ud834\uddbc\ud834\udd6f"); 2684 if (!m.matches()) 2685 failCount++; 2686 2687 // Composed supplementary outside char classes 2688 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2689 m.reset("test\ud834\uddbc\ud834\udd6f"); 2690 if (!m.matches()) 2691 failCount++; 2692 2693 m = p.matcher("test\ud834\uddc0"); 2694 if (!m.matches()) 2695 failCount++; 2696 2697 */ 2698 2699 report("Canonical Equivalence"); 2700 } 2701 2702 /** 2703 * A basic sanity test of Matcher.replaceAll(). 2704 */ 2705 private static void globalSubstitute() throws Exception { 2706 // Global substitution with a literal 2707 Pattern p = Pattern.compile("(ab)(c*)"); 2708 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2709 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2710 failCount++; 2711 2712 m.reset("zzzabccczzzabcczzzabccczzz"); 2713 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2714 failCount++; 2715 2716 // Global substitution with groups 2717 m.reset("zzzabccczzzabcczzzabccczzz"); 2718 String result = m.replaceAll("$1"); 2719 if (!result.equals("zzzabzzzabzzzabzzz")) 2720 failCount++; 2721 2722 // Supplementary character test 2723 // Global substitution with a literal 2724 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2725 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2726 if (!m.replaceAll(toSupplementaries("test")). 2727 equals(toSupplementaries("testzzztestzzztest"))) 2728 failCount++; 2729 2730 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2731 if (!m.replaceAll(toSupplementaries("test")). 2732 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2733 failCount++; 2734 2735 // Global substitution with groups 2736 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2737 result = m.replaceAll("$1"); 2738 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2739 failCount++; 2740 2741 report("Global Substitution"); 2742 } 2743 2744 /** 2745 * Tests the usage of Matcher.appendReplacement() with literal 2746 * and group substitutions. 2747 */ 2748 private static void stringbufferSubstitute() throws Exception { 2749 // SB substitution with literal 2750 String blah = "zzzblahzzz"; 2751 Pattern p = Pattern.compile("blah"); 2752 Matcher m = p.matcher(blah); 2753 StringBuffer result = new StringBuffer(); 2754 try { 2755 m.appendReplacement(result, "blech"); 2756 failCount++; 2757 } catch (IllegalStateException e) { 2758 } 2759 m.find(); 2760 m.appendReplacement(result, "blech"); 2761 if (!result.toString().equals("zzzblech")) 2762 failCount++; 2763 2764 m.appendTail(result); 2765 if (!result.toString().equals("zzzblechzzz")) 2766 failCount++; 2767 2768 // SB substitution with groups 2769 blah = "zzzabcdzzz"; 2770 p = Pattern.compile("(ab)(cd)*"); 2771 m = p.matcher(blah); 2772 result = new StringBuffer(); 2773 try { 2774 m.appendReplacement(result, "$1"); 2775 failCount++; 2776 } catch (IllegalStateException e) { 2777 } 2778 m.find(); 2779 m.appendReplacement(result, "$1"); 2780 if (!result.toString().equals("zzzab")) 2781 failCount++; 2782 2783 m.appendTail(result); 2784 if (!result.toString().equals("zzzabzzz")) 2785 failCount++; 2786 2787 // SB substitution with 3 groups 2788 blah = "zzzabcdcdefzzz"; 2789 p = Pattern.compile("(ab)(cd)*(ef)"); 2790 m = p.matcher(blah); 2791 result = new StringBuffer(); 2792 try { 2793 m.appendReplacement(result, "$1w$2w$3"); 2794 failCount++; 2795 } catch (IllegalStateException e) { 2796 } 2797 m.find(); 2798 m.appendReplacement(result, "$1w$2w$3"); 2799 if (!result.toString().equals("zzzabwcdwef")) 2800 failCount++; 2801 2802 m.appendTail(result); 2803 if (!result.toString().equals("zzzabwcdwefzzz")) 2804 failCount++; 2805 2806 // SB substitution with groups and three matches 2807 // skipping middle match 2808 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2809 p = Pattern.compile("(ab)(cd*)"); 2810 m = p.matcher(blah); 2811 result = new StringBuffer(); 2812 try { 2813 m.appendReplacement(result, "$1"); 2814 failCount++; 2815 } catch (IllegalStateException e) { 2816 } 2817 m.find(); 2818 m.appendReplacement(result, "$1"); 2819 if (!result.toString().equals("zzzab")) 2820 failCount++; 2821 2822 m.find(); 2823 m.find(); 2824 m.appendReplacement(result, "$2"); 2825 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2826 failCount++; 2827 2828 m.appendTail(result); 2829 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2830 failCount++; 2831 2832 // Check to make sure escaped $ is ignored 2833 blah = "zzzabcdcdefzzz"; 2834 p = Pattern.compile("(ab)(cd)*(ef)"); 2835 m = p.matcher(blah); 2836 result = new StringBuffer(); 2837 m.find(); 2838 m.appendReplacement(result, "$1w\\$2w$3"); 2839 if (!result.toString().equals("zzzabw$2wef")) 2840 failCount++; 2841 2842 m.appendTail(result); 2843 if (!result.toString().equals("zzzabw$2wefzzz")) 2844 failCount++; 2845 2846 // Check to make sure a reference to nonexistent group causes error 2847 blah = "zzzabcdcdefzzz"; 2848 p = Pattern.compile("(ab)(cd)*(ef)"); 2849 m = p.matcher(blah); 2850 result = new StringBuffer(); 2851 m.find(); 2852 try { 2853 m.appendReplacement(result, "$1w$5w$3"); 2854 failCount++; 2855 } catch (IndexOutOfBoundsException ioobe) { 2856 // Correct result 2857 } 2858 2859 // Check double digit group references 2860 blah = "zzz123456789101112zzz"; 2861 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2862 m = p.matcher(blah); 2863 result = new StringBuffer(); 2864 m.find(); 2865 m.appendReplacement(result, "$1w$11w$3"); 2866 if (!result.toString().equals("zzz1w11w3")) 2867 failCount++; 2868 2869 // Check to make sure it backs off $15 to $1 if only three groups 2870 blah = "zzzabcdcdefzzz"; 2871 p = Pattern.compile("(ab)(cd)*(ef)"); 2872 m = p.matcher(blah); 2873 result = new StringBuffer(); 2874 m.find(); 2875 m.appendReplacement(result, "$1w$15w$3"); 2876 if (!result.toString().equals("zzzabwab5wef")) 2877 failCount++; 2878 2879 2880 // Supplementary character test 2881 // SB substitution with literal 2882 blah = toSupplementaries("zzzblahzzz"); 2883 p = Pattern.compile(toSupplementaries("blah")); 2884 m = p.matcher(blah); 2885 result = new StringBuffer(); 2886 try { 2887 m.appendReplacement(result, toSupplementaries("blech")); 2888 failCount++; 2889 } catch (IllegalStateException e) { 2890 } 2891 m.find(); 2892 m.appendReplacement(result, toSupplementaries("blech")); 2893 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2894 failCount++; 2895 2896 m.appendTail(result); 2897 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2898 failCount++; 2899 2900 // SB substitution with groups 2901 blah = toSupplementaries("zzzabcdzzz"); 2902 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2903 m = p.matcher(blah); 2904 result = new StringBuffer(); 2905 try { 2906 m.appendReplacement(result, "$1"); 2907 failCount++; 2908 } catch (IllegalStateException e) { 2909 } 2910 m.find(); 2911 m.appendReplacement(result, "$1"); 2912 if (!result.toString().equals(toSupplementaries("zzzab"))) 2913 failCount++; 2914 2915 m.appendTail(result); 2916 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2917 failCount++; 2918 2919 // SB substitution with 3 groups 2920 blah = toSupplementaries("zzzabcdcdefzzz"); 2921 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2922 m = p.matcher(blah); 2923 result = new StringBuffer(); 2924 try { 2925 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2926 failCount++; 2927 } catch (IllegalStateException e) { 2928 } 2929 m.find(); 2930 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2931 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2932 failCount++; 2933 2934 m.appendTail(result); 2935 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2936 failCount++; 2937 2938 // SB substitution with groups and three matches 2939 // skipping middle match 2940 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2941 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2942 m = p.matcher(blah); 2943 result = new StringBuffer(); 2944 try { 2945 m.appendReplacement(result, "$1"); 2946 failCount++; 2947 } catch (IllegalStateException e) { 2948 } 2949 m.find(); 2950 m.appendReplacement(result, "$1"); 2951 if (!result.toString().equals(toSupplementaries("zzzab"))) 2952 failCount++; 2953 2954 m.find(); 2955 m.find(); 2956 m.appendReplacement(result, "$2"); 2957 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2958 failCount++; 2959 2960 m.appendTail(result); 2961 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2962 failCount++; 2963 2964 // Check to make sure escaped $ is ignored 2965 blah = toSupplementaries("zzzabcdcdefzzz"); 2966 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2967 m = p.matcher(blah); 2968 result = new StringBuffer(); 2969 m.find(); 2970 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2971 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2972 failCount++; 2973 2974 m.appendTail(result); 2975 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2976 failCount++; 2977 2978 // Check to make sure a reference to nonexistent group causes error 2979 blah = toSupplementaries("zzzabcdcdefzzz"); 2980 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2981 m = p.matcher(blah); 2982 result = new StringBuffer(); 2983 m.find(); 2984 try { 2985 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2986 failCount++; 2987 } catch (IndexOutOfBoundsException ioobe) { 2988 // Correct result 2989 } 2990 2991 // Check double digit group references 2992 blah = toSupplementaries("zzz123456789101112zzz"); 2993 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2994 m = p.matcher(blah); 2995 result = new StringBuffer(); 2996 m.find(); 2997 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2998 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2999 failCount++; 3000 3001 // Check to make sure it backs off $15 to $1 if only three groups 3002 blah = toSupplementaries("zzzabcdcdefzzz"); 3003 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3004 m = p.matcher(blah); 3005 result = new StringBuffer(); 3006 m.find(); 3007 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3008 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3009 failCount++; 3010 3011 // Check nothing has been appended into the output buffer if 3012 // the replacement string triggers IllegalArgumentException. 3013 p = Pattern.compile("(abc)"); 3014 m = p.matcher("abcd"); 3015 result = new StringBuffer(); 3016 m.find(); 3017 try { 3018 m.appendReplacement(result, ("xyz$g")); 3019 failCount++; 3020 } catch (IllegalArgumentException iae) { 3021 if (result.length() != 0) 3022 failCount++; 3023 } 3024 3025 report("SB Substitution"); 3026 } 3027 3028 /** 3029 * Tests the usage of Matcher.appendReplacement() with literal 3030 * and group substitutions. 3031 */ 3032 private static void stringbuilderSubstitute() throws Exception { 3033 // SB substitution with literal 3034 String blah = "zzzblahzzz"; 3035 Pattern p = Pattern.compile("blah"); 3036 Matcher m = p.matcher(blah); 3037 StringBuilder result = new StringBuilder(); 3038 try { 3039 m.appendReplacement(result, "blech"); 3040 failCount++; 3041 } catch (IllegalStateException e) { 3042 } 3043 m.find(); 3044 m.appendReplacement(result, "blech"); 3045 if (!result.toString().equals("zzzblech")) 3046 failCount++; 3047 3048 m.appendTail(result); 3049 if (!result.toString().equals("zzzblechzzz")) 3050 failCount++; 3051 3052 // SB substitution with groups 3053 blah = "zzzabcdzzz"; 3054 p = Pattern.compile("(ab)(cd)*"); 3055 m = p.matcher(blah); 3056 result = new StringBuilder(); 3057 try { 3058 m.appendReplacement(result, "$1"); 3059 failCount++; 3060 } catch (IllegalStateException e) { 3061 } 3062 m.find(); 3063 m.appendReplacement(result, "$1"); 3064 if (!result.toString().equals("zzzab")) 3065 failCount++; 3066 3067 m.appendTail(result); 3068 if (!result.toString().equals("zzzabzzz")) 3069 failCount++; 3070 3071 // SB substitution with 3 groups 3072 blah = "zzzabcdcdefzzz"; 3073 p = Pattern.compile("(ab)(cd)*(ef)"); 3074 m = p.matcher(blah); 3075 result = new StringBuilder(); 3076 try { 3077 m.appendReplacement(result, "$1w$2w$3"); 3078 failCount++; 3079 } catch (IllegalStateException e) { 3080 } 3081 m.find(); 3082 m.appendReplacement(result, "$1w$2w$3"); 3083 if (!result.toString().equals("zzzabwcdwef")) 3084 failCount++; 3085 3086 m.appendTail(result); 3087 if (!result.toString().equals("zzzabwcdwefzzz")) 3088 failCount++; 3089 3090 // SB substitution with groups and three matches 3091 // skipping middle match 3092 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3093 p = Pattern.compile("(ab)(cd*)"); 3094 m = p.matcher(blah); 3095 result = new StringBuilder(); 3096 try { 3097 m.appendReplacement(result, "$1"); 3098 failCount++; 3099 } catch (IllegalStateException e) { 3100 } 3101 m.find(); 3102 m.appendReplacement(result, "$1"); 3103 if (!result.toString().equals("zzzab")) 3104 failCount++; 3105 3106 m.find(); 3107 m.find(); 3108 m.appendReplacement(result, "$2"); 3109 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3110 failCount++; 3111 3112 m.appendTail(result); 3113 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3114 failCount++; 3115 3116 // Check to make sure escaped $ is ignored 3117 blah = "zzzabcdcdefzzz"; 3118 p = Pattern.compile("(ab)(cd)*(ef)"); 3119 m = p.matcher(blah); 3120 result = new StringBuilder(); 3121 m.find(); 3122 m.appendReplacement(result, "$1w\\$2w$3"); 3123 if (!result.toString().equals("zzzabw$2wef")) 3124 failCount++; 3125 3126 m.appendTail(result); 3127 if (!result.toString().equals("zzzabw$2wefzzz")) 3128 failCount++; 3129 3130 // Check to make sure a reference to nonexistent group causes error 3131 blah = "zzzabcdcdefzzz"; 3132 p = Pattern.compile("(ab)(cd)*(ef)"); 3133 m = p.matcher(blah); 3134 result = new StringBuilder(); 3135 m.find(); 3136 try { 3137 m.appendReplacement(result, "$1w$5w$3"); 3138 failCount++; 3139 } catch (IndexOutOfBoundsException ioobe) { 3140 // Correct result 3141 } 3142 3143 // Check double digit group references 3144 blah = "zzz123456789101112zzz"; 3145 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3146 m = p.matcher(blah); 3147 result = new StringBuilder(); 3148 m.find(); 3149 m.appendReplacement(result, "$1w$11w$3"); 3150 if (!result.toString().equals("zzz1w11w3")) 3151 failCount++; 3152 3153 // Check to make sure it backs off $15 to $1 if only three groups 3154 blah = "zzzabcdcdefzzz"; 3155 p = Pattern.compile("(ab)(cd)*(ef)"); 3156 m = p.matcher(blah); 3157 result = new StringBuilder(); 3158 m.find(); 3159 m.appendReplacement(result, "$1w$15w$3"); 3160 if (!result.toString().equals("zzzabwab5wef")) 3161 failCount++; 3162 3163 3164 // Supplementary character test 3165 // SB substitution with literal 3166 blah = toSupplementaries("zzzblahzzz"); 3167 p = Pattern.compile(toSupplementaries("blah")); 3168 m = p.matcher(blah); 3169 result = new StringBuilder(); 3170 try { 3171 m.appendReplacement(result, toSupplementaries("blech")); 3172 failCount++; 3173 } catch (IllegalStateException e) { 3174 } 3175 m.find(); 3176 m.appendReplacement(result, toSupplementaries("blech")); 3177 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3178 failCount++; 3179 m.appendTail(result); 3180 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3181 failCount++; 3182 3183 // SB substitution with groups 3184 blah = toSupplementaries("zzzabcdzzz"); 3185 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3186 m = p.matcher(blah); 3187 result = new StringBuilder(); 3188 try { 3189 m.appendReplacement(result, "$1"); 3190 failCount++; 3191 } catch (IllegalStateException e) { 3192 } 3193 m.find(); 3194 m.appendReplacement(result, "$1"); 3195 if (!result.toString().equals(toSupplementaries("zzzab"))) 3196 failCount++; 3197 3198 m.appendTail(result); 3199 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3200 failCount++; 3201 3202 // SB substitution with 3 groups 3203 blah = toSupplementaries("zzzabcdcdefzzz"); 3204 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3205 m = p.matcher(blah); 3206 result = new StringBuilder(); 3207 try { 3208 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3209 failCount++; 3210 } catch (IllegalStateException e) { 3211 } 3212 m.find(); 3213 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3214 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3215 failCount++; 3216 3217 m.appendTail(result); 3218 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3219 failCount++; 3220 3221 // SB substitution with groups and three matches 3222 // skipping middle match 3223 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3224 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3225 m = p.matcher(blah); 3226 result = new StringBuilder(); 3227 try { 3228 m.appendReplacement(result, "$1"); 3229 failCount++; 3230 } catch (IllegalStateException e) { 3231 } 3232 m.find(); 3233 m.appendReplacement(result, "$1"); 3234 if (!result.toString().equals(toSupplementaries("zzzab"))) 3235 failCount++; 3236 3237 m.find(); 3238 m.find(); 3239 m.appendReplacement(result, "$2"); 3240 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3241 failCount++; 3242 3243 m.appendTail(result); 3244 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3245 failCount++; 3246 3247 // Check to make sure escaped $ is ignored 3248 blah = toSupplementaries("zzzabcdcdefzzz"); 3249 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3250 m = p.matcher(blah); 3251 result = new StringBuilder(); 3252 m.find(); 3253 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3254 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3255 failCount++; 3256 3257 m.appendTail(result); 3258 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3259 failCount++; 3260 3261 // Check to make sure a reference to nonexistent group causes error 3262 blah = toSupplementaries("zzzabcdcdefzzz"); 3263 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3264 m = p.matcher(blah); 3265 result = new StringBuilder(); 3266 m.find(); 3267 try { 3268 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3269 failCount++; 3270 } catch (IndexOutOfBoundsException ioobe) { 3271 // Correct result 3272 } 3273 // Check double digit group references 3274 blah = toSupplementaries("zzz123456789101112zzz"); 3275 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3276 m = p.matcher(blah); 3277 result = new StringBuilder(); 3278 m.find(); 3279 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3280 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3281 failCount++; 3282 3283 // Check to make sure it backs off $15 to $1 if only three groups 3284 blah = toSupplementaries("zzzabcdcdefzzz"); 3285 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3286 m = p.matcher(blah); 3287 result = new StringBuilder(); 3288 m.find(); 3289 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3290 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3291 failCount++; 3292 // Check nothing has been appended into the output buffer if 3293 // the replacement string triggers IllegalArgumentException. 3294 p = Pattern.compile("(abc)"); 3295 m = p.matcher("abcd"); 3296 result = new StringBuilder(); 3297 m.find(); 3298 try { 3299 m.appendReplacement(result, ("xyz$g")); 3300 failCount++; 3301 } catch (IllegalArgumentException iae) { 3302 if (result.length() != 0) 3303 failCount++; 3304 } 3305 report("SB Substitution 2"); 3306 } 3307 3308 /* 3309 * 5 groups of characters are created to make a substitution string. 3310 * A base string will be created including random lead chars, the 3311 * substitution string, and random trailing chars. 3312 * A pattern containing the 5 groups is searched for and replaced with: 3313 * random group + random string + random group. 3314 * The results are checked for correctness. 3315 */ 3316 private static void substitutionBasher() { 3317 for (int runs = 0; runs<1000; runs++) { 3318 // Create a base string to work in 3319 int leadingChars = generator.nextInt(10); 3320 StringBuffer baseBuffer = new StringBuffer(100); 3321 String leadingString = getRandomAlphaString(leadingChars); 3322 baseBuffer.append(leadingString); 3323 3324 // Create 5 groups of random number of random chars 3325 // Create the string to substitute 3326 // Create the pattern string to search for 3327 StringBuffer bufferToSub = new StringBuffer(25); 3328 StringBuffer bufferToPat = new StringBuffer(50); 3329 String[] groups = new String[5]; 3330 for(int i=0; i<5; i++) { 3331 int aGroupSize = generator.nextInt(5)+1; 3332 groups[i] = getRandomAlphaString(aGroupSize); 3333 bufferToSub.append(groups[i]); 3334 bufferToPat.append('('); 3335 bufferToPat.append(groups[i]); 3336 bufferToPat.append(')'); 3337 } 3338 String stringToSub = bufferToSub.toString(); 3339 String pattern = bufferToPat.toString(); 3340 3341 // Place sub string into working string at random index 3342 baseBuffer.append(stringToSub); 3343 3344 // Append random chars to end 3345 int trailingChars = generator.nextInt(10); 3346 String trailingString = getRandomAlphaString(trailingChars); 3347 baseBuffer.append(trailingString); 3348 String baseString = baseBuffer.toString(); 3349 3350 // Create test pattern and matcher 3351 Pattern p = Pattern.compile(pattern); 3352 Matcher m = p.matcher(baseString); 3353 3354 // Reject candidate if pattern happens to start early 3355 m.find(); 3356 if (m.start() < leadingChars) 3357 continue; 3358 3359 // Reject candidate if more than one match 3360 if (m.find()) 3361 continue; 3362 3363 // Construct a replacement string with : 3364 // random group + random string + random group 3365 StringBuffer bufferToRep = new StringBuffer(); 3366 int groupIndex1 = generator.nextInt(5); 3367 bufferToRep.append("$" + (groupIndex1 + 1)); 3368 String randomMidString = getRandomAlphaString(5); 3369 bufferToRep.append(randomMidString); 3370 int groupIndex2 = generator.nextInt(5); 3371 bufferToRep.append("$" + (groupIndex2 + 1)); 3372 String replacement = bufferToRep.toString(); 3373 3374 // Do the replacement 3375 String result = m.replaceAll(replacement); 3376 3377 // Construct expected result 3378 StringBuffer bufferToRes = new StringBuffer(); 3379 bufferToRes.append(leadingString); 3380 bufferToRes.append(groups[groupIndex1]); 3381 bufferToRes.append(randomMidString); 3382 bufferToRes.append(groups[groupIndex2]); 3383 bufferToRes.append(trailingString); 3384 String expectedResult = bufferToRes.toString(); 3385 3386 // Check results 3387 if (!result.equals(expectedResult)) 3388 failCount++; 3389 } 3390 3391 report("Substitution Basher"); 3392 } 3393 3394 /* 3395 * 5 groups of characters are created to make a substitution string. 3396 * A base string will be created including random lead chars, the 3397 * substitution string, and random trailing chars. 3398 * A pattern containing the 5 groups is searched for and replaced with: 3399 * random group + random string + random group. 3400 * The results are checked for correctness. 3401 */ 3402 private static void substitutionBasher2() { 3403 for (int runs = 0; runs<1000; runs++) { 3404 // Create a base string to work in 3405 int leadingChars = generator.nextInt(10); 3406 StringBuilder baseBuffer = new StringBuilder(100); 3407 String leadingString = getRandomAlphaString(leadingChars); 3408 baseBuffer.append(leadingString); 3409 3410 // Create 5 groups of random number of random chars 3411 // Create the string to substitute 3412 // Create the pattern string to search for 3413 StringBuilder bufferToSub = new StringBuilder(25); 3414 StringBuilder bufferToPat = new StringBuilder(50); 3415 String[] groups = new String[5]; 3416 for(int i=0; i<5; i++) { 3417 int aGroupSize = generator.nextInt(5)+1; 3418 groups[i] = getRandomAlphaString(aGroupSize); 3419 bufferToSub.append(groups[i]); 3420 bufferToPat.append('('); 3421 bufferToPat.append(groups[i]); 3422 bufferToPat.append(')'); 3423 } 3424 String stringToSub = bufferToSub.toString(); 3425 String pattern = bufferToPat.toString(); 3426 3427 // Place sub string into working string at random index 3428 baseBuffer.append(stringToSub); 3429 3430 // Append random chars to end 3431 int trailingChars = generator.nextInt(10); 3432 String trailingString = getRandomAlphaString(trailingChars); 3433 baseBuffer.append(trailingString); 3434 String baseString = baseBuffer.toString(); 3435 3436 // Create test pattern and matcher 3437 Pattern p = Pattern.compile(pattern); 3438 Matcher m = p.matcher(baseString); 3439 3440 // Reject candidate if pattern happens to start early 3441 m.find(); 3442 if (m.start() < leadingChars) 3443 continue; 3444 3445 // Reject candidate if more than one match 3446 if (m.find()) 3447 continue; 3448 3449 // Construct a replacement string with : 3450 // random group + random string + random group 3451 StringBuilder bufferToRep = new StringBuilder(); 3452 int groupIndex1 = generator.nextInt(5); 3453 bufferToRep.append("$" + (groupIndex1 + 1)); 3454 String randomMidString = getRandomAlphaString(5); 3455 bufferToRep.append(randomMidString); 3456 int groupIndex2 = generator.nextInt(5); 3457 bufferToRep.append("$" + (groupIndex2 + 1)); 3458 String replacement = bufferToRep.toString(); 3459 3460 // Do the replacement 3461 String result = m.replaceAll(replacement); 3462 3463 // Construct expected result 3464 StringBuilder bufferToRes = new StringBuilder(); 3465 bufferToRes.append(leadingString); 3466 bufferToRes.append(groups[groupIndex1]); 3467 bufferToRes.append(randomMidString); 3468 bufferToRes.append(groups[groupIndex2]); 3469 bufferToRes.append(trailingString); 3470 String expectedResult = bufferToRes.toString(); 3471 3472 // Check results 3473 if (!result.equals(expectedResult)) { 3474 failCount++; 3475 } 3476 } 3477 3478 report("Substitution Basher 2"); 3479 } 3480 3481 /** 3482 * Checks the handling of some escape sequences that the Pattern 3483 * class should process instead of the java compiler. These are 3484 * not in the file because the escapes should be be processed 3485 * by the Pattern class when the regex is compiled. 3486 */ 3487 private static void escapes() throws Exception { 3488 Pattern p = Pattern.compile("\\043"); 3489 Matcher m = p.matcher("#"); 3490 if (!m.find()) 3491 failCount++; 3492 3493 p = Pattern.compile("\\x23"); 3494 m = p.matcher("#"); 3495 if (!m.find()) 3496 failCount++; 3497 3498 p = Pattern.compile("\\u0023"); 3499 m = p.matcher("#"); 3500 if (!m.find()) 3501 failCount++; 3502 3503 report("Escape sequences"); 3504 } 3505 3506 /** 3507 * Checks the handling of blank input situations. These 3508 * tests are incompatible with my test file format. 3509 */ 3510 private static void blankInput() throws Exception { 3511 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3512 Matcher m = p.matcher(""); 3513 if (m.find()) 3514 failCount++; 3515 3516 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3517 m = p.matcher(""); 3518 if (!m.find()) 3519 failCount++; 3520 3521 p = Pattern.compile("abc"); 3522 m = p.matcher(""); 3523 if (m.find()) 3524 failCount++; 3525 3526 p = Pattern.compile("a*"); 3527 m = p.matcher(""); 3528 if (!m.find()) 3529 failCount++; 3530 3531 report("Blank input"); 3532 } 3533 3534 /** 3535 * Tests the Boyer-Moore pattern matching of a character sequence 3536 * on randomly generated patterns. 3537 */ 3538 private static void bm() throws Exception { 3539 doBnM('a'); 3540 report("Boyer Moore (ASCII)"); 3541 3542 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3543 report("Boyer Moore (Supplementary)"); 3544 } 3545 3546 private static void doBnM(int baseCharacter) throws Exception { 3547 int achar=0; 3548 3549 for (int i=0; i<100; i++) { 3550 // Create a short pattern to search for 3551 int patternLength = generator.nextInt(7) + 4; 3552 StringBuffer patternBuffer = new StringBuffer(patternLength); 3553 for (int x=0; x<patternLength; x++) { 3554 int ch = baseCharacter + generator.nextInt(26); 3555 if (Character.isSupplementaryCodePoint(ch)) { 3556 patternBuffer.append(Character.toChars(ch)); 3557 } else { 3558 patternBuffer.append((char)ch); 3559 } 3560 } 3561 String pattern = patternBuffer.toString(); 3562 Pattern p = Pattern.compile(pattern); 3563 3564 // Create a buffer with random ASCII chars that does 3565 // not match the sample 3566 String toSearch = null; 3567 StringBuffer s = null; 3568 Matcher m = p.matcher(""); 3569 do { 3570 s = new StringBuffer(100); 3571 for (int x=0; x<100; x++) { 3572 int ch = baseCharacter + generator.nextInt(26); 3573 if (Character.isSupplementaryCodePoint(ch)) { 3574 s.append(Character.toChars(ch)); 3575 } else { 3576 s.append((char)ch); 3577 } 3578 } 3579 toSearch = s.toString(); 3580 m.reset(toSearch); 3581 } while (m.find()); 3582 3583 // Insert the pattern at a random spot 3584 int insertIndex = generator.nextInt(99); 3585 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3586 insertIndex++; 3587 s = s.insert(insertIndex, pattern); 3588 toSearch = s.toString(); 3589 3590 // Make sure that the pattern is found 3591 m.reset(toSearch); 3592 if (!m.find()) 3593 failCount++; 3594 3595 // Make sure that the match text is the pattern 3596 if (!m.group().equals(pattern)) 3597 failCount++; 3598 3599 // Make sure match occured at insertion point 3600 if (m.start() != insertIndex) 3601 failCount++; 3602 } 3603 } 3604 3605 /** 3606 * Tests the matching of slices on randomly generated patterns. 3607 * The Boyer-Moore optimization is not done on these patterns 3608 * because it uses unicode case folding. 3609 */ 3610 private static void slice() throws Exception { 3611 doSlice(Character.MAX_VALUE); 3612 report("Slice"); 3613 3614 doSlice(Character.MAX_CODE_POINT); 3615 report("Slice (Supplementary)"); 3616 } 3617 3618 private static void doSlice(int maxCharacter) throws Exception { 3619 Random generator = new Random(); 3620 int achar=0; 3621 3622 for (int i=0; i<100; i++) { 3623 // Create a short pattern to search for 3624 int patternLength = generator.nextInt(7) + 4; 3625 StringBuffer patternBuffer = new StringBuffer(patternLength); 3626 for (int x=0; x<patternLength; x++) { 3627 int randomChar = 0; 3628 while (!Character.isLetterOrDigit(randomChar)) 3629 randomChar = generator.nextInt(maxCharacter); 3630 if (Character.isSupplementaryCodePoint(randomChar)) { 3631 patternBuffer.append(Character.toChars(randomChar)); 3632 } else { 3633 patternBuffer.append((char) randomChar); 3634 } 3635 } 3636 String pattern = patternBuffer.toString(); 3637 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3638 3639 // Create a buffer with random chars that does not match the sample 3640 String toSearch = null; 3641 StringBuffer s = null; 3642 Matcher m = p.matcher(""); 3643 do { 3644 s = new StringBuffer(100); 3645 for (int x=0; x<100; x++) { 3646 int randomChar = 0; 3647 while (!Character.isLetterOrDigit(randomChar)) 3648 randomChar = generator.nextInt(maxCharacter); 3649 if (Character.isSupplementaryCodePoint(randomChar)) { 3650 s.append(Character.toChars(randomChar)); 3651 } else { 3652 s.append((char) randomChar); 3653 } 3654 } 3655 toSearch = s.toString(); 3656 m.reset(toSearch); 3657 } while (m.find()); 3658 3659 // Insert the pattern at a random spot 3660 int insertIndex = generator.nextInt(99); 3661 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3662 insertIndex++; 3663 s = s.insert(insertIndex, pattern); 3664 toSearch = s.toString(); 3665 3666 // Make sure that the pattern is found 3667 m.reset(toSearch); 3668 if (!m.find()) 3669 failCount++; 3670 3671 // Make sure that the match text is the pattern 3672 if (!m.group().equals(pattern)) 3673 failCount++; 3674 3675 // Make sure match occured at insertion point 3676 if (m.start() != insertIndex) 3677 failCount++; 3678 } 3679 } 3680 3681 private static void explainFailure(String pattern, String data, 3682 String expected, String actual) { 3683 System.err.println("----------------------------------------"); 3684 System.err.println("Pattern = "+pattern); 3685 System.err.println("Data = "+data); 3686 System.err.println("Expected = " + expected); 3687 System.err.println("Actual = " + actual); 3688 } 3689 3690 private static void explainFailure(String pattern, String data, 3691 Throwable t) { 3692 System.err.println("----------------------------------------"); 3693 System.err.println("Pattern = "+pattern); 3694 System.err.println("Data = "+data); 3695 t.printStackTrace(System.err); 3696 } 3697 3698 // Testing examples from a file 3699 3700 /** 3701 * Goes through the file "TestCases.txt" and creates many patterns 3702 * described in the file, matching the patterns against input lines in 3703 * the file, and comparing the results against the correct results 3704 * also found in the file. The file format is described in comments 3705 * at the head of the file. 3706 */ 3707 private static void processFile(String fileName) throws Exception { 3708 File testCases = new File(System.getProperty("test.src", "."), 3709 fileName); 3710 FileInputStream in = new FileInputStream(testCases); 3711 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3712 3713 // Process next test case. 3714 String aLine; 3715 while((aLine = r.readLine()) != null) { 3716 // Read a line for pattern 3717 String patternString = grabLine(r); 3718 Pattern p = null; 3719 try { 3720 p = compileTestPattern(patternString); 3721 } catch (PatternSyntaxException e) { 3722 String dataString = grabLine(r); 3723 String expectedResult = grabLine(r); 3724 if (expectedResult.startsWith("error")) 3725 continue; 3726 explainFailure(patternString, dataString, e); 3727 failCount++; 3728 continue; 3729 } 3730 3731 // Read a line for input string 3732 String dataString = grabLine(r); 3733 Matcher m = p.matcher(dataString); 3734 StringBuffer result = new StringBuffer(); 3735 3736 // Check for IllegalStateExceptions before a match 3737 failCount += preMatchInvariants(m); 3738 3739 boolean found = m.find(); 3740 3741 if (found) 3742 failCount += postTrueMatchInvariants(m); 3743 else 3744 failCount += postFalseMatchInvariants(m); 3745 3746 if (found) { 3747 result.append("true "); 3748 result.append(m.group(0) + " "); 3749 } else { 3750 result.append("false "); 3751 } 3752 3753 result.append(m.groupCount()); 3754 3755 if (found) { 3756 for (int i=1; i<m.groupCount()+1; i++) 3757 if (m.group(i) != null) 3758 result.append(" " +m.group(i)); 3759 } 3760 3761 // Read a line for the expected result 3762 String expectedResult = grabLine(r); 3763 3764 if (!result.toString().equals(expectedResult)) { 3765 explainFailure(patternString, dataString, expectedResult, result.toString()); 3766 failCount++; 3767 } 3768 } 3769 3770 report(fileName); 3771 } 3772 3773 private static int preMatchInvariants(Matcher m) { 3774 int failCount = 0; 3775 try { 3776 m.start(); 3777 failCount++; 3778 } catch (IllegalStateException ise) {} 3779 try { 3780 m.end(); 3781 failCount++; 3782 } catch (IllegalStateException ise) {} 3783 try { 3784 m.group(); 3785 failCount++; 3786 } catch (IllegalStateException ise) {} 3787 return failCount; 3788 } 3789 3790 private static int postFalseMatchInvariants(Matcher m) { 3791 int failCount = 0; 3792 try { 3793 m.group(); 3794 failCount++; 3795 } catch (IllegalStateException ise) {} 3796 try { 3797 m.start(); 3798 failCount++; 3799 } catch (IllegalStateException ise) {} 3800 try { 3801 m.end(); 3802 failCount++; 3803 } catch (IllegalStateException ise) {} 3804 return failCount; 3805 } 3806 3807 private static int postTrueMatchInvariants(Matcher m) { 3808 int failCount = 0; 3809 //assert(m.start() = m.start(0); 3810 if (m.start() != m.start(0)) 3811 failCount++; 3812 //assert(m.end() = m.end(0); 3813 if (m.start() != m.start(0)) 3814 failCount++; 3815 //assert(m.group() = m.group(0); 3816 if (!m.group().equals(m.group(0))) 3817 failCount++; 3818 try { 3819 m.group(50); 3820 failCount++; 3821 } catch (IndexOutOfBoundsException ise) {} 3822 3823 return failCount; 3824 } 3825 3826 private static Pattern compileTestPattern(String patternString) { 3827 if (!patternString.startsWith("'")) { 3828 return Pattern.compile(patternString); 3829 } 3830 3831 int break1 = patternString.lastIndexOf("'"); 3832 String flagString = patternString.substring( 3833 break1+1, patternString.length()); 3834 patternString = patternString.substring(1, break1); 3835 3836 if (flagString.equals("i")) 3837 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3838 3839 if (flagString.equals("m")) 3840 return Pattern.compile(patternString, Pattern.MULTILINE); 3841 3842 return Pattern.compile(patternString); 3843 } 3844 3845 /** 3846 * Reads a line from the input file. Keeps reading lines until a non 3847 * empty non comment line is read. If the line contains a \n then 3848 * these two characters are replaced by a newline char. If a \\uxxxx 3849 * sequence is read then the sequence is replaced by the unicode char. 3850 */ 3851 private static String grabLine(BufferedReader r) throws Exception { 3852 int index = 0; 3853 String line = r.readLine(); 3854 while (line.startsWith("//") || line.length() < 1) 3855 line = r.readLine(); 3856 while ((index = line.indexOf("\\n")) != -1) { 3857 StringBuffer temp = new StringBuffer(line); 3858 temp.replace(index, index+2, "\n"); 3859 line = temp.toString(); 3860 } 3861 while ((index = line.indexOf("\\u")) != -1) { 3862 StringBuffer temp = new StringBuffer(line); 3863 String value = temp.substring(index+2, index+6); 3864 char aChar = (char)Integer.parseInt(value, 16); 3865 String unicodeChar = "" + aChar; 3866 temp.replace(index, index+6, unicodeChar); 3867 line = temp.toString(); 3868 } 3869 3870 return line; 3871 } 3872 3873 private static void check(Pattern p, String s, String g, String expected) { 3874 Matcher m = p.matcher(s); 3875 m.find(); 3876 if (!m.group(g).equals(expected) || 3877 s.charAt(m.start(g)) != expected.charAt(0) || 3878 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3879 failCount++; 3880 } 3881 3882 private static void checkReplaceFirst(String p, String s, String r, String expected) 3883 { 3884 if (!expected.equals(Pattern.compile(p) 3885 .matcher(s) 3886 .replaceFirst(r))) 3887 failCount++; 3888 } 3889 3890 private static void checkReplaceAll(String p, String s, String r, String expected) 3891 { 3892 if (!expected.equals(Pattern.compile(p) 3893 .matcher(s) 3894 .replaceAll(r))) 3895 failCount++; 3896 } 3897 3898 private static void checkExpectedFail(String p) { 3899 try { 3900 Pattern.compile(p); 3901 } catch (PatternSyntaxException pse) { 3902 //pse.printStackTrace(); 3903 return; 3904 } 3905 failCount++; 3906 } 3907 3908 private static void checkExpectedIAE(Matcher m, String g) { 3909 m.find(); 3910 try { 3911 m.group(g); 3912 } catch (IllegalArgumentException x) { 3913 //iae.printStackTrace(); 3914 try { 3915 m.start(g); 3916 } catch (IllegalArgumentException xx) { 3917 try { 3918 m.start(g); 3919 } catch (IllegalArgumentException xxx) { 3920 return; 3921 } 3922 } 3923 } 3924 failCount++; 3925 } 3926 3927 private static void checkExpectedNPE(Matcher m) { 3928 m.find(); 3929 try { 3930 m.group(null); 3931 } catch (NullPointerException x) { 3932 try { 3933 m.start(null); 3934 } catch (NullPointerException xx) { 3935 try { 3936 m.end(null); 3937 } catch (NullPointerException xxx) { 3938 return; 3939 } 3940 } 3941 } 3942 failCount++; 3943 } 3944 3945 private static void namedGroupCaptureTest() throws Exception { 3946 check(Pattern.compile("x+(?<gname>y+)z+"), 3947 "xxxyyyzzz", 3948 "gname", 3949 "yyy"); 3950 3951 check(Pattern.compile("x+(?<gname8>y+)z+"), 3952 "xxxyyyzzz", 3953 "gname8", 3954 "yyy"); 3955 3956 //backref 3957 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3958 check(pattern, "zzzaabcazzz", true); // found "abca" 3959 3960 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3961 "zzzaabcaazzz", true); 3962 3963 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3964 "abcdefabc", true); 3965 3966 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3967 "abcdefghijkk", true); 3968 3969 // Supplementary character tests 3970 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3971 toSupplementaries("zzzaabcazzz"), true); 3972 3973 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3974 toSupplementaries("zzzaabcaazzz"), true); 3975 3976 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3977 toSupplementaries("abcdefabc"), true); 3978 3979 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3980 "(?<gname>" + 3981 toSupplementaries("k)") + "\\k<gname>"), 3982 toSupplementaries("abcdefghijkk"), true); 3983 3984 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3985 "xxxyyyzzzyyy", 3986 "gname", 3987 "yyy"); 3988 3989 //replaceFirst/All 3990 checkReplaceFirst("(?<gn>ab)(c*)", 3991 "abccczzzabcczzzabccc", 3992 "${gn}", 3993 "abzzzabcczzzabccc"); 3994 3995 checkReplaceAll("(?<gn>ab)(c*)", 3996 "abccczzzabcczzzabccc", 3997 "${gn}", 3998 "abzzzabzzzab"); 3999 4000 4001 checkReplaceFirst("(?<gn>ab)(c*)", 4002 "zzzabccczzzabcczzzabccczzz", 4003 "${gn}", 4004 "zzzabzzzabcczzzabccczzz"); 4005 4006 checkReplaceAll("(?<gn>ab)(c*)", 4007 "zzzabccczzzabcczzzabccczzz", 4008 "${gn}", 4009 "zzzabzzzabzzzabzzz"); 4010 4011 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4012 "zzzabccczzzabcczzzabccczzz", 4013 "${gn2}", 4014 "zzzccczzzabcczzzabccczzz"); 4015 4016 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4017 "zzzabccczzzabcczzzabccczzz", 4018 "${gn2}", 4019 "zzzccczzzcczzzccczzz"); 4020 4021 //toSupplementaries("(ab)(c*)")); 4022 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4023 ")(?<gn2>" + toSupplementaries("c") + "*)", 4024 toSupplementaries("abccczzzabcczzzabccc"), 4025 "${gn1}", 4026 toSupplementaries("abzzzabcczzzabccc")); 4027 4028 4029 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4030 ")(?<gn2>" + toSupplementaries("c") + "*)", 4031 toSupplementaries("abccczzzabcczzzabccc"), 4032 "${gn1}", 4033 toSupplementaries("abzzzabzzzab")); 4034 4035 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4036 ")(?<gn2>" + toSupplementaries("c") + "*)", 4037 toSupplementaries("abccczzzabcczzzabccc"), 4038 "${gn2}", 4039 toSupplementaries("ccczzzabcczzzabccc")); 4040 4041 4042 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4043 ")(?<gn2>" + toSupplementaries("c") + "*)", 4044 toSupplementaries("abccczzzabcczzzabccc"), 4045 "${gn2}", 4046 toSupplementaries("ccczzzcczzzccc")); 4047 4048 checkReplaceFirst("(?<dog>Dog)AndCat", 4049 "zzzDogAndCatzzzDogAndCatzzz", 4050 "${dog}", 4051 "zzzDogzzzDogAndCatzzz"); 4052 4053 4054 checkReplaceAll("(?<dog>Dog)AndCat", 4055 "zzzDogAndCatzzzDogAndCatzzz", 4056 "${dog}", 4057 "zzzDogzzzDogzzz"); 4058 4059 // backref in Matcher & String 4060 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4061 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4062 failCount++; 4063 4064 // negative 4065 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4066 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4067 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4068 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4069 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4070 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4071 "gnameX"); 4072 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4073 report("NamedGroupCapture"); 4074 } 4075 4076 // This is for bug 6969132 4077 private static void nonBmpClassComplementTest() throws Exception { 4078 Pattern p = Pattern.compile("\\P{Lu}"); 4079 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4080 if (m.find() && m.start() == 1) 4081 failCount++; 4082 4083 // from a unicode category 4084 p = Pattern.compile("\\P{Lu}"); 4085 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4086 if (m.find()) 4087 failCount++; 4088 if (!m.hitEnd()) 4089 failCount++; 4090 4091 // block 4092 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4093 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4094 if (m.find() && m.start() == 1) 4095 failCount++; 4096 4097 report("NonBmpClassComplement"); 4098 } 4099 4100 private static void unicodePropertiesTest() throws Exception { 4101 // different forms 4102 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4103 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4104 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4105 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4106 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4107 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4108 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4109 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4110 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4111 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4112 failCount++; 4113 4114 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4115 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4116 Matcher lastSM = common; 4117 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4118 4119 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4120 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4121 Matcher lastBM = latin; 4122 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4123 4124 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4125 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4126 continue; // only pick couple code points, they are the same 4127 } 4128 4129 // Unicode Script 4130 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4131 Matcher m; 4132 String str = new String(Character.toChars(cp)); 4133 if (script == lastScript) { 4134 m = lastSM; 4135 m.reset(str); 4136 } else { 4137 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4138 } 4139 if (!m.matches()) { 4140 failCount++; 4141 } 4142 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4143 other.reset(str); 4144 if (other.matches()) { 4145 failCount++; 4146 } 4147 lastSM = m; 4148 lastScript = script; 4149 4150 // Unicode Block 4151 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4152 if (block == null) { 4153 //System.out.printf("Not a Block: cp=%x%n", cp); 4154 continue; 4155 } 4156 if (block == lastBlock) { 4157 m = lastBM; 4158 m.reset(str); 4159 } else { 4160 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4161 } 4162 if (!m.matches()) { 4163 failCount++; 4164 } 4165 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4166 other.reset(str); 4167 if (other.matches()) { 4168 failCount++; 4169 } 4170 lastBM = m; 4171 lastBlock = block; 4172 } 4173 report("unicodeProperties"); 4174 } 4175 4176 private static void unicodeHexNotationTest() throws Exception { 4177 4178 // negative 4179 checkExpectedFail("\\x{-23}"); 4180 checkExpectedFail("\\x{110000}"); 4181 checkExpectedFail("\\x{}"); 4182 checkExpectedFail("\\x{AB[ef]"); 4183 4184 // codepoint 4185 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4186 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4187 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4188 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4189 4190 // in class 4191 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4192 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4193 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4194 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4195 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4196 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4197 4198 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4199 String s = "A" + new String(Character.toChars(cp)) + "B"; 4200 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4201 : String.format("\\u%04x\\u%04x", 4202 (int) Character.toChars(cp)[0], 4203 (int) Character.toChars(cp)[1]); 4204 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4205 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4206 failCount++; 4207 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4208 failCount++; 4209 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4210 failCount++; 4211 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4212 failCount++; 4213 } 4214 report("unicodeHexNotation"); 4215 } 4216 4217 private static void unicodeClassesTest() throws Exception { 4218 4219 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4220 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4221 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4222 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4223 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4224 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4225 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4226 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4227 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4228 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4229 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4230 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4231 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4232 Matcher bound = Pattern.compile("\\b").matcher(""); 4233 Matcher word = Pattern.compile("\\w++").matcher(""); 4234 // UNICODE_CHARACTER_CLASS 4235 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4236 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4237 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4238 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4239 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4240 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4241 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4242 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4243 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4244 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4245 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4246 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4247 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4248 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4249 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4250 // embedded flag (?U) 4251 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4252 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4253 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4254 4255 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4256 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4257 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4258 // properties 4259 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4260 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4261 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4262 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4263 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4264 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4265 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4266 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4267 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4268 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4269 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4270 4271 // javaMethod 4272 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4273 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4274 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4275 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4276 4277 for (int cp = 1; cp < 0x30000; cp++) { 4278 String str = new String(Character.toChars(cp)); 4279 int type = Character.getType(cp); 4280 if (// lower 4281 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4282 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4283 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4284 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4285 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4286 // upper 4287 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4288 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4289 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4290 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4291 // alpha 4292 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4293 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4294 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4295 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4296 // digit 4297 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4298 Character.isDigit(cp) != digitU.reset(str).matches() || 4299 // alnum 4300 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4301 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4302 // punct 4303 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4304 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4305 // graph 4306 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4307 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4308 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4309 // blank 4310 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4311 != blank.reset(str).matches() || 4312 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4313 // print 4314 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4315 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4316 // cntrl 4317 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4318 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4319 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4320 // hexdigit 4321 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4322 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4323 // space 4324 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4325 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4326 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4327 // word 4328 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4329 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4330 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4331 // bwordb 4332 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4333 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4334 // properties 4335 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4336 Character.isLetter(cp) != letterP.reset(str).matches()|| 4337 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4338 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4339 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4340 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4341 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4342 failCount++; 4343 } 4344 4345 // bounds/word align 4346 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4347 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4348 failCount++; 4349 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4350 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4351 failCount++; 4352 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4353 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4354 failCount++; 4355 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4356 failCount++; 4357 report("unicodePredefinedClasses"); 4358 } 4359 4360 private static void horizontalAndVerticalWSTest() throws Exception { 4361 String hws = new String (new char[] { 4362 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4363 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4364 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4365 0x202f, 0x205f, 0x3000 }); 4366 String vws = new String (new char[] { 4367 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4368 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4369 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4370 failCount++; 4371 if (Pattern.compile("\\H").matcher(hws).find() || 4372 Pattern.compile("[\\H]").matcher(hws).find()) 4373 failCount++; 4374 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4375 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4376 failCount++; 4377 if (Pattern.compile("\\V").matcher(vws).find() || 4378 Pattern.compile("[\\V]").matcher(vws).find()) 4379 failCount++; 4380 String prefix = "abcd"; 4381 String suffix = "efgh"; 4382 String ng = "A"; 4383 for (int i = 0; i < hws.length(); i++) { 4384 String c = String.valueOf(hws.charAt(i)); 4385 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4386 if (!m.find() || !c.equals(m.group())) 4387 failCount++; 4388 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4389 if (!m.find() || !c.equals(m.group())) 4390 failCount++; 4391 4392 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4393 if (!m.find() || !ng.equals(m.group())) 4394 failCount++; 4395 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4396 if (!m.find() || !ng.equals(m.group())) 4397 failCount++; 4398 } 4399 for (int i = 0; i < vws.length(); i++) { 4400 String c = String.valueOf(vws.charAt(i)); 4401 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4402 if (!m.find() || !c.equals(m.group())) 4403 failCount++; 4404 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4405 if (!m.find() || !c.equals(m.group())) 4406 failCount++; 4407 4408 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4409 if (!m.find() || !ng.equals(m.group())) 4410 failCount++; 4411 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4412 if (!m.find() || !ng.equals(m.group())) 4413 failCount++; 4414 } 4415 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4416 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4417 failCount++; 4418 report("horizontalAndVerticalWSTest"); 4419 } 4420 4421 private static void linebreakTest() throws Exception { 4422 String linebreaks = new String (new char[] { 4423 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4424 String crnl = "\r\n"; 4425 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4426 !Pattern.compile("\\R").matcher(crnl).matches() || 4427 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4428 failCount++; 4429 report("linebreakTest"); 4430 } 4431 4432 // #7189363 4433 private static void branchTest() throws Exception { 4434 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4435 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4436 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4437 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4438 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4439 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4440 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4441 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4442 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4443 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4444 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4445 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4446 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4447 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4448 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4449 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4450 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4451 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4452 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4453 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4454 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4455 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4456 failCount++; 4457 report("branchTest"); 4458 } 4459 4460 // This test is for 8007395 4461 private static void groupCurlyNotFoundSuppTest() throws Exception { 4462 String input = "test this as \ud83d\ude0d"; 4463 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4464 "test(.)*(@[a-zA-Z.]+)", 4465 "test([^B])+(@[a-zA-Z.]+)", 4466 "test([^B])*(@[a-zA-Z.]+)", 4467 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4468 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4469 }) { 4470 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4471 .matcher(input); 4472 try { 4473 if (m.find()) { 4474 failCount++; 4475 } 4476 } catch (Exception x) { 4477 failCount++; 4478 } 4479 } 4480 report("GroupCurly NotFoundSupp"); 4481 } 4482 4483 // This test is for 8023647 4484 private static void groupCurlyBackoffTest() throws Exception { 4485 if (!"abc1c".matches("(\\w)+1\\1") || 4486 "abc11".matches("(\\w)+1\\1")) { 4487 failCount++; 4488 } 4489 report("GroupCurly backoff"); 4490 } 4491 4492 // This test is for 8012646 4493 private static void patternAsPredicate() throws Exception { 4494 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4495 4496 if (p.test("")) { 4497 failCount++; 4498 } 4499 if (!p.test("word")) { 4500 failCount++; 4501 } 4502 if (p.test("1234")) { 4503 failCount++; 4504 } 4505 report("Pattern.asPredicate"); 4506 } 4507 4508 // This test is for 8035975 4509 private static void invalidFlags() throws Exception { 4510 for (int flag = 1; flag != 0; flag <<= 1) { 4511 switch (flag) { 4512 case Pattern.CASE_INSENSITIVE: 4513 case Pattern.MULTILINE: 4514 case Pattern.DOTALL: 4515 case Pattern.UNICODE_CASE: 4516 case Pattern.CANON_EQ: 4517 case Pattern.UNIX_LINES: 4518 case Pattern.LITERAL: 4519 case Pattern.UNICODE_CHARACTER_CLASS: 4520 case Pattern.COMMENTS: 4521 // valid flag, continue 4522 break; 4523 default: 4524 try { 4525 Pattern.compile(".", flag); 4526 failCount++; 4527 } catch (IllegalArgumentException expected) { 4528 } 4529 } 4530 } 4531 report("Invalid compile flags"); 4532 } 4533 }