1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 36 * @library /lib/testlibrary 37 * @build jdk.testlibrary.* 38 * @run main RegExTest 39 * @key randomness 40 */ 41 42 import java.util.function.Function; 43 import java.util.regex.*; 44 import java.util.Random; 45 import java.io.*; 46 import java.util.*; 47 import java.nio.CharBuffer; 48 import java.util.function.Predicate; 49 import jdk.testlibrary.RandomFactory; 50 51 /** 52 * This is a test class created to check the operation of 53 * the Pattern and Matcher classes. 54 */ 55 public class RegExTest { 56 57 private static Random generator = RandomFactory.getRandom(); 58 private static boolean failure = false; 59 private static int failCount = 0; 60 private static String firstFailure = null; 61 62 /** 63 * Main to interpret arguments and run several tests. 64 * 65 */ 66 public static void main(String[] args) throws Exception { 67 // Most of the tests are in a file 68 processFile("TestCases.txt"); 69 //processFile("PerlCases.txt"); 70 processFile("BMPTestCases.txt"); 71 processFile("SupplementaryTestCases.txt"); 72 73 // These test many randomly generated char patterns 74 bm(); 75 slice(); 76 77 // These are hard to put into the file 78 escapes(); 79 blankInput(); 80 81 // Substitition tests on randomly generated sequences 82 globalSubstitute(); 83 stringbufferSubstitute(); 84 stringbuilderSubstitute(); 85 86 substitutionBasher(); 87 substitutionBasher2(); 88 89 // Canonical Equivalence 90 ceTest(); 91 92 // Anchors 93 anchorTest(); 94 95 // boolean match calls 96 matchesTest(); 97 lookingAtTest(); 98 99 // Pattern API 100 patternMatchesTest(); 101 102 // Misc 103 lookbehindTest(); 104 nullArgumentTest(); 105 backRefTest(); 106 groupCaptureTest(); 107 caretTest(); 108 charClassTest(); 109 emptyPatternTest(); 110 findIntTest(); 111 group0Test(); 112 longPatternTest(); 113 octalTest(); 114 ampersandTest(); 115 negationTest(); 116 splitTest(); 117 appendTest(); 118 caseFoldingTest(); 119 commentsTest(); 120 unixLinesTest(); 121 replaceFirstTest(); 122 gTest(); 123 zTest(); 124 serializeTest(); 125 reluctantRepetitionTest(); 126 multilineDollarTest(); 127 dollarAtEndTest(); 128 caretBetweenTerminatorsTest(); 129 // This RFE rejected in Tiger numOccurrencesTest(); 130 javaCharClassTest(); 131 nonCaptureRepetitionTest(); 132 notCapturedGroupCurlyMatchTest(); 133 escapedSegmentTest(); 134 literalPatternTest(); 135 literalReplacementTest(); 136 regionTest(); 137 toStringTest(); 138 negatedCharClassTest(); 139 findFromTest(); 140 boundsTest(); 141 unicodeWordBoundsTest(); 142 caretAtEndTest(); 143 wordSearchTest(); 144 hitEndTest(); 145 toMatchResultTest(); 146 toMatchResultTest2(); 147 surrogatesInClassTest(); 148 removeQEQuotingTest(); 149 namedGroupCaptureTest(); 150 nonBmpClassComplementTest(); 151 unicodePropertiesTest(); 152 unicodeHexNotationTest(); 153 unicodeClassesTest(); 154 horizontalAndVerticalWSTest(); 155 linebreakTest(); 156 branchTest(); 157 groupCurlyNotFoundSuppTest(); 158 groupCurlyBackoffTest(); 159 patternAsPredicate(); 160 invalidFlags(); 161 162 if (failure) { 163 throw new 164 RuntimeException("RegExTest failed, 1st failure: " + 165 firstFailure); 166 } else { 167 System.err.println("OKAY: All tests passed."); 168 } 169 } 170 171 // Utility functions 172 173 private static String getRandomAlphaString(int length) { 174 StringBuffer buf = new StringBuffer(length); 175 for (int i=0; i<length; i++) { 176 char randChar = (char)(97 + generator.nextInt(26)); 177 buf.append(randChar); 178 } 179 return buf.toString(); 180 } 181 182 private static void check(Matcher m, String expected) { 183 m.find(); 184 if (!m.group().equals(expected)) 185 failCount++; 186 } 187 188 private static void check(Matcher m, String result, boolean expected) { 189 m.find(); 190 if (m.group().equals(result) != expected) 191 failCount++; 192 } 193 194 private static void check(Pattern p, String s, boolean expected) { 195 if (p.matcher(s).find() != expected) 196 failCount++; 197 } 198 199 private static void check(String p, String s, boolean expected) { 200 Matcher matcher = Pattern.compile(p).matcher(s); 201 if (matcher.find() != expected) 202 failCount++; 203 } 204 205 private static void check(String p, char c, boolean expected) { 206 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 207 Pattern pattern = Pattern.compile(propertyPattern); 208 char[] ca = new char[1]; ca[0] = c; 209 Matcher matcher = pattern.matcher(new String(ca)); 210 if (!matcher.find()) 211 failCount++; 212 } 213 214 private static void check(String p, int codePoint, boolean expected) { 215 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 216 Pattern pattern = Pattern.compile(propertyPattern); 217 char[] ca = Character.toChars(codePoint); 218 Matcher matcher = pattern.matcher(new String(ca)); 219 if (!matcher.find()) 220 failCount++; 221 } 222 223 private static void check(String p, int flag, String input, String s, 224 boolean expected) 225 { 226 Pattern pattern = Pattern.compile(p, flag); 227 Matcher matcher = pattern.matcher(input); 228 if (expected) 229 check(matcher, s, expected); 230 else 231 check(pattern, input, false); 232 } 233 234 private static void report(String testName) { 235 int spacesToAdd = 30 - testName.length(); 236 StringBuffer paddedNameBuffer = new StringBuffer(testName); 237 for (int i=0; i<spacesToAdd; i++) 238 paddedNameBuffer.append(" "); 239 String paddedName = paddedNameBuffer.toString(); 240 System.err.println(paddedName + ": " + 241 (failCount==0 ? "Passed":"Failed("+failCount+")")); 242 if (failCount > 0) { 243 failure = true; 244 245 if (firstFailure == null) { 246 firstFailure = testName; 247 } 248 } 249 250 failCount = 0; 251 } 252 253 /** 254 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 255 * supplementary characters. This method does NOT fully take care 256 * of the regex syntax. 257 */ 258 private static String toSupplementaries(String s) { 259 int length = s.length(); 260 StringBuffer sb = new StringBuffer(length * 2); 261 262 for (int i = 0; i < length; ) { 263 char c = s.charAt(i++); 264 if (c == '\\') { 265 sb.append(c); 266 if (i < length) { 267 c = s.charAt(i++); 268 sb.append(c); 269 if (c == 'u') { 270 // assume no syntax error 271 sb.append(s.charAt(i++)); 272 sb.append(s.charAt(i++)); 273 sb.append(s.charAt(i++)); 274 sb.append(s.charAt(i++)); 275 } 276 } 277 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 278 sb.append('\ud800').append((char)('\udc00'+c)); 279 } else { 280 sb.append(c); 281 } 282 } 283 return sb.toString(); 284 } 285 286 // Regular expression tests 287 288 // This is for bug 6178785 289 // Test if an expected NPE gets thrown when passing in a null argument 290 private static boolean check(Runnable test) { 291 try { 292 test.run(); 293 failCount++; 294 return false; 295 } catch (NullPointerException npe) { 296 return true; 297 } 298 } 299 300 private static void nullArgumentTest() { 301 check(() -> Pattern.compile(null)); 302 check(() -> Pattern.matches(null, null)); 303 check(() -> Pattern.matches("xyz", null)); 304 check(() -> Pattern.quote(null)); 305 check(() -> Pattern.compile("xyz").split(null)); 306 check(() -> Pattern.compile("xyz").matcher(null)); 307 308 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 309 m.matches(); 310 check(() -> m.appendTail((StringBuffer) null)); 311 check(() -> m.appendTail((StringBuilder)null)); 312 check(() -> m.replaceAll((String) null)); 313 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 314 check(() -> m.replaceFirst((String)null)); 315 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 316 check(() -> m.appendReplacement((StringBuffer)null, null)); 317 check(() -> m.appendReplacement((StringBuilder)null, null)); 318 check(() -> m.reset(null)); 319 check(() -> Matcher.quoteReplacement(null)); 320 //check(() -> m.usePattern(null)); 321 322 report("Null Argument"); 323 } 324 325 // This is for bug6635133 326 // Test if surrogate pair in Unicode escapes can be handled correctly. 327 private static void surrogatesInClassTest() throws Exception { 328 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 329 Matcher matcher = pattern.matcher("\ud834\udd22"); 330 if (!matcher.find()) 331 failCount++; 332 333 report("Surrogate pair in Unicode escape"); 334 } 335 336 // This is for bug6990617 337 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 338 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 339 // char is an octal digit. 340 private static void removeQEQuotingTest() throws Exception { 341 Pattern pattern = 342 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 343 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 344 if (!matcher.find()) 345 failCount++; 346 347 report("Remove Q/E Quoting"); 348 } 349 350 // This is for bug 4988891 351 // Test toMatchResult to see that it is a copy of the Matcher 352 // that is not affected by subsequent operations on the original 353 private static void toMatchResultTest() throws Exception { 354 Pattern pattern = Pattern.compile("squid"); 355 Matcher matcher = pattern.matcher( 356 "agiantsquidofdestinyasmallsquidoffate"); 357 matcher.find(); 358 int matcherStart1 = matcher.start(); 359 MatchResult mr = matcher.toMatchResult(); 360 if (mr == matcher) 361 failCount++; 362 int resultStart1 = mr.start(); 363 if (matcherStart1 != resultStart1) 364 failCount++; 365 matcher.find(); 366 int matcherStart2 = matcher.start(); 367 int resultStart2 = mr.start(); 368 if (matcherStart2 == resultStart2) 369 failCount++; 370 if (resultStart1 != resultStart2) 371 failCount++; 372 MatchResult mr2 = matcher.toMatchResult(); 373 if (mr == mr2) 374 failCount++; 375 if (mr2.start() != matcherStart2) 376 failCount++; 377 report("toMatchResult is a copy"); 378 } 379 380 private static void checkExpectedISE(Runnable test) { 381 try { 382 test.run(); 383 failCount++; 384 } catch (IllegalStateException x) { 385 } catch (IndexOutOfBoundsException xx) { 386 failCount++; 387 } 388 } 389 390 private static void checkExpectedIOOE(Runnable test) { 391 try { 392 test.run(); 393 failCount++; 394 } catch (IndexOutOfBoundsException x) {} 395 } 396 397 // This is for bug 8074678 398 // Test the result of toMatchResult throws ISE if no match is availble 399 private static void toMatchResultTest2() throws Exception { 400 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 401 matcher.find(); 402 MatchResult mr = matcher.toMatchResult(); 403 404 checkExpectedISE(() -> mr.start()); 405 checkExpectedISE(() -> mr.start(2)); 406 checkExpectedISE(() -> mr.end()); 407 checkExpectedISE(() -> mr.end(2)); 408 checkExpectedISE(() -> mr.group()); 409 checkExpectedISE(() -> mr.group(2)); 410 411 matcher = Pattern.compile("(match)").matcher("there is a match"); 412 matcher.find(); 413 MatchResult mr2 = matcher.toMatchResult(); 414 checkExpectedIOOE(() -> mr2.start(2)); 415 checkExpectedIOOE(() -> mr2.end(2)); 416 checkExpectedIOOE(() -> mr2.group(2)); 417 418 report("toMatchResult2 appropriate exceptions"); 419 } 420 421 // This is for bug 5013885 422 // Must test a slice to see if it reports hitEnd correctly 423 private static void hitEndTest() throws Exception { 424 // Basic test of Slice node 425 Pattern p = Pattern.compile("^squidattack"); 426 Matcher m = p.matcher("squack"); 427 m.find(); 428 if (m.hitEnd()) 429 failCount++; 430 m.reset("squid"); 431 m.find(); 432 if (!m.hitEnd()) 433 failCount++; 434 435 // Test Slice, SliceA and SliceU nodes 436 for (int i=0; i<3; i++) { 437 int flags = 0; 438 if (i==1) flags = Pattern.CASE_INSENSITIVE; 439 if (i==2) flags = Pattern.UNICODE_CASE; 440 p = Pattern.compile("^abc", flags); 441 m = p.matcher("ad"); 442 m.find(); 443 if (m.hitEnd()) 444 failCount++; 445 m.reset("ab"); 446 m.find(); 447 if (!m.hitEnd()) 448 failCount++; 449 } 450 451 // Test Boyer-Moore node 452 p = Pattern.compile("catattack"); 453 m = p.matcher("attack"); 454 m.find(); 455 if (!m.hitEnd()) 456 failCount++; 457 458 p = Pattern.compile("catattack"); 459 m = p.matcher("attackattackattackcatatta"); 460 m.find(); 461 if (!m.hitEnd()) 462 failCount++; 463 report("hitEnd from a Slice"); 464 } 465 466 // This is for bug 4997476 467 // It is weird code submitted by customer demonstrating a regression 468 private static void wordSearchTest() throws Exception { 469 String testString = new String("word1 word2 word3"); 470 Pattern p = Pattern.compile("\\b"); 471 Matcher m = p.matcher(testString); 472 int position = 0; 473 int start = 0; 474 while (m.find(position)) { 475 start = m.start(); 476 if (start == testString.length()) 477 break; 478 if (m.find(start+1)) { 479 position = m.start(); 480 } else { 481 position = testString.length(); 482 } 483 if (testString.substring(start, position).equals(" ")) 484 continue; 485 if (!testString.substring(start, position-1).startsWith("word")) 486 failCount++; 487 } 488 report("Customer word search"); 489 } 490 491 // This is for bug 4994840 492 private static void caretAtEndTest() throws Exception { 493 // Problem only occurs with multiline patterns 494 // containing a beginning-of-line caret "^" followed 495 // by an expression that also matches the empty string. 496 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 497 Matcher matcher = pattern.matcher("\r"); 498 matcher.find(); 499 matcher.find(); 500 report("Caret at end"); 501 } 502 503 // This test is for 4979006 504 // Check to see if word boundary construct properly handles unicode 505 // non spacing marks 506 private static void unicodeWordBoundsTest() throws Exception { 507 String spaces = " "; 508 String wordChar = "a"; 509 String nsm = "\u030a"; 510 511 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 512 513 Pattern pattern = Pattern.compile("\\b"); 514 Matcher matcher = pattern.matcher(""); 515 // S=other B=word character N=non spacing mark .=word boundary 516 // SS.BB.SS 517 String input = spaces + wordChar + wordChar + spaces; 518 twoFindIndexes(input, matcher, 2, 4); 519 // SS.BBN.SS 520 input = spaces + wordChar +wordChar + nsm + spaces; 521 twoFindIndexes(input, matcher, 2, 5); 522 // SS.BN.SS 523 input = spaces + wordChar + nsm + spaces; 524 twoFindIndexes(input, matcher, 2, 4); 525 // SS.BNN.SS 526 input = spaces + wordChar + nsm + nsm + spaces; 527 twoFindIndexes(input, matcher, 2, 5); 528 // SSN.BB.SS 529 input = spaces + nsm + wordChar + wordChar + spaces; 530 twoFindIndexes(input, matcher, 3, 5); 531 // SS.BNB.SS 532 input = spaces + wordChar + nsm + wordChar + spaces; 533 twoFindIndexes(input, matcher, 2, 5); 534 // SSNNSS 535 input = spaces + nsm + nsm + spaces; 536 matcher.reset(input); 537 if (matcher.find()) 538 failCount++; 539 // SSN.BBN.SS 540 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 541 twoFindIndexes(input, matcher, 3, 6); 542 543 report("Unicode word boundary"); 544 } 545 546 private static void twoFindIndexes(String input, Matcher matcher, int a, 547 int b) throws Exception 548 { 549 matcher.reset(input); 550 matcher.find(); 551 if (matcher.start() != a) 552 failCount++; 553 matcher.find(); 554 if (matcher.start() != b) 555 failCount++; 556 } 557 558 // This test is for 6284152 559 static void check(String regex, String input, String[] expected) { 560 List<String> result = new ArrayList<String>(); 561 Pattern p = Pattern.compile(regex); 562 Matcher m = p.matcher(input); 563 while (m.find()) { 564 result.add(m.group()); 565 } 566 if (!Arrays.asList(expected).equals(result)) 567 failCount++; 568 } 569 570 private static void lookbehindTest() throws Exception { 571 //Positive 572 check("(?<=%.{0,5})foo\\d", 573 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 574 new String[]{"foo1", "foo2", "foo3"}); 575 576 //boundary at end of the lookbehind sub-regex should work consistently 577 //with the boundary just after the lookbehind sub-regex 578 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 579 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 580 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 581 check("(?<!abc \\b)foo", "abc foo", new String[0]); 582 583 //Negative 584 check("(?<!%.{0,5})foo\\d", 585 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 586 new String[] {"foo4", "foo5"}); 587 588 //Positive greedy 589 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 590 591 //Positive reluctant 592 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 593 594 //supplementary 595 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 596 new String[] {"fo\ud800\udc00o"}); 597 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 598 new String[] {"fo\ud800\udc00o"}); 599 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 600 new String[] {"fo\ud800\udc00o"}); 601 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 602 new String[] {"fo\ud800\udc00o"}); 603 report("Lookbehind"); 604 } 605 606 // This test is for 4938995 607 // Check to see if weak region boundaries are transparent to 608 // lookahead and lookbehind constructs 609 private static void boundsTest() throws Exception { 610 String fullMessage = "catdogcat"; 611 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 612 Matcher matcher = pattern.matcher("catdogca"); 613 matcher.useTransparentBounds(true); 614 if (matcher.find()) 615 failCount++; 616 matcher.reset("atdogcat"); 617 if (matcher.find()) 618 failCount++; 619 matcher.reset(fullMessage); 620 if (!matcher.find()) 621 failCount++; 622 matcher.reset(fullMessage); 623 matcher.region(0,9); 624 if (!matcher.find()) 625 failCount++; 626 matcher.reset(fullMessage); 627 matcher.region(0,6); 628 if (!matcher.find()) 629 failCount++; 630 matcher.reset(fullMessage); 631 matcher.region(3,6); 632 if (!matcher.find()) 633 failCount++; 634 matcher.useTransparentBounds(false); 635 if (matcher.find()) 636 failCount++; 637 638 // Negative lookahead/lookbehind 639 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 640 matcher = pattern.matcher("dogcat"); 641 matcher.useTransparentBounds(true); 642 matcher.region(0,3); 643 if (matcher.find()) 644 failCount++; 645 matcher.reset("catdog"); 646 matcher.region(3,6); 647 if (matcher.find()) 648 failCount++; 649 matcher.useTransparentBounds(false); 650 matcher.reset("dogcat"); 651 matcher.region(0,3); 652 if (!matcher.find()) 653 failCount++; 654 matcher.reset("catdog"); 655 matcher.region(3,6); 656 if (!matcher.find()) 657 failCount++; 658 659 report("Region bounds transparency"); 660 } 661 662 // This test is for 4945394 663 private static void findFromTest() throws Exception { 664 String message = "This is 40 $0 message."; 665 Pattern pat = Pattern.compile("\\$0"); 666 Matcher match = pat.matcher(message); 667 if (!match.find()) 668 failCount++; 669 if (match.find()) 670 failCount++; 671 if (match.find()) 672 failCount++; 673 report("Check for alternating find"); 674 } 675 676 // This test is for 4872664 and 4892980 677 private static void negatedCharClassTest() throws Exception { 678 Pattern pattern = Pattern.compile("[^>]"); 679 Matcher matcher = pattern.matcher("\u203A"); 680 if (!matcher.matches()) 681 failCount++; 682 pattern = Pattern.compile("[^fr]"); 683 matcher = pattern.matcher("a"); 684 if (!matcher.find()) 685 failCount++; 686 matcher.reset("\u203A"); 687 if (!matcher.find()) 688 failCount++; 689 String s = "for"; 690 String result[] = s.split("[^fr]"); 691 if (!result[0].equals("f")) 692 failCount++; 693 if (!result[1].equals("r")) 694 failCount++; 695 s = "f\u203Ar"; 696 result = s.split("[^fr]"); 697 if (!result[0].equals("f")) 698 failCount++; 699 if (!result[1].equals("r")) 700 failCount++; 701 702 // Test adding to bits, subtracting a node, then adding to bits again 703 pattern = Pattern.compile("[^f\u203Ar]"); 704 matcher = pattern.matcher("a"); 705 if (!matcher.find()) 706 failCount++; 707 matcher.reset("f"); 708 if (matcher.find()) 709 failCount++; 710 matcher.reset("\u203A"); 711 if (matcher.find()) 712 failCount++; 713 matcher.reset("r"); 714 if (matcher.find()) 715 failCount++; 716 matcher.reset("\u203B"); 717 if (!matcher.find()) 718 failCount++; 719 720 // Test subtracting a node, adding to bits, subtracting again 721 pattern = Pattern.compile("[^\u203Ar\u203B]"); 722 matcher = pattern.matcher("a"); 723 if (!matcher.find()) 724 failCount++; 725 matcher.reset("\u203A"); 726 if (matcher.find()) 727 failCount++; 728 matcher.reset("r"); 729 if (matcher.find()) 730 failCount++; 731 matcher.reset("\u203B"); 732 if (matcher.find()) 733 failCount++; 734 matcher.reset("\u203C"); 735 if (!matcher.find()) 736 failCount++; 737 738 report("Negated Character Class"); 739 } 740 741 // This test is for 4628291 742 private static void toStringTest() throws Exception { 743 Pattern pattern = Pattern.compile("b+"); 744 if (pattern.toString() != "b+") 745 failCount++; 746 Matcher matcher = pattern.matcher("aaabbbccc"); 747 String matcherString = matcher.toString(); // unspecified 748 matcher.find(); 749 matcherString = matcher.toString(); // unspecified 750 matcher.region(0,3); 751 matcherString = matcher.toString(); // unspecified 752 matcher.reset(); 753 matcherString = matcher.toString(); // unspecified 754 report("toString"); 755 } 756 757 // This test is for 4808962 758 private static void literalPatternTest() throws Exception { 759 int flags = Pattern.LITERAL; 760 761 Pattern pattern = Pattern.compile("abc\\t$^", flags); 762 check(pattern, "abc\\t$^", true); 763 764 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 765 check(pattern, "abc\\t$^", true); 766 767 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 768 check(pattern, "\\Qa^$bcabc\\E", true); 769 check(pattern, "a^$bcabc", false); 770 771 pattern = Pattern.compile("\\\\Q\\\\E"); 772 check(pattern, "\\Q\\E", true); 773 774 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 775 check(pattern, "abcefg\\Q\\Ehij", true); 776 777 pattern = Pattern.compile("\\\\\\Q\\\\E"); 778 check(pattern, "\\\\\\\\", true); 779 780 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 781 check(pattern, "\\Qa^$bcabc\\E", true); 782 check(pattern, "a^$bcabc", false); 783 784 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 785 check(pattern, "\\Qabc\\Edef", true); 786 check(pattern, "abcdef", false); 787 788 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 789 check(pattern, "abc\\Edef", true); 790 check(pattern, "abcdef", false); 791 792 pattern = Pattern.compile(Pattern.quote("\\E")); 793 check(pattern, "\\E", true); 794 795 pattern = Pattern.compile("((((abc.+?:)", flags); 796 check(pattern, "((((abc.+?:)", true); 797 798 flags |= Pattern.MULTILINE; 799 800 pattern = Pattern.compile("^cat$", flags); 801 check(pattern, "abc^cat$def", true); 802 check(pattern, "cat", false); 803 804 flags |= Pattern.CASE_INSENSITIVE; 805 806 pattern = Pattern.compile("abcdef", flags); 807 check(pattern, "ABCDEF", true); 808 check(pattern, "AbCdEf", true); 809 810 flags |= Pattern.DOTALL; 811 812 pattern = Pattern.compile("a...b", flags); 813 check(pattern, "A...b", true); 814 check(pattern, "Axxxb", false); 815 816 flags |= Pattern.CANON_EQ; 817 818 Pattern p = Pattern.compile("testa\u030a", flags); 819 check(pattern, "testa\u030a", false); 820 check(pattern, "test\u00e5", false); 821 822 // Supplementary character test 823 flags = Pattern.LITERAL; 824 825 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 826 check(pattern, toSupplementaries("abc\\t$^"), true); 827 828 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 829 check(pattern, toSupplementaries("abc\\t$^"), true); 830 831 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 832 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 833 check(pattern, toSupplementaries("a^$bcabc"), false); 834 835 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 836 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 837 check(pattern, toSupplementaries("a^$bcabc"), false); 838 839 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 840 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 841 check(pattern, toSupplementaries("abcdef"), false); 842 843 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 844 check(pattern, toSupplementaries("abc\\Edef"), true); 845 check(pattern, toSupplementaries("abcdef"), false); 846 847 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 848 check(pattern, toSupplementaries("((((abc.+?:)"), true); 849 850 flags |= Pattern.MULTILINE; 851 852 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 853 check(pattern, toSupplementaries("abc^cat$def"), true); 854 check(pattern, toSupplementaries("cat"), false); 855 856 flags |= Pattern.DOTALL; 857 858 // note: this is case-sensitive. 859 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 860 check(pattern, toSupplementaries("a...b"), true); 861 check(pattern, toSupplementaries("axxxb"), false); 862 863 flags |= Pattern.CANON_EQ; 864 865 String t = toSupplementaries("test"); 866 p = Pattern.compile(t + "a\u030a", flags); 867 check(pattern, t + "a\u030a", false); 868 check(pattern, t + "\u00e5", false); 869 870 report("Literal pattern"); 871 } 872 873 // This test is for 4803179 874 // This test is also for 4808962, replacement parts 875 private static void literalReplacementTest() throws Exception { 876 int flags = Pattern.LITERAL; 877 878 Pattern pattern = Pattern.compile("abc", flags); 879 Matcher matcher = pattern.matcher("zzzabczzz"); 880 String replaceTest = "$0"; 881 String result = matcher.replaceAll(replaceTest); 882 if (!result.equals("zzzabczzz")) 883 failCount++; 884 885 matcher.reset(); 886 String literalReplacement = matcher.quoteReplacement(replaceTest); 887 result = matcher.replaceAll(literalReplacement); 888 if (!result.equals("zzz$0zzz")) 889 failCount++; 890 891 matcher.reset(); 892 replaceTest = "\\t$\\$"; 893 literalReplacement = matcher.quoteReplacement(replaceTest); 894 result = matcher.replaceAll(literalReplacement); 895 if (!result.equals("zzz\\t$\\$zzz")) 896 failCount++; 897 898 // Supplementary character test 899 pattern = Pattern.compile(toSupplementaries("abc"), flags); 900 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 901 replaceTest = "$0"; 902 result = matcher.replaceAll(replaceTest); 903 if (!result.equals(toSupplementaries("zzzabczzz"))) 904 failCount++; 905 906 matcher.reset(); 907 literalReplacement = matcher.quoteReplacement(replaceTest); 908 result = matcher.replaceAll(literalReplacement); 909 if (!result.equals(toSupplementaries("zzz$0zzz"))) 910 failCount++; 911 912 matcher.reset(); 913 replaceTest = "\\t$\\$"; 914 literalReplacement = matcher.quoteReplacement(replaceTest); 915 result = matcher.replaceAll(literalReplacement); 916 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 917 failCount++; 918 919 // IAE should be thrown if backslash or '$' is the last character 920 // in replacement string 921 try { 922 "\uac00".replaceAll("\uac00", "$"); 923 failCount++; 924 } catch (IllegalArgumentException iie) { 925 } catch (Exception e) { 926 failCount++; 927 } 928 try { 929 "\uac00".replaceAll("\uac00", "\\"); 930 failCount++; 931 } catch (IllegalArgumentException iie) { 932 } catch (Exception e) { 933 failCount++; 934 } 935 report("Literal replacement"); 936 } 937 938 // This test is for 4757029 939 private static void regionTest() throws Exception { 940 Pattern pattern = Pattern.compile("abc"); 941 Matcher matcher = pattern.matcher("abcdefabc"); 942 943 matcher.region(0,9); 944 if (!matcher.find()) 945 failCount++; 946 if (!matcher.find()) 947 failCount++; 948 matcher.region(0,3); 949 if (!matcher.find()) 950 failCount++; 951 matcher.region(3,6); 952 if (matcher.find()) 953 failCount++; 954 matcher.region(0,2); 955 if (matcher.find()) 956 failCount++; 957 958 expectRegionFail(matcher, 1, -1); 959 expectRegionFail(matcher, -1, -1); 960 expectRegionFail(matcher, -1, 1); 961 expectRegionFail(matcher, 5, 3); 962 expectRegionFail(matcher, 5, 12); 963 expectRegionFail(matcher, 12, 12); 964 965 pattern = Pattern.compile("^abc$"); 966 matcher = pattern.matcher("zzzabczzz"); 967 matcher.region(0,9); 968 if (matcher.find()) 969 failCount++; 970 matcher.region(3,6); 971 if (!matcher.find()) 972 failCount++; 973 matcher.region(3,6); 974 matcher.useAnchoringBounds(false); 975 if (matcher.find()) 976 failCount++; 977 978 // Supplementary character test 979 pattern = Pattern.compile(toSupplementaries("abc")); 980 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 981 matcher.region(0,9*2); 982 if (!matcher.find()) 983 failCount++; 984 if (!matcher.find()) 985 failCount++; 986 matcher.region(0,3*2); 987 if (!matcher.find()) 988 failCount++; 989 matcher.region(1,3*2); 990 if (matcher.find()) 991 failCount++; 992 matcher.region(3*2,6*2); 993 if (matcher.find()) 994 failCount++; 995 matcher.region(0,2*2); 996 if (matcher.find()) 997 failCount++; 998 matcher.region(0,2*2+1); 999 if (matcher.find()) 1000 failCount++; 1001 1002 expectRegionFail(matcher, 1*2, -1); 1003 expectRegionFail(matcher, -1, -1); 1004 expectRegionFail(matcher, -1, 1*2); 1005 expectRegionFail(matcher, 5*2, 3*2); 1006 expectRegionFail(matcher, 5*2, 12*2); 1007 expectRegionFail(matcher, 12*2, 12*2); 1008 1009 pattern = Pattern.compile(toSupplementaries("^abc$")); 1010 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1011 matcher.region(0,9*2); 1012 if (matcher.find()) 1013 failCount++; 1014 matcher.region(3*2,6*2); 1015 if (!matcher.find()) 1016 failCount++; 1017 matcher.region(3*2+1,6*2); 1018 if (matcher.find()) 1019 failCount++; 1020 matcher.region(3*2,6*2-1); 1021 if (matcher.find()) 1022 failCount++; 1023 matcher.region(3*2,6*2); 1024 matcher.useAnchoringBounds(false); 1025 if (matcher.find()) 1026 failCount++; 1027 report("Regions"); 1028 } 1029 1030 private static void expectRegionFail(Matcher matcher, int index1, 1031 int index2) 1032 { 1033 try { 1034 matcher.region(index1, index2); 1035 failCount++; 1036 } catch (IndexOutOfBoundsException ioobe) { 1037 // Correct result 1038 } catch (IllegalStateException ise) { 1039 // Correct result 1040 } 1041 } 1042 1043 // This test is for 4803197 1044 private static void escapedSegmentTest() throws Exception { 1045 1046 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1047 check(pattern, "dir1\\dir2", true); 1048 1049 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1050 check(pattern, "dir1\\dir2\\", true); 1051 1052 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1053 check(pattern, "dir1\\dir2\\", true); 1054 1055 // Supplementary character test 1056 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1057 check(pattern, toSupplementaries("dir1\\dir2"), true); 1058 1059 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1060 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1061 1062 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1063 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1064 1065 report("Escaped segment"); 1066 } 1067 1068 // This test is for 4792284 1069 private static void nonCaptureRepetitionTest() throws Exception { 1070 String input = "abcdefgh;"; 1071 1072 String[] patterns = new String[] { 1073 "(?:\\w{4})+;", 1074 "(?:\\w{8})*;", 1075 "(?:\\w{2}){2,4};", 1076 "(?:\\w{4}){2,};", // only matches the 1077 ".*?(?:\\w{5})+;", // specified minimum 1078 ".*?(?:\\w{9})*;", // number of reps - OK 1079 "(?:\\w{4})+?;", // lazy repetition - OK 1080 "(?:\\w{4})++;", // possessive repetition - OK 1081 "(?:\\w{2,}?)+;", // non-deterministic - OK 1082 "(\\w{4})+;", // capturing group - OK 1083 }; 1084 1085 for (int i = 0; i < patterns.length; i++) { 1086 // Check find() 1087 check(patterns[i], 0, input, input, true); 1088 // Check matches() 1089 Pattern p = Pattern.compile(patterns[i]); 1090 Matcher m = p.matcher(input); 1091 1092 if (m.matches()) { 1093 if (!m.group(0).equals(input)) 1094 failCount++; 1095 } else { 1096 failCount++; 1097 } 1098 } 1099 1100 report("Non capturing repetition"); 1101 } 1102 1103 // This test is for 6358731 1104 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1105 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1106 Matcher matcher = pattern.matcher("abcd"); 1107 if (!matcher.matches() || 1108 matcher.group(1) != null || 1109 !matcher.group(2).equals("abcd")) { 1110 failCount++; 1111 } 1112 report("Not captured GroupCurly"); 1113 } 1114 1115 // This test is for 4706545 1116 private static void javaCharClassTest() throws Exception { 1117 for (int i=0; i<1000; i++) { 1118 char c = (char)generator.nextInt(); 1119 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1120 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1121 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1122 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1123 check("{javaDigit}", c, Character.isDigit(c)); 1124 check("{javaDefined}", c, Character.isDefined(c)); 1125 check("{javaLetter}", c, Character.isLetter(c)); 1126 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1127 check("{javaJavaIdentifierStart}", c, 1128 Character.isJavaIdentifierStart(c)); 1129 check("{javaJavaIdentifierPart}", c, 1130 Character.isJavaIdentifierPart(c)); 1131 check("{javaUnicodeIdentifierStart}", c, 1132 Character.isUnicodeIdentifierStart(c)); 1133 check("{javaUnicodeIdentifierPart}", c, 1134 Character.isUnicodeIdentifierPart(c)); 1135 check("{javaIdentifierIgnorable}", c, 1136 Character.isIdentifierIgnorable(c)); 1137 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1138 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1139 check("{javaISOControl}", c, Character.isISOControl(c)); 1140 check("{javaMirrored}", c, Character.isMirrored(c)); 1141 1142 } 1143 1144 // Supplementary character test 1145 for (int i=0; i<1000; i++) { 1146 int c = generator.nextInt(Character.MAX_CODE_POINT 1147 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1148 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1149 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1150 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1151 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1152 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1153 check("{javaDigit}", c, Character.isDigit(c)); 1154 check("{javaDefined}", c, Character.isDefined(c)); 1155 check("{javaLetter}", c, Character.isLetter(c)); 1156 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1157 check("{javaJavaIdentifierStart}", c, 1158 Character.isJavaIdentifierStart(c)); 1159 check("{javaJavaIdentifierPart}", c, 1160 Character.isJavaIdentifierPart(c)); 1161 check("{javaUnicodeIdentifierStart}", c, 1162 Character.isUnicodeIdentifierStart(c)); 1163 check("{javaUnicodeIdentifierPart}", c, 1164 Character.isUnicodeIdentifierPart(c)); 1165 check("{javaIdentifierIgnorable}", c, 1166 Character.isIdentifierIgnorable(c)); 1167 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1168 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1169 check("{javaISOControl}", c, Character.isISOControl(c)); 1170 check("{javaMirrored}", c, Character.isMirrored(c)); 1171 } 1172 1173 report("Java character classes"); 1174 } 1175 1176 // This test is for 4523620 1177 /* 1178 private static void numOccurrencesTest() throws Exception { 1179 Pattern pattern = Pattern.compile("aaa"); 1180 1181 if (pattern.numOccurrences("aaaaaa", false) != 2) 1182 failCount++; 1183 if (pattern.numOccurrences("aaaaaa", true) != 4) 1184 failCount++; 1185 1186 pattern = Pattern.compile("^"); 1187 if (pattern.numOccurrences("aaaaaa", false) != 1) 1188 failCount++; 1189 if (pattern.numOccurrences("aaaaaa", true) != 1) 1190 failCount++; 1191 1192 report("Number of Occurrences"); 1193 } 1194 */ 1195 1196 // This test is for 4776374 1197 private static void caretBetweenTerminatorsTest() throws Exception { 1198 int flags1 = Pattern.DOTALL; 1199 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1200 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1201 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1202 1203 check("^....", flags1, "test\ntest", "test", true); 1204 check(".....^", flags1, "test\ntest", "test", false); 1205 check(".....^", flags1, "test\n", "test", false); 1206 check("....^", flags1, "test\r\n", "test", false); 1207 1208 check("^....", flags2, "test\ntest", "test", true); 1209 check("....^", flags2, "test\ntest", "test", false); 1210 check(".....^", flags2, "test\n", "test", false); 1211 check("....^", flags2, "test\r\n", "test", false); 1212 1213 check("^....", flags3, "test\ntest", "test", true); 1214 check(".....^", flags3, "test\ntest", "test\n", true); 1215 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1216 check(".....^", flags3, "test\n", "test", false); 1217 check(".....^", flags3, "test\r\n", "test", false); 1218 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1219 1220 check("^....", flags4, "test\ntest", "test", true); 1221 check(".....^", flags3, "test\ntest", "test\n", true); 1222 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1223 check(".....^", flags4, "test\n", "test\n", false); 1224 check(".....^", flags4, "test\r\n", "test\r", false); 1225 1226 // Supplementary character test 1227 String t = toSupplementaries("test"); 1228 check("^....", flags1, t+"\n"+t, t, true); 1229 check(".....^", flags1, t+"\n"+t, t, false); 1230 check(".....^", flags1, t+"\n", t, false); 1231 check("....^", flags1, t+"\r\n", t, false); 1232 1233 check("^....", flags2, t+"\n"+t, t, true); 1234 check("....^", flags2, t+"\n"+t, t, false); 1235 check(".....^", flags2, t+"\n", t, false); 1236 check("....^", flags2, t+"\r\n", t, false); 1237 1238 check("^....", flags3, t+"\n"+t, t, true); 1239 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1240 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1241 check(".....^", flags3, t+"\n", t, false); 1242 check(".....^", flags3, t+"\r\n", t, false); 1243 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1244 1245 check("^....", flags4, t+"\n"+t, t, true); 1246 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1247 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1248 check(".....^", flags4, t+"\n", t+"\n", false); 1249 check(".....^", flags4, t+"\r\n", t+"\r", false); 1250 1251 report("Caret between terminators"); 1252 } 1253 1254 // This test is for 4727935 1255 private static void dollarAtEndTest() throws Exception { 1256 int flags1 = Pattern.DOTALL; 1257 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1258 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1259 1260 check("....$", flags1, "test\n", "test", true); 1261 check("....$", flags1, "test\r\n", "test", true); 1262 check(".....$", flags1, "test\n", "test\n", true); 1263 check(".....$", flags1, "test\u0085", "test\u0085", true); 1264 check("....$", flags1, "test\u0085", "test", true); 1265 1266 check("....$", flags2, "test\n", "test", true); 1267 check(".....$", flags2, "test\n", "test\n", true); 1268 check(".....$", flags2, "test\u0085", "test\u0085", true); 1269 check("....$", flags2, "test\u0085", "est\u0085", true); 1270 1271 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1272 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1273 check("....$blah", flags3, "test\nblah", "!!!!", false); 1274 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1275 1276 // Supplementary character test 1277 String t = toSupplementaries("test"); 1278 String b = toSupplementaries("blah"); 1279 check("....$", flags1, t+"\n", t, true); 1280 check("....$", flags1, t+"\r\n", t, true); 1281 check(".....$", flags1, t+"\n", t+"\n", true); 1282 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1283 check("....$", flags1, t+"\u0085", t, true); 1284 1285 check("....$", flags2, t+"\n", t, true); 1286 check(".....$", flags2, t+"\n", t+"\n", true); 1287 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1288 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1289 1290 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1291 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1292 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1293 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1294 1295 report("Dollar at End"); 1296 } 1297 1298 // This test is for 4711773 1299 private static void multilineDollarTest() throws Exception { 1300 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1301 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1302 matcher.find(); 1303 if (matcher.start(0) != 9) 1304 failCount++; 1305 matcher.find(); 1306 if (matcher.start(0) != 20) 1307 failCount++; 1308 1309 // Supplementary character test 1310 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1311 matcher.find(); 1312 if (matcher.start(0) != 9*2) 1313 failCount++; 1314 matcher.find(); 1315 if (matcher.start(0) != 20*2) 1316 failCount++; 1317 1318 report("Multiline Dollar"); 1319 } 1320 1321 private static void reluctantRepetitionTest() throws Exception { 1322 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1323 check(p, "1 word word word 2", true); 1324 check(p, "1 wor wo w 2", true); 1325 check(p, "1 word word 2", true); 1326 check(p, "1 word 2", true); 1327 check(p, "1 wo w w 2", true); 1328 check(p, "1 wo w 2", true); 1329 check(p, "1 wor w 2", true); 1330 1331 p = Pattern.compile("([a-z])+?c"); 1332 Matcher m = p.matcher("ababcdefdec"); 1333 check(m, "ababc"); 1334 1335 // Supplementary character test 1336 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1337 m = p.matcher(toSupplementaries("ababcdefdec")); 1338 check(m, toSupplementaries("ababc")); 1339 1340 report("Reluctant Repetition"); 1341 } 1342 1343 private static void serializeTest() throws Exception { 1344 String patternStr = "(b)"; 1345 String matchStr = "b"; 1346 Pattern pattern = Pattern.compile(patternStr); 1347 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1348 ObjectOutputStream oos = new ObjectOutputStream(baos); 1349 oos.writeObject(pattern); 1350 oos.close(); 1351 ObjectInputStream ois = new ObjectInputStream( 1352 new ByteArrayInputStream(baos.toByteArray())); 1353 Pattern serializedPattern = (Pattern)ois.readObject(); 1354 ois.close(); 1355 Matcher matcher = serializedPattern.matcher(matchStr); 1356 if (!matcher.matches()) 1357 failCount++; 1358 if (matcher.groupCount() != 1) 1359 failCount++; 1360 1361 report("Serialization"); 1362 } 1363 1364 private static void gTest() { 1365 Pattern pattern = Pattern.compile("\\G\\w"); 1366 Matcher matcher = pattern.matcher("abc#x#x"); 1367 matcher.find(); 1368 matcher.find(); 1369 matcher.find(); 1370 if (matcher.find()) 1371 failCount++; 1372 1373 pattern = Pattern.compile("\\GA*"); 1374 matcher = pattern.matcher("1A2AA3"); 1375 matcher.find(); 1376 if (matcher.find()) 1377 failCount++; 1378 1379 pattern = Pattern.compile("\\GA*"); 1380 matcher = pattern.matcher("1A2AA3"); 1381 if (!matcher.find(1)) 1382 failCount++; 1383 matcher.find(); 1384 if (matcher.find()) 1385 failCount++; 1386 1387 report("\\G"); 1388 } 1389 1390 private static void zTest() { 1391 Pattern pattern = Pattern.compile("foo\\Z"); 1392 // Positives 1393 check(pattern, "foo\u0085", true); 1394 check(pattern, "foo\u2028", true); 1395 check(pattern, "foo\u2029", true); 1396 check(pattern, "foo\n", true); 1397 check(pattern, "foo\r", true); 1398 check(pattern, "foo\r\n", true); 1399 // Negatives 1400 check(pattern, "fooo", false); 1401 check(pattern, "foo\n\r", false); 1402 1403 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1404 // Positives 1405 check(pattern, "foo", true); 1406 check(pattern, "foo\n", true); 1407 // Negatives 1408 check(pattern, "foo\r", false); 1409 check(pattern, "foo\u0085", false); 1410 check(pattern, "foo\u2028", false); 1411 check(pattern, "foo\u2029", false); 1412 1413 report("\\Z"); 1414 } 1415 1416 private static void replaceFirstTest() { 1417 Pattern pattern = Pattern.compile("(ab)(c*)"); 1418 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1419 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1420 failCount++; 1421 1422 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1423 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1424 failCount++; 1425 1426 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1427 String result = matcher.replaceFirst("$1"); 1428 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1429 failCount++; 1430 1431 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1432 result = matcher.replaceFirst("$2"); 1433 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1434 failCount++; 1435 1436 pattern = Pattern.compile("a*"); 1437 matcher = pattern.matcher("aaaaaaaaaa"); 1438 if (!matcher.replaceFirst("test").equals("test")) 1439 failCount++; 1440 1441 pattern = Pattern.compile("a+"); 1442 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1443 if (!matcher.replaceFirst("test").equals("zzztest")) 1444 failCount++; 1445 1446 // Supplementary character test 1447 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1448 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1449 if (!matcher.replaceFirst(toSupplementaries("test")) 1450 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1451 failCount++; 1452 1453 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1454 if (!matcher.replaceFirst(toSupplementaries("test")). 1455 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1456 failCount++; 1457 1458 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1459 result = matcher.replaceFirst("$1"); 1460 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1461 failCount++; 1462 1463 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1464 result = matcher.replaceFirst("$2"); 1465 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1466 failCount++; 1467 1468 pattern = Pattern.compile(toSupplementaries("a*")); 1469 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1470 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1471 failCount++; 1472 1473 pattern = Pattern.compile(toSupplementaries("a+")); 1474 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1475 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1476 failCount++; 1477 1478 report("Replace First"); 1479 } 1480 1481 private static void unixLinesTest() { 1482 Pattern pattern = Pattern.compile(".*"); 1483 Matcher matcher = pattern.matcher("aa\u2028blah"); 1484 matcher.find(); 1485 if (!matcher.group(0).equals("aa")) 1486 failCount++; 1487 1488 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1489 matcher = pattern.matcher("aa\u2028blah"); 1490 matcher.find(); 1491 if (!matcher.group(0).equals("aa\u2028blah")) 1492 failCount++; 1493 1494 pattern = Pattern.compile("[az]$", 1495 Pattern.MULTILINE | Pattern.UNIX_LINES); 1496 matcher = pattern.matcher("aa\u2028zz"); 1497 check(matcher, "a\u2028", false); 1498 1499 // Supplementary character test 1500 pattern = Pattern.compile(".*"); 1501 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1502 matcher.find(); 1503 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1504 failCount++; 1505 1506 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1507 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1508 matcher.find(); 1509 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1510 failCount++; 1511 1512 pattern = Pattern.compile(toSupplementaries("[az]$"), 1513 Pattern.MULTILINE | Pattern.UNIX_LINES); 1514 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1515 check(matcher, toSupplementaries("a\u2028"), false); 1516 1517 report("Unix Lines"); 1518 } 1519 1520 private static void commentsTest() { 1521 int flags = Pattern.COMMENTS; 1522 1523 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1524 Matcher matcher = pattern.matcher("aa#aa"); 1525 if (!matcher.matches()) 1526 failCount++; 1527 1528 pattern = Pattern.compile("aa # blah", flags); 1529 matcher = pattern.matcher("aa"); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile("aa blah", flags); 1534 matcher = pattern.matcher("aablah"); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile("aa # blah blech ", flags); 1539 matcher = pattern.matcher("aa"); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile("aa # blah\n ", flags); 1544 matcher = pattern.matcher("aa"); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1549 matcher = pattern.matcher("aabc"); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1554 matcher = pattern.matcher("aabc"); 1555 if (!matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1559 matcher = pattern.matcher("aabc#blech"); 1560 if (!matcher.matches()) 1561 failCount++; 1562 1563 // Supplementary character test 1564 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1565 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1566 if (!matcher.matches()) 1567 failCount++; 1568 1569 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1570 matcher = pattern.matcher(toSupplementaries("aa")); 1571 if (!matcher.matches()) 1572 failCount++; 1573 1574 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1575 matcher = pattern.matcher(toSupplementaries("aablah")); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1580 matcher = pattern.matcher(toSupplementaries("aa")); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1585 matcher = pattern.matcher(toSupplementaries("aa")); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1590 matcher = pattern.matcher(toSupplementaries("aabc")); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1595 matcher = pattern.matcher(toSupplementaries("aabc")); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1600 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 report("Comments"); 1605 } 1606 1607 private static void caseFoldingTest() { // bug 4504687 1608 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1609 Pattern pattern = Pattern.compile("aa", flags); 1610 Matcher matcher = pattern.matcher("ab"); 1611 if (matcher.matches()) 1612 failCount++; 1613 1614 pattern = Pattern.compile("aA", flags); 1615 matcher = pattern.matcher("ab"); 1616 if (matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile("aa", flags); 1620 matcher = pattern.matcher("aB"); 1621 if (matcher.matches()) 1622 failCount++; 1623 matcher = pattern.matcher("Ab"); 1624 if (matcher.matches()) 1625 failCount++; 1626 1627 // ASCII "a" 1628 // Latin-1 Supplement "a" + grave 1629 // Cyrillic "a" 1630 String[] patterns = new String[] { 1631 //single 1632 "a", "\u00e0", "\u0430", 1633 //slice 1634 "ab", "\u00e0\u00e1", "\u0430\u0431", 1635 //class single 1636 "[a]", "[\u00e0]", "[\u0430]", 1637 //class range 1638 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1639 //back reference 1640 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1641 }; 1642 1643 String[] texts = new String[] { 1644 "A", "\u00c0", "\u0410", 1645 "AB", "\u00c0\u00c1", "\u0410\u0411", 1646 "A", "\u00c0", "\u0410", 1647 "B", "\u00c2", "\u0411", 1648 "aA", "\u00e0\u00c0", "\u0430\u0410" 1649 }; 1650 1651 boolean[] expected = new boolean[] { 1652 true, false, false, 1653 true, false, false, 1654 true, false, false, 1655 true, false, false, 1656 true, false, false 1657 }; 1658 1659 flags = Pattern.CASE_INSENSITIVE; 1660 for (int i = 0; i < patterns.length; i++) { 1661 pattern = Pattern.compile(patterns[i], flags); 1662 matcher = pattern.matcher(texts[i]); 1663 if (matcher.matches() != expected[i]) { 1664 System.out.println("<1> Failed at " + i); 1665 failCount++; 1666 } 1667 } 1668 1669 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1670 for (int i = 0; i < patterns.length; i++) { 1671 pattern = Pattern.compile(patterns[i], flags); 1672 matcher = pattern.matcher(texts[i]); 1673 if (!matcher.matches()) { 1674 System.out.println("<2> Failed at " + i); 1675 failCount++; 1676 } 1677 } 1678 // flag unicode_case alone should do nothing 1679 flags = Pattern.UNICODE_CASE; 1680 for (int i = 0; i < patterns.length; i++) { 1681 pattern = Pattern.compile(patterns[i], flags); 1682 matcher = pattern.matcher(texts[i]); 1683 if (matcher.matches()) { 1684 System.out.println("<3> Failed at " + i); 1685 failCount++; 1686 } 1687 } 1688 1689 // Special cases: i, I, u+0131 and u+0130 1690 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1691 pattern = Pattern.compile("[h-j]+", flags); 1692 if (!pattern.matcher("\u0131\u0130").matches()) 1693 failCount++; 1694 report("Case Folding"); 1695 } 1696 1697 private static void appendTest() { 1698 Pattern pattern = Pattern.compile("(ab)(cd)"); 1699 Matcher matcher = pattern.matcher("abcd"); 1700 String result = matcher.replaceAll("$2$1"); 1701 if (!result.equals("cdab")) 1702 failCount++; 1703 1704 String s1 = "Swap all: first = 123, second = 456"; 1705 String s2 = "Swap one: first = 123, second = 456"; 1706 String r = "$3$2$1"; 1707 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1708 matcher = pattern.matcher(s1); 1709 1710 result = matcher.replaceAll(r); 1711 if (!result.equals("Swap all: 123 = first, 456 = second")) 1712 failCount++; 1713 1714 matcher = pattern.matcher(s2); 1715 1716 if (matcher.find()) { 1717 StringBuffer sb = new StringBuffer(); 1718 matcher.appendReplacement(sb, r); 1719 matcher.appendTail(sb); 1720 result = sb.toString(); 1721 if (!result.equals("Swap one: 123 = first, second = 456")) 1722 failCount++; 1723 } 1724 1725 // Supplementary character test 1726 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1727 matcher = pattern.matcher(toSupplementaries("abcd")); 1728 result = matcher.replaceAll("$2$1"); 1729 if (!result.equals(toSupplementaries("cdab"))) 1730 failCount++; 1731 1732 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1733 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1734 r = toSupplementaries("$3$2$1"); 1735 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1736 matcher = pattern.matcher(s1); 1737 1738 result = matcher.replaceAll(r); 1739 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1740 failCount++; 1741 1742 matcher = pattern.matcher(s2); 1743 1744 if (matcher.find()) { 1745 StringBuffer sb = new StringBuffer(); 1746 matcher.appendReplacement(sb, r); 1747 matcher.appendTail(sb); 1748 result = sb.toString(); 1749 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1750 failCount++; 1751 } 1752 report("Append"); 1753 } 1754 1755 private static void splitTest() { 1756 Pattern pattern = Pattern.compile(":"); 1757 String[] result = pattern.split("foo:and:boo", 2); 1758 if (!result[0].equals("foo")) 1759 failCount++; 1760 if (!result[1].equals("and:boo")) 1761 failCount++; 1762 // Supplementary character test 1763 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1764 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1765 if (!result[0].equals(toSupplementaries("foo"))) 1766 failCount++; 1767 if (!result[1].equals(toSupplementaries("andXboo"))) 1768 failCount++; 1769 1770 CharBuffer cb = CharBuffer.allocate(100); 1771 cb.put("foo:and:boo"); 1772 cb.flip(); 1773 result = pattern.split(cb); 1774 if (!result[0].equals("foo")) 1775 failCount++; 1776 if (!result[1].equals("and")) 1777 failCount++; 1778 if (!result[2].equals("boo")) 1779 failCount++; 1780 1781 // Supplementary character test 1782 CharBuffer cbs = CharBuffer.allocate(100); 1783 cbs.put(toSupplementaries("fooXandXboo")); 1784 cbs.flip(); 1785 result = patternX.split(cbs); 1786 if (!result[0].equals(toSupplementaries("foo"))) 1787 failCount++; 1788 if (!result[1].equals(toSupplementaries("and"))) 1789 failCount++; 1790 if (!result[2].equals(toSupplementaries("boo"))) 1791 failCount++; 1792 1793 String source = "0123456789"; 1794 for (int limit=-2; limit<3; limit++) { 1795 for (int x=0; x<10; x++) { 1796 result = source.split(Integer.toString(x), limit); 1797 int expectedLength = limit < 1 ? 2 : limit; 1798 1799 if ((limit == 0) && (x == 9)) { 1800 // expected dropping of "" 1801 if (result.length != 1) 1802 failCount++; 1803 if (!result[0].equals("012345678")) { 1804 failCount++; 1805 } 1806 } else { 1807 if (result.length != expectedLength) { 1808 failCount++; 1809 } 1810 if (!result[0].equals(source.substring(0,x))) { 1811 if (limit != 1) { 1812 failCount++; 1813 } else { 1814 if (!result[0].equals(source.substring(0,10))) { 1815 failCount++; 1816 } 1817 } 1818 } 1819 if (expectedLength > 1) { // Check segment 2 1820 if (!result[1].equals(source.substring(x+1,10))) 1821 failCount++; 1822 } 1823 } 1824 } 1825 } 1826 // Check the case for no match found 1827 for (int limit=-2; limit<3; limit++) { 1828 result = source.split("e", limit); 1829 if (result.length != 1) 1830 failCount++; 1831 if (!result[0].equals(source)) 1832 failCount++; 1833 } 1834 // Check the case for limit == 0, source = ""; 1835 // split() now returns 0-length for empty source "" see #6559590 1836 source = ""; 1837 result = source.split("e", 0); 1838 if (result.length != 1) 1839 failCount++; 1840 if (!result[0].equals(source)) 1841 failCount++; 1842 1843 // Check both split() and splitAsStraem(), especially for zero-lenth 1844 // input and zero-lenth match cases 1845 String[][] input = new String[][] { 1846 { " ", "Abc Efg Hij" }, // normal non-zero-match 1847 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1848 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1849 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1850 { "(?=\\p{Lu})", "AbcEfg" }, 1851 { "(?=\\p{Lu})", "Abc" }, 1852 { " ", "" }, // zero-length input 1853 { ".*", "" }, 1854 1855 // some tests from PatternStreamTest.java 1856 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1857 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1858 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1859 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1860 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1861 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1862 { "\u56da", "" }, 1863 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1864 { "o", "boo:and:foo" }, 1865 { "o", "booooo:and:fooooo" }, 1866 { "o", "fooooo:" }, 1867 }; 1868 1869 String[][] expected = new String[][] { 1870 { "Abc", "Efg", "Hij" }, 1871 { "", "Abc", "Efg", "Hij" }, 1872 { "Abc", "", "Efg", "Hij" }, 1873 { "Abc", "Efg", "Hij" }, 1874 { "Abc", "Efg" }, 1875 { "Abc" }, 1876 { "" }, 1877 { "" }, 1878 1879 { "awgqwefg1fefw", "vssv1vvv1" }, 1880 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1881 { "awgqwefg", "fefw4vssv", "vvv" }, 1882 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1883 { "1", "23", "456", "7890" }, 1884 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1885 { "" }, 1886 { "This", "is", "testing", "", "with", "different", "separators" }, 1887 { "b", "", ":and:f" }, 1888 { "b", "", "", "", "", ":and:f" }, 1889 { "f", "", "", "", "", ":" }, 1890 }; 1891 for (int i = 0; i < input.length; i++) { 1892 pattern = Pattern.compile(input[i][0]); 1893 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1894 failCount++; 1895 } 1896 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1897 // array for zero-length input for now 1898 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1899 expected[i])) { 1900 failCount++; 1901 } 1902 } 1903 report("Split"); 1904 } 1905 1906 private static void negationTest() { 1907 Pattern pattern = Pattern.compile("[\\[@^]+"); 1908 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1909 if (!matcher.find()) 1910 failCount++; 1911 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1912 failCount++; 1913 pattern = Pattern.compile("[@\\[^]+"); 1914 matcher = pattern.matcher("@@@@[[[[^^^^"); 1915 if (!matcher.find()) 1916 failCount++; 1917 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1918 failCount++; 1919 pattern = Pattern.compile("[@\\[^@]+"); 1920 matcher = pattern.matcher("@@@@[[[[^^^^"); 1921 if (!matcher.find()) 1922 failCount++; 1923 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1924 failCount++; 1925 1926 pattern = Pattern.compile("\\)"); 1927 matcher = pattern.matcher("xxx)xxx"); 1928 if (!matcher.find()) 1929 failCount++; 1930 1931 report("Negation"); 1932 } 1933 1934 private static void ampersandTest() { 1935 Pattern pattern = Pattern.compile("[&@]+"); 1936 check(pattern, "@@@@&&&&", true); 1937 1938 pattern = Pattern.compile("[@&]+"); 1939 check(pattern, "@@@@&&&&", true); 1940 1941 pattern = Pattern.compile("[@\\&]+"); 1942 check(pattern, "@@@@&&&&", true); 1943 1944 report("Ampersand"); 1945 } 1946 1947 private static void octalTest() throws Exception { 1948 Pattern pattern = Pattern.compile("\\u0007"); 1949 Matcher matcher = pattern.matcher("\u0007"); 1950 if (!matcher.matches()) 1951 failCount++; 1952 pattern = Pattern.compile("\\07"); 1953 matcher = pattern.matcher("\u0007"); 1954 if (!matcher.matches()) 1955 failCount++; 1956 pattern = Pattern.compile("\\007"); 1957 matcher = pattern.matcher("\u0007"); 1958 if (!matcher.matches()) 1959 failCount++; 1960 pattern = Pattern.compile("\\0007"); 1961 matcher = pattern.matcher("\u0007"); 1962 if (!matcher.matches()) 1963 failCount++; 1964 pattern = Pattern.compile("\\040"); 1965 matcher = pattern.matcher("\u0020"); 1966 if (!matcher.matches()) 1967 failCount++; 1968 pattern = Pattern.compile("\\0403"); 1969 matcher = pattern.matcher("\u00203"); 1970 if (!matcher.matches()) 1971 failCount++; 1972 pattern = Pattern.compile("\\0103"); 1973 matcher = pattern.matcher("\u0043"); 1974 if (!matcher.matches()) 1975 failCount++; 1976 1977 report("Octal"); 1978 } 1979 1980 private static void longPatternTest() throws Exception { 1981 try { 1982 Pattern pattern = Pattern.compile( 1983 "a 32-character-long pattern xxxx"); 1984 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1985 pattern = Pattern.compile("a thirty four character long regex"); 1986 StringBuffer patternToBe = new StringBuffer(101); 1987 for (int i=0; i<100; i++) 1988 patternToBe.append((char)(97 + i%26)); 1989 pattern = Pattern.compile(patternToBe.toString()); 1990 } catch (PatternSyntaxException e) { 1991 failCount++; 1992 } 1993 1994 // Supplementary character test 1995 try { 1996 Pattern pattern = Pattern.compile( 1997 toSupplementaries("a 32-character-long pattern xxxx")); 1998 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1999 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2000 StringBuffer patternToBe = new StringBuffer(101*2); 2001 for (int i=0; i<100; i++) 2002 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2003 + 97 + i%26)); 2004 pattern = Pattern.compile(patternToBe.toString()); 2005 } catch (PatternSyntaxException e) { 2006 failCount++; 2007 } 2008 report("LongPattern"); 2009 } 2010 2011 private static void group0Test() throws Exception { 2012 Pattern pattern = Pattern.compile("(tes)ting"); 2013 Matcher matcher = pattern.matcher("testing"); 2014 check(matcher, "testing"); 2015 2016 matcher.reset("testing"); 2017 if (matcher.lookingAt()) { 2018 if (!matcher.group(0).equals("testing")) 2019 failCount++; 2020 } else { 2021 failCount++; 2022 } 2023 2024 matcher.reset("testing"); 2025 if (matcher.matches()) { 2026 if (!matcher.group(0).equals("testing")) 2027 failCount++; 2028 } else { 2029 failCount++; 2030 } 2031 2032 pattern = Pattern.compile("(tes)ting"); 2033 matcher = pattern.matcher("testing"); 2034 if (matcher.lookingAt()) { 2035 if (!matcher.group(0).equals("testing")) 2036 failCount++; 2037 } else { 2038 failCount++; 2039 } 2040 2041 pattern = Pattern.compile("^(tes)ting"); 2042 matcher = pattern.matcher("testing"); 2043 if (matcher.matches()) { 2044 if (!matcher.group(0).equals("testing")) 2045 failCount++; 2046 } else { 2047 failCount++; 2048 } 2049 2050 // Supplementary character test 2051 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2052 matcher = pattern.matcher(toSupplementaries("testing")); 2053 check(matcher, toSupplementaries("testing")); 2054 2055 matcher.reset(toSupplementaries("testing")); 2056 if (matcher.lookingAt()) { 2057 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2058 failCount++; 2059 } else { 2060 failCount++; 2061 } 2062 2063 matcher.reset(toSupplementaries("testing")); 2064 if (matcher.matches()) { 2065 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2066 failCount++; 2067 } else { 2068 failCount++; 2069 } 2070 2071 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2072 matcher = pattern.matcher(toSupplementaries("testing")); 2073 if (matcher.lookingAt()) { 2074 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2075 failCount++; 2076 } else { 2077 failCount++; 2078 } 2079 2080 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2081 matcher = pattern.matcher(toSupplementaries("testing")); 2082 if (matcher.matches()) { 2083 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2084 failCount++; 2085 } else { 2086 failCount++; 2087 } 2088 2089 report("Group0"); 2090 } 2091 2092 private static void findIntTest() throws Exception { 2093 Pattern p = Pattern.compile("blah"); 2094 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2095 boolean result = m.find(2); 2096 if (!result) 2097 failCount++; 2098 2099 p = Pattern.compile("$"); 2100 m = p.matcher("1234567890"); 2101 result = m.find(10); 2102 if (!result) 2103 failCount++; 2104 try { 2105 result = m.find(11); 2106 failCount++; 2107 } catch (IndexOutOfBoundsException e) { 2108 // correct result 2109 } 2110 2111 // Supplementary character test 2112 p = Pattern.compile(toSupplementaries("blah")); 2113 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2114 result = m.find(2); 2115 if (!result) 2116 failCount++; 2117 2118 report("FindInt"); 2119 } 2120 2121 private static void emptyPatternTest() throws Exception { 2122 Pattern p = Pattern.compile(""); 2123 Matcher m = p.matcher("foo"); 2124 2125 // Should find empty pattern at beginning of input 2126 boolean result = m.find(); 2127 if (result != true) 2128 failCount++; 2129 if (m.start() != 0) 2130 failCount++; 2131 2132 // Should not match entire input if input is not empty 2133 m.reset(); 2134 result = m.matches(); 2135 if (result == true) 2136 failCount++; 2137 2138 try { 2139 m.start(0); 2140 failCount++; 2141 } catch (IllegalStateException e) { 2142 // Correct result 2143 } 2144 2145 // Should match entire input if input is empty 2146 m.reset(""); 2147 result = m.matches(); 2148 if (result != true) 2149 failCount++; 2150 2151 result = Pattern.matches("", ""); 2152 if (result != true) 2153 failCount++; 2154 2155 result = Pattern.matches("", "foo"); 2156 if (result == true) 2157 failCount++; 2158 report("EmptyPattern"); 2159 } 2160 2161 private static void charClassTest() throws Exception { 2162 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2163 check(pattern, "blahb]blech", true); 2164 2165 pattern = Pattern.compile("[abc[def]]"); 2166 check(pattern, "b", true); 2167 2168 // Supplementary character tests 2169 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2170 check(pattern, toSupplementaries("blahb]blech"), true); 2171 2172 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2173 check(pattern, toSupplementaries("b"), true); 2174 2175 try { 2176 // u00ff when UNICODE_CASE 2177 pattern = Pattern.compile("[ab\u00ffcd]", 2178 Pattern.CASE_INSENSITIVE| 2179 Pattern.UNICODE_CASE); 2180 check(pattern, "ab\u00ffcd", true); 2181 check(pattern, "Ab\u0178Cd", true); 2182 2183 // u00b5 when UNICODE_CASE 2184 pattern = Pattern.compile("[ab\u00b5cd]", 2185 Pattern.CASE_INSENSITIVE| 2186 Pattern.UNICODE_CASE); 2187 check(pattern, "ab\u00b5cd", true); 2188 check(pattern, "Ab\u039cCd", true); 2189 } catch (Exception e) { failCount++; } 2190 2191 /* Special cases 2192 (1)LatinSmallLetterLongS u+017f 2193 (2)LatinSmallLetterDotlessI u+0131 2194 (3)LatineCapitalLetterIWithDotAbove u+0130 2195 (4)KelvinSign u+212a 2196 (5)AngstromSign u+212b 2197 */ 2198 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2199 pattern = Pattern.compile("[sik\u00c5]+", flags); 2200 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2201 failCount++; 2202 2203 report("CharClass"); 2204 } 2205 2206 private static void caretTest() throws Exception { 2207 Pattern pattern = Pattern.compile("\\w*"); 2208 Matcher matcher = pattern.matcher("a#bc#def##g"); 2209 check(matcher, "a"); 2210 check(matcher, ""); 2211 check(matcher, "bc"); 2212 check(matcher, ""); 2213 check(matcher, "def"); 2214 check(matcher, ""); 2215 check(matcher, ""); 2216 check(matcher, "g"); 2217 check(matcher, ""); 2218 if (matcher.find()) 2219 failCount++; 2220 2221 pattern = Pattern.compile("^\\w*"); 2222 matcher = pattern.matcher("a#bc#def##g"); 2223 check(matcher, "a"); 2224 if (matcher.find()) 2225 failCount++; 2226 2227 pattern = Pattern.compile("\\w"); 2228 matcher = pattern.matcher("abc##x"); 2229 check(matcher, "a"); 2230 check(matcher, "b"); 2231 check(matcher, "c"); 2232 check(matcher, "x"); 2233 if (matcher.find()) 2234 failCount++; 2235 2236 pattern = Pattern.compile("^\\w"); 2237 matcher = pattern.matcher("abc##x"); 2238 check(matcher, "a"); 2239 if (matcher.find()) 2240 failCount++; 2241 2242 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2243 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2244 check(matcher, "abc"); 2245 if (matcher.find()) 2246 failCount++; 2247 2248 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2249 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2250 check(matcher, "abc"); 2251 check(matcher, "jkl"); 2252 if (matcher.find()) 2253 failCount++; 2254 2255 pattern = Pattern.compile("^", Pattern.MULTILINE); 2256 matcher = pattern.matcher("this is some text"); 2257 String result = matcher.replaceAll("X"); 2258 if (!result.equals("Xthis is some text")) 2259 failCount++; 2260 2261 pattern = Pattern.compile("^"); 2262 matcher = pattern.matcher("this is some text"); 2263 result = matcher.replaceAll("X"); 2264 if (!result.equals("Xthis is some text")) 2265 failCount++; 2266 2267 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2268 matcher = pattern.matcher("this is some text\n"); 2269 result = matcher.replaceAll("X"); 2270 if (!result.equals("Xthis is some text\n")) 2271 failCount++; 2272 2273 report("Caret"); 2274 } 2275 2276 private static void groupCaptureTest() throws Exception { 2277 // Independent group 2278 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2279 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2280 matcher.find(); 2281 try { 2282 String blah = matcher.group(1); 2283 failCount++; 2284 } catch (IndexOutOfBoundsException ioobe) { 2285 // Good result 2286 } 2287 // Pure group 2288 pattern = Pattern.compile("x+(?:y+)z+"); 2289 matcher = pattern.matcher("xxxyyyzzz"); 2290 matcher.find(); 2291 try { 2292 String blah = matcher.group(1); 2293 failCount++; 2294 } catch (IndexOutOfBoundsException ioobe) { 2295 // Good result 2296 } 2297 2298 // Supplementary character tests 2299 // Independent group 2300 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2301 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2302 matcher.find(); 2303 try { 2304 String blah = matcher.group(1); 2305 failCount++; 2306 } catch (IndexOutOfBoundsException ioobe) { 2307 // Good result 2308 } 2309 // Pure group 2310 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2311 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2312 matcher.find(); 2313 try { 2314 String blah = matcher.group(1); 2315 failCount++; 2316 } catch (IndexOutOfBoundsException ioobe) { 2317 // Good result 2318 } 2319 2320 report("GroupCapture"); 2321 } 2322 2323 private static void backRefTest() throws Exception { 2324 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2325 check(pattern, "zzzaabcazzz", true); 2326 2327 pattern = Pattern.compile("(a*)bc\\1"); 2328 check(pattern, "zzzaabcaazzz", true); 2329 2330 pattern = Pattern.compile("(abc)(def)\\1"); 2331 check(pattern, "abcdefabc", true); 2332 2333 pattern = Pattern.compile("(abc)(def)\\3"); 2334 check(pattern, "abcdefabc", false); 2335 2336 try { 2337 for (int i = 1; i < 10; i++) { 2338 // Make sure backref 1-9 are always accepted 2339 pattern = Pattern.compile("abcdef\\" + i); 2340 // and fail to match if the target group does not exit 2341 check(pattern, "abcdef", false); 2342 } 2343 } catch(PatternSyntaxException e) { 2344 failCount++; 2345 } 2346 2347 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2348 check(pattern, "abcdefghija", false); 2349 check(pattern, "abcdefghija1", true); 2350 2351 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2352 check(pattern, "abcdefghijkk", true); 2353 2354 pattern = Pattern.compile("(a)bcdefghij\\11"); 2355 check(pattern, "abcdefghija1", true); 2356 2357 // Supplementary character tests 2358 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2359 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2360 2361 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2362 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2363 2364 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2365 check(pattern, toSupplementaries("abcdefabc"), true); 2366 2367 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2368 check(pattern, toSupplementaries("abcdefabc"), false); 2369 2370 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2371 check(pattern, toSupplementaries("abcdefghija"), false); 2372 check(pattern, toSupplementaries("abcdefghija1"), true); 2373 2374 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2375 check(pattern, toSupplementaries("abcdefghijkk"), true); 2376 2377 report("BackRef"); 2378 } 2379 2380 /** 2381 * Unicode Technical Report #18, section 2.6 End of Line 2382 * There is no empty line to be matched in the sequence \u000D\u000A 2383 * but there is an empty line in the sequence \u000A\u000D. 2384 */ 2385 private static void anchorTest() throws Exception { 2386 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2387 Matcher m = p.matcher("blah1\r\nblah2"); 2388 m.find(); 2389 m.find(); 2390 if (!m.group().equals("blah2")) 2391 failCount++; 2392 2393 m.reset("blah1\n\rblah2"); 2394 m.find(); 2395 m.find(); 2396 m.find(); 2397 if (!m.group().equals("blah2")) 2398 failCount++; 2399 2400 // Test behavior of $ with \r\n at end of input 2401 p = Pattern.compile(".+$"); 2402 m = p.matcher("blah1\r\n"); 2403 if (!m.find()) 2404 failCount++; 2405 if (!m.group().equals("blah1")) 2406 failCount++; 2407 if (m.find()) 2408 failCount++; 2409 2410 // Test behavior of $ with \r\n at end of input in multiline 2411 p = Pattern.compile(".+$", Pattern.MULTILINE); 2412 m = p.matcher("blah1\r\n"); 2413 if (!m.find()) 2414 failCount++; 2415 if (m.find()) 2416 failCount++; 2417 2418 // Test for $ recognition of \u0085 for bug 4527731 2419 p = Pattern.compile(".+$", Pattern.MULTILINE); 2420 m = p.matcher("blah1\u0085"); 2421 if (!m.find()) 2422 failCount++; 2423 2424 // Supplementary character test 2425 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2426 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2427 m.find(); 2428 m.find(); 2429 if (!m.group().equals(toSupplementaries("blah2"))) 2430 failCount++; 2431 2432 m.reset(toSupplementaries("blah1\n\rblah2")); 2433 m.find(); 2434 m.find(); 2435 m.find(); 2436 if (!m.group().equals(toSupplementaries("blah2"))) 2437 failCount++; 2438 2439 // Test behavior of $ with \r\n at end of input 2440 p = Pattern.compile(".+$"); 2441 m = p.matcher(toSupplementaries("blah1\r\n")); 2442 if (!m.find()) 2443 failCount++; 2444 if (!m.group().equals(toSupplementaries("blah1"))) 2445 failCount++; 2446 if (m.find()) 2447 failCount++; 2448 2449 // Test behavior of $ with \r\n at end of input in multiline 2450 p = Pattern.compile(".+$", Pattern.MULTILINE); 2451 m = p.matcher(toSupplementaries("blah1\r\n")); 2452 if (!m.find()) 2453 failCount++; 2454 if (m.find()) 2455 failCount++; 2456 2457 // Test for $ recognition of \u0085 for bug 4527731 2458 p = Pattern.compile(".+$", Pattern.MULTILINE); 2459 m = p.matcher(toSupplementaries("blah1\u0085")); 2460 if (!m.find()) 2461 failCount++; 2462 2463 report("Anchors"); 2464 } 2465 2466 /** 2467 * A basic sanity test of Matcher.lookingAt(). 2468 */ 2469 private static void lookingAtTest() throws Exception { 2470 Pattern p = Pattern.compile("(ab)(c*)"); 2471 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2472 2473 if (!m.lookingAt()) 2474 failCount++; 2475 2476 if (!m.group().equals(m.group(0))) 2477 failCount++; 2478 2479 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2480 if (m.lookingAt()) 2481 failCount++; 2482 2483 // Supplementary character test 2484 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2485 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2486 2487 if (!m.lookingAt()) 2488 failCount++; 2489 2490 if (!m.group().equals(m.group(0))) 2491 failCount++; 2492 2493 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2494 if (m.lookingAt()) 2495 failCount++; 2496 2497 report("Looking At"); 2498 } 2499 2500 /** 2501 * A basic sanity test of Matcher.matches(). 2502 */ 2503 private static void matchesTest() throws Exception { 2504 // matches() 2505 Pattern p = Pattern.compile("ulb(c*)"); 2506 Matcher m = p.matcher("ulbcccccc"); 2507 if (!m.matches()) 2508 failCount++; 2509 2510 // find() but not matches() 2511 m.reset("zzzulbcccccc"); 2512 if (m.matches()) 2513 failCount++; 2514 2515 // lookingAt() but not matches() 2516 m.reset("ulbccccccdef"); 2517 if (m.matches()) 2518 failCount++; 2519 2520 // matches() 2521 p = Pattern.compile("a|ad"); 2522 m = p.matcher("ad"); 2523 if (!m.matches()) 2524 failCount++; 2525 2526 // Supplementary character test 2527 // matches() 2528 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2529 m = p.matcher(toSupplementaries("ulbcccccc")); 2530 if (!m.matches()) 2531 failCount++; 2532 2533 // find() but not matches() 2534 m.reset(toSupplementaries("zzzulbcccccc")); 2535 if (m.matches()) 2536 failCount++; 2537 2538 // lookingAt() but not matches() 2539 m.reset(toSupplementaries("ulbccccccdef")); 2540 if (m.matches()) 2541 failCount++; 2542 2543 // matches() 2544 p = Pattern.compile(toSupplementaries("a|ad")); 2545 m = p.matcher(toSupplementaries("ad")); 2546 if (!m.matches()) 2547 failCount++; 2548 2549 report("Matches"); 2550 } 2551 2552 /** 2553 * A basic sanity test of Pattern.matches(). 2554 */ 2555 private static void patternMatchesTest() throws Exception { 2556 // matches() 2557 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2558 toSupplementaries("ulbcccccc"))) 2559 failCount++; 2560 2561 // find() but not matches() 2562 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2563 toSupplementaries("zzzulbcccccc"))) 2564 failCount++; 2565 2566 // lookingAt() but not matches() 2567 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2568 toSupplementaries("ulbccccccdef"))) 2569 failCount++; 2570 2571 // Supplementary character test 2572 // matches() 2573 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2574 toSupplementaries("ulbcccccc"))) 2575 failCount++; 2576 2577 // find() but not matches() 2578 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2579 toSupplementaries("zzzulbcccccc"))) 2580 failCount++; 2581 2582 // lookingAt() but not matches() 2583 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2584 toSupplementaries("ulbccccccdef"))) 2585 failCount++; 2586 2587 report("Pattern Matches"); 2588 } 2589 2590 /** 2591 * Canonical equivalence testing. Tests the ability of the engine 2592 * to match sequences that are not explicitly specified in the 2593 * pattern when they are considered equivalent by the Unicode Standard. 2594 */ 2595 private static void ceTest() throws Exception { 2596 // Decomposed char outside char classes 2597 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2598 Matcher m = p.matcher("test\u00e5"); 2599 if (!m.matches()) 2600 failCount++; 2601 2602 m.reset("testa\u030a"); 2603 if (!m.matches()) 2604 failCount++; 2605 2606 // Composed char outside char classes 2607 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2608 m = p.matcher("test\u00e5"); 2609 if (!m.matches()) 2610 failCount++; 2611 2612 m.reset("testa\u030a"); 2613 if (!m.find()) 2614 failCount++; 2615 2616 // Decomposed char inside a char class 2617 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2618 m = p.matcher("test\u00e5"); 2619 if (!m.find()) 2620 failCount++; 2621 2622 m.reset("testa\u030a"); 2623 if (!m.find()) 2624 failCount++; 2625 2626 // Composed char inside a char class 2627 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2628 m = p.matcher("test\u00e5"); 2629 if (!m.find()) 2630 failCount++; 2631 2632 m.reset("testa\u0300"); 2633 if (!m.find()) 2634 failCount++; 2635 2636 m.reset("testa\u030a"); 2637 if (!m.find()) 2638 failCount++; 2639 2640 // Marks that cannot legally change order and be equivalent 2641 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2642 check(p, "testa\u0308\u0300", true); 2643 check(p, "testa\u0300\u0308", false); 2644 2645 // Marks that can legally change order and be equivalent 2646 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2647 check(p, "testa\u0308\u0323", true); 2648 check(p, "testa\u0323\u0308", true); 2649 2650 // Test all equivalences of the sequence a\u0308\u0323\u0300 2651 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2652 check(p, "testa\u0308\u0323\u0300", true); 2653 check(p, "testa\u0323\u0308\u0300", true); 2654 check(p, "testa\u0308\u0300\u0323", true); 2655 check(p, "test\u00e4\u0323\u0300", true); 2656 check(p, "test\u00e4\u0300\u0323", true); 2657 2658 /* 2659 * The following canonical equivalence tests don't work. Bug id: 4916384. 2660 * 2661 // Decomposed hangul (jamos) 2662 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2663 m = p.matcher("\u1100\u1161"); 2664 if (!m.matches()) 2665 failCount++; 2666 2667 m.reset("\uac00"); 2668 if (!m.matches()) 2669 failCount++; 2670 2671 // Composed hangul 2672 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2673 m = p.matcher("\u1100\u1161"); 2674 if (!m.matches()) 2675 failCount++; 2676 2677 m.reset("\uac00"); 2678 if (!m.matches()) 2679 failCount++; 2680 2681 // Decomposed supplementary outside char classes 2682 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2683 m = p.matcher("test\ud834\uddc0"); 2684 if (!m.matches()) 2685 failCount++; 2686 2687 m.reset("test\ud834\uddbc\ud834\udd6f"); 2688 if (!m.matches()) 2689 failCount++; 2690 2691 // Composed supplementary outside char classes 2692 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2693 m.reset("test\ud834\uddbc\ud834\udd6f"); 2694 if (!m.matches()) 2695 failCount++; 2696 2697 m = p.matcher("test\ud834\uddc0"); 2698 if (!m.matches()) 2699 failCount++; 2700 2701 */ 2702 2703 report("Canonical Equivalence"); 2704 } 2705 2706 /** 2707 * A basic sanity test of Matcher.replaceAll(). 2708 */ 2709 private static void globalSubstitute() throws Exception { 2710 // Global substitution with a literal 2711 Pattern p = Pattern.compile("(ab)(c*)"); 2712 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2713 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2714 failCount++; 2715 2716 m.reset("zzzabccczzzabcczzzabccczzz"); 2717 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2718 failCount++; 2719 2720 // Global substitution with groups 2721 m.reset("zzzabccczzzabcczzzabccczzz"); 2722 String result = m.replaceAll("$1"); 2723 if (!result.equals("zzzabzzzabzzzabzzz")) 2724 failCount++; 2725 2726 // Supplementary character test 2727 // Global substitution with a literal 2728 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2729 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2730 if (!m.replaceAll(toSupplementaries("test")). 2731 equals(toSupplementaries("testzzztestzzztest"))) 2732 failCount++; 2733 2734 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2735 if (!m.replaceAll(toSupplementaries("test")). 2736 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2737 failCount++; 2738 2739 // Global substitution with groups 2740 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2741 result = m.replaceAll("$1"); 2742 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2743 failCount++; 2744 2745 report("Global Substitution"); 2746 } 2747 2748 /** 2749 * Tests the usage of Matcher.appendReplacement() with literal 2750 * and group substitutions. 2751 */ 2752 private static void stringbufferSubstitute() throws Exception { 2753 // SB substitution with literal 2754 String blah = "zzzblahzzz"; 2755 Pattern p = Pattern.compile("blah"); 2756 Matcher m = p.matcher(blah); 2757 StringBuffer result = new StringBuffer(); 2758 try { 2759 m.appendReplacement(result, "blech"); 2760 failCount++; 2761 } catch (IllegalStateException e) { 2762 } 2763 m.find(); 2764 m.appendReplacement(result, "blech"); 2765 if (!result.toString().equals("zzzblech")) 2766 failCount++; 2767 2768 m.appendTail(result); 2769 if (!result.toString().equals("zzzblechzzz")) 2770 failCount++; 2771 2772 // SB substitution with groups 2773 blah = "zzzabcdzzz"; 2774 p = Pattern.compile("(ab)(cd)*"); 2775 m = p.matcher(blah); 2776 result = new StringBuffer(); 2777 try { 2778 m.appendReplacement(result, "$1"); 2779 failCount++; 2780 } catch (IllegalStateException e) { 2781 } 2782 m.find(); 2783 m.appendReplacement(result, "$1"); 2784 if (!result.toString().equals("zzzab")) 2785 failCount++; 2786 2787 m.appendTail(result); 2788 if (!result.toString().equals("zzzabzzz")) 2789 failCount++; 2790 2791 // SB substitution with 3 groups 2792 blah = "zzzabcdcdefzzz"; 2793 p = Pattern.compile("(ab)(cd)*(ef)"); 2794 m = p.matcher(blah); 2795 result = new StringBuffer(); 2796 try { 2797 m.appendReplacement(result, "$1w$2w$3"); 2798 failCount++; 2799 } catch (IllegalStateException e) { 2800 } 2801 m.find(); 2802 m.appendReplacement(result, "$1w$2w$3"); 2803 if (!result.toString().equals("zzzabwcdwef")) 2804 failCount++; 2805 2806 m.appendTail(result); 2807 if (!result.toString().equals("zzzabwcdwefzzz")) 2808 failCount++; 2809 2810 // SB substitution with groups and three matches 2811 // skipping middle match 2812 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2813 p = Pattern.compile("(ab)(cd*)"); 2814 m = p.matcher(blah); 2815 result = new StringBuffer(); 2816 try { 2817 m.appendReplacement(result, "$1"); 2818 failCount++; 2819 } catch (IllegalStateException e) { 2820 } 2821 m.find(); 2822 m.appendReplacement(result, "$1"); 2823 if (!result.toString().equals("zzzab")) 2824 failCount++; 2825 2826 m.find(); 2827 m.find(); 2828 m.appendReplacement(result, "$2"); 2829 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2830 failCount++; 2831 2832 m.appendTail(result); 2833 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2834 failCount++; 2835 2836 // Check to make sure escaped $ is ignored 2837 blah = "zzzabcdcdefzzz"; 2838 p = Pattern.compile("(ab)(cd)*(ef)"); 2839 m = p.matcher(blah); 2840 result = new StringBuffer(); 2841 m.find(); 2842 m.appendReplacement(result, "$1w\\$2w$3"); 2843 if (!result.toString().equals("zzzabw$2wef")) 2844 failCount++; 2845 2846 m.appendTail(result); 2847 if (!result.toString().equals("zzzabw$2wefzzz")) 2848 failCount++; 2849 2850 // Check to make sure a reference to nonexistent group causes error 2851 blah = "zzzabcdcdefzzz"; 2852 p = Pattern.compile("(ab)(cd)*(ef)"); 2853 m = p.matcher(blah); 2854 result = new StringBuffer(); 2855 m.find(); 2856 try { 2857 m.appendReplacement(result, "$1w$5w$3"); 2858 failCount++; 2859 } catch (IndexOutOfBoundsException ioobe) { 2860 // Correct result 2861 } 2862 2863 // Check double digit group references 2864 blah = "zzz123456789101112zzz"; 2865 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2866 m = p.matcher(blah); 2867 result = new StringBuffer(); 2868 m.find(); 2869 m.appendReplacement(result, "$1w$11w$3"); 2870 if (!result.toString().equals("zzz1w11w3")) 2871 failCount++; 2872 2873 // Check to make sure it backs off $15 to $1 if only three groups 2874 blah = "zzzabcdcdefzzz"; 2875 p = Pattern.compile("(ab)(cd)*(ef)"); 2876 m = p.matcher(blah); 2877 result = new StringBuffer(); 2878 m.find(); 2879 m.appendReplacement(result, "$1w$15w$3"); 2880 if (!result.toString().equals("zzzabwab5wef")) 2881 failCount++; 2882 2883 2884 // Supplementary character test 2885 // SB substitution with literal 2886 blah = toSupplementaries("zzzblahzzz"); 2887 p = Pattern.compile(toSupplementaries("blah")); 2888 m = p.matcher(blah); 2889 result = new StringBuffer(); 2890 try { 2891 m.appendReplacement(result, toSupplementaries("blech")); 2892 failCount++; 2893 } catch (IllegalStateException e) { 2894 } 2895 m.find(); 2896 m.appendReplacement(result, toSupplementaries("blech")); 2897 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2898 failCount++; 2899 2900 m.appendTail(result); 2901 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2902 failCount++; 2903 2904 // SB substitution with groups 2905 blah = toSupplementaries("zzzabcdzzz"); 2906 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2907 m = p.matcher(blah); 2908 result = new StringBuffer(); 2909 try { 2910 m.appendReplacement(result, "$1"); 2911 failCount++; 2912 } catch (IllegalStateException e) { 2913 } 2914 m.find(); 2915 m.appendReplacement(result, "$1"); 2916 if (!result.toString().equals(toSupplementaries("zzzab"))) 2917 failCount++; 2918 2919 m.appendTail(result); 2920 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2921 failCount++; 2922 2923 // SB substitution with 3 groups 2924 blah = toSupplementaries("zzzabcdcdefzzz"); 2925 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2926 m = p.matcher(blah); 2927 result = new StringBuffer(); 2928 try { 2929 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2930 failCount++; 2931 } catch (IllegalStateException e) { 2932 } 2933 m.find(); 2934 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2935 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2936 failCount++; 2937 2938 m.appendTail(result); 2939 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2940 failCount++; 2941 2942 // SB substitution with groups and three matches 2943 // skipping middle match 2944 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2945 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2946 m = p.matcher(blah); 2947 result = new StringBuffer(); 2948 try { 2949 m.appendReplacement(result, "$1"); 2950 failCount++; 2951 } catch (IllegalStateException e) { 2952 } 2953 m.find(); 2954 m.appendReplacement(result, "$1"); 2955 if (!result.toString().equals(toSupplementaries("zzzab"))) 2956 failCount++; 2957 2958 m.find(); 2959 m.find(); 2960 m.appendReplacement(result, "$2"); 2961 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2962 failCount++; 2963 2964 m.appendTail(result); 2965 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2966 failCount++; 2967 2968 // Check to make sure escaped $ is ignored 2969 blah = toSupplementaries("zzzabcdcdefzzz"); 2970 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2971 m = p.matcher(blah); 2972 result = new StringBuffer(); 2973 m.find(); 2974 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2975 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2976 failCount++; 2977 2978 m.appendTail(result); 2979 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2980 failCount++; 2981 2982 // Check to make sure a reference to nonexistent group causes error 2983 blah = toSupplementaries("zzzabcdcdefzzz"); 2984 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2985 m = p.matcher(blah); 2986 result = new StringBuffer(); 2987 m.find(); 2988 try { 2989 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2990 failCount++; 2991 } catch (IndexOutOfBoundsException ioobe) { 2992 // Correct result 2993 } 2994 2995 // Check double digit group references 2996 blah = toSupplementaries("zzz123456789101112zzz"); 2997 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2998 m = p.matcher(blah); 2999 result = new StringBuffer(); 3000 m.find(); 3001 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3002 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3003 failCount++; 3004 3005 // Check to make sure it backs off $15 to $1 if only three groups 3006 blah = toSupplementaries("zzzabcdcdefzzz"); 3007 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3008 m = p.matcher(blah); 3009 result = new StringBuffer(); 3010 m.find(); 3011 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3012 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3013 failCount++; 3014 3015 // Check nothing has been appended into the output buffer if 3016 // the replacement string triggers IllegalArgumentException. 3017 p = Pattern.compile("(abc)"); 3018 m = p.matcher("abcd"); 3019 result = new StringBuffer(); 3020 m.find(); 3021 try { 3022 m.appendReplacement(result, ("xyz$g")); 3023 failCount++; 3024 } catch (IllegalArgumentException iae) { 3025 if (result.length() != 0) 3026 failCount++; 3027 } 3028 3029 report("SB Substitution"); 3030 } 3031 3032 /** 3033 * Tests the usage of Matcher.appendReplacement() with literal 3034 * and group substitutions. 3035 */ 3036 private static void stringbuilderSubstitute() throws Exception { 3037 // SB substitution with literal 3038 String blah = "zzzblahzzz"; 3039 Pattern p = Pattern.compile("blah"); 3040 Matcher m = p.matcher(blah); 3041 StringBuilder result = new StringBuilder(); 3042 try { 3043 m.appendReplacement(result, "blech"); 3044 failCount++; 3045 } catch (IllegalStateException e) { 3046 } 3047 m.find(); 3048 m.appendReplacement(result, "blech"); 3049 if (!result.toString().equals("zzzblech")) 3050 failCount++; 3051 3052 m.appendTail(result); 3053 if (!result.toString().equals("zzzblechzzz")) 3054 failCount++; 3055 3056 // SB substitution with groups 3057 blah = "zzzabcdzzz"; 3058 p = Pattern.compile("(ab)(cd)*"); 3059 m = p.matcher(blah); 3060 result = new StringBuilder(); 3061 try { 3062 m.appendReplacement(result, "$1"); 3063 failCount++; 3064 } catch (IllegalStateException e) { 3065 } 3066 m.find(); 3067 m.appendReplacement(result, "$1"); 3068 if (!result.toString().equals("zzzab")) 3069 failCount++; 3070 3071 m.appendTail(result); 3072 if (!result.toString().equals("zzzabzzz")) 3073 failCount++; 3074 3075 // SB substitution with 3 groups 3076 blah = "zzzabcdcdefzzz"; 3077 p = Pattern.compile("(ab)(cd)*(ef)"); 3078 m = p.matcher(blah); 3079 result = new StringBuilder(); 3080 try { 3081 m.appendReplacement(result, "$1w$2w$3"); 3082 failCount++; 3083 } catch (IllegalStateException e) { 3084 } 3085 m.find(); 3086 m.appendReplacement(result, "$1w$2w$3"); 3087 if (!result.toString().equals("zzzabwcdwef")) 3088 failCount++; 3089 3090 m.appendTail(result); 3091 if (!result.toString().equals("zzzabwcdwefzzz")) 3092 failCount++; 3093 3094 // SB substitution with groups and three matches 3095 // skipping middle match 3096 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3097 p = Pattern.compile("(ab)(cd*)"); 3098 m = p.matcher(blah); 3099 result = new StringBuilder(); 3100 try { 3101 m.appendReplacement(result, "$1"); 3102 failCount++; 3103 } catch (IllegalStateException e) { 3104 } 3105 m.find(); 3106 m.appendReplacement(result, "$1"); 3107 if (!result.toString().equals("zzzab")) 3108 failCount++; 3109 3110 m.find(); 3111 m.find(); 3112 m.appendReplacement(result, "$2"); 3113 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3114 failCount++; 3115 3116 m.appendTail(result); 3117 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3118 failCount++; 3119 3120 // Check to make sure escaped $ is ignored 3121 blah = "zzzabcdcdefzzz"; 3122 p = Pattern.compile("(ab)(cd)*(ef)"); 3123 m = p.matcher(blah); 3124 result = new StringBuilder(); 3125 m.find(); 3126 m.appendReplacement(result, "$1w\\$2w$3"); 3127 if (!result.toString().equals("zzzabw$2wef")) 3128 failCount++; 3129 3130 m.appendTail(result); 3131 if (!result.toString().equals("zzzabw$2wefzzz")) 3132 failCount++; 3133 3134 // Check to make sure a reference to nonexistent group causes error 3135 blah = "zzzabcdcdefzzz"; 3136 p = Pattern.compile("(ab)(cd)*(ef)"); 3137 m = p.matcher(blah); 3138 result = new StringBuilder(); 3139 m.find(); 3140 try { 3141 m.appendReplacement(result, "$1w$5w$3"); 3142 failCount++; 3143 } catch (IndexOutOfBoundsException ioobe) { 3144 // Correct result 3145 } 3146 3147 // Check double digit group references 3148 blah = "zzz123456789101112zzz"; 3149 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3150 m = p.matcher(blah); 3151 result = new StringBuilder(); 3152 m.find(); 3153 m.appendReplacement(result, "$1w$11w$3"); 3154 if (!result.toString().equals("zzz1w11w3")) 3155 failCount++; 3156 3157 // Check to make sure it backs off $15 to $1 if only three groups 3158 blah = "zzzabcdcdefzzz"; 3159 p = Pattern.compile("(ab)(cd)*(ef)"); 3160 m = p.matcher(blah); 3161 result = new StringBuilder(); 3162 m.find(); 3163 m.appendReplacement(result, "$1w$15w$3"); 3164 if (!result.toString().equals("zzzabwab5wef")) 3165 failCount++; 3166 3167 3168 // Supplementary character test 3169 // SB substitution with literal 3170 blah = toSupplementaries("zzzblahzzz"); 3171 p = Pattern.compile(toSupplementaries("blah")); 3172 m = p.matcher(blah); 3173 result = new StringBuilder(); 3174 try { 3175 m.appendReplacement(result, toSupplementaries("blech")); 3176 failCount++; 3177 } catch (IllegalStateException e) { 3178 } 3179 m.find(); 3180 m.appendReplacement(result, toSupplementaries("blech")); 3181 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3182 failCount++; 3183 m.appendTail(result); 3184 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3185 failCount++; 3186 3187 // SB substitution with groups 3188 blah = toSupplementaries("zzzabcdzzz"); 3189 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3190 m = p.matcher(blah); 3191 result = new StringBuilder(); 3192 try { 3193 m.appendReplacement(result, "$1"); 3194 failCount++; 3195 } catch (IllegalStateException e) { 3196 } 3197 m.find(); 3198 m.appendReplacement(result, "$1"); 3199 if (!result.toString().equals(toSupplementaries("zzzab"))) 3200 failCount++; 3201 3202 m.appendTail(result); 3203 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3204 failCount++; 3205 3206 // SB substitution with 3 groups 3207 blah = toSupplementaries("zzzabcdcdefzzz"); 3208 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3209 m = p.matcher(blah); 3210 result = new StringBuilder(); 3211 try { 3212 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3213 failCount++; 3214 } catch (IllegalStateException e) { 3215 } 3216 m.find(); 3217 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3218 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3219 failCount++; 3220 3221 m.appendTail(result); 3222 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3223 failCount++; 3224 3225 // SB substitution with groups and three matches 3226 // skipping middle match 3227 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3228 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3229 m = p.matcher(blah); 3230 result = new StringBuilder(); 3231 try { 3232 m.appendReplacement(result, "$1"); 3233 failCount++; 3234 } catch (IllegalStateException e) { 3235 } 3236 m.find(); 3237 m.appendReplacement(result, "$1"); 3238 if (!result.toString().equals(toSupplementaries("zzzab"))) 3239 failCount++; 3240 3241 m.find(); 3242 m.find(); 3243 m.appendReplacement(result, "$2"); 3244 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3245 failCount++; 3246 3247 m.appendTail(result); 3248 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3249 failCount++; 3250 3251 // Check to make sure escaped $ is ignored 3252 blah = toSupplementaries("zzzabcdcdefzzz"); 3253 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3254 m = p.matcher(blah); 3255 result = new StringBuilder(); 3256 m.find(); 3257 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3258 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3259 failCount++; 3260 3261 m.appendTail(result); 3262 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3263 failCount++; 3264 3265 // Check to make sure a reference to nonexistent group causes error 3266 blah = toSupplementaries("zzzabcdcdefzzz"); 3267 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3268 m = p.matcher(blah); 3269 result = new StringBuilder(); 3270 m.find(); 3271 try { 3272 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3273 failCount++; 3274 } catch (IndexOutOfBoundsException ioobe) { 3275 // Correct result 3276 } 3277 // Check double digit group references 3278 blah = toSupplementaries("zzz123456789101112zzz"); 3279 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3280 m = p.matcher(blah); 3281 result = new StringBuilder(); 3282 m.find(); 3283 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3284 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3285 failCount++; 3286 3287 // Check to make sure it backs off $15 to $1 if only three groups 3288 blah = toSupplementaries("zzzabcdcdefzzz"); 3289 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3290 m = p.matcher(blah); 3291 result = new StringBuilder(); 3292 m.find(); 3293 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3294 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3295 failCount++; 3296 // Check nothing has been appended into the output buffer if 3297 // the replacement string triggers IllegalArgumentException. 3298 p = Pattern.compile("(abc)"); 3299 m = p.matcher("abcd"); 3300 result = new StringBuilder(); 3301 m.find(); 3302 try { 3303 m.appendReplacement(result, ("xyz$g")); 3304 failCount++; 3305 } catch (IllegalArgumentException iae) { 3306 if (result.length() != 0) 3307 failCount++; 3308 } 3309 report("SB Substitution 2"); 3310 } 3311 3312 /* 3313 * 5 groups of characters are created to make a substitution string. 3314 * A base string will be created including random lead chars, the 3315 * substitution string, and random trailing chars. 3316 * A pattern containing the 5 groups is searched for and replaced with: 3317 * random group + random string + random group. 3318 * The results are checked for correctness. 3319 */ 3320 private static void substitutionBasher() { 3321 for (int runs = 0; runs<1000; runs++) { 3322 // Create a base string to work in 3323 int leadingChars = generator.nextInt(10); 3324 StringBuffer baseBuffer = new StringBuffer(100); 3325 String leadingString = getRandomAlphaString(leadingChars); 3326 baseBuffer.append(leadingString); 3327 3328 // Create 5 groups of random number of random chars 3329 // Create the string to substitute 3330 // Create the pattern string to search for 3331 StringBuffer bufferToSub = new StringBuffer(25); 3332 StringBuffer bufferToPat = new StringBuffer(50); 3333 String[] groups = new String[5]; 3334 for(int i=0; i<5; i++) { 3335 int aGroupSize = generator.nextInt(5)+1; 3336 groups[i] = getRandomAlphaString(aGroupSize); 3337 bufferToSub.append(groups[i]); 3338 bufferToPat.append('('); 3339 bufferToPat.append(groups[i]); 3340 bufferToPat.append(')'); 3341 } 3342 String stringToSub = bufferToSub.toString(); 3343 String pattern = bufferToPat.toString(); 3344 3345 // Place sub string into working string at random index 3346 baseBuffer.append(stringToSub); 3347 3348 // Append random chars to end 3349 int trailingChars = generator.nextInt(10); 3350 String trailingString = getRandomAlphaString(trailingChars); 3351 baseBuffer.append(trailingString); 3352 String baseString = baseBuffer.toString(); 3353 3354 // Create test pattern and matcher 3355 Pattern p = Pattern.compile(pattern); 3356 Matcher m = p.matcher(baseString); 3357 3358 // Reject candidate if pattern happens to start early 3359 m.find(); 3360 if (m.start() < leadingChars) 3361 continue; 3362 3363 // Reject candidate if more than one match 3364 if (m.find()) 3365 continue; 3366 3367 // Construct a replacement string with : 3368 // random group + random string + random group 3369 StringBuffer bufferToRep = new StringBuffer(); 3370 int groupIndex1 = generator.nextInt(5); 3371 bufferToRep.append("$" + (groupIndex1 + 1)); 3372 String randomMidString = getRandomAlphaString(5); 3373 bufferToRep.append(randomMidString); 3374 int groupIndex2 = generator.nextInt(5); 3375 bufferToRep.append("$" + (groupIndex2 + 1)); 3376 String replacement = bufferToRep.toString(); 3377 3378 // Do the replacement 3379 String result = m.replaceAll(replacement); 3380 3381 // Construct expected result 3382 StringBuffer bufferToRes = new StringBuffer(); 3383 bufferToRes.append(leadingString); 3384 bufferToRes.append(groups[groupIndex1]); 3385 bufferToRes.append(randomMidString); 3386 bufferToRes.append(groups[groupIndex2]); 3387 bufferToRes.append(trailingString); 3388 String expectedResult = bufferToRes.toString(); 3389 3390 // Check results 3391 if (!result.equals(expectedResult)) 3392 failCount++; 3393 } 3394 3395 report("Substitution Basher"); 3396 } 3397 3398 /* 3399 * 5 groups of characters are created to make a substitution string. 3400 * A base string will be created including random lead chars, the 3401 * substitution string, and random trailing chars. 3402 * A pattern containing the 5 groups is searched for and replaced with: 3403 * random group + random string + random group. 3404 * The results are checked for correctness. 3405 */ 3406 private static void substitutionBasher2() { 3407 for (int runs = 0; runs<1000; runs++) { 3408 // Create a base string to work in 3409 int leadingChars = generator.nextInt(10); 3410 StringBuilder baseBuffer = new StringBuilder(100); 3411 String leadingString = getRandomAlphaString(leadingChars); 3412 baseBuffer.append(leadingString); 3413 3414 // Create 5 groups of random number of random chars 3415 // Create the string to substitute 3416 // Create the pattern string to search for 3417 StringBuilder bufferToSub = new StringBuilder(25); 3418 StringBuilder bufferToPat = new StringBuilder(50); 3419 String[] groups = new String[5]; 3420 for(int i=0; i<5; i++) { 3421 int aGroupSize = generator.nextInt(5)+1; 3422 groups[i] = getRandomAlphaString(aGroupSize); 3423 bufferToSub.append(groups[i]); 3424 bufferToPat.append('('); 3425 bufferToPat.append(groups[i]); 3426 bufferToPat.append(')'); 3427 } 3428 String stringToSub = bufferToSub.toString(); 3429 String pattern = bufferToPat.toString(); 3430 3431 // Place sub string into working string at random index 3432 baseBuffer.append(stringToSub); 3433 3434 // Append random chars to end 3435 int trailingChars = generator.nextInt(10); 3436 String trailingString = getRandomAlphaString(trailingChars); 3437 baseBuffer.append(trailingString); 3438 String baseString = baseBuffer.toString(); 3439 3440 // Create test pattern and matcher 3441 Pattern p = Pattern.compile(pattern); 3442 Matcher m = p.matcher(baseString); 3443 3444 // Reject candidate if pattern happens to start early 3445 m.find(); 3446 if (m.start() < leadingChars) 3447 continue; 3448 3449 // Reject candidate if more than one match 3450 if (m.find()) 3451 continue; 3452 3453 // Construct a replacement string with : 3454 // random group + random string + random group 3455 StringBuilder bufferToRep = new StringBuilder(); 3456 int groupIndex1 = generator.nextInt(5); 3457 bufferToRep.append("$" + (groupIndex1 + 1)); 3458 String randomMidString = getRandomAlphaString(5); 3459 bufferToRep.append(randomMidString); 3460 int groupIndex2 = generator.nextInt(5); 3461 bufferToRep.append("$" + (groupIndex2 + 1)); 3462 String replacement = bufferToRep.toString(); 3463 3464 // Do the replacement 3465 String result = m.replaceAll(replacement); 3466 3467 // Construct expected result 3468 StringBuilder bufferToRes = new StringBuilder(); 3469 bufferToRes.append(leadingString); 3470 bufferToRes.append(groups[groupIndex1]); 3471 bufferToRes.append(randomMidString); 3472 bufferToRes.append(groups[groupIndex2]); 3473 bufferToRes.append(trailingString); 3474 String expectedResult = bufferToRes.toString(); 3475 3476 // Check results 3477 if (!result.equals(expectedResult)) { 3478 failCount++; 3479 } 3480 } 3481 3482 report("Substitution Basher 2"); 3483 } 3484 3485 /** 3486 * Checks the handling of some escape sequences that the Pattern 3487 * class should process instead of the java compiler. These are 3488 * not in the file because the escapes should be be processed 3489 * by the Pattern class when the regex is compiled. 3490 */ 3491 private static void escapes() throws Exception { 3492 Pattern p = Pattern.compile("\\043"); 3493 Matcher m = p.matcher("#"); 3494 if (!m.find()) 3495 failCount++; 3496 3497 p = Pattern.compile("\\x23"); 3498 m = p.matcher("#"); 3499 if (!m.find()) 3500 failCount++; 3501 3502 p = Pattern.compile("\\u0023"); 3503 m = p.matcher("#"); 3504 if (!m.find()) 3505 failCount++; 3506 3507 report("Escape sequences"); 3508 } 3509 3510 /** 3511 * Checks the handling of blank input situations. These 3512 * tests are incompatible with my test file format. 3513 */ 3514 private static void blankInput() throws Exception { 3515 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3516 Matcher m = p.matcher(""); 3517 if (m.find()) 3518 failCount++; 3519 3520 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3521 m = p.matcher(""); 3522 if (!m.find()) 3523 failCount++; 3524 3525 p = Pattern.compile("abc"); 3526 m = p.matcher(""); 3527 if (m.find()) 3528 failCount++; 3529 3530 p = Pattern.compile("a*"); 3531 m = p.matcher(""); 3532 if (!m.find()) 3533 failCount++; 3534 3535 report("Blank input"); 3536 } 3537 3538 /** 3539 * Tests the Boyer-Moore pattern matching of a character sequence 3540 * on randomly generated patterns. 3541 */ 3542 private static void bm() throws Exception { 3543 doBnM('a'); 3544 report("Boyer Moore (ASCII)"); 3545 3546 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3547 report("Boyer Moore (Supplementary)"); 3548 } 3549 3550 private static void doBnM(int baseCharacter) throws Exception { 3551 int achar=0; 3552 3553 for (int i=0; i<100; i++) { 3554 // Create a short pattern to search for 3555 int patternLength = generator.nextInt(7) + 4; 3556 StringBuffer patternBuffer = new StringBuffer(patternLength); 3557 String pattern; 3558 retry: for (;;) { 3559 for (int x=0; x<patternLength; x++) { 3560 int ch = baseCharacter + generator.nextInt(26); 3561 if (Character.isSupplementaryCodePoint(ch)) { 3562 patternBuffer.append(Character.toChars(ch)); 3563 } else { 3564 patternBuffer.append((char)ch); 3565 } 3566 } 3567 pattern = patternBuffer.toString(); 3568 3569 // Avoid patterns that start and end with the same substring 3570 // See JDK-6854417 3571 for (int x=1; x < pattern.length(); x++) { 3572 if (pattern.startsWith(pattern.substring(x))) 3573 continue retry; 3574 } 3575 break; 3576 } 3577 Pattern p = Pattern.compile(pattern); 3578 3579 // Create a buffer with random ASCII chars that does 3580 // not match the sample 3581 String toSearch = null; 3582 StringBuffer s = null; 3583 Matcher m = p.matcher(""); 3584 do { 3585 s = new StringBuffer(100); 3586 for (int x=0; x<100; x++) { 3587 int ch = baseCharacter + generator.nextInt(26); 3588 if (Character.isSupplementaryCodePoint(ch)) { 3589 s.append(Character.toChars(ch)); 3590 } else { 3591 s.append((char)ch); 3592 } 3593 } 3594 toSearch = s.toString(); 3595 m.reset(toSearch); 3596 } while (m.find()); 3597 3598 // Insert the pattern at a random spot 3599 int insertIndex = generator.nextInt(99); 3600 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3601 insertIndex++; 3602 s = s.insert(insertIndex, pattern); 3603 toSearch = s.toString(); 3604 3605 // Make sure that the pattern is found 3606 m.reset(toSearch); 3607 if (!m.find()) 3608 failCount++; 3609 3610 // Make sure that the match text is the pattern 3611 if (!m.group().equals(pattern)) 3612 failCount++; 3613 3614 // Make sure match occured at insertion point 3615 if (m.start() != insertIndex) 3616 failCount++; 3617 } 3618 } 3619 3620 /** 3621 * Tests the matching of slices on randomly generated patterns. 3622 * The Boyer-Moore optimization is not done on these patterns 3623 * because it uses unicode case folding. 3624 */ 3625 private static void slice() throws Exception { 3626 doSlice(Character.MAX_VALUE); 3627 report("Slice"); 3628 3629 doSlice(Character.MAX_CODE_POINT); 3630 report("Slice (Supplementary)"); 3631 } 3632 3633 private static void doSlice(int maxCharacter) throws Exception { 3634 Random generator = new Random(); 3635 int achar=0; 3636 3637 for (int i=0; i<100; i++) { 3638 // Create a short pattern to search for 3639 int patternLength = generator.nextInt(7) + 4; 3640 StringBuffer patternBuffer = new StringBuffer(patternLength); 3641 for (int x=0; x<patternLength; x++) { 3642 int randomChar = 0; 3643 while (!Character.isLetterOrDigit(randomChar)) 3644 randomChar = generator.nextInt(maxCharacter); 3645 if (Character.isSupplementaryCodePoint(randomChar)) { 3646 patternBuffer.append(Character.toChars(randomChar)); 3647 } else { 3648 patternBuffer.append((char) randomChar); 3649 } 3650 } 3651 String pattern = patternBuffer.toString(); 3652 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3653 3654 // Create a buffer with random chars that does not match the sample 3655 String toSearch = null; 3656 StringBuffer s = null; 3657 Matcher m = p.matcher(""); 3658 do { 3659 s = new StringBuffer(100); 3660 for (int x=0; x<100; x++) { 3661 int randomChar = 0; 3662 while (!Character.isLetterOrDigit(randomChar)) 3663 randomChar = generator.nextInt(maxCharacter); 3664 if (Character.isSupplementaryCodePoint(randomChar)) { 3665 s.append(Character.toChars(randomChar)); 3666 } else { 3667 s.append((char) randomChar); 3668 } 3669 } 3670 toSearch = s.toString(); 3671 m.reset(toSearch); 3672 } while (m.find()); 3673 3674 // Insert the pattern at a random spot 3675 int insertIndex = generator.nextInt(99); 3676 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3677 insertIndex++; 3678 s = s.insert(insertIndex, pattern); 3679 toSearch = s.toString(); 3680 3681 // Make sure that the pattern is found 3682 m.reset(toSearch); 3683 if (!m.find()) 3684 failCount++; 3685 3686 // Make sure that the match text is the pattern 3687 if (!m.group().equals(pattern)) 3688 failCount++; 3689 3690 // Make sure match occured at insertion point 3691 if (m.start() != insertIndex) 3692 failCount++; 3693 } 3694 } 3695 3696 private static void explainFailure(String pattern, String data, 3697 String expected, String actual) { 3698 System.err.println("----------------------------------------"); 3699 System.err.println("Pattern = "+pattern); 3700 System.err.println("Data = "+data); 3701 System.err.println("Expected = " + expected); 3702 System.err.println("Actual = " + actual); 3703 } 3704 3705 private static void explainFailure(String pattern, String data, 3706 Throwable t) { 3707 System.err.println("----------------------------------------"); 3708 System.err.println("Pattern = "+pattern); 3709 System.err.println("Data = "+data); 3710 t.printStackTrace(System.err); 3711 } 3712 3713 // Testing examples from a file 3714 3715 /** 3716 * Goes through the file "TestCases.txt" and creates many patterns 3717 * described in the file, matching the patterns against input lines in 3718 * the file, and comparing the results against the correct results 3719 * also found in the file. The file format is described in comments 3720 * at the head of the file. 3721 */ 3722 private static void processFile(String fileName) throws Exception { 3723 File testCases = new File(System.getProperty("test.src", "."), 3724 fileName); 3725 FileInputStream in = new FileInputStream(testCases); 3726 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3727 3728 // Process next test case. 3729 String aLine; 3730 while((aLine = r.readLine()) != null) { 3731 // Read a line for pattern 3732 String patternString = grabLine(r); 3733 Pattern p = null; 3734 try { 3735 p = compileTestPattern(patternString); 3736 } catch (PatternSyntaxException e) { 3737 String dataString = grabLine(r); 3738 String expectedResult = grabLine(r); 3739 if (expectedResult.startsWith("error")) 3740 continue; 3741 explainFailure(patternString, dataString, e); 3742 failCount++; 3743 continue; 3744 } 3745 3746 // Read a line for input string 3747 String dataString = grabLine(r); 3748 Matcher m = p.matcher(dataString); 3749 StringBuffer result = new StringBuffer(); 3750 3751 // Check for IllegalStateExceptions before a match 3752 failCount += preMatchInvariants(m); 3753 3754 boolean found = m.find(); 3755 3756 if (found) 3757 failCount += postTrueMatchInvariants(m); 3758 else 3759 failCount += postFalseMatchInvariants(m); 3760 3761 if (found) { 3762 result.append("true "); 3763 result.append(m.group(0) + " "); 3764 } else { 3765 result.append("false "); 3766 } 3767 3768 result.append(m.groupCount()); 3769 3770 if (found) { 3771 for (int i=1; i<m.groupCount()+1; i++) 3772 if (m.group(i) != null) 3773 result.append(" " +m.group(i)); 3774 } 3775 3776 // Read a line for the expected result 3777 String expectedResult = grabLine(r); 3778 3779 if (!result.toString().equals(expectedResult)) { 3780 explainFailure(patternString, dataString, expectedResult, result.toString()); 3781 failCount++; 3782 } 3783 } 3784 3785 report(fileName); 3786 } 3787 3788 private static int preMatchInvariants(Matcher m) { 3789 int failCount = 0; 3790 try { 3791 m.start(); 3792 failCount++; 3793 } catch (IllegalStateException ise) {} 3794 try { 3795 m.end(); 3796 failCount++; 3797 } catch (IllegalStateException ise) {} 3798 try { 3799 m.group(); 3800 failCount++; 3801 } catch (IllegalStateException ise) {} 3802 return failCount; 3803 } 3804 3805 private static int postFalseMatchInvariants(Matcher m) { 3806 int failCount = 0; 3807 try { 3808 m.group(); 3809 failCount++; 3810 } catch (IllegalStateException ise) {} 3811 try { 3812 m.start(); 3813 failCount++; 3814 } catch (IllegalStateException ise) {} 3815 try { 3816 m.end(); 3817 failCount++; 3818 } catch (IllegalStateException ise) {} 3819 return failCount; 3820 } 3821 3822 private static int postTrueMatchInvariants(Matcher m) { 3823 int failCount = 0; 3824 //assert(m.start() = m.start(0); 3825 if (m.start() != m.start(0)) 3826 failCount++; 3827 //assert(m.end() = m.end(0); 3828 if (m.start() != m.start(0)) 3829 failCount++; 3830 //assert(m.group() = m.group(0); 3831 if (!m.group().equals(m.group(0))) 3832 failCount++; 3833 try { 3834 m.group(50); 3835 failCount++; 3836 } catch (IndexOutOfBoundsException ise) {} 3837 3838 return failCount; 3839 } 3840 3841 private static Pattern compileTestPattern(String patternString) { 3842 if (!patternString.startsWith("'")) { 3843 return Pattern.compile(patternString); 3844 } 3845 3846 int break1 = patternString.lastIndexOf("'"); 3847 String flagString = patternString.substring( 3848 break1+1, patternString.length()); 3849 patternString = patternString.substring(1, break1); 3850 3851 if (flagString.equals("i")) 3852 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3853 3854 if (flagString.equals("m")) 3855 return Pattern.compile(patternString, Pattern.MULTILINE); 3856 3857 return Pattern.compile(patternString); 3858 } 3859 3860 /** 3861 * Reads a line from the input file. Keeps reading lines until a non 3862 * empty non comment line is read. If the line contains a \n then 3863 * these two characters are replaced by a newline char. If a \\uxxxx 3864 * sequence is read then the sequence is replaced by the unicode char. 3865 */ 3866 private static String grabLine(BufferedReader r) throws Exception { 3867 int index = 0; 3868 String line = r.readLine(); 3869 while (line.startsWith("//") || line.length() < 1) 3870 line = r.readLine(); 3871 while ((index = line.indexOf("\\n")) != -1) { 3872 StringBuffer temp = new StringBuffer(line); 3873 temp.replace(index, index+2, "\n"); 3874 line = temp.toString(); 3875 } 3876 while ((index = line.indexOf("\\u")) != -1) { 3877 StringBuffer temp = new StringBuffer(line); 3878 String value = temp.substring(index+2, index+6); 3879 char aChar = (char)Integer.parseInt(value, 16); 3880 String unicodeChar = "" + aChar; 3881 temp.replace(index, index+6, unicodeChar); 3882 line = temp.toString(); 3883 } 3884 3885 return line; 3886 } 3887 3888 private static void check(Pattern p, String s, String g, String expected) { 3889 Matcher m = p.matcher(s); 3890 m.find(); 3891 if (!m.group(g).equals(expected) || 3892 s.charAt(m.start(g)) != expected.charAt(0) || 3893 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3894 failCount++; 3895 } 3896 3897 private static void checkReplaceFirst(String p, String s, String r, String expected) 3898 { 3899 if (!expected.equals(Pattern.compile(p) 3900 .matcher(s) 3901 .replaceFirst(r))) 3902 failCount++; 3903 } 3904 3905 private static void checkReplaceAll(String p, String s, String r, String expected) 3906 { 3907 if (!expected.equals(Pattern.compile(p) 3908 .matcher(s) 3909 .replaceAll(r))) 3910 failCount++; 3911 } 3912 3913 private static void checkExpectedFail(String p) { 3914 try { 3915 Pattern.compile(p); 3916 } catch (PatternSyntaxException pse) { 3917 //pse.printStackTrace(); 3918 return; 3919 } 3920 failCount++; 3921 } 3922 3923 private static void checkExpectedIAE(Matcher m, String g) { 3924 m.find(); 3925 try { 3926 m.group(g); 3927 } catch (IllegalArgumentException x) { 3928 //iae.printStackTrace(); 3929 try { 3930 m.start(g); 3931 } catch (IllegalArgumentException xx) { 3932 try { 3933 m.start(g); 3934 } catch (IllegalArgumentException xxx) { 3935 return; 3936 } 3937 } 3938 } 3939 failCount++; 3940 } 3941 3942 private static void checkExpectedNPE(Matcher m) { 3943 m.find(); 3944 try { 3945 m.group(null); 3946 } catch (NullPointerException x) { 3947 try { 3948 m.start(null); 3949 } catch (NullPointerException xx) { 3950 try { 3951 m.end(null); 3952 } catch (NullPointerException xxx) { 3953 return; 3954 } 3955 } 3956 } 3957 failCount++; 3958 } 3959 3960 private static void namedGroupCaptureTest() throws Exception { 3961 check(Pattern.compile("x+(?<gname>y+)z+"), 3962 "xxxyyyzzz", 3963 "gname", 3964 "yyy"); 3965 3966 check(Pattern.compile("x+(?<gname8>y+)z+"), 3967 "xxxyyyzzz", 3968 "gname8", 3969 "yyy"); 3970 3971 //backref 3972 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3973 check(pattern, "zzzaabcazzz", true); // found "abca" 3974 3975 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3976 "zzzaabcaazzz", true); 3977 3978 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3979 "abcdefabc", true); 3980 3981 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3982 "abcdefghijkk", true); 3983 3984 // Supplementary character tests 3985 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3986 toSupplementaries("zzzaabcazzz"), true); 3987 3988 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3989 toSupplementaries("zzzaabcaazzz"), true); 3990 3991 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3992 toSupplementaries("abcdefabc"), true); 3993 3994 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3995 "(?<gname>" + 3996 toSupplementaries("k)") + "\\k<gname>"), 3997 toSupplementaries("abcdefghijkk"), true); 3998 3999 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4000 "xxxyyyzzzyyy", 4001 "gname", 4002 "yyy"); 4003 4004 //replaceFirst/All 4005 checkReplaceFirst("(?<gn>ab)(c*)", 4006 "abccczzzabcczzzabccc", 4007 "${gn}", 4008 "abzzzabcczzzabccc"); 4009 4010 checkReplaceAll("(?<gn>ab)(c*)", 4011 "abccczzzabcczzzabccc", 4012 "${gn}", 4013 "abzzzabzzzab"); 4014 4015 4016 checkReplaceFirst("(?<gn>ab)(c*)", 4017 "zzzabccczzzabcczzzabccczzz", 4018 "${gn}", 4019 "zzzabzzzabcczzzabccczzz"); 4020 4021 checkReplaceAll("(?<gn>ab)(c*)", 4022 "zzzabccczzzabcczzzabccczzz", 4023 "${gn}", 4024 "zzzabzzzabzzzabzzz"); 4025 4026 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4027 "zzzabccczzzabcczzzabccczzz", 4028 "${gn2}", 4029 "zzzccczzzabcczzzabccczzz"); 4030 4031 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4032 "zzzabccczzzabcczzzabccczzz", 4033 "${gn2}", 4034 "zzzccczzzcczzzccczzz"); 4035 4036 //toSupplementaries("(ab)(c*)")); 4037 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4038 ")(?<gn2>" + toSupplementaries("c") + "*)", 4039 toSupplementaries("abccczzzabcczzzabccc"), 4040 "${gn1}", 4041 toSupplementaries("abzzzabcczzzabccc")); 4042 4043 4044 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4045 ")(?<gn2>" + toSupplementaries("c") + "*)", 4046 toSupplementaries("abccczzzabcczzzabccc"), 4047 "${gn1}", 4048 toSupplementaries("abzzzabzzzab")); 4049 4050 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4051 ")(?<gn2>" + toSupplementaries("c") + "*)", 4052 toSupplementaries("abccczzzabcczzzabccc"), 4053 "${gn2}", 4054 toSupplementaries("ccczzzabcczzzabccc")); 4055 4056 4057 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4058 ")(?<gn2>" + toSupplementaries("c") + "*)", 4059 toSupplementaries("abccczzzabcczzzabccc"), 4060 "${gn2}", 4061 toSupplementaries("ccczzzcczzzccc")); 4062 4063 checkReplaceFirst("(?<dog>Dog)AndCat", 4064 "zzzDogAndCatzzzDogAndCatzzz", 4065 "${dog}", 4066 "zzzDogzzzDogAndCatzzz"); 4067 4068 4069 checkReplaceAll("(?<dog>Dog)AndCat", 4070 "zzzDogAndCatzzzDogAndCatzzz", 4071 "${dog}", 4072 "zzzDogzzzDogzzz"); 4073 4074 // backref in Matcher & String 4075 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4076 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4077 failCount++; 4078 4079 // negative 4080 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4081 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4082 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4083 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4084 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4085 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4086 "gnameX"); 4087 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4088 report("NamedGroupCapture"); 4089 } 4090 4091 // This is for bug 6969132 4092 private static void nonBmpClassComplementTest() throws Exception { 4093 Pattern p = Pattern.compile("\\P{Lu}"); 4094 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4095 if (m.find() && m.start() == 1) 4096 failCount++; 4097 4098 // from a unicode category 4099 p = Pattern.compile("\\P{Lu}"); 4100 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4101 if (m.find()) 4102 failCount++; 4103 if (!m.hitEnd()) 4104 failCount++; 4105 4106 // block 4107 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4108 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4109 if (m.find() && m.start() == 1) 4110 failCount++; 4111 4112 report("NonBmpClassComplement"); 4113 } 4114 4115 private static void unicodePropertiesTest() throws Exception { 4116 // different forms 4117 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4118 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4119 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4120 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4121 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4122 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4123 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4124 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4125 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4126 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4127 failCount++; 4128 4129 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4130 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4131 Matcher lastSM = common; 4132 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4133 4134 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4135 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4136 Matcher lastBM = latin; 4137 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4138 4139 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4140 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4141 continue; // only pick couple code points, they are the same 4142 } 4143 4144 // Unicode Script 4145 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4146 Matcher m; 4147 String str = new String(Character.toChars(cp)); 4148 if (script == lastScript) { 4149 m = lastSM; 4150 m.reset(str); 4151 } else { 4152 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4153 } 4154 if (!m.matches()) { 4155 failCount++; 4156 } 4157 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4158 other.reset(str); 4159 if (other.matches()) { 4160 failCount++; 4161 } 4162 lastSM = m; 4163 lastScript = script; 4164 4165 // Unicode Block 4166 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4167 if (block == null) { 4168 //System.out.printf("Not a Block: cp=%x%n", cp); 4169 continue; 4170 } 4171 if (block == lastBlock) { 4172 m = lastBM; 4173 m.reset(str); 4174 } else { 4175 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4176 } 4177 if (!m.matches()) { 4178 failCount++; 4179 } 4180 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4181 other.reset(str); 4182 if (other.matches()) { 4183 failCount++; 4184 } 4185 lastBM = m; 4186 lastBlock = block; 4187 } 4188 report("unicodeProperties"); 4189 } 4190 4191 private static void unicodeHexNotationTest() throws Exception { 4192 4193 // negative 4194 checkExpectedFail("\\x{-23}"); 4195 checkExpectedFail("\\x{110000}"); 4196 checkExpectedFail("\\x{}"); 4197 checkExpectedFail("\\x{AB[ef]"); 4198 4199 // codepoint 4200 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4201 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4202 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4203 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4204 4205 // in class 4206 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4207 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4208 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4209 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4210 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4211 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4212 4213 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4214 String s = "A" + new String(Character.toChars(cp)) + "B"; 4215 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4216 : String.format("\\u%04x\\u%04x", 4217 (int) Character.toChars(cp)[0], 4218 (int) Character.toChars(cp)[1]); 4219 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4220 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4221 failCount++; 4222 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4223 failCount++; 4224 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4225 failCount++; 4226 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4227 failCount++; 4228 } 4229 report("unicodeHexNotation"); 4230 } 4231 4232 private static void unicodeClassesTest() throws Exception { 4233 4234 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4235 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4236 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4237 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4238 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4239 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4240 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4241 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4242 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4243 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4244 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4245 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4246 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4247 Matcher bound = Pattern.compile("\\b").matcher(""); 4248 Matcher word = Pattern.compile("\\w++").matcher(""); 4249 // UNICODE_CHARACTER_CLASS 4250 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4251 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4252 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4253 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4254 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4255 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4256 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4257 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4258 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4259 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4260 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4261 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4262 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4263 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4264 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4265 // embedded flag (?U) 4266 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4267 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4268 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4269 4270 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4271 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4272 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4273 // properties 4274 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4275 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4276 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4277 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4278 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4279 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4280 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4281 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4282 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4283 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4284 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4285 4286 // javaMethod 4287 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4288 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4289 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4290 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4291 4292 for (int cp = 1; cp < 0x30000; cp++) { 4293 String str = new String(Character.toChars(cp)); 4294 int type = Character.getType(cp); 4295 if (// lower 4296 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4297 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4298 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4299 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4300 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4301 // upper 4302 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4303 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4304 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4305 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4306 // alpha 4307 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4308 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4309 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4310 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4311 // digit 4312 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4313 Character.isDigit(cp) != digitU.reset(str).matches() || 4314 // alnum 4315 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4316 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4317 // punct 4318 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4319 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4320 // graph 4321 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4322 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4323 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4324 // blank 4325 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4326 != blank.reset(str).matches() || 4327 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4328 // print 4329 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4330 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4331 // cntrl 4332 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4333 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4334 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4335 // hexdigit 4336 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4337 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4338 // space 4339 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4340 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4341 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4342 // word 4343 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4344 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4345 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4346 // bwordb 4347 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4348 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4349 // properties 4350 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4351 Character.isLetter(cp) != letterP.reset(str).matches()|| 4352 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4353 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4354 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4355 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4356 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4357 failCount++; 4358 } 4359 4360 // bounds/word align 4361 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4362 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4363 failCount++; 4364 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4365 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4366 failCount++; 4367 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4368 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4369 failCount++; 4370 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4371 failCount++; 4372 report("unicodePredefinedClasses"); 4373 } 4374 4375 private static void horizontalAndVerticalWSTest() throws Exception { 4376 String hws = new String (new char[] { 4377 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4378 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4379 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4380 0x202f, 0x205f, 0x3000 }); 4381 String vws = new String (new char[] { 4382 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4383 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4384 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4385 failCount++; 4386 if (Pattern.compile("\\H").matcher(hws).find() || 4387 Pattern.compile("[\\H]").matcher(hws).find()) 4388 failCount++; 4389 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4390 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4391 failCount++; 4392 if (Pattern.compile("\\V").matcher(vws).find() || 4393 Pattern.compile("[\\V]").matcher(vws).find()) 4394 failCount++; 4395 String prefix = "abcd"; 4396 String suffix = "efgh"; 4397 String ng = "A"; 4398 for (int i = 0; i < hws.length(); i++) { 4399 String c = String.valueOf(hws.charAt(i)); 4400 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4401 if (!m.find() || !c.equals(m.group())) 4402 failCount++; 4403 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4404 if (!m.find() || !c.equals(m.group())) 4405 failCount++; 4406 4407 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4408 if (!m.find() || !ng.equals(m.group())) 4409 failCount++; 4410 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4411 if (!m.find() || !ng.equals(m.group())) 4412 failCount++; 4413 } 4414 for (int i = 0; i < vws.length(); i++) { 4415 String c = String.valueOf(vws.charAt(i)); 4416 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4417 if (!m.find() || !c.equals(m.group())) 4418 failCount++; 4419 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4420 if (!m.find() || !c.equals(m.group())) 4421 failCount++; 4422 4423 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4424 if (!m.find() || !ng.equals(m.group())) 4425 failCount++; 4426 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4427 if (!m.find() || !ng.equals(m.group())) 4428 failCount++; 4429 } 4430 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4431 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4432 failCount++; 4433 report("horizontalAndVerticalWSTest"); 4434 } 4435 4436 private static void linebreakTest() throws Exception { 4437 String linebreaks = new String (new char[] { 4438 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4439 String crnl = "\r\n"; 4440 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4441 !Pattern.compile("\\R").matcher(crnl).matches() || 4442 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4443 failCount++; 4444 report("linebreakTest"); 4445 } 4446 4447 // #7189363 4448 private static void branchTest() throws Exception { 4449 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4450 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4451 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4452 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4453 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4454 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4455 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4456 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4457 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4458 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4459 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4460 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4461 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4462 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4463 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4464 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4465 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4466 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4467 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4468 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4469 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4470 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4471 failCount++; 4472 report("branchTest"); 4473 } 4474 4475 // This test is for 8007395 4476 private static void groupCurlyNotFoundSuppTest() throws Exception { 4477 String input = "test this as \ud83d\ude0d"; 4478 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4479 "test(.)*(@[a-zA-Z.]+)", 4480 "test([^B])+(@[a-zA-Z.]+)", 4481 "test([^B])*(@[a-zA-Z.]+)", 4482 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4483 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4484 }) { 4485 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4486 .matcher(input); 4487 try { 4488 if (m.find()) { 4489 failCount++; 4490 } 4491 } catch (Exception x) { 4492 failCount++; 4493 } 4494 } 4495 report("GroupCurly NotFoundSupp"); 4496 } 4497 4498 // This test is for 8023647 4499 private static void groupCurlyBackoffTest() throws Exception { 4500 if (!"abc1c".matches("(\\w)+1\\1") || 4501 "abc11".matches("(\\w)+1\\1")) { 4502 failCount++; 4503 } 4504 report("GroupCurly backoff"); 4505 } 4506 4507 // This test is for 8012646 4508 private static void patternAsPredicate() throws Exception { 4509 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4510 4511 if (p.test("")) { 4512 failCount++; 4513 } 4514 if (!p.test("word")) { 4515 failCount++; 4516 } 4517 if (p.test("1234")) { 4518 failCount++; 4519 } 4520 report("Pattern.asPredicate"); 4521 } 4522 4523 // This test is for 8035975 4524 private static void invalidFlags() throws Exception { 4525 for (int flag = 1; flag != 0; flag <<= 1) { 4526 switch (flag) { 4527 case Pattern.CASE_INSENSITIVE: 4528 case Pattern.MULTILINE: 4529 case Pattern.DOTALL: 4530 case Pattern.UNICODE_CASE: 4531 case Pattern.CANON_EQ: 4532 case Pattern.UNIX_LINES: 4533 case Pattern.LITERAL: 4534 case Pattern.UNICODE_CHARACTER_CLASS: 4535 case Pattern.COMMENTS: 4536 // valid flag, continue 4537 break; 4538 default: 4539 try { 4540 Pattern.compile(".", flag); 4541 failCount++; 4542 } catch (IllegalArgumentException expected) { 4543 } 4544 } 4545 } 4546 report("Invalid compile flags"); 4547 } 4548 }