1 /* 2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.io.BufferedReader; 48 import java.io.ByteArrayInputStream; 49 import java.io.ByteArrayOutputStream; 50 import java.io.File; 51 import java.io.FileInputStream; 52 import java.io.InputStreamReader; 53 import java.io.ObjectInputStream; 54 import java.io.ObjectOutputStream; 55 import java.math.BigInteger; 56 import java.nio.CharBuffer; 57 import java.nio.file.Files; 58 import java.util.ArrayList; 59 import java.util.Arrays; 60 import java.util.List; 61 import java.util.Random; 62 import java.util.Scanner; 63 import java.util.function.Function; 64 import java.util.function.Predicate; 65 import java.util.regex.Matcher; 66 import java.util.regex.MatchResult; 67 import java.util.regex.Pattern; 68 import java.util.regex.PatternSyntaxException; 69 import jdk.test.lib.RandomFactory; 70 71 /** 72 * This is a test class created to check the operation of 73 * the Pattern and Matcher classes. 74 */ 75 public class RegExTest { 76 77 private static Random generator = RandomFactory.getRandom(); 78 private static boolean failure = false; 79 private static int failCount = 0; 80 private static String firstFailure = null; 81 82 /** 83 * Main to interpret arguments and run several tests. 84 * 85 */ 86 public static void main(String[] args) throws Exception { 87 // Most of the tests are in a file 88 processFile("TestCases.txt"); 89 //processFile("PerlCases.txt"); 90 processFile("BMPTestCases.txt"); 91 processFile("SupplementaryTestCases.txt"); 92 93 // These test many randomly generated char patterns 94 bm(); 95 slice(); 96 97 // These are hard to put into the file 98 escapes(); 99 blankInput(); 100 101 // Substitition tests on randomly generated sequences 102 globalSubstitute(); 103 stringbufferSubstitute(); 104 stringbuilderSubstitute(); 105 106 substitutionBasher(); 107 substitutionBasher2(); 108 109 // Canonical Equivalence 110 ceTest(); 111 112 // Anchors 113 anchorTest(); 114 115 // boolean match calls 116 matchesTest(); 117 lookingAtTest(); 118 119 // Pattern API 120 patternMatchesTest(); 121 122 // Misc 123 lookbehindTest(); 124 nullArgumentTest(); 125 backRefTest(); 126 groupCaptureTest(); 127 caretTest(); 128 charClassTest(); 129 emptyPatternTest(); 130 findIntTest(); 131 group0Test(); 132 longPatternTest(); 133 octalTest(); 134 ampersandTest(); 135 negationTest(); 136 splitTest(); 137 appendTest(); 138 caseFoldingTest(); 139 commentsTest(); 140 unixLinesTest(); 141 replaceFirstTest(); 142 gTest(); 143 zTest(); 144 serializeTest(); 145 reluctantRepetitionTest(); 146 multilineDollarTest(); 147 dollarAtEndTest(); 148 caretBetweenTerminatorsTest(); 149 // This RFE rejected in Tiger numOccurrencesTest(); 150 javaCharClassTest(); 151 nonCaptureRepetitionTest(); 152 notCapturedGroupCurlyMatchTest(); 153 escapedSegmentTest(); 154 literalPatternTest(); 155 literalReplacementTest(); 156 regionTest(); 157 toStringTest(); 158 negatedCharClassTest(); 159 findFromTest(); 160 boundsTest(); 161 unicodeWordBoundsTest(); 162 caretAtEndTest(); 163 wordSearchTest(); 164 hitEndTest(); 165 toMatchResultTest(); 166 toMatchResultTest2(); 167 surrogatesInClassTest(); 168 removeQEQuotingTest(); 169 namedGroupCaptureTest(); 170 nonBmpClassComplementTest(); 171 unicodePropertiesTest(); 172 unicodeHexNotationTest(); 173 unicodeClassesTest(); 174 unicodeCharacterNameTest(); 175 horizontalAndVerticalWSTest(); 176 linebreakTest(); 177 branchTest(); 178 groupCurlyNotFoundSuppTest(); 179 groupCurlyBackoffTest(); 180 patternAsPredicate(); 181 patternAsMatchPredicate(); 182 invalidFlags(); 183 embeddedFlags(); 184 grapheme(); 185 expoBacktracking(); 186 invalidGroupName(); 187 illegalRepetitionRange(); 188 surrogatePairWithCanonEq(); 189 190 if (failure) { 191 throw new 192 RuntimeException("RegExTest failed, 1st failure: " + 193 firstFailure); 194 } else { 195 System.err.println("OKAY: All tests passed."); 196 } 197 } 198 199 // Utility functions 200 201 private static String getRandomAlphaString(int length) { 202 StringBuffer buf = new StringBuffer(length); 203 for (int i=0; i<length; i++) { 204 char randChar = (char)(97 + generator.nextInt(26)); 205 buf.append(randChar); 206 } 207 return buf.toString(); 208 } 209 210 private static void check(Matcher m, String expected) { 211 m.find(); 212 if (!m.group().equals(expected)) 213 failCount++; 214 } 215 216 private static void check(Matcher m, String result, boolean expected) { 217 m.find(); 218 if (m.group().equals(result) != expected) 219 failCount++; 220 } 221 222 private static void check(Pattern p, String s, boolean expected) { 223 if (p.matcher(s).find() != expected) 224 failCount++; 225 } 226 227 private static void check(String p, String s, boolean expected) { 228 Matcher matcher = Pattern.compile(p).matcher(s); 229 if (matcher.find() != expected) 230 failCount++; 231 } 232 233 private static void check(String p, char c, boolean expected) { 234 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 235 Pattern pattern = Pattern.compile(propertyPattern); 236 char[] ca = new char[1]; ca[0] = c; 237 Matcher matcher = pattern.matcher(new String(ca)); 238 if (!matcher.find()) 239 failCount++; 240 } 241 242 private static void check(String p, int codePoint, boolean expected) { 243 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 244 Pattern pattern = Pattern.compile(propertyPattern); 245 char[] ca = Character.toChars(codePoint); 246 Matcher matcher = pattern.matcher(new String(ca)); 247 if (!matcher.find()) 248 failCount++; 249 } 250 251 private static void check(String p, int flag, String input, String s, 252 boolean expected) 253 { 254 Pattern pattern = Pattern.compile(p, flag); 255 Matcher matcher = pattern.matcher(input); 256 if (expected) 257 check(matcher, s, expected); 258 else 259 check(pattern, input, false); 260 } 261 262 private static void report(String testName) { 263 int spacesToAdd = 30 - testName.length(); 264 StringBuffer paddedNameBuffer = new StringBuffer(testName); 265 for (int i=0; i<spacesToAdd; i++) 266 paddedNameBuffer.append(" "); 267 String paddedName = paddedNameBuffer.toString(); 268 System.err.println(paddedName + ": " + 269 (failCount==0 ? "Passed":"Failed("+failCount+")")); 270 if (failCount > 0) { 271 failure = true; 272 273 if (firstFailure == null) { 274 firstFailure = testName; 275 } 276 } 277 278 failCount = 0; 279 } 280 281 /** 282 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 283 * supplementary characters. This method does NOT fully take care 284 * of the regex syntax. 285 */ 286 private static String toSupplementaries(String s) { 287 int length = s.length(); 288 StringBuffer sb = new StringBuffer(length * 2); 289 290 for (int i = 0; i < length; ) { 291 char c = s.charAt(i++); 292 if (c == '\\') { 293 sb.append(c); 294 if (i < length) { 295 c = s.charAt(i++); 296 sb.append(c); 297 if (c == 'u') { 298 // assume no syntax error 299 sb.append(s.charAt(i++)); 300 sb.append(s.charAt(i++)); 301 sb.append(s.charAt(i++)); 302 sb.append(s.charAt(i++)); 303 } 304 } 305 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 306 sb.append('\ud800').append((char)('\udc00'+c)); 307 } else { 308 sb.append(c); 309 } 310 } 311 return sb.toString(); 312 } 313 314 // Regular expression tests 315 316 // This is for bug 6178785 317 // Test if an expected NPE gets thrown when passing in a null argument 318 private static boolean check(Runnable test) { 319 try { 320 test.run(); 321 failCount++; 322 return false; 323 } catch (NullPointerException npe) { 324 return true; 325 } 326 } 327 328 private static void nullArgumentTest() { 329 check(() -> Pattern.compile(null)); 330 check(() -> Pattern.matches(null, null)); 331 check(() -> Pattern.matches("xyz", null)); 332 check(() -> Pattern.quote(null)); 333 check(() -> Pattern.compile("xyz").split(null)); 334 check(() -> Pattern.compile("xyz").matcher(null)); 335 336 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 337 m.matches(); 338 check(() -> m.appendTail((StringBuffer) null)); 339 check(() -> m.appendTail((StringBuilder)null)); 340 check(() -> m.replaceAll((String) null)); 341 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 342 check(() -> m.replaceFirst((String)null)); 343 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 344 check(() -> m.appendReplacement((StringBuffer)null, null)); 345 check(() -> m.appendReplacement((StringBuilder)null, null)); 346 check(() -> m.reset(null)); 347 check(() -> Matcher.quoteReplacement(null)); 348 //check(() -> m.usePattern(null)); 349 350 report("Null Argument"); 351 } 352 353 // This is for bug6635133 354 // Test if surrogate pair in Unicode escapes can be handled correctly. 355 private static void surrogatesInClassTest() throws Exception { 356 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 357 Matcher matcher = pattern.matcher("\ud834\udd22"); 358 if (!matcher.find()) 359 failCount++; 360 361 report("Surrogate pair in Unicode escape"); 362 } 363 364 // This is for bug6990617 365 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 366 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 367 // char is an octal digit. 368 private static void removeQEQuotingTest() throws Exception { 369 Pattern pattern = 370 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 371 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 372 if (!matcher.find()) 373 failCount++; 374 375 report("Remove Q/E Quoting"); 376 } 377 378 // This is for bug 4988891 379 // Test toMatchResult to see that it is a copy of the Matcher 380 // that is not affected by subsequent operations on the original 381 private static void toMatchResultTest() throws Exception { 382 Pattern pattern = Pattern.compile("squid"); 383 Matcher matcher = pattern.matcher( 384 "agiantsquidofdestinyasmallsquidoffate"); 385 matcher.find(); 386 int matcherStart1 = matcher.start(); 387 MatchResult mr = matcher.toMatchResult(); 388 if (mr == matcher) 389 failCount++; 390 int resultStart1 = mr.start(); 391 if (matcherStart1 != resultStart1) 392 failCount++; 393 matcher.find(); 394 int matcherStart2 = matcher.start(); 395 int resultStart2 = mr.start(); 396 if (matcherStart2 == resultStart2) 397 failCount++; 398 if (resultStart1 != resultStart2) 399 failCount++; 400 MatchResult mr2 = matcher.toMatchResult(); 401 if (mr == mr2) 402 failCount++; 403 if (mr2.start() != matcherStart2) 404 failCount++; 405 report("toMatchResult is a copy"); 406 } 407 408 private static void checkExpectedISE(Runnable test) { 409 try { 410 test.run(); 411 failCount++; 412 } catch (IllegalStateException x) { 413 } catch (IndexOutOfBoundsException xx) { 414 failCount++; 415 } 416 } 417 418 private static void checkExpectedIOOE(Runnable test) { 419 try { 420 test.run(); 421 failCount++; 422 } catch (IndexOutOfBoundsException x) {} 423 } 424 425 // This is for bug 8074678 426 // Test the result of toMatchResult throws ISE if no match is availble 427 private static void toMatchResultTest2() throws Exception { 428 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 429 matcher.find(); 430 MatchResult mr = matcher.toMatchResult(); 431 432 checkExpectedISE(() -> mr.start()); 433 checkExpectedISE(() -> mr.start(2)); 434 checkExpectedISE(() -> mr.end()); 435 checkExpectedISE(() -> mr.end(2)); 436 checkExpectedISE(() -> mr.group()); 437 checkExpectedISE(() -> mr.group(2)); 438 439 matcher = Pattern.compile("(match)").matcher("there is a match"); 440 matcher.find(); 441 MatchResult mr2 = matcher.toMatchResult(); 442 checkExpectedIOOE(() -> mr2.start(2)); 443 checkExpectedIOOE(() -> mr2.end(2)); 444 checkExpectedIOOE(() -> mr2.group(2)); 445 446 report("toMatchResult2 appropriate exceptions"); 447 } 448 449 // This is for bug 5013885 450 // Must test a slice to see if it reports hitEnd correctly 451 private static void hitEndTest() throws Exception { 452 // Basic test of Slice node 453 Pattern p = Pattern.compile("^squidattack"); 454 Matcher m = p.matcher("squack"); 455 m.find(); 456 if (m.hitEnd()) 457 failCount++; 458 m.reset("squid"); 459 m.find(); 460 if (!m.hitEnd()) 461 failCount++; 462 463 // Test Slice, SliceA and SliceU nodes 464 for (int i=0; i<3; i++) { 465 int flags = 0; 466 if (i==1) flags = Pattern.CASE_INSENSITIVE; 467 if (i==2) flags = Pattern.UNICODE_CASE; 468 p = Pattern.compile("^abc", flags); 469 m = p.matcher("ad"); 470 m.find(); 471 if (m.hitEnd()) 472 failCount++; 473 m.reset("ab"); 474 m.find(); 475 if (!m.hitEnd()) 476 failCount++; 477 } 478 479 // Test Boyer-Moore node 480 p = Pattern.compile("catattack"); 481 m = p.matcher("attack"); 482 m.find(); 483 if (!m.hitEnd()) 484 failCount++; 485 486 p = Pattern.compile("catattack"); 487 m = p.matcher("attackattackattackcatatta"); 488 m.find(); 489 if (!m.hitEnd()) 490 failCount++; 491 492 // 8184706: Matching u+0d at EOL against \R should hit-end 493 p = Pattern.compile("...\\R"); 494 m = p.matcher("cat" + (char)0x0a); 495 m.find(); 496 if (m.hitEnd()) 497 failCount++; 498 499 m = p.matcher("cat" + (char)0x0d); 500 m.find(); 501 if (!m.hitEnd()) 502 failCount++; 503 504 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 505 m.find(); 506 if (m.hitEnd()) 507 failCount++; 508 509 report("hitEnd"); 510 } 511 512 // This is for bug 4997476 513 // It is weird code submitted by customer demonstrating a regression 514 private static void wordSearchTest() throws Exception { 515 String testString = new String("word1 word2 word3"); 516 Pattern p = Pattern.compile("\\b"); 517 Matcher m = p.matcher(testString); 518 int position = 0; 519 int start = 0; 520 while (m.find(position)) { 521 start = m.start(); 522 if (start == testString.length()) 523 break; 524 if (m.find(start+1)) { 525 position = m.start(); 526 } else { 527 position = testString.length(); 528 } 529 if (testString.substring(start, position).equals(" ")) 530 continue; 531 if (!testString.substring(start, position-1).startsWith("word")) 532 failCount++; 533 } 534 report("Customer word search"); 535 } 536 537 // This is for bug 4994840 538 private static void caretAtEndTest() throws Exception { 539 // Problem only occurs with multiline patterns 540 // containing a beginning-of-line caret "^" followed 541 // by an expression that also matches the empty string. 542 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 543 Matcher matcher = pattern.matcher("\r"); 544 matcher.find(); 545 matcher.find(); 546 report("Caret at end"); 547 } 548 549 // This test is for 4979006 550 // Check to see if word boundary construct properly handles unicode 551 // non spacing marks 552 private static void unicodeWordBoundsTest() throws Exception { 553 String spaces = " "; 554 String wordChar = "a"; 555 String nsm = "\u030a"; 556 557 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 558 559 Pattern pattern = Pattern.compile("\\b"); 560 Matcher matcher = pattern.matcher(""); 561 // S=other B=word character N=non spacing mark .=word boundary 562 // SS.BB.SS 563 String input = spaces + wordChar + wordChar + spaces; 564 twoFindIndexes(input, matcher, 2, 4); 565 // SS.BBN.SS 566 input = spaces + wordChar +wordChar + nsm + spaces; 567 twoFindIndexes(input, matcher, 2, 5); 568 // SS.BN.SS 569 input = spaces + wordChar + nsm + spaces; 570 twoFindIndexes(input, matcher, 2, 4); 571 // SS.BNN.SS 572 input = spaces + wordChar + nsm + nsm + spaces; 573 twoFindIndexes(input, matcher, 2, 5); 574 // SSN.BB.SS 575 input = spaces + nsm + wordChar + wordChar + spaces; 576 twoFindIndexes(input, matcher, 3, 5); 577 // SS.BNB.SS 578 input = spaces + wordChar + nsm + wordChar + spaces; 579 twoFindIndexes(input, matcher, 2, 5); 580 // SSNNSS 581 input = spaces + nsm + nsm + spaces; 582 matcher.reset(input); 583 if (matcher.find()) 584 failCount++; 585 // SSN.BBN.SS 586 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 587 twoFindIndexes(input, matcher, 3, 6); 588 589 report("Unicode word boundary"); 590 } 591 592 private static void twoFindIndexes(String input, Matcher matcher, int a, 593 int b) throws Exception 594 { 595 matcher.reset(input); 596 matcher.find(); 597 if (matcher.start() != a) 598 failCount++; 599 matcher.find(); 600 if (matcher.start() != b) 601 failCount++; 602 } 603 604 // This test is for 6284152 605 static void check(String regex, String input, String[] expected) { 606 List<String> result = new ArrayList<String>(); 607 Pattern p = Pattern.compile(regex); 608 Matcher m = p.matcher(input); 609 while (m.find()) { 610 result.add(m.group()); 611 } 612 if (!Arrays.asList(expected).equals(result)) 613 failCount++; 614 } 615 616 private static void lookbehindTest() throws Exception { 617 //Positive 618 check("(?<=%.{0,5})foo\\d", 619 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 620 new String[]{"foo1", "foo2", "foo3"}); 621 622 //boundary at end of the lookbehind sub-regex should work consistently 623 //with the boundary just after the lookbehind sub-regex 624 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 625 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 626 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 627 check("(?<!abc \\b)foo", "abc foo", new String[0]); 628 629 //Negative 630 check("(?<!%.{0,5})foo\\d", 631 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 632 new String[] {"foo4", "foo5"}); 633 634 //Positive greedy 635 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 636 637 //Positive reluctant 638 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 639 640 //supplementary 641 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 642 new String[] {"fo\ud800\udc00o"}); 643 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 644 new String[] {"fo\ud800\udc00o"}); 645 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 646 new String[] {"fo\ud800\udc00o"}); 647 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 648 new String[] {"fo\ud800\udc00o"}); 649 report("Lookbehind"); 650 } 651 652 // This test is for 4938995 653 // Check to see if weak region boundaries are transparent to 654 // lookahead and lookbehind constructs 655 private static void boundsTest() throws Exception { 656 String fullMessage = "catdogcat"; 657 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 658 Matcher matcher = pattern.matcher("catdogca"); 659 matcher.useTransparentBounds(true); 660 if (matcher.find()) 661 failCount++; 662 matcher.reset("atdogcat"); 663 if (matcher.find()) 664 failCount++; 665 matcher.reset(fullMessage); 666 if (!matcher.find()) 667 failCount++; 668 matcher.reset(fullMessage); 669 matcher.region(0,9); 670 if (!matcher.find()) 671 failCount++; 672 matcher.reset(fullMessage); 673 matcher.region(0,6); 674 if (!matcher.find()) 675 failCount++; 676 matcher.reset(fullMessage); 677 matcher.region(3,6); 678 if (!matcher.find()) 679 failCount++; 680 matcher.useTransparentBounds(false); 681 if (matcher.find()) 682 failCount++; 683 684 // Negative lookahead/lookbehind 685 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 686 matcher = pattern.matcher("dogcat"); 687 matcher.useTransparentBounds(true); 688 matcher.region(0,3); 689 if (matcher.find()) 690 failCount++; 691 matcher.reset("catdog"); 692 matcher.region(3,6); 693 if (matcher.find()) 694 failCount++; 695 matcher.useTransparentBounds(false); 696 matcher.reset("dogcat"); 697 matcher.region(0,3); 698 if (!matcher.find()) 699 failCount++; 700 matcher.reset("catdog"); 701 matcher.region(3,6); 702 if (!matcher.find()) 703 failCount++; 704 705 report("Region bounds transparency"); 706 } 707 708 // This test is for 4945394 709 private static void findFromTest() throws Exception { 710 String message = "This is 40 $0 message."; 711 Pattern pat = Pattern.compile("\\$0"); 712 Matcher match = pat.matcher(message); 713 if (!match.find()) 714 failCount++; 715 if (match.find()) 716 failCount++; 717 if (match.find()) 718 failCount++; 719 report("Check for alternating find"); 720 } 721 722 // This test is for 4872664 and 4892980 723 private static void negatedCharClassTest() throws Exception { 724 Pattern pattern = Pattern.compile("[^>]"); 725 Matcher matcher = pattern.matcher("\u203A"); 726 if (!matcher.matches()) 727 failCount++; 728 pattern = Pattern.compile("[^fr]"); 729 matcher = pattern.matcher("a"); 730 if (!matcher.find()) 731 failCount++; 732 matcher.reset("\u203A"); 733 if (!matcher.find()) 734 failCount++; 735 String s = "for"; 736 String result[] = s.split("[^fr]"); 737 if (!result[0].equals("f")) 738 failCount++; 739 if (!result[1].equals("r")) 740 failCount++; 741 s = "f\u203Ar"; 742 result = s.split("[^fr]"); 743 if (!result[0].equals("f")) 744 failCount++; 745 if (!result[1].equals("r")) 746 failCount++; 747 748 // Test adding to bits, subtracting a node, then adding to bits again 749 pattern = Pattern.compile("[^f\u203Ar]"); 750 matcher = pattern.matcher("a"); 751 if (!matcher.find()) 752 failCount++; 753 matcher.reset("f"); 754 if (matcher.find()) 755 failCount++; 756 matcher.reset("\u203A"); 757 if (matcher.find()) 758 failCount++; 759 matcher.reset("r"); 760 if (matcher.find()) 761 failCount++; 762 matcher.reset("\u203B"); 763 if (!matcher.find()) 764 failCount++; 765 766 // Test subtracting a node, adding to bits, subtracting again 767 pattern = Pattern.compile("[^\u203Ar\u203B]"); 768 matcher = pattern.matcher("a"); 769 if (!matcher.find()) 770 failCount++; 771 matcher.reset("\u203A"); 772 if (matcher.find()) 773 failCount++; 774 matcher.reset("r"); 775 if (matcher.find()) 776 failCount++; 777 matcher.reset("\u203B"); 778 if (matcher.find()) 779 failCount++; 780 matcher.reset("\u203C"); 781 if (!matcher.find()) 782 failCount++; 783 784 report("Negated Character Class"); 785 } 786 787 // This test is for 4628291 788 private static void toStringTest() throws Exception { 789 Pattern pattern = Pattern.compile("b+"); 790 if (pattern.toString() != "b+") 791 failCount++; 792 Matcher matcher = pattern.matcher("aaabbbccc"); 793 String matcherString = matcher.toString(); // unspecified 794 matcher.find(); 795 matcherString = matcher.toString(); // unspecified 796 matcher.region(0,3); 797 matcherString = matcher.toString(); // unspecified 798 matcher.reset(); 799 matcherString = matcher.toString(); // unspecified 800 report("toString"); 801 } 802 803 // This test is for 4808962 804 private static void literalPatternTest() throws Exception { 805 int flags = Pattern.LITERAL; 806 807 Pattern pattern = Pattern.compile("abc\\t$^", flags); 808 check(pattern, "abc\\t$^", true); 809 810 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 811 check(pattern, "abc\\t$^", true); 812 813 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 814 check(pattern, "\\Qa^$bcabc\\E", true); 815 check(pattern, "a^$bcabc", false); 816 817 pattern = Pattern.compile("\\\\Q\\\\E"); 818 check(pattern, "\\Q\\E", true); 819 820 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 821 check(pattern, "abcefg\\Q\\Ehij", true); 822 823 pattern = Pattern.compile("\\\\\\Q\\\\E"); 824 check(pattern, "\\\\\\\\", true); 825 826 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 827 check(pattern, "\\Qa^$bcabc\\E", true); 828 check(pattern, "a^$bcabc", false); 829 830 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 831 check(pattern, "\\Qabc\\Edef", true); 832 check(pattern, "abcdef", false); 833 834 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 835 check(pattern, "abc\\Edef", true); 836 check(pattern, "abcdef", false); 837 838 pattern = Pattern.compile(Pattern.quote("\\E")); 839 check(pattern, "\\E", true); 840 841 pattern = Pattern.compile("((((abc.+?:)", flags); 842 check(pattern, "((((abc.+?:)", true); 843 844 flags |= Pattern.MULTILINE; 845 846 pattern = Pattern.compile("^cat$", flags); 847 check(pattern, "abc^cat$def", true); 848 check(pattern, "cat", false); 849 850 flags |= Pattern.CASE_INSENSITIVE; 851 852 pattern = Pattern.compile("abcdef", flags); 853 check(pattern, "ABCDEF", true); 854 check(pattern, "AbCdEf", true); 855 856 flags |= Pattern.DOTALL; 857 858 pattern = Pattern.compile("a...b", flags); 859 check(pattern, "A...b", true); 860 check(pattern, "Axxxb", false); 861 862 flags |= Pattern.CANON_EQ; 863 864 Pattern p = Pattern.compile("testa\u030a", flags); 865 check(pattern, "testa\u030a", false); 866 check(pattern, "test\u00e5", false); 867 868 // Supplementary character test 869 flags = Pattern.LITERAL; 870 871 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 872 check(pattern, toSupplementaries("abc\\t$^"), true); 873 874 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 875 check(pattern, toSupplementaries("abc\\t$^"), true); 876 877 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 878 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 879 check(pattern, toSupplementaries("a^$bcabc"), false); 880 881 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 882 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 883 check(pattern, toSupplementaries("a^$bcabc"), false); 884 885 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 886 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 887 check(pattern, toSupplementaries("abcdef"), false); 888 889 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 890 check(pattern, toSupplementaries("abc\\Edef"), true); 891 check(pattern, toSupplementaries("abcdef"), false); 892 893 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 894 check(pattern, toSupplementaries("((((abc.+?:)"), true); 895 896 flags |= Pattern.MULTILINE; 897 898 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 899 check(pattern, toSupplementaries("abc^cat$def"), true); 900 check(pattern, toSupplementaries("cat"), false); 901 902 flags |= Pattern.DOTALL; 903 904 // note: this is case-sensitive. 905 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 906 check(pattern, toSupplementaries("a...b"), true); 907 check(pattern, toSupplementaries("axxxb"), false); 908 909 flags |= Pattern.CANON_EQ; 910 911 String t = toSupplementaries("test"); 912 p = Pattern.compile(t + "a\u030a", flags); 913 check(pattern, t + "a\u030a", false); 914 check(pattern, t + "\u00e5", false); 915 916 report("Literal pattern"); 917 } 918 919 // This test is for 4803179 920 // This test is also for 4808962, replacement parts 921 private static void literalReplacementTest() throws Exception { 922 int flags = Pattern.LITERAL; 923 924 Pattern pattern = Pattern.compile("abc", flags); 925 Matcher matcher = pattern.matcher("zzzabczzz"); 926 String replaceTest = "$0"; 927 String result = matcher.replaceAll(replaceTest); 928 if (!result.equals("zzzabczzz")) 929 failCount++; 930 931 matcher.reset(); 932 String literalReplacement = matcher.quoteReplacement(replaceTest); 933 result = matcher.replaceAll(literalReplacement); 934 if (!result.equals("zzz$0zzz")) 935 failCount++; 936 937 matcher.reset(); 938 replaceTest = "\\t$\\$"; 939 literalReplacement = matcher.quoteReplacement(replaceTest); 940 result = matcher.replaceAll(literalReplacement); 941 if (!result.equals("zzz\\t$\\$zzz")) 942 failCount++; 943 944 // Supplementary character test 945 pattern = Pattern.compile(toSupplementaries("abc"), flags); 946 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 947 replaceTest = "$0"; 948 result = matcher.replaceAll(replaceTest); 949 if (!result.equals(toSupplementaries("zzzabczzz"))) 950 failCount++; 951 952 matcher.reset(); 953 literalReplacement = matcher.quoteReplacement(replaceTest); 954 result = matcher.replaceAll(literalReplacement); 955 if (!result.equals(toSupplementaries("zzz$0zzz"))) 956 failCount++; 957 958 matcher.reset(); 959 replaceTest = "\\t$\\$"; 960 literalReplacement = matcher.quoteReplacement(replaceTest); 961 result = matcher.replaceAll(literalReplacement); 962 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 963 failCount++; 964 965 // IAE should be thrown if backslash or '$' is the last character 966 // in replacement string 967 try { 968 "\uac00".replaceAll("\uac00", "$"); 969 failCount++; 970 } catch (IllegalArgumentException iie) { 971 } catch (Exception e) { 972 failCount++; 973 } 974 try { 975 "\uac00".replaceAll("\uac00", "\\"); 976 failCount++; 977 } catch (IllegalArgumentException iie) { 978 } catch (Exception e) { 979 failCount++; 980 } 981 report("Literal replacement"); 982 } 983 984 // This test is for 4757029 985 private static void regionTest() throws Exception { 986 Pattern pattern = Pattern.compile("abc"); 987 Matcher matcher = pattern.matcher("abcdefabc"); 988 989 matcher.region(0,9); 990 if (!matcher.find()) 991 failCount++; 992 if (!matcher.find()) 993 failCount++; 994 matcher.region(0,3); 995 if (!matcher.find()) 996 failCount++; 997 matcher.region(3,6); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(0,2); 1001 if (matcher.find()) 1002 failCount++; 1003 1004 expectRegionFail(matcher, 1, -1); 1005 expectRegionFail(matcher, -1, -1); 1006 expectRegionFail(matcher, -1, 1); 1007 expectRegionFail(matcher, 5, 3); 1008 expectRegionFail(matcher, 5, 12); 1009 expectRegionFail(matcher, 12, 12); 1010 1011 pattern = Pattern.compile("^abc$"); 1012 matcher = pattern.matcher("zzzabczzz"); 1013 matcher.region(0,9); 1014 if (matcher.find()) 1015 failCount++; 1016 matcher.region(3,6); 1017 if (!matcher.find()) 1018 failCount++; 1019 matcher.region(3,6); 1020 matcher.useAnchoringBounds(false); 1021 if (matcher.find()) 1022 failCount++; 1023 1024 // Supplementary character test 1025 pattern = Pattern.compile(toSupplementaries("abc")); 1026 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1027 matcher.region(0,9*2); 1028 if (!matcher.find()) 1029 failCount++; 1030 if (!matcher.find()) 1031 failCount++; 1032 matcher.region(0,3*2); 1033 if (!matcher.find()) 1034 failCount++; 1035 matcher.region(1,3*2); 1036 if (matcher.find()) 1037 failCount++; 1038 matcher.region(3*2,6*2); 1039 if (matcher.find()) 1040 failCount++; 1041 matcher.region(0,2*2); 1042 if (matcher.find()) 1043 failCount++; 1044 matcher.region(0,2*2+1); 1045 if (matcher.find()) 1046 failCount++; 1047 1048 expectRegionFail(matcher, 1*2, -1); 1049 expectRegionFail(matcher, -1, -1); 1050 expectRegionFail(matcher, -1, 1*2); 1051 expectRegionFail(matcher, 5*2, 3*2); 1052 expectRegionFail(matcher, 5*2, 12*2); 1053 expectRegionFail(matcher, 12*2, 12*2); 1054 1055 pattern = Pattern.compile(toSupplementaries("^abc$")); 1056 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1057 matcher.region(0,9*2); 1058 if (matcher.find()) 1059 failCount++; 1060 matcher.region(3*2,6*2); 1061 if (!matcher.find()) 1062 failCount++; 1063 matcher.region(3*2+1,6*2); 1064 if (matcher.find()) 1065 failCount++; 1066 matcher.region(3*2,6*2-1); 1067 if (matcher.find()) 1068 failCount++; 1069 matcher.region(3*2,6*2); 1070 matcher.useAnchoringBounds(false); 1071 if (matcher.find()) 1072 failCount++; 1073 report("Regions"); 1074 } 1075 1076 private static void expectRegionFail(Matcher matcher, int index1, 1077 int index2) 1078 { 1079 try { 1080 matcher.region(index1, index2); 1081 failCount++; 1082 } catch (IndexOutOfBoundsException ioobe) { 1083 // Correct result 1084 } catch (IllegalStateException ise) { 1085 // Correct result 1086 } 1087 } 1088 1089 // This test is for 4803197 1090 private static void escapedSegmentTest() throws Exception { 1091 1092 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1093 check(pattern, "dir1\\dir2", true); 1094 1095 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1096 check(pattern, "dir1\\dir2\\", true); 1097 1098 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1099 check(pattern, "dir1\\dir2\\", true); 1100 1101 // Supplementary character test 1102 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1103 check(pattern, toSupplementaries("dir1\\dir2"), true); 1104 1105 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1106 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1107 1108 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1109 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1110 1111 report("Escaped segment"); 1112 } 1113 1114 // This test is for 4792284 1115 private static void nonCaptureRepetitionTest() throws Exception { 1116 String input = "abcdefgh;"; 1117 1118 String[] patterns = new String[] { 1119 "(?:\\w{4})+;", 1120 "(?:\\w{8})*;", 1121 "(?:\\w{2}){2,4};", 1122 "(?:\\w{4}){2,};", // only matches the 1123 ".*?(?:\\w{5})+;", // specified minimum 1124 ".*?(?:\\w{9})*;", // number of reps - OK 1125 "(?:\\w{4})+?;", // lazy repetition - OK 1126 "(?:\\w{4})++;", // possessive repetition - OK 1127 "(?:\\w{2,}?)+;", // non-deterministic - OK 1128 "(\\w{4})+;", // capturing group - OK 1129 }; 1130 1131 for (int i = 0; i < patterns.length; i++) { 1132 // Check find() 1133 check(patterns[i], 0, input, input, true); 1134 // Check matches() 1135 Pattern p = Pattern.compile(patterns[i]); 1136 Matcher m = p.matcher(input); 1137 1138 if (m.matches()) { 1139 if (!m.group(0).equals(input)) 1140 failCount++; 1141 } else { 1142 failCount++; 1143 } 1144 } 1145 1146 report("Non capturing repetition"); 1147 } 1148 1149 // This test is for 6358731 1150 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1151 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1152 Matcher matcher = pattern.matcher("abcd"); 1153 if (!matcher.matches() || 1154 matcher.group(1) != null || 1155 !matcher.group(2).equals("abcd")) { 1156 failCount++; 1157 } 1158 report("Not captured GroupCurly"); 1159 } 1160 1161 // This test is for 4706545 1162 private static void javaCharClassTest() throws Exception { 1163 for (int i=0; i<1000; i++) { 1164 char c = (char)generator.nextInt(); 1165 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1166 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1167 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1168 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1169 check("{javaDigit}", c, Character.isDigit(c)); 1170 check("{javaDefined}", c, Character.isDefined(c)); 1171 check("{javaLetter}", c, Character.isLetter(c)); 1172 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1173 check("{javaJavaIdentifierStart}", c, 1174 Character.isJavaIdentifierStart(c)); 1175 check("{javaJavaIdentifierPart}", c, 1176 Character.isJavaIdentifierPart(c)); 1177 check("{javaUnicodeIdentifierStart}", c, 1178 Character.isUnicodeIdentifierStart(c)); 1179 check("{javaUnicodeIdentifierPart}", c, 1180 Character.isUnicodeIdentifierPart(c)); 1181 check("{javaIdentifierIgnorable}", c, 1182 Character.isIdentifierIgnorable(c)); 1183 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1184 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1185 check("{javaISOControl}", c, Character.isISOControl(c)); 1186 check("{javaMirrored}", c, Character.isMirrored(c)); 1187 1188 } 1189 1190 // Supplementary character test 1191 for (int i=0; i<1000; i++) { 1192 int c = generator.nextInt(Character.MAX_CODE_POINT 1193 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1194 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1195 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1196 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1197 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1198 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1199 check("{javaDigit}", c, Character.isDigit(c)); 1200 check("{javaDefined}", c, Character.isDefined(c)); 1201 check("{javaLetter}", c, Character.isLetter(c)); 1202 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1203 check("{javaJavaIdentifierStart}", c, 1204 Character.isJavaIdentifierStart(c)); 1205 check("{javaJavaIdentifierPart}", c, 1206 Character.isJavaIdentifierPart(c)); 1207 check("{javaUnicodeIdentifierStart}", c, 1208 Character.isUnicodeIdentifierStart(c)); 1209 check("{javaUnicodeIdentifierPart}", c, 1210 Character.isUnicodeIdentifierPart(c)); 1211 check("{javaIdentifierIgnorable}", c, 1212 Character.isIdentifierIgnorable(c)); 1213 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1214 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1215 check("{javaISOControl}", c, Character.isISOControl(c)); 1216 check("{javaMirrored}", c, Character.isMirrored(c)); 1217 } 1218 1219 report("Java character classes"); 1220 } 1221 1222 // This test is for 4523620 1223 /* 1224 private static void numOccurrencesTest() throws Exception { 1225 Pattern pattern = Pattern.compile("aaa"); 1226 1227 if (pattern.numOccurrences("aaaaaa", false) != 2) 1228 failCount++; 1229 if (pattern.numOccurrences("aaaaaa", true) != 4) 1230 failCount++; 1231 1232 pattern = Pattern.compile("^"); 1233 if (pattern.numOccurrences("aaaaaa", false) != 1) 1234 failCount++; 1235 if (pattern.numOccurrences("aaaaaa", true) != 1) 1236 failCount++; 1237 1238 report("Number of Occurrences"); 1239 } 1240 */ 1241 1242 // This test is for 4776374 1243 private static void caretBetweenTerminatorsTest() throws Exception { 1244 int flags1 = Pattern.DOTALL; 1245 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1246 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1247 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1248 1249 check("^....", flags1, "test\ntest", "test", true); 1250 check(".....^", flags1, "test\ntest", "test", false); 1251 check(".....^", flags1, "test\n", "test", false); 1252 check("....^", flags1, "test\r\n", "test", false); 1253 1254 check("^....", flags2, "test\ntest", "test", true); 1255 check("....^", flags2, "test\ntest", "test", false); 1256 check(".....^", flags2, "test\n", "test", false); 1257 check("....^", flags2, "test\r\n", "test", false); 1258 1259 check("^....", flags3, "test\ntest", "test", true); 1260 check(".....^", flags3, "test\ntest", "test\n", true); 1261 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1262 check(".....^", flags3, "test\n", "test", false); 1263 check(".....^", flags3, "test\r\n", "test", false); 1264 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1265 1266 check("^....", flags4, "test\ntest", "test", true); 1267 check(".....^", flags3, "test\ntest", "test\n", true); 1268 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1269 check(".....^", flags4, "test\n", "test\n", false); 1270 check(".....^", flags4, "test\r\n", "test\r", false); 1271 1272 // Supplementary character test 1273 String t = toSupplementaries("test"); 1274 check("^....", flags1, t+"\n"+t, t, true); 1275 check(".....^", flags1, t+"\n"+t, t, false); 1276 check(".....^", flags1, t+"\n", t, false); 1277 check("....^", flags1, t+"\r\n", t, false); 1278 1279 check("^....", flags2, t+"\n"+t, t, true); 1280 check("....^", flags2, t+"\n"+t, t, false); 1281 check(".....^", flags2, t+"\n", t, false); 1282 check("....^", flags2, t+"\r\n", t, false); 1283 1284 check("^....", flags3, t+"\n"+t, t, true); 1285 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1286 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1287 check(".....^", flags3, t+"\n", t, false); 1288 check(".....^", flags3, t+"\r\n", t, false); 1289 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1290 1291 check("^....", flags4, t+"\n"+t, t, true); 1292 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1293 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1294 check(".....^", flags4, t+"\n", t+"\n", false); 1295 check(".....^", flags4, t+"\r\n", t+"\r", false); 1296 1297 report("Caret between terminators"); 1298 } 1299 1300 // This test is for 4727935 1301 private static void dollarAtEndTest() throws Exception { 1302 int flags1 = Pattern.DOTALL; 1303 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1304 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1305 1306 check("....$", flags1, "test\n", "test", true); 1307 check("....$", flags1, "test\r\n", "test", true); 1308 check(".....$", flags1, "test\n", "test\n", true); 1309 check(".....$", flags1, "test\u0085", "test\u0085", true); 1310 check("....$", flags1, "test\u0085", "test", true); 1311 1312 check("....$", flags2, "test\n", "test", true); 1313 check(".....$", flags2, "test\n", "test\n", true); 1314 check(".....$", flags2, "test\u0085", "test\u0085", true); 1315 check("....$", flags2, "test\u0085", "est\u0085", true); 1316 1317 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1318 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1319 check("....$blah", flags3, "test\nblah", "!!!!", false); 1320 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1321 1322 // Supplementary character test 1323 String t = toSupplementaries("test"); 1324 String b = toSupplementaries("blah"); 1325 check("....$", flags1, t+"\n", t, true); 1326 check("....$", flags1, t+"\r\n", t, true); 1327 check(".....$", flags1, t+"\n", t+"\n", true); 1328 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1329 check("....$", flags1, t+"\u0085", t, true); 1330 1331 check("....$", flags2, t+"\n", t, true); 1332 check(".....$", flags2, t+"\n", t+"\n", true); 1333 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1334 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1335 1336 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1337 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1338 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1339 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1340 1341 report("Dollar at End"); 1342 } 1343 1344 // This test is for 4711773 1345 private static void multilineDollarTest() throws Exception { 1346 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1347 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1348 matcher.find(); 1349 if (matcher.start(0) != 9) 1350 failCount++; 1351 matcher.find(); 1352 if (matcher.start(0) != 20) 1353 failCount++; 1354 1355 // Supplementary character test 1356 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1357 matcher.find(); 1358 if (matcher.start(0) != 9*2) 1359 failCount++; 1360 matcher.find(); 1361 if (matcher.start(0) != 20*2) 1362 failCount++; 1363 1364 report("Multiline Dollar"); 1365 } 1366 1367 private static void reluctantRepetitionTest() throws Exception { 1368 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1369 check(p, "1 word word word 2", true); 1370 check(p, "1 wor wo w 2", true); 1371 check(p, "1 word word 2", true); 1372 check(p, "1 word 2", true); 1373 check(p, "1 wo w w 2", true); 1374 check(p, "1 wo w 2", true); 1375 check(p, "1 wor w 2", true); 1376 1377 p = Pattern.compile("([a-z])+?c"); 1378 Matcher m = p.matcher("ababcdefdec"); 1379 check(m, "ababc"); 1380 1381 // Supplementary character test 1382 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1383 m = p.matcher(toSupplementaries("ababcdefdec")); 1384 check(m, toSupplementaries("ababc")); 1385 1386 report("Reluctant Repetition"); 1387 } 1388 1389 private static Pattern serializedPattern(Pattern p) throws Exception { 1390 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1391 ObjectOutputStream oos = new ObjectOutputStream(baos); 1392 oos.writeObject(p); 1393 oos.close(); 1394 try (ObjectInputStream ois = new ObjectInputStream( 1395 new ByteArrayInputStream(baos.toByteArray()))) { 1396 return (Pattern)ois.readObject(); 1397 } 1398 } 1399 1400 private static void serializeTest() throws Exception { 1401 String patternStr = "(b)"; 1402 String matchStr = "b"; 1403 Pattern pattern = Pattern.compile(patternStr); 1404 Pattern serializedPattern = serializedPattern(pattern); 1405 Matcher matcher = serializedPattern.matcher(matchStr); 1406 if (!matcher.matches()) 1407 failCount++; 1408 if (matcher.groupCount() != 1) 1409 failCount++; 1410 1411 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1412 serializedPattern = serializedPattern(pattern); 1413 if (!serializedPattern.matcher("Ab").matches()) 1414 failCount++; 1415 if (serializedPattern.matcher("AB").matches()) 1416 failCount++; 1417 1418 report("Serialization"); 1419 } 1420 1421 private static void gTest() { 1422 Pattern pattern = Pattern.compile("\\G\\w"); 1423 Matcher matcher = pattern.matcher("abc#x#x"); 1424 matcher.find(); 1425 matcher.find(); 1426 matcher.find(); 1427 if (matcher.find()) 1428 failCount++; 1429 1430 pattern = Pattern.compile("\\GA*"); 1431 matcher = pattern.matcher("1A2AA3"); 1432 matcher.find(); 1433 if (matcher.find()) 1434 failCount++; 1435 1436 pattern = Pattern.compile("\\GA*"); 1437 matcher = pattern.matcher("1A2AA3"); 1438 if (!matcher.find(1)) 1439 failCount++; 1440 matcher.find(); 1441 if (matcher.find()) 1442 failCount++; 1443 1444 report("\\G"); 1445 } 1446 1447 private static void zTest() { 1448 Pattern pattern = Pattern.compile("foo\\Z"); 1449 // Positives 1450 check(pattern, "foo\u0085", true); 1451 check(pattern, "foo\u2028", true); 1452 check(pattern, "foo\u2029", true); 1453 check(pattern, "foo\n", true); 1454 check(pattern, "foo\r", true); 1455 check(pattern, "foo\r\n", true); 1456 // Negatives 1457 check(pattern, "fooo", false); 1458 check(pattern, "foo\n\r", false); 1459 1460 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1461 // Positives 1462 check(pattern, "foo", true); 1463 check(pattern, "foo\n", true); 1464 // Negatives 1465 check(pattern, "foo\r", false); 1466 check(pattern, "foo\u0085", false); 1467 check(pattern, "foo\u2028", false); 1468 check(pattern, "foo\u2029", false); 1469 1470 report("\\Z"); 1471 } 1472 1473 private static void replaceFirstTest() { 1474 Pattern pattern = Pattern.compile("(ab)(c*)"); 1475 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1476 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1477 failCount++; 1478 1479 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1480 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1481 failCount++; 1482 1483 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1484 String result = matcher.replaceFirst("$1"); 1485 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1486 failCount++; 1487 1488 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1489 result = matcher.replaceFirst("$2"); 1490 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1491 failCount++; 1492 1493 pattern = Pattern.compile("a*"); 1494 matcher = pattern.matcher("aaaaaaaaaa"); 1495 if (!matcher.replaceFirst("test").equals("test")) 1496 failCount++; 1497 1498 pattern = Pattern.compile("a+"); 1499 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1500 if (!matcher.replaceFirst("test").equals("zzztest")) 1501 failCount++; 1502 1503 // Supplementary character test 1504 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1505 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1506 if (!matcher.replaceFirst(toSupplementaries("test")) 1507 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1508 failCount++; 1509 1510 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1511 if (!matcher.replaceFirst(toSupplementaries("test")). 1512 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1513 failCount++; 1514 1515 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1516 result = matcher.replaceFirst("$1"); 1517 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1518 failCount++; 1519 1520 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1521 result = matcher.replaceFirst("$2"); 1522 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1523 failCount++; 1524 1525 pattern = Pattern.compile(toSupplementaries("a*")); 1526 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1527 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1528 failCount++; 1529 1530 pattern = Pattern.compile(toSupplementaries("a+")); 1531 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1532 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1533 failCount++; 1534 1535 report("Replace First"); 1536 } 1537 1538 private static void unixLinesTest() { 1539 Pattern pattern = Pattern.compile(".*"); 1540 Matcher matcher = pattern.matcher("aa\u2028blah"); 1541 matcher.find(); 1542 if (!matcher.group(0).equals("aa")) 1543 failCount++; 1544 1545 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1546 matcher = pattern.matcher("aa\u2028blah"); 1547 matcher.find(); 1548 if (!matcher.group(0).equals("aa\u2028blah")) 1549 failCount++; 1550 1551 pattern = Pattern.compile("[az]$", 1552 Pattern.MULTILINE | Pattern.UNIX_LINES); 1553 matcher = pattern.matcher("aa\u2028zz"); 1554 check(matcher, "a\u2028", false); 1555 1556 // Supplementary character test 1557 pattern = Pattern.compile(".*"); 1558 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1559 matcher.find(); 1560 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1561 failCount++; 1562 1563 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1564 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1565 matcher.find(); 1566 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1567 failCount++; 1568 1569 pattern = Pattern.compile(toSupplementaries("[az]$"), 1570 Pattern.MULTILINE | Pattern.UNIX_LINES); 1571 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1572 check(matcher, toSupplementaries("a\u2028"), false); 1573 1574 report("Unix Lines"); 1575 } 1576 1577 private static void commentsTest() { 1578 int flags = Pattern.COMMENTS; 1579 1580 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1581 Matcher matcher = pattern.matcher("aa#aa"); 1582 if (!matcher.matches()) 1583 failCount++; 1584 1585 pattern = Pattern.compile("aa # blah", flags); 1586 matcher = pattern.matcher("aa"); 1587 if (!matcher.matches()) 1588 failCount++; 1589 1590 pattern = Pattern.compile("aa blah", flags); 1591 matcher = pattern.matcher("aablah"); 1592 if (!matcher.matches()) 1593 failCount++; 1594 1595 pattern = Pattern.compile("aa # blah blech ", flags); 1596 matcher = pattern.matcher("aa"); 1597 if (!matcher.matches()) 1598 failCount++; 1599 1600 pattern = Pattern.compile("aa # blah\n ", flags); 1601 matcher = pattern.matcher("aa"); 1602 if (!matcher.matches()) 1603 failCount++; 1604 1605 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1606 matcher = pattern.matcher("aabc"); 1607 if (!matcher.matches()) 1608 failCount++; 1609 1610 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1611 matcher = pattern.matcher("aabc"); 1612 if (!matcher.matches()) 1613 failCount++; 1614 1615 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1616 matcher = pattern.matcher("aabc#blech"); 1617 if (!matcher.matches()) 1618 failCount++; 1619 1620 // Supplementary character test 1621 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1622 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1623 if (!matcher.matches()) 1624 failCount++; 1625 1626 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1627 matcher = pattern.matcher(toSupplementaries("aa")); 1628 if (!matcher.matches()) 1629 failCount++; 1630 1631 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1632 matcher = pattern.matcher(toSupplementaries("aablah")); 1633 if (!matcher.matches()) 1634 failCount++; 1635 1636 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1637 matcher = pattern.matcher(toSupplementaries("aa")); 1638 if (!matcher.matches()) 1639 failCount++; 1640 1641 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1642 matcher = pattern.matcher(toSupplementaries("aa")); 1643 if (!matcher.matches()) 1644 failCount++; 1645 1646 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1647 matcher = pattern.matcher(toSupplementaries("aabc")); 1648 if (!matcher.matches()) 1649 failCount++; 1650 1651 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1652 matcher = pattern.matcher(toSupplementaries("aabc")); 1653 if (!matcher.matches()) 1654 failCount++; 1655 1656 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1657 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1658 if (!matcher.matches()) 1659 failCount++; 1660 1661 report("Comments"); 1662 } 1663 1664 private static void caseFoldingTest() { // bug 4504687 1665 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1666 Pattern pattern = Pattern.compile("aa", flags); 1667 Matcher matcher = pattern.matcher("ab"); 1668 if (matcher.matches()) 1669 failCount++; 1670 1671 pattern = Pattern.compile("aA", flags); 1672 matcher = pattern.matcher("ab"); 1673 if (matcher.matches()) 1674 failCount++; 1675 1676 pattern = Pattern.compile("aa", flags); 1677 matcher = pattern.matcher("aB"); 1678 if (matcher.matches()) 1679 failCount++; 1680 matcher = pattern.matcher("Ab"); 1681 if (matcher.matches()) 1682 failCount++; 1683 1684 // ASCII "a" 1685 // Latin-1 Supplement "a" + grave 1686 // Cyrillic "a" 1687 String[] patterns = new String[] { 1688 //single 1689 "a", "\u00e0", "\u0430", 1690 //slice 1691 "ab", "\u00e0\u00e1", "\u0430\u0431", 1692 //class single 1693 "[a]", "[\u00e0]", "[\u0430]", 1694 //class range 1695 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1696 //back reference 1697 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1698 }; 1699 1700 String[] texts = new String[] { 1701 "A", "\u00c0", "\u0410", 1702 "AB", "\u00c0\u00c1", "\u0410\u0411", 1703 "A", "\u00c0", "\u0410", 1704 "B", "\u00c2", "\u0411", 1705 "aA", "\u00e0\u00c0", "\u0430\u0410" 1706 }; 1707 1708 boolean[] expected = new boolean[] { 1709 true, false, false, 1710 true, false, false, 1711 true, false, false, 1712 true, false, false, 1713 true, false, false 1714 }; 1715 1716 flags = Pattern.CASE_INSENSITIVE; 1717 for (int i = 0; i < patterns.length; i++) { 1718 pattern = Pattern.compile(patterns[i], flags); 1719 matcher = pattern.matcher(texts[i]); 1720 if (matcher.matches() != expected[i]) { 1721 System.out.println("<1> Failed at " + i); 1722 failCount++; 1723 } 1724 } 1725 1726 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1727 for (int i = 0; i < patterns.length; i++) { 1728 pattern = Pattern.compile(patterns[i], flags); 1729 matcher = pattern.matcher(texts[i]); 1730 if (!matcher.matches()) { 1731 System.out.println("<2> Failed at " + i); 1732 failCount++; 1733 } 1734 } 1735 // flag unicode_case alone should do nothing 1736 flags = Pattern.UNICODE_CASE; 1737 for (int i = 0; i < patterns.length; i++) { 1738 pattern = Pattern.compile(patterns[i], flags); 1739 matcher = pattern.matcher(texts[i]); 1740 if (matcher.matches()) { 1741 System.out.println("<3> Failed at " + i); 1742 failCount++; 1743 } 1744 } 1745 1746 // Special cases: i, I, u+0131 and u+0130 1747 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1748 pattern = Pattern.compile("[h-j]+", flags); 1749 if (!pattern.matcher("\u0131\u0130").matches()) 1750 failCount++; 1751 report("Case Folding"); 1752 } 1753 1754 private static void appendTest() { 1755 Pattern pattern = Pattern.compile("(ab)(cd)"); 1756 Matcher matcher = pattern.matcher("abcd"); 1757 String result = matcher.replaceAll("$2$1"); 1758 if (!result.equals("cdab")) 1759 failCount++; 1760 1761 String s1 = "Swap all: first = 123, second = 456"; 1762 String s2 = "Swap one: first = 123, second = 456"; 1763 String r = "$3$2$1"; 1764 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1765 matcher = pattern.matcher(s1); 1766 1767 result = matcher.replaceAll(r); 1768 if (!result.equals("Swap all: 123 = first, 456 = second")) 1769 failCount++; 1770 1771 matcher = pattern.matcher(s2); 1772 1773 if (matcher.find()) { 1774 StringBuffer sb = new StringBuffer(); 1775 matcher.appendReplacement(sb, r); 1776 matcher.appendTail(sb); 1777 result = sb.toString(); 1778 if (!result.equals("Swap one: 123 = first, second = 456")) 1779 failCount++; 1780 } 1781 1782 // Supplementary character test 1783 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1784 matcher = pattern.matcher(toSupplementaries("abcd")); 1785 result = matcher.replaceAll("$2$1"); 1786 if (!result.equals(toSupplementaries("cdab"))) 1787 failCount++; 1788 1789 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1790 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1791 r = toSupplementaries("$3$2$1"); 1792 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1793 matcher = pattern.matcher(s1); 1794 1795 result = matcher.replaceAll(r); 1796 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1797 failCount++; 1798 1799 matcher = pattern.matcher(s2); 1800 1801 if (matcher.find()) { 1802 StringBuffer sb = new StringBuffer(); 1803 matcher.appendReplacement(sb, r); 1804 matcher.appendTail(sb); 1805 result = sb.toString(); 1806 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1807 failCount++; 1808 } 1809 report("Append"); 1810 } 1811 1812 private static void splitTest() { 1813 Pattern pattern = Pattern.compile(":"); 1814 String[] result = pattern.split("foo:and:boo", 2); 1815 if (!result[0].equals("foo")) 1816 failCount++; 1817 if (!result[1].equals("and:boo")) 1818 failCount++; 1819 // Supplementary character test 1820 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1821 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1822 if (!result[0].equals(toSupplementaries("foo"))) 1823 failCount++; 1824 if (!result[1].equals(toSupplementaries("andXboo"))) 1825 failCount++; 1826 1827 CharBuffer cb = CharBuffer.allocate(100); 1828 cb.put("foo:and:boo"); 1829 cb.flip(); 1830 result = pattern.split(cb); 1831 if (!result[0].equals("foo")) 1832 failCount++; 1833 if (!result[1].equals("and")) 1834 failCount++; 1835 if (!result[2].equals("boo")) 1836 failCount++; 1837 1838 // Supplementary character test 1839 CharBuffer cbs = CharBuffer.allocate(100); 1840 cbs.put(toSupplementaries("fooXandXboo")); 1841 cbs.flip(); 1842 result = patternX.split(cbs); 1843 if (!result[0].equals(toSupplementaries("foo"))) 1844 failCount++; 1845 if (!result[1].equals(toSupplementaries("and"))) 1846 failCount++; 1847 if (!result[2].equals(toSupplementaries("boo"))) 1848 failCount++; 1849 1850 String source = "0123456789"; 1851 for (int limit=-2; limit<3; limit++) { 1852 for (int x=0; x<10; x++) { 1853 result = source.split(Integer.toString(x), limit); 1854 int expectedLength = limit < 1 ? 2 : limit; 1855 1856 if ((limit == 0) && (x == 9)) { 1857 // expected dropping of "" 1858 if (result.length != 1) 1859 failCount++; 1860 if (!result[0].equals("012345678")) { 1861 failCount++; 1862 } 1863 } else { 1864 if (result.length != expectedLength) { 1865 failCount++; 1866 } 1867 if (!result[0].equals(source.substring(0,x))) { 1868 if (limit != 1) { 1869 failCount++; 1870 } else { 1871 if (!result[0].equals(source.substring(0,10))) { 1872 failCount++; 1873 } 1874 } 1875 } 1876 if (expectedLength > 1) { // Check segment 2 1877 if (!result[1].equals(source.substring(x+1,10))) 1878 failCount++; 1879 } 1880 } 1881 } 1882 } 1883 // Check the case for no match found 1884 for (int limit=-2; limit<3; limit++) { 1885 result = source.split("e", limit); 1886 if (result.length != 1) 1887 failCount++; 1888 if (!result[0].equals(source)) 1889 failCount++; 1890 } 1891 // Check the case for limit == 0, source = ""; 1892 // split() now returns 0-length for empty source "" see #6559590 1893 source = ""; 1894 result = source.split("e", 0); 1895 if (result.length != 1) 1896 failCount++; 1897 if (!result[0].equals(source)) 1898 failCount++; 1899 1900 // Check both split() and splitAsStraem(), especially for zero-lenth 1901 // input and zero-lenth match cases 1902 String[][] input = new String[][] { 1903 { " ", "Abc Efg Hij" }, // normal non-zero-match 1904 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1905 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1906 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1907 { "(?=\\p{Lu})", "AbcEfg" }, 1908 { "(?=\\p{Lu})", "Abc" }, 1909 { " ", "" }, // zero-length input 1910 { ".*", "" }, 1911 1912 // some tests from PatternStreamTest.java 1913 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1914 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1915 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1916 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1917 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1918 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1919 { "\u56da", "" }, 1920 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1921 { "o", "boo:and:foo" }, 1922 { "o", "booooo:and:fooooo" }, 1923 { "o", "fooooo:" }, 1924 }; 1925 1926 String[][] expected = new String[][] { 1927 { "Abc", "Efg", "Hij" }, 1928 { "", "Abc", "Efg", "Hij" }, 1929 { "Abc", "", "Efg", "Hij" }, 1930 { "Abc", "Efg", "Hij" }, 1931 { "Abc", "Efg" }, 1932 { "Abc" }, 1933 { "" }, 1934 { "" }, 1935 1936 { "awgqwefg1fefw", "vssv1vvv1" }, 1937 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1938 { "awgqwefg", "fefw4vssv", "vvv" }, 1939 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1940 { "1", "23", "456", "7890" }, 1941 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1942 { "" }, 1943 { "This", "is", "testing", "", "with", "different", "separators" }, 1944 { "b", "", ":and:f" }, 1945 { "b", "", "", "", "", ":and:f" }, 1946 { "f", "", "", "", "", ":" }, 1947 }; 1948 for (int i = 0; i < input.length; i++) { 1949 pattern = Pattern.compile(input[i][0]); 1950 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1951 failCount++; 1952 } 1953 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1954 // array for zero-length input for now 1955 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1956 expected[i])) { 1957 failCount++; 1958 } 1959 } 1960 report("Split"); 1961 } 1962 1963 private static void negationTest() { 1964 Pattern pattern = Pattern.compile("[\\[@^]+"); 1965 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1966 if (!matcher.find()) 1967 failCount++; 1968 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1969 failCount++; 1970 pattern = Pattern.compile("[@\\[^]+"); 1971 matcher = pattern.matcher("@@@@[[[[^^^^"); 1972 if (!matcher.find()) 1973 failCount++; 1974 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1975 failCount++; 1976 pattern = Pattern.compile("[@\\[^@]+"); 1977 matcher = pattern.matcher("@@@@[[[[^^^^"); 1978 if (!matcher.find()) 1979 failCount++; 1980 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1981 failCount++; 1982 1983 pattern = Pattern.compile("\\)"); 1984 matcher = pattern.matcher("xxx)xxx"); 1985 if (!matcher.find()) 1986 failCount++; 1987 1988 report("Negation"); 1989 } 1990 1991 private static void ampersandTest() { 1992 Pattern pattern = Pattern.compile("[&@]+"); 1993 check(pattern, "@@@@&&&&", true); 1994 1995 pattern = Pattern.compile("[@&]+"); 1996 check(pattern, "@@@@&&&&", true); 1997 1998 pattern = Pattern.compile("[@\\&]+"); 1999 check(pattern, "@@@@&&&&", true); 2000 2001 report("Ampersand"); 2002 } 2003 2004 private static void octalTest() throws Exception { 2005 Pattern pattern = Pattern.compile("\\u0007"); 2006 Matcher matcher = pattern.matcher("\u0007"); 2007 if (!matcher.matches()) 2008 failCount++; 2009 pattern = Pattern.compile("\\07"); 2010 matcher = pattern.matcher("\u0007"); 2011 if (!matcher.matches()) 2012 failCount++; 2013 pattern = Pattern.compile("\\007"); 2014 matcher = pattern.matcher("\u0007"); 2015 if (!matcher.matches()) 2016 failCount++; 2017 pattern = Pattern.compile("\\0007"); 2018 matcher = pattern.matcher("\u0007"); 2019 if (!matcher.matches()) 2020 failCount++; 2021 pattern = Pattern.compile("\\040"); 2022 matcher = pattern.matcher("\u0020"); 2023 if (!matcher.matches()) 2024 failCount++; 2025 pattern = Pattern.compile("\\0403"); 2026 matcher = pattern.matcher("\u00203"); 2027 if (!matcher.matches()) 2028 failCount++; 2029 pattern = Pattern.compile("\\0103"); 2030 matcher = pattern.matcher("\u0043"); 2031 if (!matcher.matches()) 2032 failCount++; 2033 2034 report("Octal"); 2035 } 2036 2037 private static void longPatternTest() throws Exception { 2038 try { 2039 Pattern pattern = Pattern.compile( 2040 "a 32-character-long pattern xxxx"); 2041 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2042 pattern = Pattern.compile("a thirty four character long regex"); 2043 StringBuffer patternToBe = new StringBuffer(101); 2044 for (int i=0; i<100; i++) 2045 patternToBe.append((char)(97 + i%26)); 2046 pattern = Pattern.compile(patternToBe.toString()); 2047 } catch (PatternSyntaxException e) { 2048 failCount++; 2049 } 2050 2051 // Supplementary character test 2052 try { 2053 Pattern pattern = Pattern.compile( 2054 toSupplementaries("a 32-character-long pattern xxxx")); 2055 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2056 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2057 StringBuffer patternToBe = new StringBuffer(101*2); 2058 for (int i=0; i<100; i++) 2059 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2060 + 97 + i%26)); 2061 pattern = Pattern.compile(patternToBe.toString()); 2062 } catch (PatternSyntaxException e) { 2063 failCount++; 2064 } 2065 report("LongPattern"); 2066 } 2067 2068 private static void group0Test() throws Exception { 2069 Pattern pattern = Pattern.compile("(tes)ting"); 2070 Matcher matcher = pattern.matcher("testing"); 2071 check(matcher, "testing"); 2072 2073 matcher.reset("testing"); 2074 if (matcher.lookingAt()) { 2075 if (!matcher.group(0).equals("testing")) 2076 failCount++; 2077 } else { 2078 failCount++; 2079 } 2080 2081 matcher.reset("testing"); 2082 if (matcher.matches()) { 2083 if (!matcher.group(0).equals("testing")) 2084 failCount++; 2085 } else { 2086 failCount++; 2087 } 2088 2089 pattern = Pattern.compile("(tes)ting"); 2090 matcher = pattern.matcher("testing"); 2091 if (matcher.lookingAt()) { 2092 if (!matcher.group(0).equals("testing")) 2093 failCount++; 2094 } else { 2095 failCount++; 2096 } 2097 2098 pattern = Pattern.compile("^(tes)ting"); 2099 matcher = pattern.matcher("testing"); 2100 if (matcher.matches()) { 2101 if (!matcher.group(0).equals("testing")) 2102 failCount++; 2103 } else { 2104 failCount++; 2105 } 2106 2107 // Supplementary character test 2108 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2109 matcher = pattern.matcher(toSupplementaries("testing")); 2110 check(matcher, toSupplementaries("testing")); 2111 2112 matcher.reset(toSupplementaries("testing")); 2113 if (matcher.lookingAt()) { 2114 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2115 failCount++; 2116 } else { 2117 failCount++; 2118 } 2119 2120 matcher.reset(toSupplementaries("testing")); 2121 if (matcher.matches()) { 2122 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2123 failCount++; 2124 } else { 2125 failCount++; 2126 } 2127 2128 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2129 matcher = pattern.matcher(toSupplementaries("testing")); 2130 if (matcher.lookingAt()) { 2131 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2132 failCount++; 2133 } else { 2134 failCount++; 2135 } 2136 2137 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2138 matcher = pattern.matcher(toSupplementaries("testing")); 2139 if (matcher.matches()) { 2140 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2141 failCount++; 2142 } else { 2143 failCount++; 2144 } 2145 2146 report("Group0"); 2147 } 2148 2149 private static void findIntTest() throws Exception { 2150 Pattern p = Pattern.compile("blah"); 2151 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2152 boolean result = m.find(2); 2153 if (!result) 2154 failCount++; 2155 2156 p = Pattern.compile("$"); 2157 m = p.matcher("1234567890"); 2158 result = m.find(10); 2159 if (!result) 2160 failCount++; 2161 try { 2162 result = m.find(11); 2163 failCount++; 2164 } catch (IndexOutOfBoundsException e) { 2165 // correct result 2166 } 2167 2168 // Supplementary character test 2169 p = Pattern.compile(toSupplementaries("blah")); 2170 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2171 result = m.find(2); 2172 if (!result) 2173 failCount++; 2174 2175 report("FindInt"); 2176 } 2177 2178 private static void emptyPatternTest() throws Exception { 2179 Pattern p = Pattern.compile(""); 2180 Matcher m = p.matcher("foo"); 2181 2182 // Should find empty pattern at beginning of input 2183 boolean result = m.find(); 2184 if (result != true) 2185 failCount++; 2186 if (m.start() != 0) 2187 failCount++; 2188 2189 // Should not match entire input if input is not empty 2190 m.reset(); 2191 result = m.matches(); 2192 if (result == true) 2193 failCount++; 2194 2195 try { 2196 m.start(0); 2197 failCount++; 2198 } catch (IllegalStateException e) { 2199 // Correct result 2200 } 2201 2202 // Should match entire input if input is empty 2203 m.reset(""); 2204 result = m.matches(); 2205 if (result != true) 2206 failCount++; 2207 2208 result = Pattern.matches("", ""); 2209 if (result != true) 2210 failCount++; 2211 2212 result = Pattern.matches("", "foo"); 2213 if (result == true) 2214 failCount++; 2215 report("EmptyPattern"); 2216 } 2217 2218 private static void charClassTest() throws Exception { 2219 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2220 check(pattern, "blahb]blech", true); 2221 2222 pattern = Pattern.compile("[abc[def]]"); 2223 check(pattern, "b", true); 2224 2225 // Supplementary character tests 2226 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2227 check(pattern, toSupplementaries("blahb]blech"), true); 2228 2229 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2230 check(pattern, toSupplementaries("b"), true); 2231 2232 try { 2233 // u00ff when UNICODE_CASE 2234 pattern = Pattern.compile("[ab\u00ffcd]", 2235 Pattern.CASE_INSENSITIVE| 2236 Pattern.UNICODE_CASE); 2237 check(pattern, "ab\u00ffcd", true); 2238 check(pattern, "Ab\u0178Cd", true); 2239 2240 // u00b5 when UNICODE_CASE 2241 pattern = Pattern.compile("[ab\u00b5cd]", 2242 Pattern.CASE_INSENSITIVE| 2243 Pattern.UNICODE_CASE); 2244 check(pattern, "ab\u00b5cd", true); 2245 check(pattern, "Ab\u039cCd", true); 2246 } catch (Exception e) { failCount++; } 2247 2248 /* Special cases 2249 (1)LatinSmallLetterLongS u+017f 2250 (2)LatinSmallLetterDotlessI u+0131 2251 (3)LatineCapitalLetterIWithDotAbove u+0130 2252 (4)KelvinSign u+212a 2253 (5)AngstromSign u+212b 2254 */ 2255 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2256 pattern = Pattern.compile("[sik\u00c5]+", flags); 2257 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2258 failCount++; 2259 2260 report("CharClass"); 2261 } 2262 2263 private static void caretTest() throws Exception { 2264 Pattern pattern = Pattern.compile("\\w*"); 2265 Matcher matcher = pattern.matcher("a#bc#def##g"); 2266 check(matcher, "a"); 2267 check(matcher, ""); 2268 check(matcher, "bc"); 2269 check(matcher, ""); 2270 check(matcher, "def"); 2271 check(matcher, ""); 2272 check(matcher, ""); 2273 check(matcher, "g"); 2274 check(matcher, ""); 2275 if (matcher.find()) 2276 failCount++; 2277 2278 pattern = Pattern.compile("^\\w*"); 2279 matcher = pattern.matcher("a#bc#def##g"); 2280 check(matcher, "a"); 2281 if (matcher.find()) 2282 failCount++; 2283 2284 pattern = Pattern.compile("\\w"); 2285 matcher = pattern.matcher("abc##x"); 2286 check(matcher, "a"); 2287 check(matcher, "b"); 2288 check(matcher, "c"); 2289 check(matcher, "x"); 2290 if (matcher.find()) 2291 failCount++; 2292 2293 pattern = Pattern.compile("^\\w"); 2294 matcher = pattern.matcher("abc##x"); 2295 check(matcher, "a"); 2296 if (matcher.find()) 2297 failCount++; 2298 2299 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2300 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2301 check(matcher, "abc"); 2302 if (matcher.find()) 2303 failCount++; 2304 2305 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2306 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2307 check(matcher, "abc"); 2308 check(matcher, "jkl"); 2309 if (matcher.find()) 2310 failCount++; 2311 2312 pattern = Pattern.compile("^", Pattern.MULTILINE); 2313 matcher = pattern.matcher("this is some text"); 2314 String result = matcher.replaceAll("X"); 2315 if (!result.equals("Xthis is some text")) 2316 failCount++; 2317 2318 pattern = Pattern.compile("^"); 2319 matcher = pattern.matcher("this is some text"); 2320 result = matcher.replaceAll("X"); 2321 if (!result.equals("Xthis is some text")) 2322 failCount++; 2323 2324 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2325 matcher = pattern.matcher("this is some text\n"); 2326 result = matcher.replaceAll("X"); 2327 if (!result.equals("Xthis is some text\n")) 2328 failCount++; 2329 2330 report("Caret"); 2331 } 2332 2333 private static void groupCaptureTest() throws Exception { 2334 // Independent group 2335 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2336 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2337 matcher.find(); 2338 try { 2339 String blah = matcher.group(1); 2340 failCount++; 2341 } catch (IndexOutOfBoundsException ioobe) { 2342 // Good result 2343 } 2344 // Pure group 2345 pattern = Pattern.compile("x+(?:y+)z+"); 2346 matcher = pattern.matcher("xxxyyyzzz"); 2347 matcher.find(); 2348 try { 2349 String blah = matcher.group(1); 2350 failCount++; 2351 } catch (IndexOutOfBoundsException ioobe) { 2352 // Good result 2353 } 2354 2355 // Supplementary character tests 2356 // Independent group 2357 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2358 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2359 matcher.find(); 2360 try { 2361 String blah = matcher.group(1); 2362 failCount++; 2363 } catch (IndexOutOfBoundsException ioobe) { 2364 // Good result 2365 } 2366 // Pure group 2367 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2368 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2369 matcher.find(); 2370 try { 2371 String blah = matcher.group(1); 2372 failCount++; 2373 } catch (IndexOutOfBoundsException ioobe) { 2374 // Good result 2375 } 2376 2377 report("GroupCapture"); 2378 } 2379 2380 private static void backRefTest() throws Exception { 2381 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2382 check(pattern, "zzzaabcazzz", true); 2383 2384 pattern = Pattern.compile("(a*)bc\\1"); 2385 check(pattern, "zzzaabcaazzz", true); 2386 2387 pattern = Pattern.compile("(abc)(def)\\1"); 2388 check(pattern, "abcdefabc", true); 2389 2390 pattern = Pattern.compile("(abc)(def)\\3"); 2391 check(pattern, "abcdefabc", false); 2392 2393 try { 2394 for (int i = 1; i < 10; i++) { 2395 // Make sure backref 1-9 are always accepted 2396 pattern = Pattern.compile("abcdef\\" + i); 2397 // and fail to match if the target group does not exit 2398 check(pattern, "abcdef", false); 2399 } 2400 } catch(PatternSyntaxException e) { 2401 failCount++; 2402 } 2403 2404 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2405 check(pattern, "abcdefghija", false); 2406 check(pattern, "abcdefghija1", true); 2407 2408 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2409 check(pattern, "abcdefghijkk", true); 2410 2411 pattern = Pattern.compile("(a)bcdefghij\\11"); 2412 check(pattern, "abcdefghija1", true); 2413 2414 // Supplementary character tests 2415 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2416 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2417 2418 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2419 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2420 2421 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2422 check(pattern, toSupplementaries("abcdefabc"), true); 2423 2424 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2425 check(pattern, toSupplementaries("abcdefabc"), false); 2426 2427 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2428 check(pattern, toSupplementaries("abcdefghija"), false); 2429 check(pattern, toSupplementaries("abcdefghija1"), true); 2430 2431 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2432 check(pattern, toSupplementaries("abcdefghijkk"), true); 2433 2434 report("BackRef"); 2435 } 2436 2437 /** 2438 * Unicode Technical Report #18, section 2.6 End of Line 2439 * There is no empty line to be matched in the sequence \u000D\u000A 2440 * but there is an empty line in the sequence \u000A\u000D. 2441 */ 2442 private static void anchorTest() throws Exception { 2443 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2444 Matcher m = p.matcher("blah1\r\nblah2"); 2445 m.find(); 2446 m.find(); 2447 if (!m.group().equals("blah2")) 2448 failCount++; 2449 2450 m.reset("blah1\n\rblah2"); 2451 m.find(); 2452 m.find(); 2453 m.find(); 2454 if (!m.group().equals("blah2")) 2455 failCount++; 2456 2457 // Test behavior of $ with \r\n at end of input 2458 p = Pattern.compile(".+$"); 2459 m = p.matcher("blah1\r\n"); 2460 if (!m.find()) 2461 failCount++; 2462 if (!m.group().equals("blah1")) 2463 failCount++; 2464 if (m.find()) 2465 failCount++; 2466 2467 // Test behavior of $ with \r\n at end of input in multiline 2468 p = Pattern.compile(".+$", Pattern.MULTILINE); 2469 m = p.matcher("blah1\r\n"); 2470 if (!m.find()) 2471 failCount++; 2472 if (m.find()) 2473 failCount++; 2474 2475 // Test for $ recognition of \u0085 for bug 4527731 2476 p = Pattern.compile(".+$", Pattern.MULTILINE); 2477 m = p.matcher("blah1\u0085"); 2478 if (!m.find()) 2479 failCount++; 2480 2481 // Supplementary character test 2482 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2483 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2484 m.find(); 2485 m.find(); 2486 if (!m.group().equals(toSupplementaries("blah2"))) 2487 failCount++; 2488 2489 m.reset(toSupplementaries("blah1\n\rblah2")); 2490 m.find(); 2491 m.find(); 2492 m.find(); 2493 if (!m.group().equals(toSupplementaries("blah2"))) 2494 failCount++; 2495 2496 // Test behavior of $ with \r\n at end of input 2497 p = Pattern.compile(".+$"); 2498 m = p.matcher(toSupplementaries("blah1\r\n")); 2499 if (!m.find()) 2500 failCount++; 2501 if (!m.group().equals(toSupplementaries("blah1"))) 2502 failCount++; 2503 if (m.find()) 2504 failCount++; 2505 2506 // Test behavior of $ with \r\n at end of input in multiline 2507 p = Pattern.compile(".+$", Pattern.MULTILINE); 2508 m = p.matcher(toSupplementaries("blah1\r\n")); 2509 if (!m.find()) 2510 failCount++; 2511 if (m.find()) 2512 failCount++; 2513 2514 // Test for $ recognition of \u0085 for bug 4527731 2515 p = Pattern.compile(".+$", Pattern.MULTILINE); 2516 m = p.matcher(toSupplementaries("blah1\u0085")); 2517 if (!m.find()) 2518 failCount++; 2519 2520 report("Anchors"); 2521 } 2522 2523 /** 2524 * A basic sanity test of Matcher.lookingAt(). 2525 */ 2526 private static void lookingAtTest() throws Exception { 2527 Pattern p = Pattern.compile("(ab)(c*)"); 2528 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2529 2530 if (!m.lookingAt()) 2531 failCount++; 2532 2533 if (!m.group().equals(m.group(0))) 2534 failCount++; 2535 2536 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2537 if (m.lookingAt()) 2538 failCount++; 2539 2540 // Supplementary character test 2541 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2542 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2543 2544 if (!m.lookingAt()) 2545 failCount++; 2546 2547 if (!m.group().equals(m.group(0))) 2548 failCount++; 2549 2550 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2551 if (m.lookingAt()) 2552 failCount++; 2553 2554 report("Looking At"); 2555 } 2556 2557 /** 2558 * A basic sanity test of Matcher.matches(). 2559 */ 2560 private static void matchesTest() throws Exception { 2561 // matches() 2562 Pattern p = Pattern.compile("ulb(c*)"); 2563 Matcher m = p.matcher("ulbcccccc"); 2564 if (!m.matches()) 2565 failCount++; 2566 2567 // find() but not matches() 2568 m.reset("zzzulbcccccc"); 2569 if (m.matches()) 2570 failCount++; 2571 2572 // lookingAt() but not matches() 2573 m.reset("ulbccccccdef"); 2574 if (m.matches()) 2575 failCount++; 2576 2577 // matches() 2578 p = Pattern.compile("a|ad"); 2579 m = p.matcher("ad"); 2580 if (!m.matches()) 2581 failCount++; 2582 2583 // Supplementary character test 2584 // matches() 2585 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2586 m = p.matcher(toSupplementaries("ulbcccccc")); 2587 if (!m.matches()) 2588 failCount++; 2589 2590 // find() but not matches() 2591 m.reset(toSupplementaries("zzzulbcccccc")); 2592 if (m.matches()) 2593 failCount++; 2594 2595 // lookingAt() but not matches() 2596 m.reset(toSupplementaries("ulbccccccdef")); 2597 if (m.matches()) 2598 failCount++; 2599 2600 // matches() 2601 p = Pattern.compile(toSupplementaries("a|ad")); 2602 m = p.matcher(toSupplementaries("ad")); 2603 if (!m.matches()) 2604 failCount++; 2605 2606 report("Matches"); 2607 } 2608 2609 /** 2610 * A basic sanity test of Pattern.matches(). 2611 */ 2612 private static void patternMatchesTest() throws Exception { 2613 // matches() 2614 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2615 toSupplementaries("ulbcccccc"))) 2616 failCount++; 2617 2618 // find() but not matches() 2619 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2620 toSupplementaries("zzzulbcccccc"))) 2621 failCount++; 2622 2623 // lookingAt() but not matches() 2624 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2625 toSupplementaries("ulbccccccdef"))) 2626 failCount++; 2627 2628 // Supplementary character test 2629 // matches() 2630 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2631 toSupplementaries("ulbcccccc"))) 2632 failCount++; 2633 2634 // find() but not matches() 2635 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2636 toSupplementaries("zzzulbcccccc"))) 2637 failCount++; 2638 2639 // lookingAt() but not matches() 2640 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2641 toSupplementaries("ulbccccccdef"))) 2642 failCount++; 2643 2644 report("Pattern Matches"); 2645 } 2646 2647 /** 2648 * Canonical equivalence testing. Tests the ability of the engine 2649 * to match sequences that are not explicitly specified in the 2650 * pattern when they are considered equivalent by the Unicode Standard. 2651 */ 2652 private static void ceTest() throws Exception { 2653 // Decomposed char outside char classes 2654 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2655 Matcher m = p.matcher("test\u00e5"); 2656 if (!m.matches()) 2657 failCount++; 2658 2659 m.reset("testa\u030a"); 2660 if (!m.matches()) 2661 failCount++; 2662 2663 // Composed char outside char classes 2664 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2665 m = p.matcher("test\u00e5"); 2666 if (!m.matches()) 2667 failCount++; 2668 2669 m.reset("testa\u030a"); 2670 if (!m.find()) 2671 failCount++; 2672 2673 // Decomposed char inside a char class 2674 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2675 m = p.matcher("test\u00e5"); 2676 if (!m.find()) 2677 failCount++; 2678 2679 m.reset("testa\u030a"); 2680 if (!m.find()) 2681 failCount++; 2682 2683 // Composed char inside a char class 2684 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2685 m = p.matcher("test\u00e5"); 2686 if (!m.find()) 2687 failCount++; 2688 2689 m.reset("testa\u0300"); 2690 if (!m.find()) 2691 failCount++; 2692 2693 m.reset("testa\u030a"); 2694 if (!m.find()) 2695 failCount++; 2696 2697 // Marks that cannot legally change order and be equivalent 2698 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2699 check(p, "testa\u0308\u0300", true); 2700 check(p, "testa\u0300\u0308", false); 2701 2702 // Marks that can legally change order and be equivalent 2703 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2704 check(p, "testa\u0308\u0323", true); 2705 check(p, "testa\u0323\u0308", true); 2706 2707 // Test all equivalences of the sequence a\u0308\u0323\u0300 2708 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2709 check(p, "testa\u0308\u0323\u0300", true); 2710 check(p, "testa\u0323\u0308\u0300", true); 2711 check(p, "testa\u0308\u0300\u0323", true); 2712 check(p, "test\u00e4\u0323\u0300", true); 2713 check(p, "test\u00e4\u0300\u0323", true); 2714 2715 Object[][] data = new Object[][] { 2716 2717 // JDK-4867170 2718 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2719 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2720 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2721 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2722 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2723 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2724 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2725 2726 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2727 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2728 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2729 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2730 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2731 2732 // backtracking, force to match "\u1f80", instead of \u1f82" 2733 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2734 2735 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2736 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2737 2738 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2739 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2740 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2741 2742 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2743 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2744 { "\u1f80", "ab\u1f80cd", "f", true }, 2745 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2746 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2747 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2748 { "\u1f82", "\u1f80\u0300", "m", true }, 2749 2750 // JDK-7080302 # compile failed 2751 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2752 2753 // JDK-6728861, same cause as above one 2754 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2755 2756 // JDK-6995635 2757 { "(\u00e9)", "e\u0301", "m", true }, 2758 2759 // JDK-6736245 2760 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2761 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2762 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2763 2764 // 4916384. 2765 // Decomposed hangul (jamos) works inside clazz 2766 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2767 { "[\u1100\u1161]", "\uac00", "m", true}, 2768 2769 { "[\uac00]", "\u1100\u1161", "m", true}, 2770 { "[\uac00]", "\uac00", "m", true}, 2771 2772 // Decomposed hangul (jamos) 2773 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2774 { "\u1100\u1161", "\uac00", "m", true}, 2775 2776 // Composed hangul 2777 { "\uac00", "\u1100\u1161", "m", true }, 2778 { "\uac00", "\uac00", "m", true }, 2779 2780 /* Need a NFDSlice to nfd the source to solve this issue 2781 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2782 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2783 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2784 2785 // Decomposed supplementary outside char classes 2786 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2787 // Composed supplementary outside char classes 2788 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2789 */ 2790 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2791 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2792 2793 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2794 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2795 }; 2796 2797 int failCount = 0; 2798 for (Object[] d : data) { 2799 String pn = (String)d[0]; 2800 String tt = (String)d[1]; 2801 boolean isFind = "f".equals(((String)d[2])); 2802 boolean expected = (boolean)d[3]; 2803 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2804 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2805 if (ret != expected) { 2806 failCount++; 2807 continue; 2808 } 2809 } 2810 report("Canonical Equivalence"); 2811 } 2812 2813 /** 2814 * A basic sanity test of Matcher.replaceAll(). 2815 */ 2816 private static void globalSubstitute() throws Exception { 2817 // Global substitution with a literal 2818 Pattern p = Pattern.compile("(ab)(c*)"); 2819 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2820 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2821 failCount++; 2822 2823 m.reset("zzzabccczzzabcczzzabccczzz"); 2824 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2825 failCount++; 2826 2827 // Global substitution with groups 2828 m.reset("zzzabccczzzabcczzzabccczzz"); 2829 String result = m.replaceAll("$1"); 2830 if (!result.equals("zzzabzzzabzzzabzzz")) 2831 failCount++; 2832 2833 // Supplementary character test 2834 // Global substitution with a literal 2835 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2836 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2837 if (!m.replaceAll(toSupplementaries("test")). 2838 equals(toSupplementaries("testzzztestzzztest"))) 2839 failCount++; 2840 2841 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2842 if (!m.replaceAll(toSupplementaries("test")). 2843 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2844 failCount++; 2845 2846 // Global substitution with groups 2847 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2848 result = m.replaceAll("$1"); 2849 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2850 failCount++; 2851 2852 report("Global Substitution"); 2853 } 2854 2855 /** 2856 * Tests the usage of Matcher.appendReplacement() with literal 2857 * and group substitutions. 2858 */ 2859 private static void stringbufferSubstitute() throws Exception { 2860 // SB substitution with literal 2861 String blah = "zzzblahzzz"; 2862 Pattern p = Pattern.compile("blah"); 2863 Matcher m = p.matcher(blah); 2864 StringBuffer result = new StringBuffer(); 2865 try { 2866 m.appendReplacement(result, "blech"); 2867 failCount++; 2868 } catch (IllegalStateException e) { 2869 } 2870 m.find(); 2871 m.appendReplacement(result, "blech"); 2872 if (!result.toString().equals("zzzblech")) 2873 failCount++; 2874 2875 m.appendTail(result); 2876 if (!result.toString().equals("zzzblechzzz")) 2877 failCount++; 2878 2879 // SB substitution with groups 2880 blah = "zzzabcdzzz"; 2881 p = Pattern.compile("(ab)(cd)*"); 2882 m = p.matcher(blah); 2883 result = new StringBuffer(); 2884 try { 2885 m.appendReplacement(result, "$1"); 2886 failCount++; 2887 } catch (IllegalStateException e) { 2888 } 2889 m.find(); 2890 m.appendReplacement(result, "$1"); 2891 if (!result.toString().equals("zzzab")) 2892 failCount++; 2893 2894 m.appendTail(result); 2895 if (!result.toString().equals("zzzabzzz")) 2896 failCount++; 2897 2898 // SB substitution with 3 groups 2899 blah = "zzzabcdcdefzzz"; 2900 p = Pattern.compile("(ab)(cd)*(ef)"); 2901 m = p.matcher(blah); 2902 result = new StringBuffer(); 2903 try { 2904 m.appendReplacement(result, "$1w$2w$3"); 2905 failCount++; 2906 } catch (IllegalStateException e) { 2907 } 2908 m.find(); 2909 m.appendReplacement(result, "$1w$2w$3"); 2910 if (!result.toString().equals("zzzabwcdwef")) 2911 failCount++; 2912 2913 m.appendTail(result); 2914 if (!result.toString().equals("zzzabwcdwefzzz")) 2915 failCount++; 2916 2917 // SB substitution with groups and three matches 2918 // skipping middle match 2919 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2920 p = Pattern.compile("(ab)(cd*)"); 2921 m = p.matcher(blah); 2922 result = new StringBuffer(); 2923 try { 2924 m.appendReplacement(result, "$1"); 2925 failCount++; 2926 } catch (IllegalStateException e) { 2927 } 2928 m.find(); 2929 m.appendReplacement(result, "$1"); 2930 if (!result.toString().equals("zzzab")) 2931 failCount++; 2932 2933 m.find(); 2934 m.find(); 2935 m.appendReplacement(result, "$2"); 2936 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2937 failCount++; 2938 2939 m.appendTail(result); 2940 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2941 failCount++; 2942 2943 // Check to make sure escaped $ is ignored 2944 blah = "zzzabcdcdefzzz"; 2945 p = Pattern.compile("(ab)(cd)*(ef)"); 2946 m = p.matcher(blah); 2947 result = new StringBuffer(); 2948 m.find(); 2949 m.appendReplacement(result, "$1w\\$2w$3"); 2950 if (!result.toString().equals("zzzabw$2wef")) 2951 failCount++; 2952 2953 m.appendTail(result); 2954 if (!result.toString().equals("zzzabw$2wefzzz")) 2955 failCount++; 2956 2957 // Check to make sure a reference to nonexistent group causes error 2958 blah = "zzzabcdcdefzzz"; 2959 p = Pattern.compile("(ab)(cd)*(ef)"); 2960 m = p.matcher(blah); 2961 result = new StringBuffer(); 2962 m.find(); 2963 try { 2964 m.appendReplacement(result, "$1w$5w$3"); 2965 failCount++; 2966 } catch (IndexOutOfBoundsException ioobe) { 2967 // Correct result 2968 } 2969 2970 // Check double digit group references 2971 blah = "zzz123456789101112zzz"; 2972 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2973 m = p.matcher(blah); 2974 result = new StringBuffer(); 2975 m.find(); 2976 m.appendReplacement(result, "$1w$11w$3"); 2977 if (!result.toString().equals("zzz1w11w3")) 2978 failCount++; 2979 2980 // Check to make sure it backs off $15 to $1 if only three groups 2981 blah = "zzzabcdcdefzzz"; 2982 p = Pattern.compile("(ab)(cd)*(ef)"); 2983 m = p.matcher(blah); 2984 result = new StringBuffer(); 2985 m.find(); 2986 m.appendReplacement(result, "$1w$15w$3"); 2987 if (!result.toString().equals("zzzabwab5wef")) 2988 failCount++; 2989 2990 2991 // Supplementary character test 2992 // SB substitution with literal 2993 blah = toSupplementaries("zzzblahzzz"); 2994 p = Pattern.compile(toSupplementaries("blah")); 2995 m = p.matcher(blah); 2996 result = new StringBuffer(); 2997 try { 2998 m.appendReplacement(result, toSupplementaries("blech")); 2999 failCount++; 3000 } catch (IllegalStateException e) { 3001 } 3002 m.find(); 3003 m.appendReplacement(result, toSupplementaries("blech")); 3004 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3005 failCount++; 3006 3007 m.appendTail(result); 3008 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3009 failCount++; 3010 3011 // SB substitution with groups 3012 blah = toSupplementaries("zzzabcdzzz"); 3013 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3014 m = p.matcher(blah); 3015 result = new StringBuffer(); 3016 try { 3017 m.appendReplacement(result, "$1"); 3018 failCount++; 3019 } catch (IllegalStateException e) { 3020 } 3021 m.find(); 3022 m.appendReplacement(result, "$1"); 3023 if (!result.toString().equals(toSupplementaries("zzzab"))) 3024 failCount++; 3025 3026 m.appendTail(result); 3027 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3028 failCount++; 3029 3030 // SB substitution with 3 groups 3031 blah = toSupplementaries("zzzabcdcdefzzz"); 3032 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3033 m = p.matcher(blah); 3034 result = new StringBuffer(); 3035 try { 3036 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3037 failCount++; 3038 } catch (IllegalStateException e) { 3039 } 3040 m.find(); 3041 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3042 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3043 failCount++; 3044 3045 m.appendTail(result); 3046 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3047 failCount++; 3048 3049 // SB substitution with groups and three matches 3050 // skipping middle match 3051 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3052 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3053 m = p.matcher(blah); 3054 result = new StringBuffer(); 3055 try { 3056 m.appendReplacement(result, "$1"); 3057 failCount++; 3058 } catch (IllegalStateException e) { 3059 } 3060 m.find(); 3061 m.appendReplacement(result, "$1"); 3062 if (!result.toString().equals(toSupplementaries("zzzab"))) 3063 failCount++; 3064 3065 m.find(); 3066 m.find(); 3067 m.appendReplacement(result, "$2"); 3068 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3069 failCount++; 3070 3071 m.appendTail(result); 3072 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3073 failCount++; 3074 3075 // Check to make sure escaped $ is ignored 3076 blah = toSupplementaries("zzzabcdcdefzzz"); 3077 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3078 m = p.matcher(blah); 3079 result = new StringBuffer(); 3080 m.find(); 3081 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3082 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3083 failCount++; 3084 3085 m.appendTail(result); 3086 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3087 failCount++; 3088 3089 // Check to make sure a reference to nonexistent group causes error 3090 blah = toSupplementaries("zzzabcdcdefzzz"); 3091 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3092 m = p.matcher(blah); 3093 result = new StringBuffer(); 3094 m.find(); 3095 try { 3096 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3097 failCount++; 3098 } catch (IndexOutOfBoundsException ioobe) { 3099 // Correct result 3100 } 3101 3102 // Check double digit group references 3103 blah = toSupplementaries("zzz123456789101112zzz"); 3104 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3105 m = p.matcher(blah); 3106 result = new StringBuffer(); 3107 m.find(); 3108 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3109 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3110 failCount++; 3111 3112 // Check to make sure it backs off $15 to $1 if only three groups 3113 blah = toSupplementaries("zzzabcdcdefzzz"); 3114 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3115 m = p.matcher(blah); 3116 result = new StringBuffer(); 3117 m.find(); 3118 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3119 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3120 failCount++; 3121 3122 // Check nothing has been appended into the output buffer if 3123 // the replacement string triggers IllegalArgumentException. 3124 p = Pattern.compile("(abc)"); 3125 m = p.matcher("abcd"); 3126 result = new StringBuffer(); 3127 m.find(); 3128 try { 3129 m.appendReplacement(result, ("xyz$g")); 3130 failCount++; 3131 } catch (IllegalArgumentException iae) { 3132 if (result.length() != 0) 3133 failCount++; 3134 } 3135 3136 report("SB Substitution"); 3137 } 3138 3139 /** 3140 * Tests the usage of Matcher.appendReplacement() with literal 3141 * and group substitutions. 3142 */ 3143 private static void stringbuilderSubstitute() throws Exception { 3144 // SB substitution with literal 3145 String blah = "zzzblahzzz"; 3146 Pattern p = Pattern.compile("blah"); 3147 Matcher m = p.matcher(blah); 3148 StringBuilder result = new StringBuilder(); 3149 try { 3150 m.appendReplacement(result, "blech"); 3151 failCount++; 3152 } catch (IllegalStateException e) { 3153 } 3154 m.find(); 3155 m.appendReplacement(result, "blech"); 3156 if (!result.toString().equals("zzzblech")) 3157 failCount++; 3158 3159 m.appendTail(result); 3160 if (!result.toString().equals("zzzblechzzz")) 3161 failCount++; 3162 3163 // SB substitution with groups 3164 blah = "zzzabcdzzz"; 3165 p = Pattern.compile("(ab)(cd)*"); 3166 m = p.matcher(blah); 3167 result = new StringBuilder(); 3168 try { 3169 m.appendReplacement(result, "$1"); 3170 failCount++; 3171 } catch (IllegalStateException e) { 3172 } 3173 m.find(); 3174 m.appendReplacement(result, "$1"); 3175 if (!result.toString().equals("zzzab")) 3176 failCount++; 3177 3178 m.appendTail(result); 3179 if (!result.toString().equals("zzzabzzz")) 3180 failCount++; 3181 3182 // SB substitution with 3 groups 3183 blah = "zzzabcdcdefzzz"; 3184 p = Pattern.compile("(ab)(cd)*(ef)"); 3185 m = p.matcher(blah); 3186 result = new StringBuilder(); 3187 try { 3188 m.appendReplacement(result, "$1w$2w$3"); 3189 failCount++; 3190 } catch (IllegalStateException e) { 3191 } 3192 m.find(); 3193 m.appendReplacement(result, "$1w$2w$3"); 3194 if (!result.toString().equals("zzzabwcdwef")) 3195 failCount++; 3196 3197 m.appendTail(result); 3198 if (!result.toString().equals("zzzabwcdwefzzz")) 3199 failCount++; 3200 3201 // SB substitution with groups and three matches 3202 // skipping middle match 3203 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3204 p = Pattern.compile("(ab)(cd*)"); 3205 m = p.matcher(blah); 3206 result = new StringBuilder(); 3207 try { 3208 m.appendReplacement(result, "$1"); 3209 failCount++; 3210 } catch (IllegalStateException e) { 3211 } 3212 m.find(); 3213 m.appendReplacement(result, "$1"); 3214 if (!result.toString().equals("zzzab")) 3215 failCount++; 3216 3217 m.find(); 3218 m.find(); 3219 m.appendReplacement(result, "$2"); 3220 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3221 failCount++; 3222 3223 m.appendTail(result); 3224 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3225 failCount++; 3226 3227 // Check to make sure escaped $ is ignored 3228 blah = "zzzabcdcdefzzz"; 3229 p = Pattern.compile("(ab)(cd)*(ef)"); 3230 m = p.matcher(blah); 3231 result = new StringBuilder(); 3232 m.find(); 3233 m.appendReplacement(result, "$1w\\$2w$3"); 3234 if (!result.toString().equals("zzzabw$2wef")) 3235 failCount++; 3236 3237 m.appendTail(result); 3238 if (!result.toString().equals("zzzabw$2wefzzz")) 3239 failCount++; 3240 3241 // Check to make sure a reference to nonexistent group causes error 3242 blah = "zzzabcdcdefzzz"; 3243 p = Pattern.compile("(ab)(cd)*(ef)"); 3244 m = p.matcher(blah); 3245 result = new StringBuilder(); 3246 m.find(); 3247 try { 3248 m.appendReplacement(result, "$1w$5w$3"); 3249 failCount++; 3250 } catch (IndexOutOfBoundsException ioobe) { 3251 // Correct result 3252 } 3253 3254 // Check double digit group references 3255 blah = "zzz123456789101112zzz"; 3256 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3257 m = p.matcher(blah); 3258 result = new StringBuilder(); 3259 m.find(); 3260 m.appendReplacement(result, "$1w$11w$3"); 3261 if (!result.toString().equals("zzz1w11w3")) 3262 failCount++; 3263 3264 // Check to make sure it backs off $15 to $1 if only three groups 3265 blah = "zzzabcdcdefzzz"; 3266 p = Pattern.compile("(ab)(cd)*(ef)"); 3267 m = p.matcher(blah); 3268 result = new StringBuilder(); 3269 m.find(); 3270 m.appendReplacement(result, "$1w$15w$3"); 3271 if (!result.toString().equals("zzzabwab5wef")) 3272 failCount++; 3273 3274 3275 // Supplementary character test 3276 // SB substitution with literal 3277 blah = toSupplementaries("zzzblahzzz"); 3278 p = Pattern.compile(toSupplementaries("blah")); 3279 m = p.matcher(blah); 3280 result = new StringBuilder(); 3281 try { 3282 m.appendReplacement(result, toSupplementaries("blech")); 3283 failCount++; 3284 } catch (IllegalStateException e) { 3285 } 3286 m.find(); 3287 m.appendReplacement(result, toSupplementaries("blech")); 3288 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3289 failCount++; 3290 m.appendTail(result); 3291 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3292 failCount++; 3293 3294 // SB substitution with groups 3295 blah = toSupplementaries("zzzabcdzzz"); 3296 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3297 m = p.matcher(blah); 3298 result = new StringBuilder(); 3299 try { 3300 m.appendReplacement(result, "$1"); 3301 failCount++; 3302 } catch (IllegalStateException e) { 3303 } 3304 m.find(); 3305 m.appendReplacement(result, "$1"); 3306 if (!result.toString().equals(toSupplementaries("zzzab"))) 3307 failCount++; 3308 3309 m.appendTail(result); 3310 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3311 failCount++; 3312 3313 // SB substitution with 3 groups 3314 blah = toSupplementaries("zzzabcdcdefzzz"); 3315 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3316 m = p.matcher(blah); 3317 result = new StringBuilder(); 3318 try { 3319 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3320 failCount++; 3321 } catch (IllegalStateException e) { 3322 } 3323 m.find(); 3324 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3325 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3326 failCount++; 3327 3328 m.appendTail(result); 3329 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3330 failCount++; 3331 3332 // SB substitution with groups and three matches 3333 // skipping middle match 3334 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3335 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3336 m = p.matcher(blah); 3337 result = new StringBuilder(); 3338 try { 3339 m.appendReplacement(result, "$1"); 3340 failCount++; 3341 } catch (IllegalStateException e) { 3342 } 3343 m.find(); 3344 m.appendReplacement(result, "$1"); 3345 if (!result.toString().equals(toSupplementaries("zzzab"))) 3346 failCount++; 3347 3348 m.find(); 3349 m.find(); 3350 m.appendReplacement(result, "$2"); 3351 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3352 failCount++; 3353 3354 m.appendTail(result); 3355 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3356 failCount++; 3357 3358 // Check to make sure escaped $ is ignored 3359 blah = toSupplementaries("zzzabcdcdefzzz"); 3360 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3361 m = p.matcher(blah); 3362 result = new StringBuilder(); 3363 m.find(); 3364 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3365 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3366 failCount++; 3367 3368 m.appendTail(result); 3369 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3370 failCount++; 3371 3372 // Check to make sure a reference to nonexistent group causes error 3373 blah = toSupplementaries("zzzabcdcdefzzz"); 3374 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3375 m = p.matcher(blah); 3376 result = new StringBuilder(); 3377 m.find(); 3378 try { 3379 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3380 failCount++; 3381 } catch (IndexOutOfBoundsException ioobe) { 3382 // Correct result 3383 } 3384 // Check double digit group references 3385 blah = toSupplementaries("zzz123456789101112zzz"); 3386 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3387 m = p.matcher(blah); 3388 result = new StringBuilder(); 3389 m.find(); 3390 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3391 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3392 failCount++; 3393 3394 // Check to make sure it backs off $15 to $1 if only three groups 3395 blah = toSupplementaries("zzzabcdcdefzzz"); 3396 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3397 m = p.matcher(blah); 3398 result = new StringBuilder(); 3399 m.find(); 3400 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3401 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3402 failCount++; 3403 // Check nothing has been appended into the output buffer if 3404 // the replacement string triggers IllegalArgumentException. 3405 p = Pattern.compile("(abc)"); 3406 m = p.matcher("abcd"); 3407 result = new StringBuilder(); 3408 m.find(); 3409 try { 3410 m.appendReplacement(result, ("xyz$g")); 3411 failCount++; 3412 } catch (IllegalArgumentException iae) { 3413 if (result.length() != 0) 3414 failCount++; 3415 } 3416 report("SB Substitution 2"); 3417 } 3418 3419 /* 3420 * 5 groups of characters are created to make a substitution string. 3421 * A base string will be created including random lead chars, the 3422 * substitution string, and random trailing chars. 3423 * A pattern containing the 5 groups is searched for and replaced with: 3424 * random group + random string + random group. 3425 * The results are checked for correctness. 3426 */ 3427 private static void substitutionBasher() { 3428 for (int runs = 0; runs<1000; runs++) { 3429 // Create a base string to work in 3430 int leadingChars = generator.nextInt(10); 3431 StringBuffer baseBuffer = new StringBuffer(100); 3432 String leadingString = getRandomAlphaString(leadingChars); 3433 baseBuffer.append(leadingString); 3434 3435 // Create 5 groups of random number of random chars 3436 // Create the string to substitute 3437 // Create the pattern string to search for 3438 StringBuffer bufferToSub = new StringBuffer(25); 3439 StringBuffer bufferToPat = new StringBuffer(50); 3440 String[] groups = new String[5]; 3441 for(int i=0; i<5; i++) { 3442 int aGroupSize = generator.nextInt(5)+1; 3443 groups[i] = getRandomAlphaString(aGroupSize); 3444 bufferToSub.append(groups[i]); 3445 bufferToPat.append('('); 3446 bufferToPat.append(groups[i]); 3447 bufferToPat.append(')'); 3448 } 3449 String stringToSub = bufferToSub.toString(); 3450 String pattern = bufferToPat.toString(); 3451 3452 // Place sub string into working string at random index 3453 baseBuffer.append(stringToSub); 3454 3455 // Append random chars to end 3456 int trailingChars = generator.nextInt(10); 3457 String trailingString = getRandomAlphaString(trailingChars); 3458 baseBuffer.append(trailingString); 3459 String baseString = baseBuffer.toString(); 3460 3461 // Create test pattern and matcher 3462 Pattern p = Pattern.compile(pattern); 3463 Matcher m = p.matcher(baseString); 3464 3465 // Reject candidate if pattern happens to start early 3466 m.find(); 3467 if (m.start() < leadingChars) 3468 continue; 3469 3470 // Reject candidate if more than one match 3471 if (m.find()) 3472 continue; 3473 3474 // Construct a replacement string with : 3475 // random group + random string + random group 3476 StringBuffer bufferToRep = new StringBuffer(); 3477 int groupIndex1 = generator.nextInt(5); 3478 bufferToRep.append("$" + (groupIndex1 + 1)); 3479 String randomMidString = getRandomAlphaString(5); 3480 bufferToRep.append(randomMidString); 3481 int groupIndex2 = generator.nextInt(5); 3482 bufferToRep.append("$" + (groupIndex2 + 1)); 3483 String replacement = bufferToRep.toString(); 3484 3485 // Do the replacement 3486 String result = m.replaceAll(replacement); 3487 3488 // Construct expected result 3489 StringBuffer bufferToRes = new StringBuffer(); 3490 bufferToRes.append(leadingString); 3491 bufferToRes.append(groups[groupIndex1]); 3492 bufferToRes.append(randomMidString); 3493 bufferToRes.append(groups[groupIndex2]); 3494 bufferToRes.append(trailingString); 3495 String expectedResult = bufferToRes.toString(); 3496 3497 // Check results 3498 if (!result.equals(expectedResult)) 3499 failCount++; 3500 } 3501 3502 report("Substitution Basher"); 3503 } 3504 3505 /* 3506 * 5 groups of characters are created to make a substitution string. 3507 * A base string will be created including random lead chars, the 3508 * substitution string, and random trailing chars. 3509 * A pattern containing the 5 groups is searched for and replaced with: 3510 * random group + random string + random group. 3511 * The results are checked for correctness. 3512 */ 3513 private static void substitutionBasher2() { 3514 for (int runs = 0; runs<1000; runs++) { 3515 // Create a base string to work in 3516 int leadingChars = generator.nextInt(10); 3517 StringBuilder baseBuffer = new StringBuilder(100); 3518 String leadingString = getRandomAlphaString(leadingChars); 3519 baseBuffer.append(leadingString); 3520 3521 // Create 5 groups of random number of random chars 3522 // Create the string to substitute 3523 // Create the pattern string to search for 3524 StringBuilder bufferToSub = new StringBuilder(25); 3525 StringBuilder bufferToPat = new StringBuilder(50); 3526 String[] groups = new String[5]; 3527 for(int i=0; i<5; i++) { 3528 int aGroupSize = generator.nextInt(5)+1; 3529 groups[i] = getRandomAlphaString(aGroupSize); 3530 bufferToSub.append(groups[i]); 3531 bufferToPat.append('('); 3532 bufferToPat.append(groups[i]); 3533 bufferToPat.append(')'); 3534 } 3535 String stringToSub = bufferToSub.toString(); 3536 String pattern = bufferToPat.toString(); 3537 3538 // Place sub string into working string at random index 3539 baseBuffer.append(stringToSub); 3540 3541 // Append random chars to end 3542 int trailingChars = generator.nextInt(10); 3543 String trailingString = getRandomAlphaString(trailingChars); 3544 baseBuffer.append(trailingString); 3545 String baseString = baseBuffer.toString(); 3546 3547 // Create test pattern and matcher 3548 Pattern p = Pattern.compile(pattern); 3549 Matcher m = p.matcher(baseString); 3550 3551 // Reject candidate if pattern happens to start early 3552 m.find(); 3553 if (m.start() < leadingChars) 3554 continue; 3555 3556 // Reject candidate if more than one match 3557 if (m.find()) 3558 continue; 3559 3560 // Construct a replacement string with : 3561 // random group + random string + random group 3562 StringBuilder bufferToRep = new StringBuilder(); 3563 int groupIndex1 = generator.nextInt(5); 3564 bufferToRep.append("$" + (groupIndex1 + 1)); 3565 String randomMidString = getRandomAlphaString(5); 3566 bufferToRep.append(randomMidString); 3567 int groupIndex2 = generator.nextInt(5); 3568 bufferToRep.append("$" + (groupIndex2 + 1)); 3569 String replacement = bufferToRep.toString(); 3570 3571 // Do the replacement 3572 String result = m.replaceAll(replacement); 3573 3574 // Construct expected result 3575 StringBuilder bufferToRes = new StringBuilder(); 3576 bufferToRes.append(leadingString); 3577 bufferToRes.append(groups[groupIndex1]); 3578 bufferToRes.append(randomMidString); 3579 bufferToRes.append(groups[groupIndex2]); 3580 bufferToRes.append(trailingString); 3581 String expectedResult = bufferToRes.toString(); 3582 3583 // Check results 3584 if (!result.equals(expectedResult)) { 3585 failCount++; 3586 } 3587 } 3588 3589 report("Substitution Basher 2"); 3590 } 3591 3592 /** 3593 * Checks the handling of some escape sequences that the Pattern 3594 * class should process instead of the java compiler. These are 3595 * not in the file because the escapes should be be processed 3596 * by the Pattern class when the regex is compiled. 3597 */ 3598 private static void escapes() throws Exception { 3599 Pattern p = Pattern.compile("\\043"); 3600 Matcher m = p.matcher("#"); 3601 if (!m.find()) 3602 failCount++; 3603 3604 p = Pattern.compile("\\x23"); 3605 m = p.matcher("#"); 3606 if (!m.find()) 3607 failCount++; 3608 3609 p = Pattern.compile("\\u0023"); 3610 m = p.matcher("#"); 3611 if (!m.find()) 3612 failCount++; 3613 3614 report("Escape sequences"); 3615 } 3616 3617 /** 3618 * Checks the handling of blank input situations. These 3619 * tests are incompatible with my test file format. 3620 */ 3621 private static void blankInput() throws Exception { 3622 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3623 Matcher m = p.matcher(""); 3624 if (m.find()) 3625 failCount++; 3626 3627 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3628 m = p.matcher(""); 3629 if (!m.find()) 3630 failCount++; 3631 3632 p = Pattern.compile("abc"); 3633 m = p.matcher(""); 3634 if (m.find()) 3635 failCount++; 3636 3637 p = Pattern.compile("a*"); 3638 m = p.matcher(""); 3639 if (!m.find()) 3640 failCount++; 3641 3642 report("Blank input"); 3643 } 3644 3645 /** 3646 * Tests the Boyer-Moore pattern matching of a character sequence 3647 * on randomly generated patterns. 3648 */ 3649 private static void bm() throws Exception { 3650 doBnM('a'); 3651 report("Boyer Moore (ASCII)"); 3652 3653 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3654 report("Boyer Moore (Supplementary)"); 3655 } 3656 3657 private static void doBnM(int baseCharacter) throws Exception { 3658 int achar=0; 3659 3660 for (int i=0; i<100; i++) { 3661 // Create a short pattern to search for 3662 int patternLength = generator.nextInt(7) + 4; 3663 StringBuffer patternBuffer = new StringBuffer(patternLength); 3664 String pattern; 3665 retry: for (;;) { 3666 for (int x=0; x<patternLength; x++) { 3667 int ch = baseCharacter + generator.nextInt(26); 3668 if (Character.isSupplementaryCodePoint(ch)) { 3669 patternBuffer.append(Character.toChars(ch)); 3670 } else { 3671 patternBuffer.append((char)ch); 3672 } 3673 } 3674 pattern = patternBuffer.toString(); 3675 3676 // Avoid patterns that start and end with the same substring 3677 // See JDK-6854417 3678 for (int x=1; x < pattern.length(); x++) { 3679 if (pattern.startsWith(pattern.substring(x))) 3680 continue retry; 3681 } 3682 break; 3683 } 3684 Pattern p = Pattern.compile(pattern); 3685 3686 // Create a buffer with random ASCII chars that does 3687 // not match the sample 3688 String toSearch = null; 3689 StringBuffer s = null; 3690 Matcher m = p.matcher(""); 3691 do { 3692 s = new StringBuffer(100); 3693 for (int x=0; x<100; x++) { 3694 int ch = baseCharacter + generator.nextInt(26); 3695 if (Character.isSupplementaryCodePoint(ch)) { 3696 s.append(Character.toChars(ch)); 3697 } else { 3698 s.append((char)ch); 3699 } 3700 } 3701 toSearch = s.toString(); 3702 m.reset(toSearch); 3703 } while (m.find()); 3704 3705 // Insert the pattern at a random spot 3706 int insertIndex = generator.nextInt(99); 3707 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3708 insertIndex++; 3709 s = s.insert(insertIndex, pattern); 3710 toSearch = s.toString(); 3711 3712 // Make sure that the pattern is found 3713 m.reset(toSearch); 3714 if (!m.find()) 3715 failCount++; 3716 3717 // Make sure that the match text is the pattern 3718 if (!m.group().equals(pattern)) 3719 failCount++; 3720 3721 // Make sure match occured at insertion point 3722 if (m.start() != insertIndex) 3723 failCount++; 3724 } 3725 } 3726 3727 /** 3728 * Tests the matching of slices on randomly generated patterns. 3729 * The Boyer-Moore optimization is not done on these patterns 3730 * because it uses unicode case folding. 3731 */ 3732 private static void slice() throws Exception { 3733 doSlice(Character.MAX_VALUE); 3734 report("Slice"); 3735 3736 doSlice(Character.MAX_CODE_POINT); 3737 report("Slice (Supplementary)"); 3738 } 3739 3740 private static void doSlice(int maxCharacter) throws Exception { 3741 Random generator = new Random(); 3742 int achar=0; 3743 3744 for (int i=0; i<100; i++) { 3745 // Create a short pattern to search for 3746 int patternLength = generator.nextInt(7) + 4; 3747 StringBuffer patternBuffer = new StringBuffer(patternLength); 3748 for (int x=0; x<patternLength; x++) { 3749 int randomChar = 0; 3750 while (!Character.isLetterOrDigit(randomChar)) 3751 randomChar = generator.nextInt(maxCharacter); 3752 if (Character.isSupplementaryCodePoint(randomChar)) { 3753 patternBuffer.append(Character.toChars(randomChar)); 3754 } else { 3755 patternBuffer.append((char) randomChar); 3756 } 3757 } 3758 String pattern = patternBuffer.toString(); 3759 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3760 3761 // Create a buffer with random chars that does not match the sample 3762 String toSearch = null; 3763 StringBuffer s = null; 3764 Matcher m = p.matcher(""); 3765 do { 3766 s = new StringBuffer(100); 3767 for (int x=0; x<100; x++) { 3768 int randomChar = 0; 3769 while (!Character.isLetterOrDigit(randomChar)) 3770 randomChar = generator.nextInt(maxCharacter); 3771 if (Character.isSupplementaryCodePoint(randomChar)) { 3772 s.append(Character.toChars(randomChar)); 3773 } else { 3774 s.append((char) randomChar); 3775 } 3776 } 3777 toSearch = s.toString(); 3778 m.reset(toSearch); 3779 } while (m.find()); 3780 3781 // Insert the pattern at a random spot 3782 int insertIndex = generator.nextInt(99); 3783 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3784 insertIndex++; 3785 s = s.insert(insertIndex, pattern); 3786 toSearch = s.toString(); 3787 3788 // Make sure that the pattern is found 3789 m.reset(toSearch); 3790 if (!m.find()) 3791 failCount++; 3792 3793 // Make sure that the match text is the pattern 3794 if (!m.group().equals(pattern)) 3795 failCount++; 3796 3797 // Make sure match occured at insertion point 3798 if (m.start() != insertIndex) 3799 failCount++; 3800 } 3801 } 3802 3803 private static void explainFailure(String pattern, String data, 3804 String expected, String actual) { 3805 System.err.println("----------------------------------------"); 3806 System.err.println("Pattern = "+pattern); 3807 System.err.println("Data = "+data); 3808 System.err.println("Expected = " + expected); 3809 System.err.println("Actual = " + actual); 3810 } 3811 3812 private static void explainFailure(String pattern, String data, 3813 Throwable t) { 3814 System.err.println("----------------------------------------"); 3815 System.err.println("Pattern = "+pattern); 3816 System.err.println("Data = "+data); 3817 t.printStackTrace(System.err); 3818 } 3819 3820 // Testing examples from a file 3821 3822 /** 3823 * Goes through the file "TestCases.txt" and creates many patterns 3824 * described in the file, matching the patterns against input lines in 3825 * the file, and comparing the results against the correct results 3826 * also found in the file. The file format is described in comments 3827 * at the head of the file. 3828 */ 3829 private static void processFile(String fileName) throws Exception { 3830 File testCases = new File(System.getProperty("test.src", "."), 3831 fileName); 3832 FileInputStream in = new FileInputStream(testCases); 3833 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3834 3835 // Process next test case. 3836 String aLine; 3837 while((aLine = r.readLine()) != null) { 3838 // Read a line for pattern 3839 String patternString = grabLine(r); 3840 Pattern p = null; 3841 try { 3842 p = compileTestPattern(patternString); 3843 } catch (PatternSyntaxException e) { 3844 String dataString = grabLine(r); 3845 String expectedResult = grabLine(r); 3846 if (expectedResult.startsWith("error")) 3847 continue; 3848 explainFailure(patternString, dataString, e); 3849 failCount++; 3850 continue; 3851 } 3852 3853 // Read a line for input string 3854 String dataString = grabLine(r); 3855 Matcher m = p.matcher(dataString); 3856 StringBuffer result = new StringBuffer(); 3857 3858 // Check for IllegalStateExceptions before a match 3859 failCount += preMatchInvariants(m); 3860 3861 boolean found = m.find(); 3862 3863 if (found) 3864 failCount += postTrueMatchInvariants(m); 3865 else 3866 failCount += postFalseMatchInvariants(m); 3867 3868 if (found) { 3869 result.append("true "); 3870 result.append(m.group(0) + " "); 3871 } else { 3872 result.append("false "); 3873 } 3874 3875 result.append(m.groupCount()); 3876 3877 if (found) { 3878 for (int i=1; i<m.groupCount()+1; i++) 3879 if (m.group(i) != null) 3880 result.append(" " +m.group(i)); 3881 } 3882 3883 // Read a line for the expected result 3884 String expectedResult = grabLine(r); 3885 3886 if (!result.toString().equals(expectedResult)) { 3887 explainFailure(patternString, dataString, expectedResult, result.toString()); 3888 failCount++; 3889 } 3890 } 3891 3892 report(fileName); 3893 } 3894 3895 private static int preMatchInvariants(Matcher m) { 3896 int failCount = 0; 3897 try { 3898 m.start(); 3899 failCount++; 3900 } catch (IllegalStateException ise) {} 3901 try { 3902 m.end(); 3903 failCount++; 3904 } catch (IllegalStateException ise) {} 3905 try { 3906 m.group(); 3907 failCount++; 3908 } catch (IllegalStateException ise) {} 3909 return failCount; 3910 } 3911 3912 private static int postFalseMatchInvariants(Matcher m) { 3913 int failCount = 0; 3914 try { 3915 m.group(); 3916 failCount++; 3917 } catch (IllegalStateException ise) {} 3918 try { 3919 m.start(); 3920 failCount++; 3921 } catch (IllegalStateException ise) {} 3922 try { 3923 m.end(); 3924 failCount++; 3925 } catch (IllegalStateException ise) {} 3926 return failCount; 3927 } 3928 3929 private static int postTrueMatchInvariants(Matcher m) { 3930 int failCount = 0; 3931 //assert(m.start() = m.start(0); 3932 if (m.start() != m.start(0)) 3933 failCount++; 3934 //assert(m.end() = m.end(0); 3935 if (m.start() != m.start(0)) 3936 failCount++; 3937 //assert(m.group() = m.group(0); 3938 if (!m.group().equals(m.group(0))) 3939 failCount++; 3940 try { 3941 m.group(50); 3942 failCount++; 3943 } catch (IndexOutOfBoundsException ise) {} 3944 3945 return failCount; 3946 } 3947 3948 private static Pattern compileTestPattern(String patternString) { 3949 if (!patternString.startsWith("'")) { 3950 return Pattern.compile(patternString); 3951 } 3952 int break1 = patternString.lastIndexOf("'"); 3953 String flagString = patternString.substring( 3954 break1+1, patternString.length()); 3955 patternString = patternString.substring(1, break1); 3956 3957 if (flagString.equals("i")) 3958 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3959 3960 if (flagString.equals("m")) 3961 return Pattern.compile(patternString, Pattern.MULTILINE); 3962 3963 return Pattern.compile(patternString); 3964 } 3965 3966 /** 3967 * Reads a line from the input file. Keeps reading lines until a non 3968 * empty non comment line is read. If the line contains a \n then 3969 * these two characters are replaced by a newline char. If a \\uxxxx 3970 * sequence is read then the sequence is replaced by the unicode char. 3971 */ 3972 private static String grabLine(BufferedReader r) throws Exception { 3973 int index = 0; 3974 String line = r.readLine(); 3975 while (line.startsWith("//") || line.length() < 1) 3976 line = r.readLine(); 3977 while ((index = line.indexOf("\\n")) != -1) { 3978 StringBuffer temp = new StringBuffer(line); 3979 temp.replace(index, index+2, "\n"); 3980 line = temp.toString(); 3981 } 3982 while ((index = line.indexOf("\\u")) != -1) { 3983 StringBuffer temp = new StringBuffer(line); 3984 String value = temp.substring(index+2, index+6); 3985 char aChar = (char)Integer.parseInt(value, 16); 3986 String unicodeChar = "" + aChar; 3987 temp.replace(index, index+6, unicodeChar); 3988 line = temp.toString(); 3989 } 3990 3991 return line; 3992 } 3993 3994 private static void check(Pattern p, String s, String g, String expected) { 3995 Matcher m = p.matcher(s); 3996 m.find(); 3997 if (!m.group(g).equals(expected) || 3998 s.charAt(m.start(g)) != expected.charAt(0) || 3999 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4000 failCount++; 4001 } 4002 4003 private static void checkReplaceFirst(String p, String s, String r, String expected) 4004 { 4005 if (!expected.equals(Pattern.compile(p) 4006 .matcher(s) 4007 .replaceFirst(r))) 4008 failCount++; 4009 } 4010 4011 private static void checkReplaceAll(String p, String s, String r, String expected) 4012 { 4013 if (!expected.equals(Pattern.compile(p) 4014 .matcher(s) 4015 .replaceAll(r))) 4016 failCount++; 4017 } 4018 4019 private static void checkExpectedFail(String p) { 4020 try { 4021 Pattern.compile(p); 4022 } catch (PatternSyntaxException pse) { 4023 //pse.printStackTrace(); 4024 return; 4025 } 4026 failCount++; 4027 } 4028 4029 private static void checkExpectedIAE(Matcher m, String g) { 4030 m.find(); 4031 try { 4032 m.group(g); 4033 } catch (IllegalArgumentException x) { 4034 //iae.printStackTrace(); 4035 try { 4036 m.start(g); 4037 } catch (IllegalArgumentException xx) { 4038 try { 4039 m.start(g); 4040 } catch (IllegalArgumentException xxx) { 4041 return; 4042 } 4043 } 4044 } 4045 failCount++; 4046 } 4047 4048 private static void checkExpectedNPE(Matcher m) { 4049 m.find(); 4050 try { 4051 m.group(null); 4052 } catch (NullPointerException x) { 4053 try { 4054 m.start(null); 4055 } catch (NullPointerException xx) { 4056 try { 4057 m.end(null); 4058 } catch (NullPointerException xxx) { 4059 return; 4060 } 4061 } 4062 } 4063 failCount++; 4064 } 4065 4066 private static void namedGroupCaptureTest() throws Exception { 4067 check(Pattern.compile("x+(?<gname>y+)z+"), 4068 "xxxyyyzzz", 4069 "gname", 4070 "yyy"); 4071 4072 check(Pattern.compile("x+(?<gname8>y+)z+"), 4073 "xxxyyyzzz", 4074 "gname8", 4075 "yyy"); 4076 4077 //backref 4078 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4079 check(pattern, "zzzaabcazzz", true); // found "abca" 4080 4081 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4082 "zzzaabcaazzz", true); 4083 4084 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4085 "abcdefabc", true); 4086 4087 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4088 "abcdefghijkk", true); 4089 4090 // Supplementary character tests 4091 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4092 toSupplementaries("zzzaabcazzz"), true); 4093 4094 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4095 toSupplementaries("zzzaabcaazzz"), true); 4096 4097 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4098 toSupplementaries("abcdefabc"), true); 4099 4100 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4101 "(?<gname>" + 4102 toSupplementaries("k)") + "\\k<gname>"), 4103 toSupplementaries("abcdefghijkk"), true); 4104 4105 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4106 "xxxyyyzzzyyy", 4107 "gname", 4108 "yyy"); 4109 4110 //replaceFirst/All 4111 checkReplaceFirst("(?<gn>ab)(c*)", 4112 "abccczzzabcczzzabccc", 4113 "${gn}", 4114 "abzzzabcczzzabccc"); 4115 4116 checkReplaceAll("(?<gn>ab)(c*)", 4117 "abccczzzabcczzzabccc", 4118 "${gn}", 4119 "abzzzabzzzab"); 4120 4121 4122 checkReplaceFirst("(?<gn>ab)(c*)", 4123 "zzzabccczzzabcczzzabccczzz", 4124 "${gn}", 4125 "zzzabzzzabcczzzabccczzz"); 4126 4127 checkReplaceAll("(?<gn>ab)(c*)", 4128 "zzzabccczzzabcczzzabccczzz", 4129 "${gn}", 4130 "zzzabzzzabzzzabzzz"); 4131 4132 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4133 "zzzabccczzzabcczzzabccczzz", 4134 "${gn2}", 4135 "zzzccczzzabcczzzabccczzz"); 4136 4137 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4138 "zzzabccczzzabcczzzabccczzz", 4139 "${gn2}", 4140 "zzzccczzzcczzzccczzz"); 4141 4142 //toSupplementaries("(ab)(c*)")); 4143 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4144 ")(?<gn2>" + toSupplementaries("c") + "*)", 4145 toSupplementaries("abccczzzabcczzzabccc"), 4146 "${gn1}", 4147 toSupplementaries("abzzzabcczzzabccc")); 4148 4149 4150 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4151 ")(?<gn2>" + toSupplementaries("c") + "*)", 4152 toSupplementaries("abccczzzabcczzzabccc"), 4153 "${gn1}", 4154 toSupplementaries("abzzzabzzzab")); 4155 4156 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4157 ")(?<gn2>" + toSupplementaries("c") + "*)", 4158 toSupplementaries("abccczzzabcczzzabccc"), 4159 "${gn2}", 4160 toSupplementaries("ccczzzabcczzzabccc")); 4161 4162 4163 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4164 ")(?<gn2>" + toSupplementaries("c") + "*)", 4165 toSupplementaries("abccczzzabcczzzabccc"), 4166 "${gn2}", 4167 toSupplementaries("ccczzzcczzzccc")); 4168 4169 checkReplaceFirst("(?<dog>Dog)AndCat", 4170 "zzzDogAndCatzzzDogAndCatzzz", 4171 "${dog}", 4172 "zzzDogzzzDogAndCatzzz"); 4173 4174 4175 checkReplaceAll("(?<dog>Dog)AndCat", 4176 "zzzDogAndCatzzzDogAndCatzzz", 4177 "${dog}", 4178 "zzzDogzzzDogzzz"); 4179 4180 // backref in Matcher & String 4181 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4182 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4183 failCount++; 4184 4185 // negative 4186 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4187 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4188 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4189 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4190 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4191 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4192 "gnameX"); 4193 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4194 report("NamedGroupCapture"); 4195 } 4196 4197 // This is for bug 6919132 4198 private static void nonBmpClassComplementTest() throws Exception { 4199 Pattern p = Pattern.compile("\\P{Lu}"); 4200 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4201 4202 if (m.find() && m.start() == 1) 4203 failCount++; 4204 4205 // from a unicode category 4206 p = Pattern.compile("\\P{Lu}"); 4207 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4208 if (m.find()) 4209 failCount++; 4210 if (!m.hitEnd()) 4211 failCount++; 4212 4213 // block 4214 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4215 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4216 if (m.find() && m.start() == 1) 4217 failCount++; 4218 4219 p = Pattern.compile("\\P{sc=GRANTHA}"); 4220 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4221 if (m.find() && m.start() == 1) 4222 failCount++; 4223 4224 report("NonBmpClassComplement"); 4225 } 4226 4227 private static void unicodePropertiesTest() throws Exception { 4228 // different forms 4229 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4230 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4231 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4232 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4233 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4234 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4235 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4236 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4237 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4238 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4239 failCount++; 4240 4241 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4242 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4243 Matcher lastSM = common; 4244 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4245 4246 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4247 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4248 Matcher lastBM = latin; 4249 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4250 4251 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4252 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4253 continue; // only pick couple code points, they are the same 4254 } 4255 4256 // Unicode Script 4257 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4258 Matcher m; 4259 String str = new String(Character.toChars(cp)); 4260 if (script == lastScript) { 4261 m = lastSM; 4262 m.reset(str); 4263 } else { 4264 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4265 } 4266 if (!m.matches()) { 4267 failCount++; 4268 } 4269 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4270 other.reset(str); 4271 if (other.matches()) { 4272 failCount++; 4273 } 4274 lastSM = m; 4275 lastScript = script; 4276 4277 // Unicode Block 4278 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4279 if (block == null) { 4280 //System.out.printf("Not a Block: cp=%x%n", cp); 4281 continue; 4282 } 4283 if (block == lastBlock) { 4284 m = lastBM; 4285 m.reset(str); 4286 } else { 4287 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4288 } 4289 if (!m.matches()) { 4290 failCount++; 4291 } 4292 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4293 other.reset(str); 4294 if (other.matches()) { 4295 failCount++; 4296 } 4297 lastBM = m; 4298 lastBlock = block; 4299 } 4300 report("unicodeProperties"); 4301 } 4302 4303 private static void unicodeHexNotationTest() throws Exception { 4304 4305 // negative 4306 checkExpectedFail("\\x{-23}"); 4307 checkExpectedFail("\\x{110000}"); 4308 checkExpectedFail("\\x{}"); 4309 checkExpectedFail("\\x{AB[ef]"); 4310 4311 // codepoint 4312 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4313 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4314 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4315 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4316 4317 // in class 4318 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4319 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4320 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4321 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4322 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4323 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4324 4325 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4326 String s = "A" + new String(Character.toChars(cp)) + "B"; 4327 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4328 : String.format("\\u%04x\\u%04x", 4329 (int) Character.toChars(cp)[0], 4330 (int) Character.toChars(cp)[1]); 4331 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4332 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4333 failCount++; 4334 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4335 failCount++; 4336 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4337 failCount++; 4338 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4339 failCount++; 4340 } 4341 report("unicodeHexNotation"); 4342 } 4343 4344 private static void unicodeClassesTest() throws Exception { 4345 4346 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4347 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4348 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4349 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4350 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4351 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4352 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4353 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4354 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4355 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4356 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4357 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4358 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4359 Matcher bound = Pattern.compile("\\b").matcher(""); 4360 Matcher word = Pattern.compile("\\w++").matcher(""); 4361 // UNICODE_CHARACTER_CLASS 4362 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4363 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4364 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4365 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4366 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4367 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4368 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4369 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4370 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4371 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4372 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4373 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4374 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4375 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4376 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4377 // embedded flag (?U) 4378 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4379 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4380 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4381 4382 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4383 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4384 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4385 // properties 4386 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4387 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4388 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4389 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4390 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4391 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4392 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4393 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4394 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4395 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4396 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4397 // javaMethod 4398 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4399 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4400 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4401 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4402 // GC/C 4403 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4404 4405 for (int cp = 1; cp < 0x30000; cp++) { 4406 String str = new String(Character.toChars(cp)); 4407 int type = Character.getType(cp); 4408 if (// lower 4409 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4410 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4411 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4412 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4413 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4414 // upper 4415 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4416 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4417 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4418 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4419 // alpha 4420 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4421 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4422 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4423 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4424 // digit 4425 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4426 Character.isDigit(cp) != digitU.reset(str).matches() || 4427 // alnum 4428 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4429 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4430 // punct 4431 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4432 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4433 // graph 4434 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4435 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4436 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4437 // blank 4438 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4439 != blank.reset(str).matches() || 4440 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4441 // print 4442 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4443 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4444 // cntrl 4445 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4446 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4447 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4448 // hexdigit 4449 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4450 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4451 // space 4452 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4453 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4454 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4455 // word 4456 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4457 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4458 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4459 // bwordb 4460 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4461 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4462 // properties 4463 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4464 Character.isLetter(cp) != letterP.reset(str).matches()|| 4465 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4466 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4467 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4468 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4469 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4470 // gc_C 4471 (Character.CONTROL == type || Character.FORMAT == type || 4472 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4473 Character.UNASSIGNED == type) 4474 != gcC.reset(str).matches()) { 4475 failCount++; 4476 } 4477 } 4478 4479 // bounds/word align 4480 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4481 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4482 failCount++; 4483 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4484 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4485 failCount++; 4486 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4487 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4488 failCount++; 4489 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4490 failCount++; 4491 report("unicodePredefinedClasses"); 4492 } 4493 4494 private static void unicodeCharacterNameTest() throws Exception { 4495 4496 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4497 if (!Character.isValidCodePoint(cp) || 4498 Character.getType(cp) == Character.UNASSIGNED) 4499 continue; 4500 String str = new String(Character.toChars(cp)); 4501 // single 4502 String p = "\\N{" + Character.getName(cp) + "}"; 4503 if (!Pattern.compile(p).matcher(str).matches()) { 4504 failCount++; 4505 } 4506 // class[c] 4507 p = "[\\N{" + Character.getName(cp) + "}]"; 4508 if (!Pattern.compile(p).matcher(str).matches()) { 4509 failCount++; 4510 } 4511 } 4512 4513 // range 4514 for (int i = 0; i < 10; i++) { 4515 int start = generator.nextInt(20); 4516 int end = start + generator.nextInt(200); 4517 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4518 String str; 4519 for (int cp = start; cp < end; cp++) { 4520 str = new String(Character.toChars(cp)); 4521 if (!Pattern.compile(p).matcher(str).matches()) { 4522 failCount++; 4523 } 4524 } 4525 str = new String(Character.toChars(end + 10)); 4526 if (Pattern.compile(p).matcher(str).matches()) { 4527 failCount++; 4528 } 4529 } 4530 4531 // slice 4532 for (int i = 0; i < 10; i++) { 4533 int n = generator.nextInt(256); 4534 int[] buf = new int[n]; 4535 StringBuffer sb = new StringBuffer(1024); 4536 for (int j = 0; j < n; j++) { 4537 int cp = generator.nextInt(1000); 4538 if (!Character.isValidCodePoint(cp) || 4539 Character.getType(cp) == Character.UNASSIGNED) 4540 cp = 0x4e00; // just use 4e00 4541 sb.append("\\N{" + Character.getName(cp) + "}"); 4542 buf[j] = cp; 4543 } 4544 String p = sb.toString(); 4545 String str = new String(buf, 0, buf.length); 4546 if (!Pattern.compile(p).matcher(str).matches()) { 4547 failCount++; 4548 } 4549 } 4550 report("unicodeCharacterName"); 4551 } 4552 4553 private static void horizontalAndVerticalWSTest() throws Exception { 4554 String hws = new String (new char[] { 4555 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4556 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4557 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4558 0x202f, 0x205f, 0x3000 }); 4559 String vws = new String (new char[] { 4560 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4561 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4562 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4563 failCount++; 4564 if (Pattern.compile("\\H").matcher(hws).find() || 4565 Pattern.compile("[\\H]").matcher(hws).find()) 4566 failCount++; 4567 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4568 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4569 failCount++; 4570 if (Pattern.compile("\\V").matcher(vws).find() || 4571 Pattern.compile("[\\V]").matcher(vws).find()) 4572 failCount++; 4573 String prefix = "abcd"; 4574 String suffix = "efgh"; 4575 String ng = "A"; 4576 for (int i = 0; i < hws.length(); i++) { 4577 String c = String.valueOf(hws.charAt(i)); 4578 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4579 if (!m.find() || !c.equals(m.group())) 4580 failCount++; 4581 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4582 if (!m.find() || !c.equals(m.group())) 4583 failCount++; 4584 4585 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4586 if (!m.find() || !ng.equals(m.group())) 4587 failCount++; 4588 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4589 if (!m.find() || !ng.equals(m.group())) 4590 failCount++; 4591 } 4592 for (int i = 0; i < vws.length(); i++) { 4593 String c = String.valueOf(vws.charAt(i)); 4594 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4595 if (!m.find() || !c.equals(m.group())) 4596 failCount++; 4597 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4598 if (!m.find() || !c.equals(m.group())) 4599 failCount++; 4600 4601 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4602 if (!m.find() || !ng.equals(m.group())) 4603 failCount++; 4604 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4605 if (!m.find() || !ng.equals(m.group())) 4606 failCount++; 4607 } 4608 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4609 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4610 failCount++; 4611 report("horizontalAndVerticalWSTest"); 4612 } 4613 4614 private static void linebreakTest() throws Exception { 4615 String linebreaks = new String (new char[] { 4616 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4617 String crnl = "\r\n"; 4618 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4619 Pattern.compile("\\R").matcher(crnl).matches() && 4620 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4621 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4622 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4623 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4624 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4625 failCount++; 4626 } 4627 report("linebreakTest"); 4628 } 4629 4630 // #7189363 4631 private static void branchTest() throws Exception { 4632 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4633 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4634 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4635 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4636 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4637 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4638 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4639 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4640 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4641 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4642 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4643 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4644 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4645 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4646 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4647 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4648 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4649 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4650 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4651 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4652 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4653 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4654 failCount++; 4655 report("branchTest"); 4656 } 4657 4658 // This test is for 8007395 4659 private static void groupCurlyNotFoundSuppTest() throws Exception { 4660 String input = "test this as \ud83d\ude0d"; 4661 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4662 "test(.)*(@[a-zA-Z.]+)", 4663 "test([^B])+(@[a-zA-Z.]+)", 4664 "test([^B])*(@[a-zA-Z.]+)", 4665 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4666 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4667 }) { 4668 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4669 .matcher(input); 4670 try { 4671 if (m.find()) { 4672 failCount++; 4673 } 4674 } catch (Exception x) { 4675 failCount++; 4676 } 4677 } 4678 report("GroupCurly NotFoundSupp"); 4679 } 4680 4681 // This test is for 8023647 4682 private static void groupCurlyBackoffTest() throws Exception { 4683 if (!"abc1c".matches("(\\w)+1\\1") || 4684 "abc11".matches("(\\w)+1\\1")) { 4685 failCount++; 4686 } 4687 report("GroupCurly backoff"); 4688 } 4689 4690 // This test is for 8012646 4691 private static void patternAsPredicate() throws Exception { 4692 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4693 4694 if (p.test("")) { 4695 failCount++; 4696 } 4697 if (!p.test("word")) { 4698 failCount++; 4699 } 4700 if (p.test("1234")) { 4701 failCount++; 4702 } 4703 if (!p.test("word1234")) { 4704 failCount++; 4705 } 4706 report("Pattern.asPredicate"); 4707 } 4708 4709 // This test is for 8184692 4710 private static void patternAsMatchPredicate() throws Exception { 4711 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4712 4713 if (p.test("")) { 4714 failCount++; 4715 } 4716 if (!p.test("word")) { 4717 failCount++; 4718 } 4719 if (p.test("1234word")) { 4720 failCount++; 4721 } 4722 if (p.test("1234")) { 4723 failCount++; 4724 } 4725 report("Pattern.asMatchPredicate"); 4726 } 4727 4728 4729 // This test is for 8035975 4730 private static void invalidFlags() throws Exception { 4731 for (int flag = 1; flag != 0; flag <<= 1) { 4732 switch (flag) { 4733 case Pattern.CASE_INSENSITIVE: 4734 case Pattern.MULTILINE: 4735 case Pattern.DOTALL: 4736 case Pattern.UNICODE_CASE: 4737 case Pattern.CANON_EQ: 4738 case Pattern.UNIX_LINES: 4739 case Pattern.LITERAL: 4740 case Pattern.UNICODE_CHARACTER_CLASS: 4741 case Pattern.COMMENTS: 4742 // valid flag, continue 4743 break; 4744 default: 4745 try { 4746 Pattern.compile(".", flag); 4747 failCount++; 4748 } catch (IllegalArgumentException expected) { 4749 } 4750 } 4751 } 4752 report("Invalid compile flags"); 4753 } 4754 4755 // This test is for 8158482 4756 private static void embeddedFlags() throws Exception { 4757 try { 4758 Pattern.compile("(?i).(?-i)."); 4759 Pattern.compile("(?m).(?-m)."); 4760 Pattern.compile("(?s).(?-s)."); 4761 Pattern.compile("(?d).(?-d)."); 4762 Pattern.compile("(?u).(?-u)."); 4763 Pattern.compile("(?c).(?-c)."); 4764 Pattern.compile("(?x).(?-x)."); 4765 Pattern.compile("(?U).(?-U)."); 4766 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4767 } catch (PatternSyntaxException x) { 4768 failCount++; 4769 } 4770 report("Embedded flags"); 4771 } 4772 4773 private static void grapheme() throws Exception { 4774 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4775 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4776 .forEach( ln -> { 4777 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4778 // System.out.println(str); 4779 String[] strs = ln.split("\u00f7|\u00d7"); 4780 StringBuilder src = new StringBuilder(); 4781 ArrayList<String> graphemes = new ArrayList<>(); 4782 StringBuilder buf = new StringBuilder(); 4783 int offBk = 0; 4784 for (String str : strs) { 4785 if (str.length() == 0) // first empty str 4786 continue; 4787 int cp = Integer.parseInt(str, 16); 4788 src.appendCodePoint(cp); 4789 buf.appendCodePoint(cp); 4790 offBk += (str.length() + 1); 4791 if (ln.charAt(offBk) == '\u00f7') { // DIV 4792 graphemes.add(buf.toString()); 4793 buf = new StringBuilder(); 4794 } 4795 } 4796 Pattern p = Pattern.compile("\\X"); 4797 Matcher m = p.matcher(src.toString()); 4798 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4799 for (String g : graphemes) { 4800 // System.out.printf(" grapheme:=[%s]%n", g); 4801 // (1) test \\X directly 4802 if (!m.find() || !m.group().equals(g)) { 4803 System.out.println("Failed \\X [" + ln + "] : " + g); 4804 failCount++; 4805 } 4806 // (2) test \\b{g} + \\X via Scanner 4807 boolean hasNext = s.hasNext(p); 4808 // if (!s.hasNext() || !s.next().equals(next)) { 4809 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4810 System.out.println("Failed b{g} [" + ln + "] : " + g); 4811 failCount++; 4812 } 4813 } 4814 }); 4815 // some sanity checks 4816 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4817 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4818 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4819 failCount++; 4820 // make sure "\b{n}" still works 4821 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4822 failCount++; 4823 report("Unicode extended grapheme cluster"); 4824 } 4825 4826 // hangup/timeout if go into exponential backtracking 4827 private static void expoBacktracking() throws Exception { 4828 4829 Object[][] patternMatchers = { 4830 // 6328855 4831 { "(.*\n*)*", 4832 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4833 false }, 4834 // 6192895 4835 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4836 "Hello World this is a test this is a test this is a test A", 4837 true }, 4838 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4839 "Hello World this is a test this is a test this is a test \u4e00 ", 4840 false }, 4841 { " *([a-z0-9]+ *)+", 4842 "hello world this is a test this is a test this is a test A", 4843 false }, 4844 // 4771934 [FIXED] #5013651? 4845 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4846 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4847 true }, 4848 // 4866249 [FIXED] 4849 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4850 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4851 true }, 4852 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4853 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4854 false }, 4855 // 6345469 4856 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4857 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4858 true }, // --> matched 4859 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4860 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4861 false }, 4862 // 5026912 4863 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4864 "156580451111112225588087755221111111566969655555555", 4865 false}, 4866 // 6988218 4867 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4868 "'%)) order by ANGEBOT.ID", 4869 false}, // find 4870 // 6693451 4871 { "^(\\s*foo\\s*)*$", 4872 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4873 true }, 4874 { "^(\\s*foo\\s*)*$", 4875 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4876 false 4877 }, 4878 // 7006761 4879 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4880 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4881 // 8140212 4882 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4883 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4884 false 4885 }, 4886 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4887 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4888 4889 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4890 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4891 4892 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4893 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4894 4895 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4896 4897 /* not fixed 4898 //8132141 ---> second level exponential backtracking 4899 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4900 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4901 */ 4902 }; 4903 4904 for (Object[] pm : patternMatchers) { 4905 String p = (String)pm[0]; 4906 String s = (String)pm[1]; 4907 boolean r = (Boolean)pm[2]; 4908 if (r != Pattern.compile(p).matcher(s).matches()) { 4909 failCount++; 4910 } 4911 } 4912 } 4913 4914 private static void invalidGroupName() { 4915 // Invalid start of a group name 4916 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4917 "\u0060", "\u007b", "\u0416")) { 4918 for (String pat : List.of("(?<" + groupName + ">)", 4919 "\\k<" + groupName + ">")) { 4920 try { 4921 Pattern.compile(pat); 4922 failCount++; 4923 } catch (PatternSyntaxException e) { 4924 if (!e.getMessage().startsWith( 4925 "capturing group name does not start with a" 4926 + " Latin letter")) { 4927 failCount++; 4928 } 4929 } 4930 } 4931 } 4932 // Invalid char in a group name 4933 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4934 "d\u0060", "e\u007b", "f\u0416")) { 4935 for (String pat : List.of("(?<" + groupName + ">)", 4936 "\\k<" + groupName + ">")) { 4937 try { 4938 Pattern.compile(pat); 4939 failCount++; 4940 } catch (PatternSyntaxException e) { 4941 if (!e.getMessage().startsWith( 4942 "named capturing group is missing trailing '>'")) { 4943 failCount++; 4944 } 4945 } 4946 } 4947 } 4948 report("Invalid capturing group names"); 4949 } 4950 4951 private static void illegalRepetitionRange() { 4952 // huge integers > (2^31 - 1) 4953 String n = BigInteger.valueOf(1L << 32) 4954 .toString(); 4955 String m = BigInteger.valueOf(1L << 31) 4956 .add(new BigInteger(80, generator)) 4957 .toString(); 4958 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4959 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4960 String pat = ".{" + rep + "}"; 4961 try { 4962 Pattern.compile(pat); 4963 failCount++; 4964 System.out.println("Expected to fail. Pattern: " + pat); 4965 } catch (PatternSyntaxException e) { 4966 if (!e.getMessage().startsWith("Illegal repetition")) { 4967 failCount++; 4968 System.out.println("Unexpected error message: " + e.getMessage()); 4969 } 4970 } catch (Throwable t) { 4971 failCount++; 4972 System.out.println("Unexpected exception: " + t); 4973 } 4974 } 4975 report("illegalRepetitionRange"); 4976 } 4977 4978 private static void surrogatePairWithCanonEq() { 4979 try { 4980 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 4981 } catch (Throwable t) { 4982 failCount++; 4983 System.out.println("Unexpected exception: " + t); 4984 } 4985 report("surrogatePairWithCanonEq"); 4986 } 4987 }