1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.io.BufferedReader; 48 import java.io.ByteArrayInputStream; 49 import java.io.ByteArrayOutputStream; 50 import java.io.File; 51 import java.io.FileInputStream; 52 import java.io.InputStreamReader; 53 import java.io.ObjectInputStream; 54 import java.io.ObjectOutputStream; 55 import java.math.BigInteger; 56 import java.nio.CharBuffer; 57 import java.nio.file.Files; 58 import java.util.ArrayList; 59 import java.util.Arrays; 60 import java.util.HashMap; 61 import java.util.List; 62 import java.util.Map; 63 import java.util.Random; 64 import java.util.Scanner; 65 import java.util.function.Function; 66 import java.util.function.Predicate; 67 import java.util.regex.Matcher; 68 import java.util.regex.MatchResult; 69 import java.util.regex.Pattern; 70 import java.util.regex.PatternSyntaxException; 71 import jdk.test.lib.RandomFactory; 72 73 /** 74 * This is a test class created to check the operation of 75 * the Pattern and Matcher classes. 76 */ 77 public class RegExTest { 78 79 private static Random generator = RandomFactory.getRandom(); 80 private static boolean failure = false; 81 private static int failCount = 0; 82 private static String firstFailure = null; 83 84 /** 85 * Main to interpret arguments and run several tests. 86 * 87 */ 88 public static void main(String[] args) throws Exception { 89 // Most of the tests are in a file 90 processFile("TestCases.txt"); 91 //processFile("PerlCases.txt"); 92 processFile("BMPTestCases.txt"); 93 processFile("SupplementaryTestCases.txt"); 94 95 // These test many randomly generated char patterns 96 bm(); 97 slice(); 98 99 // These are hard to put into the file 100 escapes(); 101 blankInput(); 102 103 // Substitition tests on randomly generated sequences 104 globalSubstitute(); 105 stringbufferSubstitute(); 106 stringbuilderSubstitute(); 107 108 substitutionBasher(); 109 substitutionBasher2(); 110 111 // Canonical Equivalence 112 ceTest(); 113 114 // Anchors 115 anchorTest(); 116 117 // boolean match calls 118 matchesTest(); 119 lookingAtTest(); 120 121 // Pattern API 122 patternMatchesTest(); 123 124 // Misc 125 lookbehindTest(); 126 nullArgumentTest(); 127 backRefTest(); 128 groupCaptureTest(); 129 caretTest(); 130 charClassTest(); 131 emptyPatternTest(); 132 findIntTest(); 133 group0Test(); 134 longPatternTest(); 135 octalTest(); 136 ampersandTest(); 137 negationTest(); 138 splitTest(); 139 appendTest(); 140 caseFoldingTest(); 141 commentsTest(); 142 unixLinesTest(); 143 replaceFirstTest(); 144 gTest(); 145 zTest(); 146 serializeTest(); 147 reluctantRepetitionTest(); 148 multilineDollarTest(); 149 dollarAtEndTest(); 150 caretBetweenTerminatorsTest(); 151 // This RFE rejected in Tiger numOccurrencesTest(); 152 javaCharClassTest(); 153 nonCaptureRepetitionTest(); 154 notCapturedGroupCurlyMatchTest(); 155 escapedSegmentTest(); 156 literalPatternTest(); 157 literalReplacementTest(); 158 regionTest(); 159 toStringTest(); 160 negatedCharClassTest(); 161 findFromTest(); 162 boundsTest(); 163 unicodeWordBoundsTest(); 164 caretAtEndTest(); 165 wordSearchTest(); 166 hitEndTest(); 167 toMatchResultTest(); 168 toMatchResultTest2(); 169 surrogatesInClassTest(); 170 removeQEQuotingTest(); 171 namedGroupCaptureTest(); 172 nonBmpClassComplementTest(); 173 unicodePropertiesTest(); 174 unicodeHexNotationTest(); 175 unicodeClassesTest(); 176 unicodeCharacterNameTest(); 177 horizontalAndVerticalWSTest(); 178 linebreakTest(); 179 branchTest(); 180 groupCurlyNotFoundSuppTest(); 181 groupCurlyBackoffTest(); 182 patternAsPredicate(); 183 patternAsMatchPredicate(); 184 invalidFlags(); 185 embeddedFlags(); 186 grapheme(); 187 expoBacktracking(); 188 invalidGroupName(); 189 illegalRepetitionRange(); 190 surrogatePairWithCanonEq(); 191 lineBreakWithQuantifier(); 192 193 if (failure) { 194 throw new 195 RuntimeException("RegExTest failed, 1st failure: " + 196 firstFailure); 197 } else { 198 System.err.println("OKAY: All tests passed."); 199 } 200 } 201 202 // Utility functions 203 204 private static String getRandomAlphaString(int length) { 205 StringBuffer buf = new StringBuffer(length); 206 for (int i=0; i<length; i++) { 207 char randChar = (char)(97 + generator.nextInt(26)); 208 buf.append(randChar); 209 } 210 return buf.toString(); 211 } 212 213 private static void check(Matcher m, String expected) { 214 m.find(); 215 if (!m.group().equals(expected)) 216 failCount++; 217 } 218 219 private static void check(Matcher m, String result, boolean expected) { 220 m.find(); 221 if (m.group().equals(result) != expected) 222 failCount++; 223 } 224 225 private static void check(Pattern p, String s, boolean expected) { 226 if (p.matcher(s).find() != expected) 227 failCount++; 228 } 229 230 private static void check(String p, String s, boolean expected) { 231 Matcher matcher = Pattern.compile(p).matcher(s); 232 if (matcher.find() != expected) 233 failCount++; 234 } 235 236 private static void check(String p, char c, boolean expected) { 237 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 238 Pattern pattern = Pattern.compile(propertyPattern); 239 char[] ca = new char[1]; ca[0] = c; 240 Matcher matcher = pattern.matcher(new String(ca)); 241 if (!matcher.find()) 242 failCount++; 243 } 244 245 private static void check(String p, int codePoint, boolean expected) { 246 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 247 Pattern pattern = Pattern.compile(propertyPattern); 248 char[] ca = Character.toChars(codePoint); 249 Matcher matcher = pattern.matcher(new String(ca)); 250 if (!matcher.find()) 251 failCount++; 252 } 253 254 private static void check(String p, int flag, String input, String s, 255 boolean expected) 256 { 257 Pattern pattern = Pattern.compile(p, flag); 258 Matcher matcher = pattern.matcher(input); 259 if (expected) 260 check(matcher, s, expected); 261 else 262 check(pattern, input, false); 263 } 264 265 private static void report(String testName) { 266 int spacesToAdd = 30 - testName.length(); 267 StringBuffer paddedNameBuffer = new StringBuffer(testName); 268 for (int i=0; i<spacesToAdd; i++) 269 paddedNameBuffer.append(" "); 270 String paddedName = paddedNameBuffer.toString(); 271 System.err.println(paddedName + ": " + 272 (failCount==0 ? "Passed":"Failed("+failCount+")")); 273 if (failCount > 0) { 274 failure = true; 275 276 if (firstFailure == null) { 277 firstFailure = testName; 278 } 279 } 280 281 failCount = 0; 282 } 283 284 /** 285 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 286 * supplementary characters. This method does NOT fully take care 287 * of the regex syntax. 288 */ 289 private static String toSupplementaries(String s) { 290 int length = s.length(); 291 StringBuffer sb = new StringBuffer(length * 2); 292 293 for (int i = 0; i < length; ) { 294 char c = s.charAt(i++); 295 if (c == '\\') { 296 sb.append(c); 297 if (i < length) { 298 c = s.charAt(i++); 299 sb.append(c); 300 if (c == 'u') { 301 // assume no syntax error 302 sb.append(s.charAt(i++)); 303 sb.append(s.charAt(i++)); 304 sb.append(s.charAt(i++)); 305 sb.append(s.charAt(i++)); 306 } 307 } 308 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 309 sb.append('\ud800').append((char)('\udc00'+c)); 310 } else { 311 sb.append(c); 312 } 313 } 314 return sb.toString(); 315 } 316 317 // Regular expression tests 318 319 // This is for bug 6178785 320 // Test if an expected NPE gets thrown when passing in a null argument 321 private static boolean check(Runnable test) { 322 try { 323 test.run(); 324 failCount++; 325 return false; 326 } catch (NullPointerException npe) { 327 return true; 328 } 329 } 330 331 private static void nullArgumentTest() { 332 check(() -> Pattern.compile(null)); 333 check(() -> Pattern.matches(null, null)); 334 check(() -> Pattern.matches("xyz", null)); 335 check(() -> Pattern.quote(null)); 336 check(() -> Pattern.compile("xyz").split(null)); 337 check(() -> Pattern.compile("xyz").matcher(null)); 338 339 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 340 m.matches(); 341 check(() -> m.appendTail((StringBuffer) null)); 342 check(() -> m.appendTail((StringBuilder)null)); 343 check(() -> m.replaceAll((String) null)); 344 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 345 check(() -> m.replaceFirst((String)null)); 346 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 347 check(() -> m.appendReplacement((StringBuffer)null, null)); 348 check(() -> m.appendReplacement((StringBuilder)null, null)); 349 check(() -> m.reset(null)); 350 check(() -> Matcher.quoteReplacement(null)); 351 //check(() -> m.usePattern(null)); 352 353 report("Null Argument"); 354 } 355 356 // This is for bug6635133 357 // Test if surrogate pair in Unicode escapes can be handled correctly. 358 private static void surrogatesInClassTest() throws Exception { 359 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 360 Matcher matcher = pattern.matcher("\ud834\udd22"); 361 if (!matcher.find()) 362 failCount++; 363 364 report("Surrogate pair in Unicode escape"); 365 } 366 367 // This is for bug6990617 368 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 369 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 370 // char is an octal digit. 371 private static void removeQEQuotingTest() throws Exception { 372 Pattern pattern = 373 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 374 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 375 if (!matcher.find()) 376 failCount++; 377 378 report("Remove Q/E Quoting"); 379 } 380 381 // This is for bug 4988891 382 // Test toMatchResult to see that it is a copy of the Matcher 383 // that is not affected by subsequent operations on the original 384 private static void toMatchResultTest() throws Exception { 385 Pattern pattern = Pattern.compile("squid"); 386 Matcher matcher = pattern.matcher( 387 "agiantsquidofdestinyasmallsquidoffate"); 388 matcher.find(); 389 int matcherStart1 = matcher.start(); 390 MatchResult mr = matcher.toMatchResult(); 391 if (mr == matcher) 392 failCount++; 393 int resultStart1 = mr.start(); 394 if (matcherStart1 != resultStart1) 395 failCount++; 396 matcher.find(); 397 int matcherStart2 = matcher.start(); 398 int resultStart2 = mr.start(); 399 if (matcherStart2 == resultStart2) 400 failCount++; 401 if (resultStart1 != resultStart2) 402 failCount++; 403 MatchResult mr2 = matcher.toMatchResult(); 404 if (mr == mr2) 405 failCount++; 406 if (mr2.start() != matcherStart2) 407 failCount++; 408 report("toMatchResult is a copy"); 409 } 410 411 private static void checkExpectedISE(Runnable test) { 412 try { 413 test.run(); 414 failCount++; 415 } catch (IllegalStateException x) { 416 } catch (IndexOutOfBoundsException xx) { 417 failCount++; 418 } 419 } 420 421 private static void checkExpectedIOOE(Runnable test) { 422 try { 423 test.run(); 424 failCount++; 425 } catch (IndexOutOfBoundsException x) {} 426 } 427 428 // This is for bug 8074678 429 // Test the result of toMatchResult throws ISE if no match is availble 430 private static void toMatchResultTest2() throws Exception { 431 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 432 matcher.find(); 433 MatchResult mr = matcher.toMatchResult(); 434 435 checkExpectedISE(() -> mr.start()); 436 checkExpectedISE(() -> mr.start(2)); 437 checkExpectedISE(() -> mr.end()); 438 checkExpectedISE(() -> mr.end(2)); 439 checkExpectedISE(() -> mr.group()); 440 checkExpectedISE(() -> mr.group(2)); 441 442 matcher = Pattern.compile("(match)").matcher("there is a match"); 443 matcher.find(); 444 MatchResult mr2 = matcher.toMatchResult(); 445 checkExpectedIOOE(() -> mr2.start(2)); 446 checkExpectedIOOE(() -> mr2.end(2)); 447 checkExpectedIOOE(() -> mr2.group(2)); 448 449 report("toMatchResult2 appropriate exceptions"); 450 } 451 452 // This is for bug 5013885 453 // Must test a slice to see if it reports hitEnd correctly 454 private static void hitEndTest() throws Exception { 455 // Basic test of Slice node 456 Pattern p = Pattern.compile("^squidattack"); 457 Matcher m = p.matcher("squack"); 458 m.find(); 459 if (m.hitEnd()) 460 failCount++; 461 m.reset("squid"); 462 m.find(); 463 if (!m.hitEnd()) 464 failCount++; 465 466 // Test Slice, SliceA and SliceU nodes 467 for (int i=0; i<3; i++) { 468 int flags = 0; 469 if (i==1) flags = Pattern.CASE_INSENSITIVE; 470 if (i==2) flags = Pattern.UNICODE_CASE; 471 p = Pattern.compile("^abc", flags); 472 m = p.matcher("ad"); 473 m.find(); 474 if (m.hitEnd()) 475 failCount++; 476 m.reset("ab"); 477 m.find(); 478 if (!m.hitEnd()) 479 failCount++; 480 } 481 482 // Test Boyer-Moore node 483 p = Pattern.compile("catattack"); 484 m = p.matcher("attack"); 485 m.find(); 486 if (!m.hitEnd()) 487 failCount++; 488 489 p = Pattern.compile("catattack"); 490 m = p.matcher("attackattackattackcatatta"); 491 m.find(); 492 if (!m.hitEnd()) 493 failCount++; 494 495 // 8184706: Matching u+0d at EOL against \R should hit-end 496 p = Pattern.compile("...\\R"); 497 m = p.matcher("cat" + (char)0x0a); 498 m.find(); 499 if (m.hitEnd()) 500 failCount++; 501 502 m = p.matcher("cat" + (char)0x0d); 503 m.find(); 504 if (!m.hitEnd()) 505 failCount++; 506 507 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 508 m.find(); 509 if (m.hitEnd()) 510 failCount++; 511 512 report("hitEnd"); 513 } 514 515 // This is for bug 4997476 516 // It is weird code submitted by customer demonstrating a regression 517 private static void wordSearchTest() throws Exception { 518 String testString = new String("word1 word2 word3"); 519 Pattern p = Pattern.compile("\\b"); 520 Matcher m = p.matcher(testString); 521 int position = 0; 522 int start = 0; 523 while (m.find(position)) { 524 start = m.start(); 525 if (start == testString.length()) 526 break; 527 if (m.find(start+1)) { 528 position = m.start(); 529 } else { 530 position = testString.length(); 531 } 532 if (testString.substring(start, position).equals(" ")) 533 continue; 534 if (!testString.substring(start, position-1).startsWith("word")) 535 failCount++; 536 } 537 report("Customer word search"); 538 } 539 540 // This is for bug 4994840 541 private static void caretAtEndTest() throws Exception { 542 // Problem only occurs with multiline patterns 543 // containing a beginning-of-line caret "^" followed 544 // by an expression that also matches the empty string. 545 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 546 Matcher matcher = pattern.matcher("\r"); 547 matcher.find(); 548 matcher.find(); 549 report("Caret at end"); 550 } 551 552 // This test is for 4979006 553 // Check to see if word boundary construct properly handles unicode 554 // non spacing marks 555 private static void unicodeWordBoundsTest() throws Exception { 556 String spaces = " "; 557 String wordChar = "a"; 558 String nsm = "\u030a"; 559 560 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 561 562 Pattern pattern = Pattern.compile("\\b"); 563 Matcher matcher = pattern.matcher(""); 564 // S=other B=word character N=non spacing mark .=word boundary 565 // SS.BB.SS 566 String input = spaces + wordChar + wordChar + spaces; 567 twoFindIndexes(input, matcher, 2, 4); 568 // SS.BBN.SS 569 input = spaces + wordChar +wordChar + nsm + spaces; 570 twoFindIndexes(input, matcher, 2, 5); 571 // SS.BN.SS 572 input = spaces + wordChar + nsm + spaces; 573 twoFindIndexes(input, matcher, 2, 4); 574 // SS.BNN.SS 575 input = spaces + wordChar + nsm + nsm + spaces; 576 twoFindIndexes(input, matcher, 2, 5); 577 // SSN.BB.SS 578 input = spaces + nsm + wordChar + wordChar + spaces; 579 twoFindIndexes(input, matcher, 3, 5); 580 // SS.BNB.SS 581 input = spaces + wordChar + nsm + wordChar + spaces; 582 twoFindIndexes(input, matcher, 2, 5); 583 // SSNNSS 584 input = spaces + nsm + nsm + spaces; 585 matcher.reset(input); 586 if (matcher.find()) 587 failCount++; 588 // SSN.BBN.SS 589 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 590 twoFindIndexes(input, matcher, 3, 6); 591 592 report("Unicode word boundary"); 593 } 594 595 private static void twoFindIndexes(String input, Matcher matcher, int a, 596 int b) throws Exception 597 { 598 matcher.reset(input); 599 matcher.find(); 600 if (matcher.start() != a) 601 failCount++; 602 matcher.find(); 603 if (matcher.start() != b) 604 failCount++; 605 } 606 607 // This test is for 6284152 608 static void check(String regex, String input, String[] expected) { 609 List<String> result = new ArrayList<String>(); 610 Pattern p = Pattern.compile(regex); 611 Matcher m = p.matcher(input); 612 while (m.find()) { 613 result.add(m.group()); 614 } 615 if (!Arrays.asList(expected).equals(result)) 616 failCount++; 617 } 618 619 private static void lookbehindTest() throws Exception { 620 //Positive 621 check("(?<=%.{0,5})foo\\d", 622 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 623 new String[]{"foo1", "foo2", "foo3"}); 624 625 //boundary at end of the lookbehind sub-regex should work consistently 626 //with the boundary just after the lookbehind sub-regex 627 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 628 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 629 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 630 check("(?<!abc \\b)foo", "abc foo", new String[0]); 631 632 //Negative 633 check("(?<!%.{0,5})foo\\d", 634 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 635 new String[] {"foo4", "foo5"}); 636 637 //Positive greedy 638 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 639 640 //Positive reluctant 641 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 642 643 //supplementary 644 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 645 new String[] {"fo\ud800\udc00o"}); 646 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 647 new String[] {"fo\ud800\udc00o"}); 648 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 649 new String[] {"fo\ud800\udc00o"}); 650 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 651 new String[] {"fo\ud800\udc00o"}); 652 report("Lookbehind"); 653 } 654 655 // This test is for 4938995 656 // Check to see if weak region boundaries are transparent to 657 // lookahead and lookbehind constructs 658 private static void boundsTest() throws Exception { 659 String fullMessage = "catdogcat"; 660 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 661 Matcher matcher = pattern.matcher("catdogca"); 662 matcher.useTransparentBounds(true); 663 if (matcher.find()) 664 failCount++; 665 matcher.reset("atdogcat"); 666 if (matcher.find()) 667 failCount++; 668 matcher.reset(fullMessage); 669 if (!matcher.find()) 670 failCount++; 671 matcher.reset(fullMessage); 672 matcher.region(0,9); 673 if (!matcher.find()) 674 failCount++; 675 matcher.reset(fullMessage); 676 matcher.region(0,6); 677 if (!matcher.find()) 678 failCount++; 679 matcher.reset(fullMessage); 680 matcher.region(3,6); 681 if (!matcher.find()) 682 failCount++; 683 matcher.useTransparentBounds(false); 684 if (matcher.find()) 685 failCount++; 686 687 // Negative lookahead/lookbehind 688 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 689 matcher = pattern.matcher("dogcat"); 690 matcher.useTransparentBounds(true); 691 matcher.region(0,3); 692 if (matcher.find()) 693 failCount++; 694 matcher.reset("catdog"); 695 matcher.region(3,6); 696 if (matcher.find()) 697 failCount++; 698 matcher.useTransparentBounds(false); 699 matcher.reset("dogcat"); 700 matcher.region(0,3); 701 if (!matcher.find()) 702 failCount++; 703 matcher.reset("catdog"); 704 matcher.region(3,6); 705 if (!matcher.find()) 706 failCount++; 707 708 report("Region bounds transparency"); 709 } 710 711 // This test is for 4945394 712 private static void findFromTest() throws Exception { 713 String message = "This is 40 $0 message."; 714 Pattern pat = Pattern.compile("\\$0"); 715 Matcher match = pat.matcher(message); 716 if (!match.find()) 717 failCount++; 718 if (match.find()) 719 failCount++; 720 if (match.find()) 721 failCount++; 722 report("Check for alternating find"); 723 } 724 725 // This test is for 4872664 and 4892980 726 private static void negatedCharClassTest() throws Exception { 727 Pattern pattern = Pattern.compile("[^>]"); 728 Matcher matcher = pattern.matcher("\u203A"); 729 if (!matcher.matches()) 730 failCount++; 731 pattern = Pattern.compile("[^fr]"); 732 matcher = pattern.matcher("a"); 733 if (!matcher.find()) 734 failCount++; 735 matcher.reset("\u203A"); 736 if (!matcher.find()) 737 failCount++; 738 String s = "for"; 739 String result[] = s.split("[^fr]"); 740 if (!result[0].equals("f")) 741 failCount++; 742 if (!result[1].equals("r")) 743 failCount++; 744 s = "f\u203Ar"; 745 result = s.split("[^fr]"); 746 if (!result[0].equals("f")) 747 failCount++; 748 if (!result[1].equals("r")) 749 failCount++; 750 751 // Test adding to bits, subtracting a node, then adding to bits again 752 pattern = Pattern.compile("[^f\u203Ar]"); 753 matcher = pattern.matcher("a"); 754 if (!matcher.find()) 755 failCount++; 756 matcher.reset("f"); 757 if (matcher.find()) 758 failCount++; 759 matcher.reset("\u203A"); 760 if (matcher.find()) 761 failCount++; 762 matcher.reset("r"); 763 if (matcher.find()) 764 failCount++; 765 matcher.reset("\u203B"); 766 if (!matcher.find()) 767 failCount++; 768 769 // Test subtracting a node, adding to bits, subtracting again 770 pattern = Pattern.compile("[^\u203Ar\u203B]"); 771 matcher = pattern.matcher("a"); 772 if (!matcher.find()) 773 failCount++; 774 matcher.reset("\u203A"); 775 if (matcher.find()) 776 failCount++; 777 matcher.reset("r"); 778 if (matcher.find()) 779 failCount++; 780 matcher.reset("\u203B"); 781 if (matcher.find()) 782 failCount++; 783 matcher.reset("\u203C"); 784 if (!matcher.find()) 785 failCount++; 786 787 report("Negated Character Class"); 788 } 789 790 // This test is for 4628291 791 private static void toStringTest() throws Exception { 792 Pattern pattern = Pattern.compile("b+"); 793 if (pattern.toString() != "b+") 794 failCount++; 795 Matcher matcher = pattern.matcher("aaabbbccc"); 796 String matcherString = matcher.toString(); // unspecified 797 matcher.find(); 798 matcherString = matcher.toString(); // unspecified 799 matcher.region(0,3); 800 matcherString = matcher.toString(); // unspecified 801 matcher.reset(); 802 matcherString = matcher.toString(); // unspecified 803 report("toString"); 804 } 805 806 // This test is for 4808962 807 private static void literalPatternTest() throws Exception { 808 int flags = Pattern.LITERAL; 809 810 Pattern pattern = Pattern.compile("abc\\t$^", flags); 811 check(pattern, "abc\\t$^", true); 812 813 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 814 check(pattern, "abc\\t$^", true); 815 816 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 817 check(pattern, "\\Qa^$bcabc\\E", true); 818 check(pattern, "a^$bcabc", false); 819 820 pattern = Pattern.compile("\\\\Q\\\\E"); 821 check(pattern, "\\Q\\E", true); 822 823 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 824 check(pattern, "abcefg\\Q\\Ehij", true); 825 826 pattern = Pattern.compile("\\\\\\Q\\\\E"); 827 check(pattern, "\\\\\\\\", true); 828 829 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 830 check(pattern, "\\Qa^$bcabc\\E", true); 831 check(pattern, "a^$bcabc", false); 832 833 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 834 check(pattern, "\\Qabc\\Edef", true); 835 check(pattern, "abcdef", false); 836 837 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 838 check(pattern, "abc\\Edef", true); 839 check(pattern, "abcdef", false); 840 841 pattern = Pattern.compile(Pattern.quote("\\E")); 842 check(pattern, "\\E", true); 843 844 pattern = Pattern.compile("((((abc.+?:)", flags); 845 check(pattern, "((((abc.+?:)", true); 846 847 flags |= Pattern.MULTILINE; 848 849 pattern = Pattern.compile("^cat$", flags); 850 check(pattern, "abc^cat$def", true); 851 check(pattern, "cat", false); 852 853 flags |= Pattern.CASE_INSENSITIVE; 854 855 pattern = Pattern.compile("abcdef", flags); 856 check(pattern, "ABCDEF", true); 857 check(pattern, "AbCdEf", true); 858 859 flags |= Pattern.DOTALL; 860 861 pattern = Pattern.compile("a...b", flags); 862 check(pattern, "A...b", true); 863 check(pattern, "Axxxb", false); 864 865 flags |= Pattern.CANON_EQ; 866 867 Pattern p = Pattern.compile("testa\u030a", flags); 868 check(pattern, "testa\u030a", false); 869 check(pattern, "test\u00e5", false); 870 871 // Supplementary character test 872 flags = Pattern.LITERAL; 873 874 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 875 check(pattern, toSupplementaries("abc\\t$^"), true); 876 877 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 878 check(pattern, toSupplementaries("abc\\t$^"), true); 879 880 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 881 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 882 check(pattern, toSupplementaries("a^$bcabc"), false); 883 884 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 885 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 886 check(pattern, toSupplementaries("a^$bcabc"), false); 887 888 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 889 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 890 check(pattern, toSupplementaries("abcdef"), false); 891 892 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 893 check(pattern, toSupplementaries("abc\\Edef"), true); 894 check(pattern, toSupplementaries("abcdef"), false); 895 896 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 897 check(pattern, toSupplementaries("((((abc.+?:)"), true); 898 899 flags |= Pattern.MULTILINE; 900 901 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 902 check(pattern, toSupplementaries("abc^cat$def"), true); 903 check(pattern, toSupplementaries("cat"), false); 904 905 flags |= Pattern.DOTALL; 906 907 // note: this is case-sensitive. 908 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 909 check(pattern, toSupplementaries("a...b"), true); 910 check(pattern, toSupplementaries("axxxb"), false); 911 912 flags |= Pattern.CANON_EQ; 913 914 String t = toSupplementaries("test"); 915 p = Pattern.compile(t + "a\u030a", flags); 916 check(pattern, t + "a\u030a", false); 917 check(pattern, t + "\u00e5", false); 918 919 report("Literal pattern"); 920 } 921 922 // This test is for 4803179 923 // This test is also for 4808962, replacement parts 924 private static void literalReplacementTest() throws Exception { 925 int flags = Pattern.LITERAL; 926 927 Pattern pattern = Pattern.compile("abc", flags); 928 Matcher matcher = pattern.matcher("zzzabczzz"); 929 String replaceTest = "$0"; 930 String result = matcher.replaceAll(replaceTest); 931 if (!result.equals("zzzabczzz")) 932 failCount++; 933 934 matcher.reset(); 935 String literalReplacement = matcher.quoteReplacement(replaceTest); 936 result = matcher.replaceAll(literalReplacement); 937 if (!result.equals("zzz$0zzz")) 938 failCount++; 939 940 matcher.reset(); 941 replaceTest = "\\t$\\$"; 942 literalReplacement = matcher.quoteReplacement(replaceTest); 943 result = matcher.replaceAll(literalReplacement); 944 if (!result.equals("zzz\\t$\\$zzz")) 945 failCount++; 946 947 // Supplementary character test 948 pattern = Pattern.compile(toSupplementaries("abc"), flags); 949 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 950 replaceTest = "$0"; 951 result = matcher.replaceAll(replaceTest); 952 if (!result.equals(toSupplementaries("zzzabczzz"))) 953 failCount++; 954 955 matcher.reset(); 956 literalReplacement = matcher.quoteReplacement(replaceTest); 957 result = matcher.replaceAll(literalReplacement); 958 if (!result.equals(toSupplementaries("zzz$0zzz"))) 959 failCount++; 960 961 matcher.reset(); 962 replaceTest = "\\t$\\$"; 963 literalReplacement = matcher.quoteReplacement(replaceTest); 964 result = matcher.replaceAll(literalReplacement); 965 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 966 failCount++; 967 968 // IAE should be thrown if backslash or '$' is the last character 969 // in replacement string 970 try { 971 "\uac00".replaceAll("\uac00", "$"); 972 failCount++; 973 } catch (IllegalArgumentException iie) { 974 } catch (Exception e) { 975 failCount++; 976 } 977 try { 978 "\uac00".replaceAll("\uac00", "\\"); 979 failCount++; 980 } catch (IllegalArgumentException iie) { 981 } catch (Exception e) { 982 failCount++; 983 } 984 report("Literal replacement"); 985 } 986 987 // This test is for 4757029 988 private static void regionTest() throws Exception { 989 Pattern pattern = Pattern.compile("abc"); 990 Matcher matcher = pattern.matcher("abcdefabc"); 991 992 matcher.region(0,9); 993 if (!matcher.find()) 994 failCount++; 995 if (!matcher.find()) 996 failCount++; 997 matcher.region(0,3); 998 if (!matcher.find()) 999 failCount++; 1000 matcher.region(3,6); 1001 if (matcher.find()) 1002 failCount++; 1003 matcher.region(0,2); 1004 if (matcher.find()) 1005 failCount++; 1006 1007 expectRegionFail(matcher, 1, -1); 1008 expectRegionFail(matcher, -1, -1); 1009 expectRegionFail(matcher, -1, 1); 1010 expectRegionFail(matcher, 5, 3); 1011 expectRegionFail(matcher, 5, 12); 1012 expectRegionFail(matcher, 12, 12); 1013 1014 pattern = Pattern.compile("^abc$"); 1015 matcher = pattern.matcher("zzzabczzz"); 1016 matcher.region(0,9); 1017 if (matcher.find()) 1018 failCount++; 1019 matcher.region(3,6); 1020 if (!matcher.find()) 1021 failCount++; 1022 matcher.region(3,6); 1023 matcher.useAnchoringBounds(false); 1024 if (matcher.find()) 1025 failCount++; 1026 1027 // Supplementary character test 1028 pattern = Pattern.compile(toSupplementaries("abc")); 1029 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1030 matcher.region(0,9*2); 1031 if (!matcher.find()) 1032 failCount++; 1033 if (!matcher.find()) 1034 failCount++; 1035 matcher.region(0,3*2); 1036 if (!matcher.find()) 1037 failCount++; 1038 matcher.region(1,3*2); 1039 if (matcher.find()) 1040 failCount++; 1041 matcher.region(3*2,6*2); 1042 if (matcher.find()) 1043 failCount++; 1044 matcher.region(0,2*2); 1045 if (matcher.find()) 1046 failCount++; 1047 matcher.region(0,2*2+1); 1048 if (matcher.find()) 1049 failCount++; 1050 1051 expectRegionFail(matcher, 1*2, -1); 1052 expectRegionFail(matcher, -1, -1); 1053 expectRegionFail(matcher, -1, 1*2); 1054 expectRegionFail(matcher, 5*2, 3*2); 1055 expectRegionFail(matcher, 5*2, 12*2); 1056 expectRegionFail(matcher, 12*2, 12*2); 1057 1058 pattern = Pattern.compile(toSupplementaries("^abc$")); 1059 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1060 matcher.region(0,9*2); 1061 if (matcher.find()) 1062 failCount++; 1063 matcher.region(3*2,6*2); 1064 if (!matcher.find()) 1065 failCount++; 1066 matcher.region(3*2+1,6*2); 1067 if (matcher.find()) 1068 failCount++; 1069 matcher.region(3*2,6*2-1); 1070 if (matcher.find()) 1071 failCount++; 1072 matcher.region(3*2,6*2); 1073 matcher.useAnchoringBounds(false); 1074 if (matcher.find()) 1075 failCount++; 1076 1077 // JDK-8230829 1078 pattern = Pattern.compile("\\ud800\\udc61"); 1079 matcher = pattern.matcher("\ud800\udc61"); 1080 matcher.region(0, 1); 1081 if (matcher.find()) { 1082 failCount++; 1083 System.out.println("Matched a surrogate pair" + 1084 " that crosses border of region"); 1085 } 1086 if (!matcher.hitEnd()) { 1087 failCount++; 1088 System.out.println("Expected to hit the end when" + 1089 " matching a surrogate pair crossing region"); 1090 } 1091 1092 report("Regions"); 1093 } 1094 1095 private static void expectRegionFail(Matcher matcher, int index1, 1096 int index2) 1097 { 1098 try { 1099 matcher.region(index1, index2); 1100 failCount++; 1101 } catch (IndexOutOfBoundsException ioobe) { 1102 // Correct result 1103 } catch (IllegalStateException ise) { 1104 // Correct result 1105 } 1106 } 1107 1108 // This test is for 4803197 1109 private static void escapedSegmentTest() throws Exception { 1110 1111 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1112 check(pattern, "dir1\\dir2", true); 1113 1114 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1115 check(pattern, "dir1\\dir2\\", true); 1116 1117 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1118 check(pattern, "dir1\\dir2\\", true); 1119 1120 // Supplementary character test 1121 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1122 check(pattern, toSupplementaries("dir1\\dir2"), true); 1123 1124 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1125 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1126 1127 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1128 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1129 1130 report("Escaped segment"); 1131 } 1132 1133 // This test is for 4792284 1134 private static void nonCaptureRepetitionTest() throws Exception { 1135 String input = "abcdefgh;"; 1136 1137 String[] patterns = new String[] { 1138 "(?:\\w{4})+;", 1139 "(?:\\w{8})*;", 1140 "(?:\\w{2}){2,4};", 1141 "(?:\\w{4}){2,};", // only matches the 1142 ".*?(?:\\w{5})+;", // specified minimum 1143 ".*?(?:\\w{9})*;", // number of reps - OK 1144 "(?:\\w{4})+?;", // lazy repetition - OK 1145 "(?:\\w{4})++;", // possessive repetition - OK 1146 "(?:\\w{2,}?)+;", // non-deterministic - OK 1147 "(\\w{4})+;", // capturing group - OK 1148 }; 1149 1150 for (int i = 0; i < patterns.length; i++) { 1151 // Check find() 1152 check(patterns[i], 0, input, input, true); 1153 // Check matches() 1154 Pattern p = Pattern.compile(patterns[i]); 1155 Matcher m = p.matcher(input); 1156 1157 if (m.matches()) { 1158 if (!m.group(0).equals(input)) 1159 failCount++; 1160 } else { 1161 failCount++; 1162 } 1163 } 1164 1165 report("Non capturing repetition"); 1166 } 1167 1168 // This test is for 6358731 1169 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1170 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1171 Matcher matcher = pattern.matcher("abcd"); 1172 if (!matcher.matches() || 1173 matcher.group(1) != null || 1174 !matcher.group(2).equals("abcd")) { 1175 failCount++; 1176 } 1177 report("Not captured GroupCurly"); 1178 } 1179 1180 // This test is for 4706545 1181 private static void javaCharClassTest() throws Exception { 1182 for (int i=0; i<1000; i++) { 1183 char c = (char)generator.nextInt(); 1184 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1185 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1186 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1187 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1188 check("{javaDigit}", c, Character.isDigit(c)); 1189 check("{javaDefined}", c, Character.isDefined(c)); 1190 check("{javaLetter}", c, Character.isLetter(c)); 1191 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1192 check("{javaJavaIdentifierStart}", c, 1193 Character.isJavaIdentifierStart(c)); 1194 check("{javaJavaIdentifierPart}", c, 1195 Character.isJavaIdentifierPart(c)); 1196 check("{javaUnicodeIdentifierStart}", c, 1197 Character.isUnicodeIdentifierStart(c)); 1198 check("{javaUnicodeIdentifierPart}", c, 1199 Character.isUnicodeIdentifierPart(c)); 1200 check("{javaIdentifierIgnorable}", c, 1201 Character.isIdentifierIgnorable(c)); 1202 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1203 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1204 check("{javaISOControl}", c, Character.isISOControl(c)); 1205 check("{javaMirrored}", c, Character.isMirrored(c)); 1206 1207 } 1208 1209 // Supplementary character test 1210 for (int i=0; i<1000; i++) { 1211 int c = generator.nextInt(Character.MAX_CODE_POINT 1212 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1213 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1214 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1215 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1216 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1217 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1218 check("{javaDigit}", c, Character.isDigit(c)); 1219 check("{javaDefined}", c, Character.isDefined(c)); 1220 check("{javaLetter}", c, Character.isLetter(c)); 1221 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1222 check("{javaJavaIdentifierStart}", c, 1223 Character.isJavaIdentifierStart(c)); 1224 check("{javaJavaIdentifierPart}", c, 1225 Character.isJavaIdentifierPart(c)); 1226 check("{javaUnicodeIdentifierStart}", c, 1227 Character.isUnicodeIdentifierStart(c)); 1228 check("{javaUnicodeIdentifierPart}", c, 1229 Character.isUnicodeIdentifierPart(c)); 1230 check("{javaIdentifierIgnorable}", c, 1231 Character.isIdentifierIgnorable(c)); 1232 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1233 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1234 check("{javaISOControl}", c, Character.isISOControl(c)); 1235 check("{javaMirrored}", c, Character.isMirrored(c)); 1236 } 1237 1238 report("Java character classes"); 1239 } 1240 1241 // This test is for 4523620 1242 /* 1243 private static void numOccurrencesTest() throws Exception { 1244 Pattern pattern = Pattern.compile("aaa"); 1245 1246 if (pattern.numOccurrences("aaaaaa", false) != 2) 1247 failCount++; 1248 if (pattern.numOccurrences("aaaaaa", true) != 4) 1249 failCount++; 1250 1251 pattern = Pattern.compile("^"); 1252 if (pattern.numOccurrences("aaaaaa", false) != 1) 1253 failCount++; 1254 if (pattern.numOccurrences("aaaaaa", true) != 1) 1255 failCount++; 1256 1257 report("Number of Occurrences"); 1258 } 1259 */ 1260 1261 // This test is for 4776374 1262 private static void caretBetweenTerminatorsTest() throws Exception { 1263 int flags1 = Pattern.DOTALL; 1264 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1265 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1266 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1267 1268 check("^....", flags1, "test\ntest", "test", true); 1269 check(".....^", flags1, "test\ntest", "test", false); 1270 check(".....^", flags1, "test\n", "test", false); 1271 check("....^", flags1, "test\r\n", "test", false); 1272 1273 check("^....", flags2, "test\ntest", "test", true); 1274 check("....^", flags2, "test\ntest", "test", false); 1275 check(".....^", flags2, "test\n", "test", false); 1276 check("....^", flags2, "test\r\n", "test", false); 1277 1278 check("^....", flags3, "test\ntest", "test", true); 1279 check(".....^", flags3, "test\ntest", "test\n", true); 1280 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1281 check(".....^", flags3, "test\n", "test", false); 1282 check(".....^", flags3, "test\r\n", "test", false); 1283 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1284 1285 check("^....", flags4, "test\ntest", "test", true); 1286 check(".....^", flags3, "test\ntest", "test\n", true); 1287 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1288 check(".....^", flags4, "test\n", "test\n", false); 1289 check(".....^", flags4, "test\r\n", "test\r", false); 1290 1291 // Supplementary character test 1292 String t = toSupplementaries("test"); 1293 check("^....", flags1, t+"\n"+t, t, true); 1294 check(".....^", flags1, t+"\n"+t, t, false); 1295 check(".....^", flags1, t+"\n", t, false); 1296 check("....^", flags1, t+"\r\n", t, false); 1297 1298 check("^....", flags2, t+"\n"+t, t, true); 1299 check("....^", flags2, t+"\n"+t, t, false); 1300 check(".....^", flags2, t+"\n", t, false); 1301 check("....^", flags2, t+"\r\n", t, false); 1302 1303 check("^....", flags3, t+"\n"+t, t, true); 1304 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1305 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1306 check(".....^", flags3, t+"\n", t, false); 1307 check(".....^", flags3, t+"\r\n", t, false); 1308 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1309 1310 check("^....", flags4, t+"\n"+t, t, true); 1311 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1312 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1313 check(".....^", flags4, t+"\n", t+"\n", false); 1314 check(".....^", flags4, t+"\r\n", t+"\r", false); 1315 1316 report("Caret between terminators"); 1317 } 1318 1319 // This test is for 4727935 1320 private static void dollarAtEndTest() throws Exception { 1321 int flags1 = Pattern.DOTALL; 1322 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1323 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1324 1325 check("....$", flags1, "test\n", "test", true); 1326 check("....$", flags1, "test\r\n", "test", true); 1327 check(".....$", flags1, "test\n", "test\n", true); 1328 check(".....$", flags1, "test\u0085", "test\u0085", true); 1329 check("....$", flags1, "test\u0085", "test", true); 1330 1331 check("....$", flags2, "test\n", "test", true); 1332 check(".....$", flags2, "test\n", "test\n", true); 1333 check(".....$", flags2, "test\u0085", "test\u0085", true); 1334 check("....$", flags2, "test\u0085", "est\u0085", true); 1335 1336 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1337 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1338 check("....$blah", flags3, "test\nblah", "!!!!", false); 1339 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1340 1341 // Supplementary character test 1342 String t = toSupplementaries("test"); 1343 String b = toSupplementaries("blah"); 1344 check("....$", flags1, t+"\n", t, true); 1345 check("....$", flags1, t+"\r\n", t, true); 1346 check(".....$", flags1, t+"\n", t+"\n", true); 1347 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1348 check("....$", flags1, t+"\u0085", t, true); 1349 1350 check("....$", flags2, t+"\n", t, true); 1351 check(".....$", flags2, t+"\n", t+"\n", true); 1352 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1353 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1354 1355 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1356 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1357 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1358 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1359 1360 report("Dollar at End"); 1361 } 1362 1363 // This test is for 4711773 1364 private static void multilineDollarTest() throws Exception { 1365 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1366 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1367 matcher.find(); 1368 if (matcher.start(0) != 9) 1369 failCount++; 1370 matcher.find(); 1371 if (matcher.start(0) != 20) 1372 failCount++; 1373 1374 // Supplementary character test 1375 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1376 matcher.find(); 1377 if (matcher.start(0) != 9*2) 1378 failCount++; 1379 matcher.find(); 1380 if (matcher.start(0) != 20*2) 1381 failCount++; 1382 1383 report("Multiline Dollar"); 1384 } 1385 1386 private static void reluctantRepetitionTest() throws Exception { 1387 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1388 check(p, "1 word word word 2", true); 1389 check(p, "1 wor wo w 2", true); 1390 check(p, "1 word word 2", true); 1391 check(p, "1 word 2", true); 1392 check(p, "1 wo w w 2", true); 1393 check(p, "1 wo w 2", true); 1394 check(p, "1 wor w 2", true); 1395 1396 p = Pattern.compile("([a-z])+?c"); 1397 Matcher m = p.matcher("ababcdefdec"); 1398 check(m, "ababc"); 1399 1400 // Supplementary character test 1401 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1402 m = p.matcher(toSupplementaries("ababcdefdec")); 1403 check(m, toSupplementaries("ababc")); 1404 1405 report("Reluctant Repetition"); 1406 } 1407 1408 private static Pattern serializedPattern(Pattern p) throws Exception { 1409 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1410 ObjectOutputStream oos = new ObjectOutputStream(baos); 1411 oos.writeObject(p); 1412 oos.close(); 1413 try (ObjectInputStream ois = new ObjectInputStream( 1414 new ByteArrayInputStream(baos.toByteArray()))) { 1415 return (Pattern)ois.readObject(); 1416 } 1417 } 1418 1419 private static void serializeTest() throws Exception { 1420 String patternStr = "(b)"; 1421 String matchStr = "b"; 1422 Pattern pattern = Pattern.compile(patternStr); 1423 Pattern serializedPattern = serializedPattern(pattern); 1424 Matcher matcher = serializedPattern.matcher(matchStr); 1425 if (!matcher.matches()) 1426 failCount++; 1427 if (matcher.groupCount() != 1) 1428 failCount++; 1429 1430 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1431 serializedPattern = serializedPattern(pattern); 1432 if (!serializedPattern.matcher("Ab").matches()) 1433 failCount++; 1434 if (serializedPattern.matcher("AB").matches()) 1435 failCount++; 1436 1437 report("Serialization"); 1438 } 1439 1440 private static void gTest() { 1441 Pattern pattern = Pattern.compile("\\G\\w"); 1442 Matcher matcher = pattern.matcher("abc#x#x"); 1443 matcher.find(); 1444 matcher.find(); 1445 matcher.find(); 1446 if (matcher.find()) 1447 failCount++; 1448 1449 pattern = Pattern.compile("\\GA*"); 1450 matcher = pattern.matcher("1A2AA3"); 1451 matcher.find(); 1452 if (matcher.find()) 1453 failCount++; 1454 1455 pattern = Pattern.compile("\\GA*"); 1456 matcher = pattern.matcher("1A2AA3"); 1457 if (!matcher.find(1)) 1458 failCount++; 1459 matcher.find(); 1460 if (matcher.find()) 1461 failCount++; 1462 1463 report("\\G"); 1464 } 1465 1466 private static void zTest() { 1467 Pattern pattern = Pattern.compile("foo\\Z"); 1468 // Positives 1469 check(pattern, "foo\u0085", true); 1470 check(pattern, "foo\u2028", true); 1471 check(pattern, "foo\u2029", true); 1472 check(pattern, "foo\n", true); 1473 check(pattern, "foo\r", true); 1474 check(pattern, "foo\r\n", true); 1475 // Negatives 1476 check(pattern, "fooo", false); 1477 check(pattern, "foo\n\r", false); 1478 1479 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1480 // Positives 1481 check(pattern, "foo", true); 1482 check(pattern, "foo\n", true); 1483 // Negatives 1484 check(pattern, "foo\r", false); 1485 check(pattern, "foo\u0085", false); 1486 check(pattern, "foo\u2028", false); 1487 check(pattern, "foo\u2029", false); 1488 1489 report("\\Z"); 1490 } 1491 1492 private static void replaceFirstTest() { 1493 Pattern pattern = Pattern.compile("(ab)(c*)"); 1494 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1495 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1496 failCount++; 1497 1498 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1499 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1500 failCount++; 1501 1502 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1503 String result = matcher.replaceFirst("$1"); 1504 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1505 failCount++; 1506 1507 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1508 result = matcher.replaceFirst("$2"); 1509 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1510 failCount++; 1511 1512 pattern = Pattern.compile("a*"); 1513 matcher = pattern.matcher("aaaaaaaaaa"); 1514 if (!matcher.replaceFirst("test").equals("test")) 1515 failCount++; 1516 1517 pattern = Pattern.compile("a+"); 1518 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1519 if (!matcher.replaceFirst("test").equals("zzztest")) 1520 failCount++; 1521 1522 // Supplementary character test 1523 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1524 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1525 if (!matcher.replaceFirst(toSupplementaries("test")) 1526 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1527 failCount++; 1528 1529 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1530 if (!matcher.replaceFirst(toSupplementaries("test")). 1531 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1532 failCount++; 1533 1534 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1535 result = matcher.replaceFirst("$1"); 1536 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1537 failCount++; 1538 1539 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1540 result = matcher.replaceFirst("$2"); 1541 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1542 failCount++; 1543 1544 pattern = Pattern.compile(toSupplementaries("a*")); 1545 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1546 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1547 failCount++; 1548 1549 pattern = Pattern.compile(toSupplementaries("a+")); 1550 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1551 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1552 failCount++; 1553 1554 report("Replace First"); 1555 } 1556 1557 private static void unixLinesTest() { 1558 Pattern pattern = Pattern.compile(".*"); 1559 Matcher matcher = pattern.matcher("aa\u2028blah"); 1560 matcher.find(); 1561 if (!matcher.group(0).equals("aa")) 1562 failCount++; 1563 1564 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1565 matcher = pattern.matcher("aa\u2028blah"); 1566 matcher.find(); 1567 if (!matcher.group(0).equals("aa\u2028blah")) 1568 failCount++; 1569 1570 pattern = Pattern.compile("[az]$", 1571 Pattern.MULTILINE | Pattern.UNIX_LINES); 1572 matcher = pattern.matcher("aa\u2028zz"); 1573 check(matcher, "a\u2028", false); 1574 1575 // Supplementary character test 1576 pattern = Pattern.compile(".*"); 1577 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1578 matcher.find(); 1579 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1580 failCount++; 1581 1582 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1583 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1584 matcher.find(); 1585 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1586 failCount++; 1587 1588 pattern = Pattern.compile(toSupplementaries("[az]$"), 1589 Pattern.MULTILINE | Pattern.UNIX_LINES); 1590 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1591 check(matcher, toSupplementaries("a\u2028"), false); 1592 1593 report("Unix Lines"); 1594 } 1595 1596 private static void commentsTest() { 1597 int flags = Pattern.COMMENTS; 1598 1599 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1600 Matcher matcher = pattern.matcher("aa#aa"); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 pattern = Pattern.compile("aa # blah", flags); 1605 matcher = pattern.matcher("aa"); 1606 if (!matcher.matches()) 1607 failCount++; 1608 1609 pattern = Pattern.compile("aa blah", flags); 1610 matcher = pattern.matcher("aablah"); 1611 if (!matcher.matches()) 1612 failCount++; 1613 1614 pattern = Pattern.compile("aa # blah blech ", flags); 1615 matcher = pattern.matcher("aa"); 1616 if (!matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile("aa # blah\n ", flags); 1620 matcher = pattern.matcher("aa"); 1621 if (!matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1625 matcher = pattern.matcher("aabc"); 1626 if (!matcher.matches()) 1627 failCount++; 1628 1629 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1630 matcher = pattern.matcher("aabc"); 1631 if (!matcher.matches()) 1632 failCount++; 1633 1634 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1635 matcher = pattern.matcher("aabc#blech"); 1636 if (!matcher.matches()) 1637 failCount++; 1638 1639 // Supplementary character test 1640 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1641 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1642 if (!matcher.matches()) 1643 failCount++; 1644 1645 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1646 matcher = pattern.matcher(toSupplementaries("aa")); 1647 if (!matcher.matches()) 1648 failCount++; 1649 1650 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1651 matcher = pattern.matcher(toSupplementaries("aablah")); 1652 if (!matcher.matches()) 1653 failCount++; 1654 1655 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1656 matcher = pattern.matcher(toSupplementaries("aa")); 1657 if (!matcher.matches()) 1658 failCount++; 1659 1660 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1661 matcher = pattern.matcher(toSupplementaries("aa")); 1662 if (!matcher.matches()) 1663 failCount++; 1664 1665 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1666 matcher = pattern.matcher(toSupplementaries("aabc")); 1667 if (!matcher.matches()) 1668 failCount++; 1669 1670 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1671 matcher = pattern.matcher(toSupplementaries("aabc")); 1672 if (!matcher.matches()) 1673 failCount++; 1674 1675 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1676 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1677 if (!matcher.matches()) 1678 failCount++; 1679 1680 report("Comments"); 1681 } 1682 1683 private static void caseFoldingTest() { // bug 4504687 1684 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1685 Pattern pattern = Pattern.compile("aa", flags); 1686 Matcher matcher = pattern.matcher("ab"); 1687 if (matcher.matches()) 1688 failCount++; 1689 1690 pattern = Pattern.compile("aA", flags); 1691 matcher = pattern.matcher("ab"); 1692 if (matcher.matches()) 1693 failCount++; 1694 1695 pattern = Pattern.compile("aa", flags); 1696 matcher = pattern.matcher("aB"); 1697 if (matcher.matches()) 1698 failCount++; 1699 matcher = pattern.matcher("Ab"); 1700 if (matcher.matches()) 1701 failCount++; 1702 1703 // ASCII "a" 1704 // Latin-1 Supplement "a" + grave 1705 // Cyrillic "a" 1706 String[] patterns = new String[] { 1707 //single 1708 "a", "\u00e0", "\u0430", 1709 //slice 1710 "ab", "\u00e0\u00e1", "\u0430\u0431", 1711 //class single 1712 "[a]", "[\u00e0]", "[\u0430]", 1713 //class range 1714 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1715 //back reference 1716 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1717 }; 1718 1719 String[] texts = new String[] { 1720 "A", "\u00c0", "\u0410", 1721 "AB", "\u00c0\u00c1", "\u0410\u0411", 1722 "A", "\u00c0", "\u0410", 1723 "B", "\u00c2", "\u0411", 1724 "aA", "\u00e0\u00c0", "\u0430\u0410" 1725 }; 1726 1727 boolean[] expected = new boolean[] { 1728 true, false, false, 1729 true, false, false, 1730 true, false, false, 1731 true, false, false, 1732 true, false, false 1733 }; 1734 1735 flags = Pattern.CASE_INSENSITIVE; 1736 for (int i = 0; i < patterns.length; i++) { 1737 pattern = Pattern.compile(patterns[i], flags); 1738 matcher = pattern.matcher(texts[i]); 1739 if (matcher.matches() != expected[i]) { 1740 System.out.println("<1> Failed at " + i); 1741 failCount++; 1742 } 1743 } 1744 1745 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1746 for (int i = 0; i < patterns.length; i++) { 1747 pattern = Pattern.compile(patterns[i], flags); 1748 matcher = pattern.matcher(texts[i]); 1749 if (!matcher.matches()) { 1750 System.out.println("<2> Failed at " + i); 1751 failCount++; 1752 } 1753 } 1754 // flag unicode_case alone should do nothing 1755 flags = Pattern.UNICODE_CASE; 1756 for (int i = 0; i < patterns.length; i++) { 1757 pattern = Pattern.compile(patterns[i], flags); 1758 matcher = pattern.matcher(texts[i]); 1759 if (matcher.matches()) { 1760 System.out.println("<3> Failed at " + i); 1761 failCount++; 1762 } 1763 } 1764 1765 // Special cases: i, I, u+0131 and u+0130 1766 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1767 pattern = Pattern.compile("[h-j]+", flags); 1768 if (!pattern.matcher("\u0131\u0130").matches()) 1769 failCount++; 1770 report("Case Folding"); 1771 } 1772 1773 private static void appendTest() { 1774 Pattern pattern = Pattern.compile("(ab)(cd)"); 1775 Matcher matcher = pattern.matcher("abcd"); 1776 String result = matcher.replaceAll("$2$1"); 1777 if (!result.equals("cdab")) 1778 failCount++; 1779 1780 String s1 = "Swap all: first = 123, second = 456"; 1781 String s2 = "Swap one: first = 123, second = 456"; 1782 String r = "$3$2$1"; 1783 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1784 matcher = pattern.matcher(s1); 1785 1786 result = matcher.replaceAll(r); 1787 if (!result.equals("Swap all: 123 = first, 456 = second")) 1788 failCount++; 1789 1790 matcher = pattern.matcher(s2); 1791 1792 if (matcher.find()) { 1793 StringBuffer sb = new StringBuffer(); 1794 matcher.appendReplacement(sb, r); 1795 matcher.appendTail(sb); 1796 result = sb.toString(); 1797 if (!result.equals("Swap one: 123 = first, second = 456")) 1798 failCount++; 1799 } 1800 1801 // Supplementary character test 1802 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1803 matcher = pattern.matcher(toSupplementaries("abcd")); 1804 result = matcher.replaceAll("$2$1"); 1805 if (!result.equals(toSupplementaries("cdab"))) 1806 failCount++; 1807 1808 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1809 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1810 r = toSupplementaries("$3$2$1"); 1811 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1812 matcher = pattern.matcher(s1); 1813 1814 result = matcher.replaceAll(r); 1815 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1816 failCount++; 1817 1818 matcher = pattern.matcher(s2); 1819 1820 if (matcher.find()) { 1821 StringBuffer sb = new StringBuffer(); 1822 matcher.appendReplacement(sb, r); 1823 matcher.appendTail(sb); 1824 result = sb.toString(); 1825 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1826 failCount++; 1827 } 1828 report("Append"); 1829 } 1830 1831 private static void splitTest() { 1832 Pattern pattern = Pattern.compile(":"); 1833 String[] result = pattern.split("foo:and:boo", 2); 1834 if (!result[0].equals("foo")) 1835 failCount++; 1836 if (!result[1].equals("and:boo")) 1837 failCount++; 1838 // Supplementary character test 1839 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1840 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1841 if (!result[0].equals(toSupplementaries("foo"))) 1842 failCount++; 1843 if (!result[1].equals(toSupplementaries("andXboo"))) 1844 failCount++; 1845 1846 CharBuffer cb = CharBuffer.allocate(100); 1847 cb.put("foo:and:boo"); 1848 cb.flip(); 1849 result = pattern.split(cb); 1850 if (!result[0].equals("foo")) 1851 failCount++; 1852 if (!result[1].equals("and")) 1853 failCount++; 1854 if (!result[2].equals("boo")) 1855 failCount++; 1856 1857 // Supplementary character test 1858 CharBuffer cbs = CharBuffer.allocate(100); 1859 cbs.put(toSupplementaries("fooXandXboo")); 1860 cbs.flip(); 1861 result = patternX.split(cbs); 1862 if (!result[0].equals(toSupplementaries("foo"))) 1863 failCount++; 1864 if (!result[1].equals(toSupplementaries("and"))) 1865 failCount++; 1866 if (!result[2].equals(toSupplementaries("boo"))) 1867 failCount++; 1868 1869 String source = "0123456789"; 1870 for (int limit=-2; limit<3; limit++) { 1871 for (int x=0; x<10; x++) { 1872 result = source.split(Integer.toString(x), limit); 1873 int expectedLength = limit < 1 ? 2 : limit; 1874 1875 if ((limit == 0) && (x == 9)) { 1876 // expected dropping of "" 1877 if (result.length != 1) 1878 failCount++; 1879 if (!result[0].equals("012345678")) { 1880 failCount++; 1881 } 1882 } else { 1883 if (result.length != expectedLength) { 1884 failCount++; 1885 } 1886 if (!result[0].equals(source.substring(0,x))) { 1887 if (limit != 1) { 1888 failCount++; 1889 } else { 1890 if (!result[0].equals(source.substring(0,10))) { 1891 failCount++; 1892 } 1893 } 1894 } 1895 if (expectedLength > 1) { // Check segment 2 1896 if (!result[1].equals(source.substring(x+1,10))) 1897 failCount++; 1898 } 1899 } 1900 } 1901 } 1902 // Check the case for no match found 1903 for (int limit=-2; limit<3; limit++) { 1904 result = source.split("e", limit); 1905 if (result.length != 1) 1906 failCount++; 1907 if (!result[0].equals(source)) 1908 failCount++; 1909 } 1910 // Check the case for limit == 0, source = ""; 1911 // split() now returns 0-length for empty source "" see #6559590 1912 source = ""; 1913 result = source.split("e", 0); 1914 if (result.length != 1) 1915 failCount++; 1916 if (!result[0].equals(source)) 1917 failCount++; 1918 1919 // Check both split() and splitAsStraem(), especially for zero-lenth 1920 // input and zero-lenth match cases 1921 String[][] input = new String[][] { 1922 { " ", "Abc Efg Hij" }, // normal non-zero-match 1923 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1924 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1925 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1926 { "(?=\\p{Lu})", "AbcEfg" }, 1927 { "(?=\\p{Lu})", "Abc" }, 1928 { " ", "" }, // zero-length input 1929 { ".*", "" }, 1930 1931 // some tests from PatternStreamTest.java 1932 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1933 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1934 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1935 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1936 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1937 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1938 { "\u56da", "" }, 1939 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1940 { "o", "boo:and:foo" }, 1941 { "o", "booooo:and:fooooo" }, 1942 { "o", "fooooo:" }, 1943 }; 1944 1945 String[][] expected = new String[][] { 1946 { "Abc", "Efg", "Hij" }, 1947 { "", "Abc", "Efg", "Hij" }, 1948 { "Abc", "", "Efg", "Hij" }, 1949 { "Abc", "Efg", "Hij" }, 1950 { "Abc", "Efg" }, 1951 { "Abc" }, 1952 { "" }, 1953 { "" }, 1954 1955 { "awgqwefg1fefw", "vssv1vvv1" }, 1956 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1957 { "awgqwefg", "fefw4vssv", "vvv" }, 1958 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1959 { "1", "23", "456", "7890" }, 1960 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1961 { "" }, 1962 { "This", "is", "testing", "", "with", "different", "separators" }, 1963 { "b", "", ":and:f" }, 1964 { "b", "", "", "", "", ":and:f" }, 1965 { "f", "", "", "", "", ":" }, 1966 }; 1967 for (int i = 0; i < input.length; i++) { 1968 pattern = Pattern.compile(input[i][0]); 1969 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1970 failCount++; 1971 } 1972 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1973 // array for zero-length input for now 1974 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1975 expected[i])) { 1976 failCount++; 1977 } 1978 } 1979 report("Split"); 1980 } 1981 1982 private static void negationTest() { 1983 Pattern pattern = Pattern.compile("[\\[@^]+"); 1984 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1985 if (!matcher.find()) 1986 failCount++; 1987 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1988 failCount++; 1989 pattern = Pattern.compile("[@\\[^]+"); 1990 matcher = pattern.matcher("@@@@[[[[^^^^"); 1991 if (!matcher.find()) 1992 failCount++; 1993 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1994 failCount++; 1995 pattern = Pattern.compile("[@\\[^@]+"); 1996 matcher = pattern.matcher("@@@@[[[[^^^^"); 1997 if (!matcher.find()) 1998 failCount++; 1999 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2000 failCount++; 2001 2002 pattern = Pattern.compile("\\)"); 2003 matcher = pattern.matcher("xxx)xxx"); 2004 if (!matcher.find()) 2005 failCount++; 2006 2007 report("Negation"); 2008 } 2009 2010 private static void ampersandTest() { 2011 Pattern pattern = Pattern.compile("[&@]+"); 2012 check(pattern, "@@@@&&&&", true); 2013 2014 pattern = Pattern.compile("[@&]+"); 2015 check(pattern, "@@@@&&&&", true); 2016 2017 pattern = Pattern.compile("[@\\&]+"); 2018 check(pattern, "@@@@&&&&", true); 2019 2020 report("Ampersand"); 2021 } 2022 2023 private static void octalTest() throws Exception { 2024 Pattern pattern = Pattern.compile("\\u0007"); 2025 Matcher matcher = pattern.matcher("\u0007"); 2026 if (!matcher.matches()) 2027 failCount++; 2028 pattern = Pattern.compile("\\07"); 2029 matcher = pattern.matcher("\u0007"); 2030 if (!matcher.matches()) 2031 failCount++; 2032 pattern = Pattern.compile("\\007"); 2033 matcher = pattern.matcher("\u0007"); 2034 if (!matcher.matches()) 2035 failCount++; 2036 pattern = Pattern.compile("\\0007"); 2037 matcher = pattern.matcher("\u0007"); 2038 if (!matcher.matches()) 2039 failCount++; 2040 pattern = Pattern.compile("\\040"); 2041 matcher = pattern.matcher("\u0020"); 2042 if (!matcher.matches()) 2043 failCount++; 2044 pattern = Pattern.compile("\\0403"); 2045 matcher = pattern.matcher("\u00203"); 2046 if (!matcher.matches()) 2047 failCount++; 2048 pattern = Pattern.compile("\\0103"); 2049 matcher = pattern.matcher("\u0043"); 2050 if (!matcher.matches()) 2051 failCount++; 2052 2053 report("Octal"); 2054 } 2055 2056 private static void longPatternTest() throws Exception { 2057 try { 2058 Pattern pattern = Pattern.compile( 2059 "a 32-character-long pattern xxxx"); 2060 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2061 pattern = Pattern.compile("a thirty four character long regex"); 2062 StringBuffer patternToBe = new StringBuffer(101); 2063 for (int i=0; i<100; i++) 2064 patternToBe.append((char)(97 + i%26)); 2065 pattern = Pattern.compile(patternToBe.toString()); 2066 } catch (PatternSyntaxException e) { 2067 failCount++; 2068 } 2069 2070 // Supplementary character test 2071 try { 2072 Pattern pattern = Pattern.compile( 2073 toSupplementaries("a 32-character-long pattern xxxx")); 2074 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2075 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2076 StringBuffer patternToBe = new StringBuffer(101*2); 2077 for (int i=0; i<100; i++) 2078 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2079 + 97 + i%26)); 2080 pattern = Pattern.compile(patternToBe.toString()); 2081 } catch (PatternSyntaxException e) { 2082 failCount++; 2083 } 2084 report("LongPattern"); 2085 } 2086 2087 private static void group0Test() throws Exception { 2088 Pattern pattern = Pattern.compile("(tes)ting"); 2089 Matcher matcher = pattern.matcher("testing"); 2090 check(matcher, "testing"); 2091 2092 matcher.reset("testing"); 2093 if (matcher.lookingAt()) { 2094 if (!matcher.group(0).equals("testing")) 2095 failCount++; 2096 } else { 2097 failCount++; 2098 } 2099 2100 matcher.reset("testing"); 2101 if (matcher.matches()) { 2102 if (!matcher.group(0).equals("testing")) 2103 failCount++; 2104 } else { 2105 failCount++; 2106 } 2107 2108 pattern = Pattern.compile("(tes)ting"); 2109 matcher = pattern.matcher("testing"); 2110 if (matcher.lookingAt()) { 2111 if (!matcher.group(0).equals("testing")) 2112 failCount++; 2113 } else { 2114 failCount++; 2115 } 2116 2117 pattern = Pattern.compile("^(tes)ting"); 2118 matcher = pattern.matcher("testing"); 2119 if (matcher.matches()) { 2120 if (!matcher.group(0).equals("testing")) 2121 failCount++; 2122 } else { 2123 failCount++; 2124 } 2125 2126 // Supplementary character test 2127 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2128 matcher = pattern.matcher(toSupplementaries("testing")); 2129 check(matcher, toSupplementaries("testing")); 2130 2131 matcher.reset(toSupplementaries("testing")); 2132 if (matcher.lookingAt()) { 2133 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2134 failCount++; 2135 } else { 2136 failCount++; 2137 } 2138 2139 matcher.reset(toSupplementaries("testing")); 2140 if (matcher.matches()) { 2141 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2142 failCount++; 2143 } else { 2144 failCount++; 2145 } 2146 2147 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2148 matcher = pattern.matcher(toSupplementaries("testing")); 2149 if (matcher.lookingAt()) { 2150 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2151 failCount++; 2152 } else { 2153 failCount++; 2154 } 2155 2156 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2157 matcher = pattern.matcher(toSupplementaries("testing")); 2158 if (matcher.matches()) { 2159 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2160 failCount++; 2161 } else { 2162 failCount++; 2163 } 2164 2165 report("Group0"); 2166 } 2167 2168 private static void findIntTest() throws Exception { 2169 Pattern p = Pattern.compile("blah"); 2170 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2171 boolean result = m.find(2); 2172 if (!result) 2173 failCount++; 2174 2175 p = Pattern.compile("$"); 2176 m = p.matcher("1234567890"); 2177 result = m.find(10); 2178 if (!result) 2179 failCount++; 2180 try { 2181 result = m.find(11); 2182 failCount++; 2183 } catch (IndexOutOfBoundsException e) { 2184 // correct result 2185 } 2186 2187 // Supplementary character test 2188 p = Pattern.compile(toSupplementaries("blah")); 2189 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2190 result = m.find(2); 2191 if (!result) 2192 failCount++; 2193 2194 report("FindInt"); 2195 } 2196 2197 private static void emptyPatternTest() throws Exception { 2198 Pattern p = Pattern.compile(""); 2199 Matcher m = p.matcher("foo"); 2200 2201 // Should find empty pattern at beginning of input 2202 boolean result = m.find(); 2203 if (result != true) 2204 failCount++; 2205 if (m.start() != 0) 2206 failCount++; 2207 2208 // Should not match entire input if input is not empty 2209 m.reset(); 2210 result = m.matches(); 2211 if (result == true) 2212 failCount++; 2213 2214 try { 2215 m.start(0); 2216 failCount++; 2217 } catch (IllegalStateException e) { 2218 // Correct result 2219 } 2220 2221 // Should match entire input if input is empty 2222 m.reset(""); 2223 result = m.matches(); 2224 if (result != true) 2225 failCount++; 2226 2227 result = Pattern.matches("", ""); 2228 if (result != true) 2229 failCount++; 2230 2231 result = Pattern.matches("", "foo"); 2232 if (result == true) 2233 failCount++; 2234 report("EmptyPattern"); 2235 } 2236 2237 private static void charClassTest() throws Exception { 2238 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2239 check(pattern, "blahb]blech", true); 2240 2241 pattern = Pattern.compile("[abc[def]]"); 2242 check(pattern, "b", true); 2243 2244 // Supplementary character tests 2245 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2246 check(pattern, toSupplementaries("blahb]blech"), true); 2247 2248 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2249 check(pattern, toSupplementaries("b"), true); 2250 2251 try { 2252 // u00ff when UNICODE_CASE 2253 pattern = Pattern.compile("[ab\u00ffcd]", 2254 Pattern.CASE_INSENSITIVE| 2255 Pattern.UNICODE_CASE); 2256 check(pattern, "ab\u00ffcd", true); 2257 check(pattern, "Ab\u0178Cd", true); 2258 2259 // u00b5 when UNICODE_CASE 2260 pattern = Pattern.compile("[ab\u00b5cd]", 2261 Pattern.CASE_INSENSITIVE| 2262 Pattern.UNICODE_CASE); 2263 check(pattern, "ab\u00b5cd", true); 2264 check(pattern, "Ab\u039cCd", true); 2265 } catch (Exception e) { failCount++; } 2266 2267 /* Special cases 2268 (1)LatinSmallLetterLongS u+017f 2269 (2)LatinSmallLetterDotlessI u+0131 2270 (3)LatineCapitalLetterIWithDotAbove u+0130 2271 (4)KelvinSign u+212a 2272 (5)AngstromSign u+212b 2273 */ 2274 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2275 pattern = Pattern.compile("[sik\u00c5]+", flags); 2276 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2277 failCount++; 2278 2279 report("CharClass"); 2280 } 2281 2282 private static void caretTest() throws Exception { 2283 Pattern pattern = Pattern.compile("\\w*"); 2284 Matcher matcher = pattern.matcher("a#bc#def##g"); 2285 check(matcher, "a"); 2286 check(matcher, ""); 2287 check(matcher, "bc"); 2288 check(matcher, ""); 2289 check(matcher, "def"); 2290 check(matcher, ""); 2291 check(matcher, ""); 2292 check(matcher, "g"); 2293 check(matcher, ""); 2294 if (matcher.find()) 2295 failCount++; 2296 2297 pattern = Pattern.compile("^\\w*"); 2298 matcher = pattern.matcher("a#bc#def##g"); 2299 check(matcher, "a"); 2300 if (matcher.find()) 2301 failCount++; 2302 2303 pattern = Pattern.compile("\\w"); 2304 matcher = pattern.matcher("abc##x"); 2305 check(matcher, "a"); 2306 check(matcher, "b"); 2307 check(matcher, "c"); 2308 check(matcher, "x"); 2309 if (matcher.find()) 2310 failCount++; 2311 2312 pattern = Pattern.compile("^\\w"); 2313 matcher = pattern.matcher("abc##x"); 2314 check(matcher, "a"); 2315 if (matcher.find()) 2316 failCount++; 2317 2318 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2319 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2320 check(matcher, "abc"); 2321 if (matcher.find()) 2322 failCount++; 2323 2324 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2325 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2326 check(matcher, "abc"); 2327 check(matcher, "jkl"); 2328 if (matcher.find()) 2329 failCount++; 2330 2331 pattern = Pattern.compile("^", Pattern.MULTILINE); 2332 matcher = pattern.matcher("this is some text"); 2333 String result = matcher.replaceAll("X"); 2334 if (!result.equals("Xthis is some text")) 2335 failCount++; 2336 2337 pattern = Pattern.compile("^"); 2338 matcher = pattern.matcher("this is some text"); 2339 result = matcher.replaceAll("X"); 2340 if (!result.equals("Xthis is some text")) 2341 failCount++; 2342 2343 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2344 matcher = pattern.matcher("this is some text\n"); 2345 result = matcher.replaceAll("X"); 2346 if (!result.equals("Xthis is some text\n")) 2347 failCount++; 2348 2349 report("Caret"); 2350 } 2351 2352 private static void groupCaptureTest() throws Exception { 2353 // Independent group 2354 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2355 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2356 matcher.find(); 2357 try { 2358 String blah = matcher.group(1); 2359 failCount++; 2360 } catch (IndexOutOfBoundsException ioobe) { 2361 // Good result 2362 } 2363 // Pure group 2364 pattern = Pattern.compile("x+(?:y+)z+"); 2365 matcher = pattern.matcher("xxxyyyzzz"); 2366 matcher.find(); 2367 try { 2368 String blah = matcher.group(1); 2369 failCount++; 2370 } catch (IndexOutOfBoundsException ioobe) { 2371 // Good result 2372 } 2373 2374 // Supplementary character tests 2375 // Independent group 2376 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2377 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2378 matcher.find(); 2379 try { 2380 String blah = matcher.group(1); 2381 failCount++; 2382 } catch (IndexOutOfBoundsException ioobe) { 2383 // Good result 2384 } 2385 // Pure group 2386 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2387 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2388 matcher.find(); 2389 try { 2390 String blah = matcher.group(1); 2391 failCount++; 2392 } catch (IndexOutOfBoundsException ioobe) { 2393 // Good result 2394 } 2395 2396 report("GroupCapture"); 2397 } 2398 2399 private static void backRefTest() throws Exception { 2400 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2401 check(pattern, "zzzaabcazzz", true); 2402 2403 pattern = Pattern.compile("(a*)bc\\1"); 2404 check(pattern, "zzzaabcaazzz", true); 2405 2406 pattern = Pattern.compile("(abc)(def)\\1"); 2407 check(pattern, "abcdefabc", true); 2408 2409 pattern = Pattern.compile("(abc)(def)\\3"); 2410 check(pattern, "abcdefabc", false); 2411 2412 try { 2413 for (int i = 1; i < 10; i++) { 2414 // Make sure backref 1-9 are always accepted 2415 pattern = Pattern.compile("abcdef\\" + i); 2416 // and fail to match if the target group does not exit 2417 check(pattern, "abcdef", false); 2418 } 2419 } catch(PatternSyntaxException e) { 2420 failCount++; 2421 } 2422 2423 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2424 check(pattern, "abcdefghija", false); 2425 check(pattern, "abcdefghija1", true); 2426 2427 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2428 check(pattern, "abcdefghijkk", true); 2429 2430 pattern = Pattern.compile("(a)bcdefghij\\11"); 2431 check(pattern, "abcdefghija1", true); 2432 2433 // Supplementary character tests 2434 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2435 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2436 2437 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2438 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2439 2440 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2441 check(pattern, toSupplementaries("abcdefabc"), true); 2442 2443 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2444 check(pattern, toSupplementaries("abcdefabc"), false); 2445 2446 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2447 check(pattern, toSupplementaries("abcdefghija"), false); 2448 check(pattern, toSupplementaries("abcdefghija1"), true); 2449 2450 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2451 check(pattern, toSupplementaries("abcdefghijkk"), true); 2452 2453 report("BackRef"); 2454 } 2455 2456 /** 2457 * Unicode Technical Report #18, section 2.6 End of Line 2458 * There is no empty line to be matched in the sequence \u000D\u000A 2459 * but there is an empty line in the sequence \u000A\u000D. 2460 */ 2461 private static void anchorTest() throws Exception { 2462 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2463 Matcher m = p.matcher("blah1\r\nblah2"); 2464 m.find(); 2465 m.find(); 2466 if (!m.group().equals("blah2")) 2467 failCount++; 2468 2469 m.reset("blah1\n\rblah2"); 2470 m.find(); 2471 m.find(); 2472 m.find(); 2473 if (!m.group().equals("blah2")) 2474 failCount++; 2475 2476 // Test behavior of $ with \r\n at end of input 2477 p = Pattern.compile(".+$"); 2478 m = p.matcher("blah1\r\n"); 2479 if (!m.find()) 2480 failCount++; 2481 if (!m.group().equals("blah1")) 2482 failCount++; 2483 if (m.find()) 2484 failCount++; 2485 2486 // Test behavior of $ with \r\n at end of input in multiline 2487 p = Pattern.compile(".+$", Pattern.MULTILINE); 2488 m = p.matcher("blah1\r\n"); 2489 if (!m.find()) 2490 failCount++; 2491 if (m.find()) 2492 failCount++; 2493 2494 // Test for $ recognition of \u0085 for bug 4527731 2495 p = Pattern.compile(".+$", Pattern.MULTILINE); 2496 m = p.matcher("blah1\u0085"); 2497 if (!m.find()) 2498 failCount++; 2499 2500 // Supplementary character test 2501 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2502 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2503 m.find(); 2504 m.find(); 2505 if (!m.group().equals(toSupplementaries("blah2"))) 2506 failCount++; 2507 2508 m.reset(toSupplementaries("blah1\n\rblah2")); 2509 m.find(); 2510 m.find(); 2511 m.find(); 2512 if (!m.group().equals(toSupplementaries("blah2"))) 2513 failCount++; 2514 2515 // Test behavior of $ with \r\n at end of input 2516 p = Pattern.compile(".+$"); 2517 m = p.matcher(toSupplementaries("blah1\r\n")); 2518 if (!m.find()) 2519 failCount++; 2520 if (!m.group().equals(toSupplementaries("blah1"))) 2521 failCount++; 2522 if (m.find()) 2523 failCount++; 2524 2525 // Test behavior of $ with \r\n at end of input in multiline 2526 p = Pattern.compile(".+$", Pattern.MULTILINE); 2527 m = p.matcher(toSupplementaries("blah1\r\n")); 2528 if (!m.find()) 2529 failCount++; 2530 if (m.find()) 2531 failCount++; 2532 2533 // Test for $ recognition of \u0085 for bug 4527731 2534 p = Pattern.compile(".+$", Pattern.MULTILINE); 2535 m = p.matcher(toSupplementaries("blah1\u0085")); 2536 if (!m.find()) 2537 failCount++; 2538 2539 report("Anchors"); 2540 } 2541 2542 /** 2543 * A basic sanity test of Matcher.lookingAt(). 2544 */ 2545 private static void lookingAtTest() throws Exception { 2546 Pattern p = Pattern.compile("(ab)(c*)"); 2547 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2548 2549 if (!m.lookingAt()) 2550 failCount++; 2551 2552 if (!m.group().equals(m.group(0))) 2553 failCount++; 2554 2555 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2556 if (m.lookingAt()) 2557 failCount++; 2558 2559 // Supplementary character test 2560 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2561 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2562 2563 if (!m.lookingAt()) 2564 failCount++; 2565 2566 if (!m.group().equals(m.group(0))) 2567 failCount++; 2568 2569 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2570 if (m.lookingAt()) 2571 failCount++; 2572 2573 report("Looking At"); 2574 } 2575 2576 /** 2577 * A basic sanity test of Matcher.matches(). 2578 */ 2579 private static void matchesTest() throws Exception { 2580 // matches() 2581 Pattern p = Pattern.compile("ulb(c*)"); 2582 Matcher m = p.matcher("ulbcccccc"); 2583 if (!m.matches()) 2584 failCount++; 2585 2586 // find() but not matches() 2587 m.reset("zzzulbcccccc"); 2588 if (m.matches()) 2589 failCount++; 2590 2591 // lookingAt() but not matches() 2592 m.reset("ulbccccccdef"); 2593 if (m.matches()) 2594 failCount++; 2595 2596 // matches() 2597 p = Pattern.compile("a|ad"); 2598 m = p.matcher("ad"); 2599 if (!m.matches()) 2600 failCount++; 2601 2602 // Supplementary character test 2603 // matches() 2604 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2605 m = p.matcher(toSupplementaries("ulbcccccc")); 2606 if (!m.matches()) 2607 failCount++; 2608 2609 // find() but not matches() 2610 m.reset(toSupplementaries("zzzulbcccccc")); 2611 if (m.matches()) 2612 failCount++; 2613 2614 // lookingAt() but not matches() 2615 m.reset(toSupplementaries("ulbccccccdef")); 2616 if (m.matches()) 2617 failCount++; 2618 2619 // matches() 2620 p = Pattern.compile(toSupplementaries("a|ad")); 2621 m = p.matcher(toSupplementaries("ad")); 2622 if (!m.matches()) 2623 failCount++; 2624 2625 report("Matches"); 2626 } 2627 2628 /** 2629 * A basic sanity test of Pattern.matches(). 2630 */ 2631 private static void patternMatchesTest() throws Exception { 2632 // matches() 2633 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2634 toSupplementaries("ulbcccccc"))) 2635 failCount++; 2636 2637 // find() but not matches() 2638 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2639 toSupplementaries("zzzulbcccccc"))) 2640 failCount++; 2641 2642 // lookingAt() but not matches() 2643 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2644 toSupplementaries("ulbccccccdef"))) 2645 failCount++; 2646 2647 // Supplementary character test 2648 // matches() 2649 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2650 toSupplementaries("ulbcccccc"))) 2651 failCount++; 2652 2653 // find() but not matches() 2654 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2655 toSupplementaries("zzzulbcccccc"))) 2656 failCount++; 2657 2658 // lookingAt() but not matches() 2659 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2660 toSupplementaries("ulbccccccdef"))) 2661 failCount++; 2662 2663 report("Pattern Matches"); 2664 } 2665 2666 /** 2667 * Canonical equivalence testing. Tests the ability of the engine 2668 * to match sequences that are not explicitly specified in the 2669 * pattern when they are considered equivalent by the Unicode Standard. 2670 */ 2671 private static void ceTest() throws Exception { 2672 // Decomposed char outside char classes 2673 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2674 Matcher m = p.matcher("test\u00e5"); 2675 if (!m.matches()) 2676 failCount++; 2677 2678 m.reset("testa\u030a"); 2679 if (!m.matches()) 2680 failCount++; 2681 2682 // Composed char outside char classes 2683 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2684 m = p.matcher("test\u00e5"); 2685 if (!m.matches()) 2686 failCount++; 2687 2688 m.reset("testa\u030a"); 2689 if (!m.find()) 2690 failCount++; 2691 2692 // Decomposed char inside a char class 2693 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2694 m = p.matcher("test\u00e5"); 2695 if (!m.find()) 2696 failCount++; 2697 2698 m.reset("testa\u030a"); 2699 if (!m.find()) 2700 failCount++; 2701 2702 // Composed char inside a char class 2703 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2704 m = p.matcher("test\u00e5"); 2705 if (!m.find()) 2706 failCount++; 2707 2708 m.reset("testa\u0300"); 2709 if (!m.find()) 2710 failCount++; 2711 2712 m.reset("testa\u030a"); 2713 if (!m.find()) 2714 failCount++; 2715 2716 // Marks that cannot legally change order and be equivalent 2717 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2718 check(p, "testa\u0308\u0300", true); 2719 check(p, "testa\u0300\u0308", false); 2720 2721 // Marks that can legally change order and be equivalent 2722 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2723 check(p, "testa\u0308\u0323", true); 2724 check(p, "testa\u0323\u0308", true); 2725 2726 // Test all equivalences of the sequence a\u0308\u0323\u0300 2727 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2728 check(p, "testa\u0308\u0323\u0300", true); 2729 check(p, "testa\u0323\u0308\u0300", true); 2730 check(p, "testa\u0308\u0300\u0323", true); 2731 check(p, "test\u00e4\u0323\u0300", true); 2732 check(p, "test\u00e4\u0300\u0323", true); 2733 2734 Object[][] data = new Object[][] { 2735 2736 // JDK-4867170 2737 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2738 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2739 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2740 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2741 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2742 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2743 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2744 2745 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2746 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2747 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2748 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2749 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2750 2751 // backtracking, force to match "\u1f80", instead of \u1f82" 2752 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2753 2754 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2755 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2756 2757 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2758 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2759 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2760 2761 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2762 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2763 { "\u1f80", "ab\u1f80cd", "f", true }, 2764 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2765 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2766 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2767 { "\u1f82", "\u1f80\u0300", "m", true }, 2768 2769 // JDK-7080302 # compile failed 2770 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2771 2772 // JDK-6728861, same cause as above one 2773 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2774 2775 // JDK-6995635 2776 { "(\u00e9)", "e\u0301", "m", true }, 2777 2778 // JDK-6736245 2779 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2780 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2781 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2782 2783 // 4916384. 2784 // Decomposed hangul (jamos) works inside clazz 2785 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2786 { "[\u1100\u1161]", "\uac00", "m", true}, 2787 2788 { "[\uac00]", "\u1100\u1161", "m", true}, 2789 { "[\uac00]", "\uac00", "m", true}, 2790 2791 // Decomposed hangul (jamos) 2792 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2793 { "\u1100\u1161", "\uac00", "m", true}, 2794 2795 // Composed hangul 2796 { "\uac00", "\u1100\u1161", "m", true }, 2797 { "\uac00", "\uac00", "m", true }, 2798 2799 /* Need a NFDSlice to nfd the source to solve this issue 2800 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2801 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2802 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2803 2804 // Decomposed supplementary outside char classes 2805 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2806 // Composed supplementary outside char classes 2807 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2808 */ 2809 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2810 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2811 2812 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2813 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2814 }; 2815 2816 int failCount = 0; 2817 for (Object[] d : data) { 2818 String pn = (String)d[0]; 2819 String tt = (String)d[1]; 2820 boolean isFind = "f".equals(((String)d[2])); 2821 boolean expected = (boolean)d[3]; 2822 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2823 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2824 if (ret != expected) { 2825 failCount++; 2826 continue; 2827 } 2828 } 2829 report("Canonical Equivalence"); 2830 } 2831 2832 /** 2833 * A basic sanity test of Matcher.replaceAll(). 2834 */ 2835 private static void globalSubstitute() throws Exception { 2836 // Global substitution with a literal 2837 Pattern p = Pattern.compile("(ab)(c*)"); 2838 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2839 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2840 failCount++; 2841 2842 m.reset("zzzabccczzzabcczzzabccczzz"); 2843 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2844 failCount++; 2845 2846 // Global substitution with groups 2847 m.reset("zzzabccczzzabcczzzabccczzz"); 2848 String result = m.replaceAll("$1"); 2849 if (!result.equals("zzzabzzzabzzzabzzz")) 2850 failCount++; 2851 2852 // Supplementary character test 2853 // Global substitution with a literal 2854 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2855 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2856 if (!m.replaceAll(toSupplementaries("test")). 2857 equals(toSupplementaries("testzzztestzzztest"))) 2858 failCount++; 2859 2860 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2861 if (!m.replaceAll(toSupplementaries("test")). 2862 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2863 failCount++; 2864 2865 // Global substitution with groups 2866 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2867 result = m.replaceAll("$1"); 2868 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2869 failCount++; 2870 2871 report("Global Substitution"); 2872 } 2873 2874 /** 2875 * Tests the usage of Matcher.appendReplacement() with literal 2876 * and group substitutions. 2877 */ 2878 private static void stringbufferSubstitute() throws Exception { 2879 // SB substitution with literal 2880 String blah = "zzzblahzzz"; 2881 Pattern p = Pattern.compile("blah"); 2882 Matcher m = p.matcher(blah); 2883 StringBuffer result = new StringBuffer(); 2884 try { 2885 m.appendReplacement(result, "blech"); 2886 failCount++; 2887 } catch (IllegalStateException e) { 2888 } 2889 m.find(); 2890 m.appendReplacement(result, "blech"); 2891 if (!result.toString().equals("zzzblech")) 2892 failCount++; 2893 2894 m.appendTail(result); 2895 if (!result.toString().equals("zzzblechzzz")) 2896 failCount++; 2897 2898 // SB substitution with groups 2899 blah = "zzzabcdzzz"; 2900 p = Pattern.compile("(ab)(cd)*"); 2901 m = p.matcher(blah); 2902 result = new StringBuffer(); 2903 try { 2904 m.appendReplacement(result, "$1"); 2905 failCount++; 2906 } catch (IllegalStateException e) { 2907 } 2908 m.find(); 2909 m.appendReplacement(result, "$1"); 2910 if (!result.toString().equals("zzzab")) 2911 failCount++; 2912 2913 m.appendTail(result); 2914 if (!result.toString().equals("zzzabzzz")) 2915 failCount++; 2916 2917 // SB substitution with 3 groups 2918 blah = "zzzabcdcdefzzz"; 2919 p = Pattern.compile("(ab)(cd)*(ef)"); 2920 m = p.matcher(blah); 2921 result = new StringBuffer(); 2922 try { 2923 m.appendReplacement(result, "$1w$2w$3"); 2924 failCount++; 2925 } catch (IllegalStateException e) { 2926 } 2927 m.find(); 2928 m.appendReplacement(result, "$1w$2w$3"); 2929 if (!result.toString().equals("zzzabwcdwef")) 2930 failCount++; 2931 2932 m.appendTail(result); 2933 if (!result.toString().equals("zzzabwcdwefzzz")) 2934 failCount++; 2935 2936 // SB substitution with groups and three matches 2937 // skipping middle match 2938 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2939 p = Pattern.compile("(ab)(cd*)"); 2940 m = p.matcher(blah); 2941 result = new StringBuffer(); 2942 try { 2943 m.appendReplacement(result, "$1"); 2944 failCount++; 2945 } catch (IllegalStateException e) { 2946 } 2947 m.find(); 2948 m.appendReplacement(result, "$1"); 2949 if (!result.toString().equals("zzzab")) 2950 failCount++; 2951 2952 m.find(); 2953 m.find(); 2954 m.appendReplacement(result, "$2"); 2955 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2956 failCount++; 2957 2958 m.appendTail(result); 2959 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2960 failCount++; 2961 2962 // Check to make sure escaped $ is ignored 2963 blah = "zzzabcdcdefzzz"; 2964 p = Pattern.compile("(ab)(cd)*(ef)"); 2965 m = p.matcher(blah); 2966 result = new StringBuffer(); 2967 m.find(); 2968 m.appendReplacement(result, "$1w\\$2w$3"); 2969 if (!result.toString().equals("zzzabw$2wef")) 2970 failCount++; 2971 2972 m.appendTail(result); 2973 if (!result.toString().equals("zzzabw$2wefzzz")) 2974 failCount++; 2975 2976 // Check to make sure a reference to nonexistent group causes error 2977 blah = "zzzabcdcdefzzz"; 2978 p = Pattern.compile("(ab)(cd)*(ef)"); 2979 m = p.matcher(blah); 2980 result = new StringBuffer(); 2981 m.find(); 2982 try { 2983 m.appendReplacement(result, "$1w$5w$3"); 2984 failCount++; 2985 } catch (IndexOutOfBoundsException ioobe) { 2986 // Correct result 2987 } 2988 2989 // Check double digit group references 2990 blah = "zzz123456789101112zzz"; 2991 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2992 m = p.matcher(blah); 2993 result = new StringBuffer(); 2994 m.find(); 2995 m.appendReplacement(result, "$1w$11w$3"); 2996 if (!result.toString().equals("zzz1w11w3")) 2997 failCount++; 2998 2999 // Check to make sure it backs off $15 to $1 if only three groups 3000 blah = "zzzabcdcdefzzz"; 3001 p = Pattern.compile("(ab)(cd)*(ef)"); 3002 m = p.matcher(blah); 3003 result = new StringBuffer(); 3004 m.find(); 3005 m.appendReplacement(result, "$1w$15w$3"); 3006 if (!result.toString().equals("zzzabwab5wef")) 3007 failCount++; 3008 3009 3010 // Supplementary character test 3011 // SB substitution with literal 3012 blah = toSupplementaries("zzzblahzzz"); 3013 p = Pattern.compile(toSupplementaries("blah")); 3014 m = p.matcher(blah); 3015 result = new StringBuffer(); 3016 try { 3017 m.appendReplacement(result, toSupplementaries("blech")); 3018 failCount++; 3019 } catch (IllegalStateException e) { 3020 } 3021 m.find(); 3022 m.appendReplacement(result, toSupplementaries("blech")); 3023 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3024 failCount++; 3025 3026 m.appendTail(result); 3027 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3028 failCount++; 3029 3030 // SB substitution with groups 3031 blah = toSupplementaries("zzzabcdzzz"); 3032 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3033 m = p.matcher(blah); 3034 result = new StringBuffer(); 3035 try { 3036 m.appendReplacement(result, "$1"); 3037 failCount++; 3038 } catch (IllegalStateException e) { 3039 } 3040 m.find(); 3041 m.appendReplacement(result, "$1"); 3042 if (!result.toString().equals(toSupplementaries("zzzab"))) 3043 failCount++; 3044 3045 m.appendTail(result); 3046 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3047 failCount++; 3048 3049 // SB substitution with 3 groups 3050 blah = toSupplementaries("zzzabcdcdefzzz"); 3051 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3052 m = p.matcher(blah); 3053 result = new StringBuffer(); 3054 try { 3055 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3056 failCount++; 3057 } catch (IllegalStateException e) { 3058 } 3059 m.find(); 3060 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3061 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3062 failCount++; 3063 3064 m.appendTail(result); 3065 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3066 failCount++; 3067 3068 // SB substitution with groups and three matches 3069 // skipping middle match 3070 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3071 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3072 m = p.matcher(blah); 3073 result = new StringBuffer(); 3074 try { 3075 m.appendReplacement(result, "$1"); 3076 failCount++; 3077 } catch (IllegalStateException e) { 3078 } 3079 m.find(); 3080 m.appendReplacement(result, "$1"); 3081 if (!result.toString().equals(toSupplementaries("zzzab"))) 3082 failCount++; 3083 3084 m.find(); 3085 m.find(); 3086 m.appendReplacement(result, "$2"); 3087 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3088 failCount++; 3089 3090 m.appendTail(result); 3091 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3092 failCount++; 3093 3094 // Check to make sure escaped $ is ignored 3095 blah = toSupplementaries("zzzabcdcdefzzz"); 3096 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3097 m = p.matcher(blah); 3098 result = new StringBuffer(); 3099 m.find(); 3100 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3101 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3102 failCount++; 3103 3104 m.appendTail(result); 3105 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3106 failCount++; 3107 3108 // Check to make sure a reference to nonexistent group causes error 3109 blah = toSupplementaries("zzzabcdcdefzzz"); 3110 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3111 m = p.matcher(blah); 3112 result = new StringBuffer(); 3113 m.find(); 3114 try { 3115 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3116 failCount++; 3117 } catch (IndexOutOfBoundsException ioobe) { 3118 // Correct result 3119 } 3120 3121 // Check double digit group references 3122 blah = toSupplementaries("zzz123456789101112zzz"); 3123 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3124 m = p.matcher(blah); 3125 result = new StringBuffer(); 3126 m.find(); 3127 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3128 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3129 failCount++; 3130 3131 // Check to make sure it backs off $15 to $1 if only three groups 3132 blah = toSupplementaries("zzzabcdcdefzzz"); 3133 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3134 m = p.matcher(blah); 3135 result = new StringBuffer(); 3136 m.find(); 3137 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3138 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3139 failCount++; 3140 3141 // Check nothing has been appended into the output buffer if 3142 // the replacement string triggers IllegalArgumentException. 3143 p = Pattern.compile("(abc)"); 3144 m = p.matcher("abcd"); 3145 result = new StringBuffer(); 3146 m.find(); 3147 try { 3148 m.appendReplacement(result, ("xyz$g")); 3149 failCount++; 3150 } catch (IllegalArgumentException iae) { 3151 if (result.length() != 0) 3152 failCount++; 3153 } 3154 3155 report("SB Substitution"); 3156 } 3157 3158 /** 3159 * Tests the usage of Matcher.appendReplacement() with literal 3160 * and group substitutions. 3161 */ 3162 private static void stringbuilderSubstitute() throws Exception { 3163 // SB substitution with literal 3164 String blah = "zzzblahzzz"; 3165 Pattern p = Pattern.compile("blah"); 3166 Matcher m = p.matcher(blah); 3167 StringBuilder result = new StringBuilder(); 3168 try { 3169 m.appendReplacement(result, "blech"); 3170 failCount++; 3171 } catch (IllegalStateException e) { 3172 } 3173 m.find(); 3174 m.appendReplacement(result, "blech"); 3175 if (!result.toString().equals("zzzblech")) 3176 failCount++; 3177 3178 m.appendTail(result); 3179 if (!result.toString().equals("zzzblechzzz")) 3180 failCount++; 3181 3182 // SB substitution with groups 3183 blah = "zzzabcdzzz"; 3184 p = Pattern.compile("(ab)(cd)*"); 3185 m = p.matcher(blah); 3186 result = new StringBuilder(); 3187 try { 3188 m.appendReplacement(result, "$1"); 3189 failCount++; 3190 } catch (IllegalStateException e) { 3191 } 3192 m.find(); 3193 m.appendReplacement(result, "$1"); 3194 if (!result.toString().equals("zzzab")) 3195 failCount++; 3196 3197 m.appendTail(result); 3198 if (!result.toString().equals("zzzabzzz")) 3199 failCount++; 3200 3201 // SB substitution with 3 groups 3202 blah = "zzzabcdcdefzzz"; 3203 p = Pattern.compile("(ab)(cd)*(ef)"); 3204 m = p.matcher(blah); 3205 result = new StringBuilder(); 3206 try { 3207 m.appendReplacement(result, "$1w$2w$3"); 3208 failCount++; 3209 } catch (IllegalStateException e) { 3210 } 3211 m.find(); 3212 m.appendReplacement(result, "$1w$2w$3"); 3213 if (!result.toString().equals("zzzabwcdwef")) 3214 failCount++; 3215 3216 m.appendTail(result); 3217 if (!result.toString().equals("zzzabwcdwefzzz")) 3218 failCount++; 3219 3220 // SB substitution with groups and three matches 3221 // skipping middle match 3222 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3223 p = Pattern.compile("(ab)(cd*)"); 3224 m = p.matcher(blah); 3225 result = new StringBuilder(); 3226 try { 3227 m.appendReplacement(result, "$1"); 3228 failCount++; 3229 } catch (IllegalStateException e) { 3230 } 3231 m.find(); 3232 m.appendReplacement(result, "$1"); 3233 if (!result.toString().equals("zzzab")) 3234 failCount++; 3235 3236 m.find(); 3237 m.find(); 3238 m.appendReplacement(result, "$2"); 3239 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3240 failCount++; 3241 3242 m.appendTail(result); 3243 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3244 failCount++; 3245 3246 // Check to make sure escaped $ is ignored 3247 blah = "zzzabcdcdefzzz"; 3248 p = Pattern.compile("(ab)(cd)*(ef)"); 3249 m = p.matcher(blah); 3250 result = new StringBuilder(); 3251 m.find(); 3252 m.appendReplacement(result, "$1w\\$2w$3"); 3253 if (!result.toString().equals("zzzabw$2wef")) 3254 failCount++; 3255 3256 m.appendTail(result); 3257 if (!result.toString().equals("zzzabw$2wefzzz")) 3258 failCount++; 3259 3260 // Check to make sure a reference to nonexistent group causes error 3261 blah = "zzzabcdcdefzzz"; 3262 p = Pattern.compile("(ab)(cd)*(ef)"); 3263 m = p.matcher(blah); 3264 result = new StringBuilder(); 3265 m.find(); 3266 try { 3267 m.appendReplacement(result, "$1w$5w$3"); 3268 failCount++; 3269 } catch (IndexOutOfBoundsException ioobe) { 3270 // Correct result 3271 } 3272 3273 // Check double digit group references 3274 blah = "zzz123456789101112zzz"; 3275 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3276 m = p.matcher(blah); 3277 result = new StringBuilder(); 3278 m.find(); 3279 m.appendReplacement(result, "$1w$11w$3"); 3280 if (!result.toString().equals("zzz1w11w3")) 3281 failCount++; 3282 3283 // Check to make sure it backs off $15 to $1 if only three groups 3284 blah = "zzzabcdcdefzzz"; 3285 p = Pattern.compile("(ab)(cd)*(ef)"); 3286 m = p.matcher(blah); 3287 result = new StringBuilder(); 3288 m.find(); 3289 m.appendReplacement(result, "$1w$15w$3"); 3290 if (!result.toString().equals("zzzabwab5wef")) 3291 failCount++; 3292 3293 3294 // Supplementary character test 3295 // SB substitution with literal 3296 blah = toSupplementaries("zzzblahzzz"); 3297 p = Pattern.compile(toSupplementaries("blah")); 3298 m = p.matcher(blah); 3299 result = new StringBuilder(); 3300 try { 3301 m.appendReplacement(result, toSupplementaries("blech")); 3302 failCount++; 3303 } catch (IllegalStateException e) { 3304 } 3305 m.find(); 3306 m.appendReplacement(result, toSupplementaries("blech")); 3307 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3308 failCount++; 3309 m.appendTail(result); 3310 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3311 failCount++; 3312 3313 // SB substitution with groups 3314 blah = toSupplementaries("zzzabcdzzz"); 3315 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3316 m = p.matcher(blah); 3317 result = new StringBuilder(); 3318 try { 3319 m.appendReplacement(result, "$1"); 3320 failCount++; 3321 } catch (IllegalStateException e) { 3322 } 3323 m.find(); 3324 m.appendReplacement(result, "$1"); 3325 if (!result.toString().equals(toSupplementaries("zzzab"))) 3326 failCount++; 3327 3328 m.appendTail(result); 3329 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3330 failCount++; 3331 3332 // SB substitution with 3 groups 3333 blah = toSupplementaries("zzzabcdcdefzzz"); 3334 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3335 m = p.matcher(blah); 3336 result = new StringBuilder(); 3337 try { 3338 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3339 failCount++; 3340 } catch (IllegalStateException e) { 3341 } 3342 m.find(); 3343 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3344 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3345 failCount++; 3346 3347 m.appendTail(result); 3348 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3349 failCount++; 3350 3351 // SB substitution with groups and three matches 3352 // skipping middle match 3353 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3354 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3355 m = p.matcher(blah); 3356 result = new StringBuilder(); 3357 try { 3358 m.appendReplacement(result, "$1"); 3359 failCount++; 3360 } catch (IllegalStateException e) { 3361 } 3362 m.find(); 3363 m.appendReplacement(result, "$1"); 3364 if (!result.toString().equals(toSupplementaries("zzzab"))) 3365 failCount++; 3366 3367 m.find(); 3368 m.find(); 3369 m.appendReplacement(result, "$2"); 3370 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3371 failCount++; 3372 3373 m.appendTail(result); 3374 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3375 failCount++; 3376 3377 // Check to make sure escaped $ is ignored 3378 blah = toSupplementaries("zzzabcdcdefzzz"); 3379 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3380 m = p.matcher(blah); 3381 result = new StringBuilder(); 3382 m.find(); 3383 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3384 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3385 failCount++; 3386 3387 m.appendTail(result); 3388 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3389 failCount++; 3390 3391 // Check to make sure a reference to nonexistent group causes error 3392 blah = toSupplementaries("zzzabcdcdefzzz"); 3393 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3394 m = p.matcher(blah); 3395 result = new StringBuilder(); 3396 m.find(); 3397 try { 3398 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3399 failCount++; 3400 } catch (IndexOutOfBoundsException ioobe) { 3401 // Correct result 3402 } 3403 // Check double digit group references 3404 blah = toSupplementaries("zzz123456789101112zzz"); 3405 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3406 m = p.matcher(blah); 3407 result = new StringBuilder(); 3408 m.find(); 3409 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3410 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3411 failCount++; 3412 3413 // Check to make sure it backs off $15 to $1 if only three groups 3414 blah = toSupplementaries("zzzabcdcdefzzz"); 3415 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3416 m = p.matcher(blah); 3417 result = new StringBuilder(); 3418 m.find(); 3419 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3420 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3421 failCount++; 3422 // Check nothing has been appended into the output buffer if 3423 // the replacement string triggers IllegalArgumentException. 3424 p = Pattern.compile("(abc)"); 3425 m = p.matcher("abcd"); 3426 result = new StringBuilder(); 3427 m.find(); 3428 try { 3429 m.appendReplacement(result, ("xyz$g")); 3430 failCount++; 3431 } catch (IllegalArgumentException iae) { 3432 if (result.length() != 0) 3433 failCount++; 3434 } 3435 report("SB Substitution 2"); 3436 } 3437 3438 /* 3439 * 5 groups of characters are created to make a substitution string. 3440 * A base string will be created including random lead chars, the 3441 * substitution string, and random trailing chars. 3442 * A pattern containing the 5 groups is searched for and replaced with: 3443 * random group + random string + random group. 3444 * The results are checked for correctness. 3445 */ 3446 private static void substitutionBasher() { 3447 for (int runs = 0; runs<1000; runs++) { 3448 // Create a base string to work in 3449 int leadingChars = generator.nextInt(10); 3450 StringBuffer baseBuffer = new StringBuffer(100); 3451 String leadingString = getRandomAlphaString(leadingChars); 3452 baseBuffer.append(leadingString); 3453 3454 // Create 5 groups of random number of random chars 3455 // Create the string to substitute 3456 // Create the pattern string to search for 3457 StringBuffer bufferToSub = new StringBuffer(25); 3458 StringBuffer bufferToPat = new StringBuffer(50); 3459 String[] groups = new String[5]; 3460 for(int i=0; i<5; i++) { 3461 int aGroupSize = generator.nextInt(5)+1; 3462 groups[i] = getRandomAlphaString(aGroupSize); 3463 bufferToSub.append(groups[i]); 3464 bufferToPat.append('('); 3465 bufferToPat.append(groups[i]); 3466 bufferToPat.append(')'); 3467 } 3468 String stringToSub = bufferToSub.toString(); 3469 String pattern = bufferToPat.toString(); 3470 3471 // Place sub string into working string at random index 3472 baseBuffer.append(stringToSub); 3473 3474 // Append random chars to end 3475 int trailingChars = generator.nextInt(10); 3476 String trailingString = getRandomAlphaString(trailingChars); 3477 baseBuffer.append(trailingString); 3478 String baseString = baseBuffer.toString(); 3479 3480 // Create test pattern and matcher 3481 Pattern p = Pattern.compile(pattern); 3482 Matcher m = p.matcher(baseString); 3483 3484 // Reject candidate if pattern happens to start early 3485 m.find(); 3486 if (m.start() < leadingChars) 3487 continue; 3488 3489 // Reject candidate if more than one match 3490 if (m.find()) 3491 continue; 3492 3493 // Construct a replacement string with : 3494 // random group + random string + random group 3495 StringBuffer bufferToRep = new StringBuffer(); 3496 int groupIndex1 = generator.nextInt(5); 3497 bufferToRep.append("$" + (groupIndex1 + 1)); 3498 String randomMidString = getRandomAlphaString(5); 3499 bufferToRep.append(randomMidString); 3500 int groupIndex2 = generator.nextInt(5); 3501 bufferToRep.append("$" + (groupIndex2 + 1)); 3502 String replacement = bufferToRep.toString(); 3503 3504 // Do the replacement 3505 String result = m.replaceAll(replacement); 3506 3507 // Construct expected result 3508 StringBuffer bufferToRes = new StringBuffer(); 3509 bufferToRes.append(leadingString); 3510 bufferToRes.append(groups[groupIndex1]); 3511 bufferToRes.append(randomMidString); 3512 bufferToRes.append(groups[groupIndex2]); 3513 bufferToRes.append(trailingString); 3514 String expectedResult = bufferToRes.toString(); 3515 3516 // Check results 3517 if (!result.equals(expectedResult)) 3518 failCount++; 3519 } 3520 3521 report("Substitution Basher"); 3522 } 3523 3524 /* 3525 * 5 groups of characters are created to make a substitution string. 3526 * A base string will be created including random lead chars, the 3527 * substitution string, and random trailing chars. 3528 * A pattern containing the 5 groups is searched for and replaced with: 3529 * random group + random string + random group. 3530 * The results are checked for correctness. 3531 */ 3532 private static void substitutionBasher2() { 3533 for (int runs = 0; runs<1000; runs++) { 3534 // Create a base string to work in 3535 int leadingChars = generator.nextInt(10); 3536 StringBuilder baseBuffer = new StringBuilder(100); 3537 String leadingString = getRandomAlphaString(leadingChars); 3538 baseBuffer.append(leadingString); 3539 3540 // Create 5 groups of random number of random chars 3541 // Create the string to substitute 3542 // Create the pattern string to search for 3543 StringBuilder bufferToSub = new StringBuilder(25); 3544 StringBuilder bufferToPat = new StringBuilder(50); 3545 String[] groups = new String[5]; 3546 for(int i=0; i<5; i++) { 3547 int aGroupSize = generator.nextInt(5)+1; 3548 groups[i] = getRandomAlphaString(aGroupSize); 3549 bufferToSub.append(groups[i]); 3550 bufferToPat.append('('); 3551 bufferToPat.append(groups[i]); 3552 bufferToPat.append(')'); 3553 } 3554 String stringToSub = bufferToSub.toString(); 3555 String pattern = bufferToPat.toString(); 3556 3557 // Place sub string into working string at random index 3558 baseBuffer.append(stringToSub); 3559 3560 // Append random chars to end 3561 int trailingChars = generator.nextInt(10); 3562 String trailingString = getRandomAlphaString(trailingChars); 3563 baseBuffer.append(trailingString); 3564 String baseString = baseBuffer.toString(); 3565 3566 // Create test pattern and matcher 3567 Pattern p = Pattern.compile(pattern); 3568 Matcher m = p.matcher(baseString); 3569 3570 // Reject candidate if pattern happens to start early 3571 m.find(); 3572 if (m.start() < leadingChars) 3573 continue; 3574 3575 // Reject candidate if more than one match 3576 if (m.find()) 3577 continue; 3578 3579 // Construct a replacement string with : 3580 // random group + random string + random group 3581 StringBuilder bufferToRep = new StringBuilder(); 3582 int groupIndex1 = generator.nextInt(5); 3583 bufferToRep.append("$" + (groupIndex1 + 1)); 3584 String randomMidString = getRandomAlphaString(5); 3585 bufferToRep.append(randomMidString); 3586 int groupIndex2 = generator.nextInt(5); 3587 bufferToRep.append("$" + (groupIndex2 + 1)); 3588 String replacement = bufferToRep.toString(); 3589 3590 // Do the replacement 3591 String result = m.replaceAll(replacement); 3592 3593 // Construct expected result 3594 StringBuilder bufferToRes = new StringBuilder(); 3595 bufferToRes.append(leadingString); 3596 bufferToRes.append(groups[groupIndex1]); 3597 bufferToRes.append(randomMidString); 3598 bufferToRes.append(groups[groupIndex2]); 3599 bufferToRes.append(trailingString); 3600 String expectedResult = bufferToRes.toString(); 3601 3602 // Check results 3603 if (!result.equals(expectedResult)) { 3604 failCount++; 3605 } 3606 } 3607 3608 report("Substitution Basher 2"); 3609 } 3610 3611 /** 3612 * Checks the handling of some escape sequences that the Pattern 3613 * class should process instead of the java compiler. These are 3614 * not in the file because the escapes should be be processed 3615 * by the Pattern class when the regex is compiled. 3616 */ 3617 private static void escapes() throws Exception { 3618 Pattern p = Pattern.compile("\\043"); 3619 Matcher m = p.matcher("#"); 3620 if (!m.find()) 3621 failCount++; 3622 3623 p = Pattern.compile("\\x23"); 3624 m = p.matcher("#"); 3625 if (!m.find()) 3626 failCount++; 3627 3628 p = Pattern.compile("\\u0023"); 3629 m = p.matcher("#"); 3630 if (!m.find()) 3631 failCount++; 3632 3633 report("Escape sequences"); 3634 } 3635 3636 /** 3637 * Checks the handling of blank input situations. These 3638 * tests are incompatible with my test file format. 3639 */ 3640 private static void blankInput() throws Exception { 3641 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3642 Matcher m = p.matcher(""); 3643 if (m.find()) 3644 failCount++; 3645 3646 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3647 m = p.matcher(""); 3648 if (!m.find()) 3649 failCount++; 3650 3651 p = Pattern.compile("abc"); 3652 m = p.matcher(""); 3653 if (m.find()) 3654 failCount++; 3655 3656 p = Pattern.compile("a*"); 3657 m = p.matcher(""); 3658 if (!m.find()) 3659 failCount++; 3660 3661 report("Blank input"); 3662 } 3663 3664 /** 3665 * Tests the Boyer-Moore pattern matching of a character sequence 3666 * on randomly generated patterns. 3667 */ 3668 private static void bm() throws Exception { 3669 doBnM('a'); 3670 report("Boyer Moore (ASCII)"); 3671 3672 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3673 report("Boyer Moore (Supplementary)"); 3674 } 3675 3676 private static void doBnM(int baseCharacter) throws Exception { 3677 int achar=0; 3678 3679 for (int i=0; i<100; i++) { 3680 // Create a short pattern to search for 3681 int patternLength = generator.nextInt(7) + 4; 3682 StringBuffer patternBuffer = new StringBuffer(patternLength); 3683 String pattern; 3684 retry: for (;;) { 3685 for (int x=0; x<patternLength; x++) { 3686 int ch = baseCharacter + generator.nextInt(26); 3687 if (Character.isSupplementaryCodePoint(ch)) { 3688 patternBuffer.append(Character.toChars(ch)); 3689 } else { 3690 patternBuffer.append((char)ch); 3691 } 3692 } 3693 pattern = patternBuffer.toString(); 3694 3695 // Avoid patterns that start and end with the same substring 3696 // See JDK-6854417 3697 for (int x=1; x < pattern.length(); x++) { 3698 if (pattern.startsWith(pattern.substring(x))) 3699 continue retry; 3700 } 3701 break; 3702 } 3703 Pattern p = Pattern.compile(pattern); 3704 3705 // Create a buffer with random ASCII chars that does 3706 // not match the sample 3707 String toSearch = null; 3708 StringBuffer s = null; 3709 Matcher m = p.matcher(""); 3710 do { 3711 s = new StringBuffer(100); 3712 for (int x=0; x<100; x++) { 3713 int ch = baseCharacter + generator.nextInt(26); 3714 if (Character.isSupplementaryCodePoint(ch)) { 3715 s.append(Character.toChars(ch)); 3716 } else { 3717 s.append((char)ch); 3718 } 3719 } 3720 toSearch = s.toString(); 3721 m.reset(toSearch); 3722 } while (m.find()); 3723 3724 // Insert the pattern at a random spot 3725 int insertIndex = generator.nextInt(99); 3726 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3727 insertIndex++; 3728 s = s.insert(insertIndex, pattern); 3729 toSearch = s.toString(); 3730 3731 // Make sure that the pattern is found 3732 m.reset(toSearch); 3733 if (!m.find()) 3734 failCount++; 3735 3736 // Make sure that the match text is the pattern 3737 if (!m.group().equals(pattern)) 3738 failCount++; 3739 3740 // Make sure match occured at insertion point 3741 if (m.start() != insertIndex) 3742 failCount++; 3743 } 3744 } 3745 3746 /** 3747 * Tests the matching of slices on randomly generated patterns. 3748 * The Boyer-Moore optimization is not done on these patterns 3749 * because it uses unicode case folding. 3750 */ 3751 private static void slice() throws Exception { 3752 doSlice(Character.MAX_VALUE); 3753 report("Slice"); 3754 3755 doSlice(Character.MAX_CODE_POINT); 3756 report("Slice (Supplementary)"); 3757 } 3758 3759 private static void doSlice(int maxCharacter) throws Exception { 3760 Random generator = new Random(); 3761 int achar=0; 3762 3763 for (int i=0; i<100; i++) { 3764 // Create a short pattern to search for 3765 int patternLength = generator.nextInt(7) + 4; 3766 StringBuffer patternBuffer = new StringBuffer(patternLength); 3767 for (int x=0; x<patternLength; x++) { 3768 int randomChar = 0; 3769 while (!Character.isLetterOrDigit(randomChar)) 3770 randomChar = generator.nextInt(maxCharacter); 3771 if (Character.isSupplementaryCodePoint(randomChar)) { 3772 patternBuffer.append(Character.toChars(randomChar)); 3773 } else { 3774 patternBuffer.append((char) randomChar); 3775 } 3776 } 3777 String pattern = patternBuffer.toString(); 3778 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3779 3780 // Create a buffer with random chars that does not match the sample 3781 String toSearch = null; 3782 StringBuffer s = null; 3783 Matcher m = p.matcher(""); 3784 do { 3785 s = new StringBuffer(100); 3786 for (int x=0; x<100; x++) { 3787 int randomChar = 0; 3788 while (!Character.isLetterOrDigit(randomChar)) 3789 randomChar = generator.nextInt(maxCharacter); 3790 if (Character.isSupplementaryCodePoint(randomChar)) { 3791 s.append(Character.toChars(randomChar)); 3792 } else { 3793 s.append((char) randomChar); 3794 } 3795 } 3796 toSearch = s.toString(); 3797 m.reset(toSearch); 3798 } while (m.find()); 3799 3800 // Insert the pattern at a random spot 3801 int insertIndex = generator.nextInt(99); 3802 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3803 insertIndex++; 3804 s = s.insert(insertIndex, pattern); 3805 toSearch = s.toString(); 3806 3807 // Make sure that the pattern is found 3808 m.reset(toSearch); 3809 if (!m.find()) 3810 failCount++; 3811 3812 // Make sure that the match text is the pattern 3813 if (!m.group().equals(pattern)) 3814 failCount++; 3815 3816 // Make sure match occured at insertion point 3817 if (m.start() != insertIndex) 3818 failCount++; 3819 } 3820 } 3821 3822 private static void explainFailure(String pattern, String data, 3823 String expected, String actual) { 3824 System.err.println("----------------------------------------"); 3825 System.err.println("Pattern = "+pattern); 3826 System.err.println("Data = "+data); 3827 System.err.println("Expected = " + expected); 3828 System.err.println("Actual = " + actual); 3829 } 3830 3831 private static void explainFailure(String pattern, String data, 3832 Throwable t) { 3833 System.err.println("----------------------------------------"); 3834 System.err.println("Pattern = "+pattern); 3835 System.err.println("Data = "+data); 3836 t.printStackTrace(System.err); 3837 } 3838 3839 // Testing examples from a file 3840 3841 /** 3842 * Goes through the file "TestCases.txt" and creates many patterns 3843 * described in the file, matching the patterns against input lines in 3844 * the file, and comparing the results against the correct results 3845 * also found in the file. The file format is described in comments 3846 * at the head of the file. 3847 */ 3848 private static void processFile(String fileName) throws Exception { 3849 File testCases = new File(System.getProperty("test.src", "."), 3850 fileName); 3851 FileInputStream in = new FileInputStream(testCases); 3852 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3853 3854 // Process next test case. 3855 String aLine; 3856 while((aLine = r.readLine()) != null) { 3857 // Read a line for pattern 3858 String patternString = grabLine(r); 3859 Pattern p = null; 3860 try { 3861 p = compileTestPattern(patternString); 3862 } catch (PatternSyntaxException e) { 3863 String dataString = grabLine(r); 3864 String expectedResult = grabLine(r); 3865 if (expectedResult.startsWith("error")) 3866 continue; 3867 explainFailure(patternString, dataString, e); 3868 failCount++; 3869 continue; 3870 } 3871 3872 // Read a line for input string 3873 String dataString = grabLine(r); 3874 Matcher m = p.matcher(dataString); 3875 StringBuffer result = new StringBuffer(); 3876 3877 // Check for IllegalStateExceptions before a match 3878 failCount += preMatchInvariants(m); 3879 3880 boolean found = m.find(); 3881 3882 if (found) 3883 failCount += postTrueMatchInvariants(m); 3884 else 3885 failCount += postFalseMatchInvariants(m); 3886 3887 if (found) { 3888 result.append("true "); 3889 result.append(m.group(0) + " "); 3890 } else { 3891 result.append("false "); 3892 } 3893 3894 result.append(m.groupCount()); 3895 3896 if (found) { 3897 for (int i=1; i<m.groupCount()+1; i++) 3898 if (m.group(i) != null) 3899 result.append(" " +m.group(i)); 3900 } 3901 3902 // Read a line for the expected result 3903 String expectedResult = grabLine(r); 3904 3905 if (!result.toString().equals(expectedResult)) { 3906 explainFailure(patternString, dataString, expectedResult, result.toString()); 3907 failCount++; 3908 } 3909 } 3910 3911 report(fileName); 3912 } 3913 3914 private static int preMatchInvariants(Matcher m) { 3915 int failCount = 0; 3916 try { 3917 m.start(); 3918 failCount++; 3919 } catch (IllegalStateException ise) {} 3920 try { 3921 m.end(); 3922 failCount++; 3923 } catch (IllegalStateException ise) {} 3924 try { 3925 m.group(); 3926 failCount++; 3927 } catch (IllegalStateException ise) {} 3928 return failCount; 3929 } 3930 3931 private static int postFalseMatchInvariants(Matcher m) { 3932 int failCount = 0; 3933 try { 3934 m.group(); 3935 failCount++; 3936 } catch (IllegalStateException ise) {} 3937 try { 3938 m.start(); 3939 failCount++; 3940 } catch (IllegalStateException ise) {} 3941 try { 3942 m.end(); 3943 failCount++; 3944 } catch (IllegalStateException ise) {} 3945 return failCount; 3946 } 3947 3948 private static int postTrueMatchInvariants(Matcher m) { 3949 int failCount = 0; 3950 //assert(m.start() = m.start(0); 3951 if (m.start() != m.start(0)) 3952 failCount++; 3953 //assert(m.end() = m.end(0); 3954 if (m.start() != m.start(0)) 3955 failCount++; 3956 //assert(m.group() = m.group(0); 3957 if (!m.group().equals(m.group(0))) 3958 failCount++; 3959 try { 3960 m.group(50); 3961 failCount++; 3962 } catch (IndexOutOfBoundsException ise) {} 3963 3964 return failCount; 3965 } 3966 3967 private static Pattern compileTestPattern(String patternString) { 3968 if (!patternString.startsWith("'")) { 3969 return Pattern.compile(patternString); 3970 } 3971 int break1 = patternString.lastIndexOf("'"); 3972 String flagString = patternString.substring( 3973 break1+1, patternString.length()); 3974 patternString = patternString.substring(1, break1); 3975 3976 if (flagString.equals("i")) 3977 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3978 3979 if (flagString.equals("m")) 3980 return Pattern.compile(patternString, Pattern.MULTILINE); 3981 3982 return Pattern.compile(patternString); 3983 } 3984 3985 /** 3986 * Reads a line from the input file. Keeps reading lines until a non 3987 * empty non comment line is read. If the line contains a \n then 3988 * these two characters are replaced by a newline char. If a \\uxxxx 3989 * sequence is read then the sequence is replaced by the unicode char. 3990 */ 3991 private static String grabLine(BufferedReader r) throws Exception { 3992 int index = 0; 3993 String line = r.readLine(); 3994 while (line.startsWith("//") || line.length() < 1) 3995 line = r.readLine(); 3996 while ((index = line.indexOf("\\n")) != -1) { 3997 StringBuffer temp = new StringBuffer(line); 3998 temp.replace(index, index+2, "\n"); 3999 line = temp.toString(); 4000 } 4001 while ((index = line.indexOf("\\u")) != -1) { 4002 StringBuffer temp = new StringBuffer(line); 4003 String value = temp.substring(index+2, index+6); 4004 char aChar = (char)Integer.parseInt(value, 16); 4005 String unicodeChar = "" + aChar; 4006 temp.replace(index, index+6, unicodeChar); 4007 line = temp.toString(); 4008 } 4009 4010 return line; 4011 } 4012 4013 private static void check(Pattern p, String s, String g, String expected) { 4014 Matcher m = p.matcher(s); 4015 m.find(); 4016 if (!m.group(g).equals(expected) || 4017 s.charAt(m.start(g)) != expected.charAt(0) || 4018 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4019 failCount++; 4020 } 4021 4022 private static void checkReplaceFirst(String p, String s, String r, String expected) 4023 { 4024 if (!expected.equals(Pattern.compile(p) 4025 .matcher(s) 4026 .replaceFirst(r))) 4027 failCount++; 4028 } 4029 4030 private static void checkReplaceAll(String p, String s, String r, String expected) 4031 { 4032 if (!expected.equals(Pattern.compile(p) 4033 .matcher(s) 4034 .replaceAll(r))) 4035 failCount++; 4036 } 4037 4038 private static void checkExpectedFail(String p) { 4039 try { 4040 Pattern.compile(p); 4041 } catch (PatternSyntaxException pse) { 4042 //pse.printStackTrace(); 4043 return; 4044 } 4045 failCount++; 4046 } 4047 4048 private static void checkExpectedIAE(Matcher m, String g) { 4049 m.find(); 4050 try { 4051 m.group(g); 4052 } catch (IllegalArgumentException x) { 4053 //iae.printStackTrace(); 4054 try { 4055 m.start(g); 4056 } catch (IllegalArgumentException xx) { 4057 try { 4058 m.start(g); 4059 } catch (IllegalArgumentException xxx) { 4060 return; 4061 } 4062 } 4063 } 4064 failCount++; 4065 } 4066 4067 private static void checkExpectedNPE(Matcher m) { 4068 m.find(); 4069 try { 4070 m.group(null); 4071 } catch (NullPointerException x) { 4072 try { 4073 m.start(null); 4074 } catch (NullPointerException xx) { 4075 try { 4076 m.end(null); 4077 } catch (NullPointerException xxx) { 4078 return; 4079 } 4080 } 4081 } 4082 failCount++; 4083 } 4084 4085 private static void namedGroupCaptureTest() throws Exception { 4086 check(Pattern.compile("x+(?<gname>y+)z+"), 4087 "xxxyyyzzz", 4088 "gname", 4089 "yyy"); 4090 4091 check(Pattern.compile("x+(?<gname8>y+)z+"), 4092 "xxxyyyzzz", 4093 "gname8", 4094 "yyy"); 4095 4096 //backref 4097 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4098 check(pattern, "zzzaabcazzz", true); // found "abca" 4099 4100 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4101 "zzzaabcaazzz", true); 4102 4103 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4104 "abcdefabc", true); 4105 4106 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4107 "abcdefghijkk", true); 4108 4109 // Supplementary character tests 4110 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4111 toSupplementaries("zzzaabcazzz"), true); 4112 4113 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4114 toSupplementaries("zzzaabcaazzz"), true); 4115 4116 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4117 toSupplementaries("abcdefabc"), true); 4118 4119 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4120 "(?<gname>" + 4121 toSupplementaries("k)") + "\\k<gname>"), 4122 toSupplementaries("abcdefghijkk"), true); 4123 4124 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4125 "xxxyyyzzzyyy", 4126 "gname", 4127 "yyy"); 4128 4129 //replaceFirst/All 4130 checkReplaceFirst("(?<gn>ab)(c*)", 4131 "abccczzzabcczzzabccc", 4132 "${gn}", 4133 "abzzzabcczzzabccc"); 4134 4135 checkReplaceAll("(?<gn>ab)(c*)", 4136 "abccczzzabcczzzabccc", 4137 "${gn}", 4138 "abzzzabzzzab"); 4139 4140 4141 checkReplaceFirst("(?<gn>ab)(c*)", 4142 "zzzabccczzzabcczzzabccczzz", 4143 "${gn}", 4144 "zzzabzzzabcczzzabccczzz"); 4145 4146 checkReplaceAll("(?<gn>ab)(c*)", 4147 "zzzabccczzzabcczzzabccczzz", 4148 "${gn}", 4149 "zzzabzzzabzzzabzzz"); 4150 4151 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4152 "zzzabccczzzabcczzzabccczzz", 4153 "${gn2}", 4154 "zzzccczzzabcczzzabccczzz"); 4155 4156 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4157 "zzzabccczzzabcczzzabccczzz", 4158 "${gn2}", 4159 "zzzccczzzcczzzccczzz"); 4160 4161 //toSupplementaries("(ab)(c*)")); 4162 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4163 ")(?<gn2>" + toSupplementaries("c") + "*)", 4164 toSupplementaries("abccczzzabcczzzabccc"), 4165 "${gn1}", 4166 toSupplementaries("abzzzabcczzzabccc")); 4167 4168 4169 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4170 ")(?<gn2>" + toSupplementaries("c") + "*)", 4171 toSupplementaries("abccczzzabcczzzabccc"), 4172 "${gn1}", 4173 toSupplementaries("abzzzabzzzab")); 4174 4175 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4176 ")(?<gn2>" + toSupplementaries("c") + "*)", 4177 toSupplementaries("abccczzzabcczzzabccc"), 4178 "${gn2}", 4179 toSupplementaries("ccczzzabcczzzabccc")); 4180 4181 4182 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4183 ")(?<gn2>" + toSupplementaries("c") + "*)", 4184 toSupplementaries("abccczzzabcczzzabccc"), 4185 "${gn2}", 4186 toSupplementaries("ccczzzcczzzccc")); 4187 4188 checkReplaceFirst("(?<dog>Dog)AndCat", 4189 "zzzDogAndCatzzzDogAndCatzzz", 4190 "${dog}", 4191 "zzzDogzzzDogAndCatzzz"); 4192 4193 4194 checkReplaceAll("(?<dog>Dog)AndCat", 4195 "zzzDogAndCatzzzDogAndCatzzz", 4196 "${dog}", 4197 "zzzDogzzzDogzzz"); 4198 4199 // backref in Matcher & String 4200 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4201 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4202 failCount++; 4203 4204 // negative 4205 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4206 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4207 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4208 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4209 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4210 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4211 "gnameX"); 4212 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4213 report("NamedGroupCapture"); 4214 } 4215 4216 // This is for bug 6919132 4217 private static void nonBmpClassComplementTest() throws Exception { 4218 Pattern p = Pattern.compile("\\P{Lu}"); 4219 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4220 4221 if (m.find() && m.start() == 1) 4222 failCount++; 4223 4224 // from a unicode category 4225 p = Pattern.compile("\\P{Lu}"); 4226 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4227 if (m.find()) 4228 failCount++; 4229 if (!m.hitEnd()) 4230 failCount++; 4231 4232 // block 4233 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4234 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4235 if (m.find() && m.start() == 1) 4236 failCount++; 4237 4238 p = Pattern.compile("\\P{sc=GRANTHA}"); 4239 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4240 if (m.find() && m.start() == 1) 4241 failCount++; 4242 4243 report("NonBmpClassComplement"); 4244 } 4245 4246 private static void unicodePropertiesTest() throws Exception { 4247 // different forms 4248 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4249 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4250 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4251 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4252 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4253 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4254 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4255 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4256 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4257 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4258 failCount++; 4259 4260 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4261 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4262 Matcher lastSM = common; 4263 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4264 4265 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4266 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4267 Matcher lastBM = latin; 4268 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4269 4270 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4271 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4272 continue; // only pick couple code points, they are the same 4273 } 4274 4275 // Unicode Script 4276 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4277 Matcher m; 4278 String str = new String(Character.toChars(cp)); 4279 if (script == lastScript) { 4280 m = lastSM; 4281 m.reset(str); 4282 } else { 4283 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4284 } 4285 if (!m.matches()) { 4286 failCount++; 4287 } 4288 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4289 other.reset(str); 4290 if (other.matches()) { 4291 failCount++; 4292 } 4293 lastSM = m; 4294 lastScript = script; 4295 4296 // Unicode Block 4297 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4298 if (block == null) { 4299 //System.out.printf("Not a Block: cp=%x%n", cp); 4300 continue; 4301 } 4302 if (block == lastBlock) { 4303 m = lastBM; 4304 m.reset(str); 4305 } else { 4306 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4307 } 4308 if (!m.matches()) { 4309 failCount++; 4310 } 4311 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4312 other.reset(str); 4313 if (other.matches()) { 4314 failCount++; 4315 } 4316 lastBM = m; 4317 lastBlock = block; 4318 } 4319 report("unicodeProperties"); 4320 } 4321 4322 private static void unicodeHexNotationTest() throws Exception { 4323 4324 // negative 4325 checkExpectedFail("\\x{-23}"); 4326 checkExpectedFail("\\x{110000}"); 4327 checkExpectedFail("\\x{}"); 4328 checkExpectedFail("\\x{AB[ef]"); 4329 4330 // codepoint 4331 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4332 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4333 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4334 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4335 4336 // in class 4337 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4338 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4339 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4340 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4341 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4342 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4343 4344 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4345 String s = "A" + new String(Character.toChars(cp)) + "B"; 4346 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4347 : String.format("\\u%04x\\u%04x", 4348 (int) Character.toChars(cp)[0], 4349 (int) Character.toChars(cp)[1]); 4350 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4351 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4352 failCount++; 4353 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4354 failCount++; 4355 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4356 failCount++; 4357 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4358 failCount++; 4359 } 4360 report("unicodeHexNotation"); 4361 } 4362 4363 private static void unicodeClassesTest() throws Exception { 4364 4365 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4366 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4367 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4368 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4369 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4370 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4371 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4372 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4373 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4374 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4375 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4376 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4377 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4378 Matcher bound = Pattern.compile("\\b").matcher(""); 4379 Matcher word = Pattern.compile("\\w++").matcher(""); 4380 // UNICODE_CHARACTER_CLASS 4381 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4382 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4383 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4384 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4385 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4386 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4387 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4388 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4389 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4390 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4391 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4394 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4395 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 // embedded flag (?U) 4397 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4398 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4399 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4400 4401 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4402 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4403 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4404 // properties 4405 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4406 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4407 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4408 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4409 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4410 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4411 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4412 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4413 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4414 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4415 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4416 // javaMethod 4417 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4418 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4419 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4420 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4421 // GC/C 4422 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4423 4424 for (int cp = 1; cp < 0x30000; cp++) { 4425 String str = new String(Character.toChars(cp)); 4426 int type = Character.getType(cp); 4427 if (// lower 4428 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4429 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4430 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4431 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4432 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4433 // upper 4434 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4435 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4436 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4437 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4438 // alpha 4439 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4440 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4441 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4442 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4443 // digit 4444 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4445 Character.isDigit(cp) != digitU.reset(str).matches() || 4446 // alnum 4447 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4448 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4449 // punct 4450 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4451 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4452 // graph 4453 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4454 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4455 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4456 // blank 4457 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4458 != blank.reset(str).matches() || 4459 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4460 // print 4461 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4462 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4463 // cntrl 4464 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4465 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4466 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4467 // hexdigit 4468 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4469 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4470 // space 4471 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4472 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4473 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4474 // word 4475 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4476 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4477 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4478 // bwordb 4479 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4480 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4481 // properties 4482 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4483 Character.isLetter(cp) != letterP.reset(str).matches()|| 4484 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4485 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4486 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4487 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4488 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4489 // gc_C 4490 (Character.CONTROL == type || Character.FORMAT == type || 4491 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4492 Character.UNASSIGNED == type) 4493 != gcC.reset(str).matches()) { 4494 failCount++; 4495 } 4496 } 4497 4498 // bounds/word align 4499 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4500 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4501 failCount++; 4502 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4503 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4504 failCount++; 4505 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4506 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4507 failCount++; 4508 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4509 failCount++; 4510 report("unicodePredefinedClasses"); 4511 } 4512 4513 private static void unicodeCharacterNameTest() throws Exception { 4514 4515 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4516 if (!Character.isValidCodePoint(cp) || 4517 Character.getType(cp) == Character.UNASSIGNED) 4518 continue; 4519 String str = new String(Character.toChars(cp)); 4520 // single 4521 String p = "\\N{" + Character.getName(cp) + "}"; 4522 if (!Pattern.compile(p).matcher(str).matches()) { 4523 failCount++; 4524 } 4525 // class[c] 4526 p = "[\\N{" + Character.getName(cp) + "}]"; 4527 if (!Pattern.compile(p).matcher(str).matches()) { 4528 failCount++; 4529 } 4530 } 4531 4532 // range 4533 for (int i = 0; i < 10; i++) { 4534 int start = generator.nextInt(20); 4535 int end = start + generator.nextInt(200); 4536 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4537 String str; 4538 for (int cp = start; cp < end; cp++) { 4539 str = new String(Character.toChars(cp)); 4540 if (!Pattern.compile(p).matcher(str).matches()) { 4541 failCount++; 4542 } 4543 } 4544 str = new String(Character.toChars(end + 10)); 4545 if (Pattern.compile(p).matcher(str).matches()) { 4546 failCount++; 4547 } 4548 } 4549 4550 // slice 4551 for (int i = 0; i < 10; i++) { 4552 int n = generator.nextInt(256); 4553 int[] buf = new int[n]; 4554 StringBuffer sb = new StringBuffer(1024); 4555 for (int j = 0; j < n; j++) { 4556 int cp = generator.nextInt(1000); 4557 if (!Character.isValidCodePoint(cp) || 4558 Character.getType(cp) == Character.UNASSIGNED) 4559 cp = 0x4e00; // just use 4e00 4560 sb.append("\\N{" + Character.getName(cp) + "}"); 4561 buf[j] = cp; 4562 } 4563 String p = sb.toString(); 4564 String str = new String(buf, 0, buf.length); 4565 if (!Pattern.compile(p).matcher(str).matches()) { 4566 failCount++; 4567 } 4568 } 4569 report("unicodeCharacterName"); 4570 } 4571 4572 private static void horizontalAndVerticalWSTest() throws Exception { 4573 String hws = new String (new char[] { 4574 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4575 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4576 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4577 0x202f, 0x205f, 0x3000 }); 4578 String vws = new String (new char[] { 4579 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4580 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4581 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4582 failCount++; 4583 if (Pattern.compile("\\H").matcher(hws).find() || 4584 Pattern.compile("[\\H]").matcher(hws).find()) 4585 failCount++; 4586 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4587 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4588 failCount++; 4589 if (Pattern.compile("\\V").matcher(vws).find() || 4590 Pattern.compile("[\\V]").matcher(vws).find()) 4591 failCount++; 4592 String prefix = "abcd"; 4593 String suffix = "efgh"; 4594 String ng = "A"; 4595 for (int i = 0; i < hws.length(); i++) { 4596 String c = String.valueOf(hws.charAt(i)); 4597 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4598 if (!m.find() || !c.equals(m.group())) 4599 failCount++; 4600 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4601 if (!m.find() || !c.equals(m.group())) 4602 failCount++; 4603 4604 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4605 if (!m.find() || !ng.equals(m.group())) 4606 failCount++; 4607 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4608 if (!m.find() || !ng.equals(m.group())) 4609 failCount++; 4610 } 4611 for (int i = 0; i < vws.length(); i++) { 4612 String c = String.valueOf(vws.charAt(i)); 4613 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4614 if (!m.find() || !c.equals(m.group())) 4615 failCount++; 4616 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4617 if (!m.find() || !c.equals(m.group())) 4618 failCount++; 4619 4620 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4621 if (!m.find() || !ng.equals(m.group())) 4622 failCount++; 4623 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4624 if (!m.find() || !ng.equals(m.group())) 4625 failCount++; 4626 } 4627 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4628 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4629 failCount++; 4630 report("horizontalAndVerticalWSTest"); 4631 } 4632 4633 private static void linebreakTest() throws Exception { 4634 String linebreaks = new String (new char[] { 4635 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4636 String crnl = "\r\n"; 4637 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4638 Pattern.compile("\\R").matcher(crnl).matches() && 4639 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4640 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4641 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4642 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4643 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4644 failCount++; 4645 } 4646 report("linebreakTest"); 4647 } 4648 4649 // #7189363 4650 private static void branchTest() throws Exception { 4651 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4652 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4653 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4654 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4655 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4656 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4657 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4658 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4659 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4660 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4661 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4662 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4663 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4664 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4665 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4666 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4667 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4668 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4669 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4670 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4671 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4672 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4673 failCount++; 4674 report("branchTest"); 4675 } 4676 4677 // This test is for 8007395 4678 private static void groupCurlyNotFoundSuppTest() throws Exception { 4679 String input = "test this as \ud83d\ude0d"; 4680 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4681 "test(.)*(@[a-zA-Z.]+)", 4682 "test([^B])+(@[a-zA-Z.]+)", 4683 "test([^B])*(@[a-zA-Z.]+)", 4684 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4685 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4686 }) { 4687 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4688 .matcher(input); 4689 try { 4690 if (m.find()) { 4691 failCount++; 4692 } 4693 } catch (Exception x) { 4694 failCount++; 4695 } 4696 } 4697 report("GroupCurly NotFoundSupp"); 4698 } 4699 4700 // This test is for 8023647 4701 private static void groupCurlyBackoffTest() throws Exception { 4702 if (!"abc1c".matches("(\\w)+1\\1") || 4703 "abc11".matches("(\\w)+1\\1")) { 4704 failCount++; 4705 } 4706 report("GroupCurly backoff"); 4707 } 4708 4709 // This test is for 8012646 4710 private static void patternAsPredicate() throws Exception { 4711 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4712 4713 if (p.test("")) { 4714 failCount++; 4715 } 4716 if (!p.test("word")) { 4717 failCount++; 4718 } 4719 if (p.test("1234")) { 4720 failCount++; 4721 } 4722 if (!p.test("word1234")) { 4723 failCount++; 4724 } 4725 report("Pattern.asPredicate"); 4726 } 4727 4728 // This test is for 8184692 4729 private static void patternAsMatchPredicate() throws Exception { 4730 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4731 4732 if (p.test("")) { 4733 failCount++; 4734 } 4735 if (!p.test("word")) { 4736 failCount++; 4737 } 4738 if (p.test("1234word")) { 4739 failCount++; 4740 } 4741 if (p.test("1234")) { 4742 failCount++; 4743 } 4744 report("Pattern.asMatchPredicate"); 4745 } 4746 4747 4748 // This test is for 8035975 4749 private static void invalidFlags() throws Exception { 4750 for (int flag = 1; flag != 0; flag <<= 1) { 4751 switch (flag) { 4752 case Pattern.CASE_INSENSITIVE: 4753 case Pattern.MULTILINE: 4754 case Pattern.DOTALL: 4755 case Pattern.UNICODE_CASE: 4756 case Pattern.CANON_EQ: 4757 case Pattern.UNIX_LINES: 4758 case Pattern.LITERAL: 4759 case Pattern.UNICODE_CHARACTER_CLASS: 4760 case Pattern.COMMENTS: 4761 // valid flag, continue 4762 break; 4763 default: 4764 try { 4765 Pattern.compile(".", flag); 4766 failCount++; 4767 } catch (IllegalArgumentException expected) { 4768 } 4769 } 4770 } 4771 report("Invalid compile flags"); 4772 } 4773 4774 // This test is for 8158482 4775 private static void embeddedFlags() throws Exception { 4776 try { 4777 Pattern.compile("(?i).(?-i)."); 4778 Pattern.compile("(?m).(?-m)."); 4779 Pattern.compile("(?s).(?-s)."); 4780 Pattern.compile("(?d).(?-d)."); 4781 Pattern.compile("(?u).(?-u)."); 4782 Pattern.compile("(?c).(?-c)."); 4783 Pattern.compile("(?x).(?-x)."); 4784 Pattern.compile("(?U).(?-U)."); 4785 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4786 } catch (PatternSyntaxException x) { 4787 failCount++; 4788 } 4789 report("Embedded flags"); 4790 } 4791 4792 private static void grapheme() throws Exception { 4793 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4794 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4795 .forEach( ln -> { 4796 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4797 // System.out.println(str); 4798 String[] strs = ln.split("\u00f7|\u00d7"); 4799 StringBuilder src = new StringBuilder(); 4800 ArrayList<String> graphemes = new ArrayList<>(); 4801 StringBuilder buf = new StringBuilder(); 4802 int offBk = 0; 4803 for (String str : strs) { 4804 if (str.length() == 0) // first empty str 4805 continue; 4806 int cp = Integer.parseInt(str, 16); 4807 src.appendCodePoint(cp); 4808 buf.appendCodePoint(cp); 4809 offBk += (str.length() + 1); 4810 if (ln.charAt(offBk) == '\u00f7') { // DIV 4811 graphemes.add(buf.toString()); 4812 buf = new StringBuilder(); 4813 } 4814 } 4815 Pattern p = Pattern.compile("\\X"); 4816 Matcher m = p.matcher(src.toString()); 4817 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4818 for (String g : graphemes) { 4819 // System.out.printf(" grapheme:=[%s]%n", g); 4820 // (1) test \\X directly 4821 if (!m.find() || !m.group().equals(g)) { 4822 System.out.println("Failed \\X [" + ln + "] : " + g); 4823 failCount++; 4824 } 4825 // (2) test \\b{g} + \\X via Scanner 4826 boolean hasNext = s.hasNext(p); 4827 // if (!s.hasNext() || !s.next().equals(next)) { 4828 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4829 System.out.println("Failed b{g} [" + ln + "] : " + g); 4830 failCount++; 4831 } 4832 } 4833 }); 4834 // some sanity checks 4835 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4836 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4837 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4838 failCount++; 4839 // make sure "\b{n}" still works 4840 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4841 failCount++; 4842 report("Unicode extended grapheme cluster"); 4843 } 4844 4845 // hangup/timeout if go into exponential backtracking 4846 private static void expoBacktracking() throws Exception { 4847 4848 Object[][] patternMatchers = { 4849 // 6328855 4850 { "(.*\n*)*", 4851 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4852 false }, 4853 // 6192895 4854 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4855 "Hello World this is a test this is a test this is a test A", 4856 true }, 4857 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4858 "Hello World this is a test this is a test this is a test \u4e00 ", 4859 false }, 4860 { " *([a-z0-9]+ *)+", 4861 "hello world this is a test this is a test this is a test A", 4862 false }, 4863 // 4771934 [FIXED] #5013651? 4864 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4865 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4866 true }, 4867 // 4866249 [FIXED] 4868 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4869 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4870 true }, 4871 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4872 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4873 false }, 4874 // 6345469 4875 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4876 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4877 true }, // --> matched 4878 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4879 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4880 false }, 4881 // 5026912 4882 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4883 "156580451111112225588087755221111111566969655555555", 4884 false}, 4885 // 6988218 4886 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4887 "'%)) order by ANGEBOT.ID", 4888 false}, // find 4889 // 6693451 4890 { "^(\\s*foo\\s*)*$", 4891 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4892 true }, 4893 { "^(\\s*foo\\s*)*$", 4894 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4895 false 4896 }, 4897 // 7006761 4898 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4899 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4900 // 8140212 4901 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4902 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4903 false 4904 }, 4905 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4906 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4907 4908 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4909 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4910 4911 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4912 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4913 4914 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4915 4916 /* not fixed 4917 //8132141 ---> second level exponential backtracking 4918 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4919 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4920 */ 4921 }; 4922 4923 for (Object[] pm : patternMatchers) { 4924 String p = (String)pm[0]; 4925 String s = (String)pm[1]; 4926 boolean r = (Boolean)pm[2]; 4927 if (r != Pattern.compile(p).matcher(s).matches()) { 4928 failCount++; 4929 } 4930 } 4931 } 4932 4933 private static void invalidGroupName() { 4934 // Invalid start of a group name 4935 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4936 "\u0060", "\u007b", "\u0416")) { 4937 for (String pat : List.of("(?<" + groupName + ">)", 4938 "\\k<" + groupName + ">")) { 4939 try { 4940 Pattern.compile(pat); 4941 failCount++; 4942 } catch (PatternSyntaxException e) { 4943 if (!e.getMessage().startsWith( 4944 "capturing group name does not start with a" 4945 + " Latin letter")) { 4946 failCount++; 4947 } 4948 } 4949 } 4950 } 4951 // Invalid char in a group name 4952 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4953 "d\u0060", "e\u007b", "f\u0416")) { 4954 for (String pat : List.of("(?<" + groupName + ">)", 4955 "\\k<" + groupName + ">")) { 4956 try { 4957 Pattern.compile(pat); 4958 failCount++; 4959 } catch (PatternSyntaxException e) { 4960 if (!e.getMessage().startsWith( 4961 "named capturing group is missing trailing '>'")) { 4962 failCount++; 4963 } 4964 } 4965 } 4966 } 4967 report("Invalid capturing group names"); 4968 } 4969 4970 private static void illegalRepetitionRange() { 4971 // huge integers > (2^31 - 1) 4972 String n = BigInteger.valueOf(1L << 32) 4973 .toString(); 4974 String m = BigInteger.valueOf(1L << 31) 4975 .add(new BigInteger(80, generator)) 4976 .toString(); 4977 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4978 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4979 String pat = ".{" + rep + "}"; 4980 try { 4981 Pattern.compile(pat); 4982 failCount++; 4983 System.out.println("Expected to fail. Pattern: " + pat); 4984 } catch (PatternSyntaxException e) { 4985 if (!e.getMessage().startsWith("Illegal repetition")) { 4986 failCount++; 4987 System.out.println("Unexpected error message: " + e.getMessage()); 4988 } 4989 } catch (Throwable t) { 4990 failCount++; 4991 System.out.println("Unexpected exception: " + t); 4992 } 4993 } 4994 report("illegalRepetitionRange"); 4995 } 4996 4997 private static void surrogatePairWithCanonEq() { 4998 try { 4999 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 5000 } catch (Throwable t) { 5001 failCount++; 5002 System.out.println("Unexpected exception: " + t); 5003 } 5004 report("surrogatePairWithCanonEq"); 5005 } 5006 5007 // This test is for 8235812 5008 private static void lineBreakWithQuantifier() { 5009 // key: pattern 5010 // value: lengths of input that must match the pattern 5011 Map<String, List<Integer>> cases = Map.ofEntries( 5012 Map.entry("\\R?", List.of(0, 1)), 5013 Map.entry("\\R*", List.of(0, 1, 2, 3)), 5014 Map.entry("\\R+", List.of(1, 2, 3)), 5015 Map.entry("\\R{0}", List.of(0)), 5016 Map.entry("\\R{1}", List.of(1)), 5017 Map.entry("\\R{2}", List.of(2)), 5018 Map.entry("\\R{3}", List.of(3)), 5019 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)), 5020 Map.entry("\\R{1,}", List.of(1, 2, 3)), 5021 Map.entry("\\R{2,}", List.of(2, 3)), 5022 Map.entry("\\R{3,}", List.of(3)), 5023 Map.entry("\\R{0,0}", List.of(0)), 5024 Map.entry("\\R{0,1}", List.of(0, 1)), 5025 Map.entry("\\R{0,2}", List.of(0, 1, 2)), 5026 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)), 5027 Map.entry("\\R{1,1}", List.of(1)), 5028 Map.entry("\\R{1,2}", List.of(1, 2)), 5029 Map.entry("\\R{1,3}", List.of(1, 2, 3)), 5030 Map.entry("\\R{2,2}", List.of(2)), 5031 Map.entry("\\R{2,3}", List.of(2, 3)), 5032 Map.entry("\\R{3,3}", List.of(3)), 5033 Map.entry("\\R", List.of(1)), 5034 Map.entry("\\R\\R", List.of(2)), 5035 Map.entry("\\R\\R\\R", List.of(3)) 5036 ); 5037 5038 // key: length of input 5039 // value: all possible inputs of given length 5040 Map<Integer, List<String>> inputs = new HashMap<>(); 5041 String[] Rs = { "\r\n", "\r", "\n", 5042 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" }; 5043 StringBuilder sb = new StringBuilder(); 5044 for (int len = 0; len <= 3; ++len) { 5045 int[] idx = new int[len + 1]; 5046 do { 5047 sb.setLength(0); 5048 for (int j = 0; j < len; ++j) 5049 sb.append(Rs[idx[j]]); 5050 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString()); 5051 idx[0]++; 5052 for (int j = 0; j < len; ++j) { 5053 if (idx[j] < Rs.length) 5054 break; 5055 idx[j] = 0; 5056 idx[j+1]++; 5057 } 5058 } while (idx[len] == 0); 5059 } 5060 5061 // exhaustive testing 5062 for (String patStr : cases.keySet()) { 5063 Pattern[] pats = patStr.endsWith("R") 5064 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers 5065 : new Pattern[] { Pattern.compile(patStr), // greedy 5066 Pattern.compile(patStr + "?") }; // reluctant 5067 Matcher m = pats[0].matcher(""); 5068 for (Pattern p : pats) { 5069 m.usePattern(p); 5070 for (int len : cases.get(patStr)) { 5071 for (String in : inputs.get(len)) { 5072 if (!m.reset(in).matches()) { 5073 failCount++; 5074 System.out.println("Expected to match '" + 5075 in + "' =~ /" + p + "/"); 5076 } 5077 } 5078 } 5079 } 5080 } 5081 report("lineBreakWithQuantifier"); 5082 } 5083 }