1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 39 * 8216332 8214245 8237599 40 * 41 * @library /test/lib 42 * @library /lib/testlibrary/java/lang 43 * @build jdk.test.lib.RandomFactory 44 * @run main RegExTest 45 * @key randomness 46 */ 47 48 import java.io.BufferedReader; 49 import java.io.ByteArrayInputStream; 50 import java.io.ByteArrayOutputStream; 51 import java.io.File; 52 import java.io.FileInputStream; 53 import java.io.InputStreamReader; 54 import java.io.ObjectInputStream; 55 import java.io.ObjectOutputStream; 56 import java.math.BigInteger; 57 import java.nio.CharBuffer; 58 import java.nio.file.Files; 59 import java.nio.file.Path; 60 import java.nio.file.Paths; 61 import java.util.ArrayList; 62 import java.util.Arrays; 63 import java.util.HashMap; 64 import java.util.List; 65 import java.util.Map; 66 import java.util.Random; 67 import java.util.Scanner; 68 import java.util.function.Function; 69 import java.util.function.Predicate; 70 import java.util.regex.Matcher; 71 import java.util.regex.MatchResult; 72 import java.util.regex.Pattern; 73 import java.util.regex.PatternSyntaxException; 74 import java.util.stream.Stream; 75 76 import jdk.test.lib.RandomFactory; 77 78 /** 79 * This is a test class created to check the operation of 80 * the Pattern and Matcher classes. 81 */ 82 public class RegExTest { 83 84 private static Random generator = RandomFactory.getRandom(); 85 private static boolean failure = false; 86 private static int failCount = 0; 87 private static String firstFailure = null; 88 89 /** 90 * Main to interpret arguments and run several tests. 91 * 92 */ 93 public static void main(String[] args) throws Exception { 94 // Most of the tests are in a file 95 processFile("TestCases.txt"); 96 //processFile("PerlCases.txt"); 97 processFile("BMPTestCases.txt"); 98 processFile("SupplementaryTestCases.txt"); 99 100 // These test many randomly generated char patterns 101 bm(); 102 slice(); 103 104 // These are hard to put into the file 105 escapes(); 106 blankInput(); 107 108 // Substitition tests on randomly generated sequences 109 globalSubstitute(); 110 stringbufferSubstitute(); 111 stringbuilderSubstitute(); 112 113 substitutionBasher(); 114 substitutionBasher2(); 115 116 // Canonical Equivalence 117 ceTest(); 118 119 // Anchors 120 anchorTest(); 121 122 // boolean match calls 123 matchesTest(); 124 lookingAtTest(); 125 126 // Pattern API 127 patternMatchesTest(); 128 129 // Misc 130 lookbehindTest(); 131 nullArgumentTest(); 132 backRefTest(); 133 groupCaptureTest(); 134 caretTest(); 135 charClassTest(); 136 emptyPatternTest(); 137 findIntTest(); 138 group0Test(); 139 longPatternTest(); 140 octalTest(); 141 ampersandTest(); 142 negationTest(); 143 splitTest(); 144 appendTest(); 145 caseFoldingTest(); 146 commentsTest(); 147 unixLinesTest(); 148 replaceFirstTest(); 149 gTest(); 150 zTest(); 151 serializeTest(); 152 reluctantRepetitionTest(); 153 multilineDollarTest(); 154 dollarAtEndTest(); 155 caretBetweenTerminatorsTest(); 156 // This RFE rejected in Tiger numOccurrencesTest(); 157 javaCharClassTest(); 158 nonCaptureRepetitionTest(); 159 notCapturedGroupCurlyMatchTest(); 160 escapedSegmentTest(); 161 literalPatternTest(); 162 literalReplacementTest(); 163 regionTest(); 164 toStringTest(); 165 negatedCharClassTest(); 166 findFromTest(); 167 boundsTest(); 168 unicodeWordBoundsTest(); 169 caretAtEndTest(); 170 wordSearchTest(); 171 hitEndTest(); 172 toMatchResultTest(); 173 toMatchResultTest2(); 174 surrogatesInClassTest(); 175 removeQEQuotingTest(); 176 namedGroupCaptureTest(); 177 nonBmpClassComplementTest(); 178 unicodePropertiesTest(); 179 unicodeHexNotationTest(); 180 unicodeClassesTest(); 181 unicodeCharacterNameTest(); 182 horizontalAndVerticalWSTest(); 183 linebreakTest(); 184 branchTest(); 185 groupCurlyNotFoundSuppTest(); 186 groupCurlyBackoffTest(); 187 patternAsPredicate(); 188 patternAsMatchPredicate(); 189 invalidFlags(); 190 embeddedFlags(); 191 grapheme(); 192 expoBacktracking(); 193 invalidGroupName(); 194 illegalRepetitionRange(); 195 surrogatePairWithCanonEq(); 196 lineBreakWithQuantifier(); 197 caseInsensitivePMatch(); 198 surrogatePairOverlapRegion(); 199 200 if (failure) { 201 throw new 202 RuntimeException("RegExTest failed, 1st failure: " + 203 firstFailure); 204 } else { 205 System.err.println("OKAY: All tests passed."); 206 } 207 } 208 209 // Utility functions 210 211 private static String getRandomAlphaString(int length) { 212 StringBuffer buf = new StringBuffer(length); 213 for (int i=0; i<length; i++) { 214 char randChar = (char)(97 + generator.nextInt(26)); 215 buf.append(randChar); 216 } 217 return buf.toString(); 218 } 219 220 private static void check(Matcher m, String expected) { 221 m.find(); 222 if (!m.group().equals(expected)) 223 failCount++; 224 } 225 226 private static void check(Matcher m, String result, boolean expected) { 227 m.find(); 228 if (m.group().equals(result) != expected) 229 failCount++; 230 } 231 232 private static void check(Pattern p, String s, boolean expected) { 233 if (p.matcher(s).find() != expected) 234 failCount++; 235 } 236 237 private static void check(String p, String s, boolean expected) { 238 Matcher matcher = Pattern.compile(p).matcher(s); 239 if (matcher.find() != expected) 240 failCount++; 241 } 242 243 private static void check(String p, char c, boolean expected) { 244 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 245 Pattern pattern = Pattern.compile(propertyPattern); 246 char[] ca = new char[1]; ca[0] = c; 247 Matcher matcher = pattern.matcher(new String(ca)); 248 if (!matcher.find()) 249 failCount++; 250 } 251 252 private static void check(String p, int codePoint, boolean expected) { 253 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 254 Pattern pattern = Pattern.compile(propertyPattern); 255 char[] ca = Character.toChars(codePoint); 256 Matcher matcher = pattern.matcher(new String(ca)); 257 if (!matcher.find()) 258 failCount++; 259 } 260 261 private static void check(String p, int flag, String input, String s, 262 boolean expected) 263 { 264 Pattern pattern = Pattern.compile(p, flag); 265 Matcher matcher = pattern.matcher(input); 266 if (expected) 267 check(matcher, s, expected); 268 else 269 check(pattern, input, false); 270 } 271 272 private static void report(String testName) { 273 int spacesToAdd = 30 - testName.length(); 274 StringBuffer paddedNameBuffer = new StringBuffer(testName); 275 for (int i=0; i<spacesToAdd; i++) 276 paddedNameBuffer.append(" "); 277 String paddedName = paddedNameBuffer.toString(); 278 System.err.println(paddedName + ": " + 279 (failCount==0 ? "Passed":"Failed("+failCount+")")); 280 if (failCount > 0) { 281 failure = true; 282 283 if (firstFailure == null) { 284 firstFailure = testName; 285 } 286 } 287 288 failCount = 0; 289 } 290 291 /** 292 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 293 * supplementary characters. This method does NOT fully take care 294 * of the regex syntax. 295 */ 296 private static String toSupplementaries(String s) { 297 int length = s.length(); 298 StringBuffer sb = new StringBuffer(length * 2); 299 300 for (int i = 0; i < length; ) { 301 char c = s.charAt(i++); 302 if (c == '\\') { 303 sb.append(c); 304 if (i < length) { 305 c = s.charAt(i++); 306 sb.append(c); 307 if (c == 'u') { 308 // assume no syntax error 309 sb.append(s.charAt(i++)); 310 sb.append(s.charAt(i++)); 311 sb.append(s.charAt(i++)); 312 sb.append(s.charAt(i++)); 313 } 314 } 315 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 316 sb.append('\ud800').append((char)('\udc00'+c)); 317 } else { 318 sb.append(c); 319 } 320 } 321 return sb.toString(); 322 } 323 324 // Regular expression tests 325 326 // This is for bug 6178785 327 // Test if an expected NPE gets thrown when passing in a null argument 328 private static boolean check(Runnable test) { 329 try { 330 test.run(); 331 failCount++; 332 return false; 333 } catch (NullPointerException npe) { 334 return true; 335 } 336 } 337 338 private static void nullArgumentTest() { 339 check(() -> Pattern.compile(null)); 340 check(() -> Pattern.matches(null, null)); 341 check(() -> Pattern.matches("xyz", null)); 342 check(() -> Pattern.quote(null)); 343 check(() -> Pattern.compile("xyz").split(null)); 344 check(() -> Pattern.compile("xyz").matcher(null)); 345 346 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 347 m.matches(); 348 check(() -> m.appendTail((StringBuffer) null)); 349 check(() -> m.appendTail((StringBuilder)null)); 350 check(() -> m.replaceAll((String) null)); 351 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 352 check(() -> m.replaceFirst((String)null)); 353 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 354 check(() -> m.appendReplacement((StringBuffer)null, null)); 355 check(() -> m.appendReplacement((StringBuilder)null, null)); 356 check(() -> m.reset(null)); 357 check(() -> Matcher.quoteReplacement(null)); 358 //check(() -> m.usePattern(null)); 359 360 report("Null Argument"); 361 } 362 363 // This is for bug6635133 364 // Test if surrogate pair in Unicode escapes can be handled correctly. 365 private static void surrogatesInClassTest() throws Exception { 366 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 367 Matcher matcher = pattern.matcher("\ud834\udd22"); 368 if (!matcher.find()) 369 failCount++; 370 371 report("Surrogate pair in Unicode escape"); 372 } 373 374 // This is for bug6990617 375 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 376 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 377 // char is an octal digit. 378 private static void removeQEQuotingTest() throws Exception { 379 Pattern pattern = 380 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 381 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 382 if (!matcher.find()) 383 failCount++; 384 385 report("Remove Q/E Quoting"); 386 } 387 388 // This is for bug 4988891 389 // Test toMatchResult to see that it is a copy of the Matcher 390 // that is not affected by subsequent operations on the original 391 private static void toMatchResultTest() throws Exception { 392 Pattern pattern = Pattern.compile("squid"); 393 Matcher matcher = pattern.matcher( 394 "agiantsquidofdestinyasmallsquidoffate"); 395 matcher.find(); 396 int matcherStart1 = matcher.start(); 397 MatchResult mr = matcher.toMatchResult(); 398 if (mr == matcher) 399 failCount++; 400 int resultStart1 = mr.start(); 401 if (matcherStart1 != resultStart1) 402 failCount++; 403 matcher.find(); 404 int matcherStart2 = matcher.start(); 405 int resultStart2 = mr.start(); 406 if (matcherStart2 == resultStart2) 407 failCount++; 408 if (resultStart1 != resultStart2) 409 failCount++; 410 MatchResult mr2 = matcher.toMatchResult(); 411 if (mr == mr2) 412 failCount++; 413 if (mr2.start() != matcherStart2) 414 failCount++; 415 report("toMatchResult is a copy"); 416 } 417 418 private static void checkExpectedISE(Runnable test) { 419 try { 420 test.run(); 421 failCount++; 422 } catch (IllegalStateException x) { 423 } catch (IndexOutOfBoundsException xx) { 424 failCount++; 425 } 426 } 427 428 private static void checkExpectedIOOE(Runnable test) { 429 try { 430 test.run(); 431 failCount++; 432 } catch (IndexOutOfBoundsException x) {} 433 } 434 435 // This is for bug 8074678 436 // Test the result of toMatchResult throws ISE if no match is availble 437 private static void toMatchResultTest2() throws Exception { 438 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 439 matcher.find(); 440 MatchResult mr = matcher.toMatchResult(); 441 442 checkExpectedISE(() -> mr.start()); 443 checkExpectedISE(() -> mr.start(2)); 444 checkExpectedISE(() -> mr.end()); 445 checkExpectedISE(() -> mr.end(2)); 446 checkExpectedISE(() -> mr.group()); 447 checkExpectedISE(() -> mr.group(2)); 448 449 matcher = Pattern.compile("(match)").matcher("there is a match"); 450 matcher.find(); 451 MatchResult mr2 = matcher.toMatchResult(); 452 checkExpectedIOOE(() -> mr2.start(2)); 453 checkExpectedIOOE(() -> mr2.end(2)); 454 checkExpectedIOOE(() -> mr2.group(2)); 455 456 report("toMatchResult2 appropriate exceptions"); 457 } 458 459 // This is for bug 5013885 460 // Must test a slice to see if it reports hitEnd correctly 461 private static void hitEndTest() throws Exception { 462 // Basic test of Slice node 463 Pattern p = Pattern.compile("^squidattack"); 464 Matcher m = p.matcher("squack"); 465 m.find(); 466 if (m.hitEnd()) 467 failCount++; 468 m.reset("squid"); 469 m.find(); 470 if (!m.hitEnd()) 471 failCount++; 472 473 // Test Slice, SliceA and SliceU nodes 474 for (int i=0; i<3; i++) { 475 int flags = 0; 476 if (i==1) flags = Pattern.CASE_INSENSITIVE; 477 if (i==2) flags = Pattern.UNICODE_CASE; 478 p = Pattern.compile("^abc", flags); 479 m = p.matcher("ad"); 480 m.find(); 481 if (m.hitEnd()) 482 failCount++; 483 m.reset("ab"); 484 m.find(); 485 if (!m.hitEnd()) 486 failCount++; 487 } 488 489 // Test Boyer-Moore node 490 p = Pattern.compile("catattack"); 491 m = p.matcher("attack"); 492 m.find(); 493 if (!m.hitEnd()) 494 failCount++; 495 496 p = Pattern.compile("catattack"); 497 m = p.matcher("attackattackattackcatatta"); 498 m.find(); 499 if (!m.hitEnd()) 500 failCount++; 501 502 // 8184706: Matching u+0d at EOL against \R should hit-end 503 p = Pattern.compile("...\\R"); 504 m = p.matcher("cat" + (char)0x0a); 505 m.find(); 506 if (m.hitEnd()) 507 failCount++; 508 509 m = p.matcher("cat" + (char)0x0d); 510 m.find(); 511 if (!m.hitEnd()) 512 failCount++; 513 514 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 515 m.find(); 516 if (m.hitEnd()) 517 failCount++; 518 519 report("hitEnd"); 520 } 521 522 // This is for bug 4997476 523 // It is weird code submitted by customer demonstrating a regression 524 private static void wordSearchTest() throws Exception { 525 String testString = new String("word1 word2 word3"); 526 Pattern p = Pattern.compile("\\b"); 527 Matcher m = p.matcher(testString); 528 int position = 0; 529 int start = 0; 530 while (m.find(position)) { 531 start = m.start(); 532 if (start == testString.length()) 533 break; 534 if (m.find(start+1)) { 535 position = m.start(); 536 } else { 537 position = testString.length(); 538 } 539 if (testString.substring(start, position).equals(" ")) 540 continue; 541 if (!testString.substring(start, position-1).startsWith("word")) 542 failCount++; 543 } 544 report("Customer word search"); 545 } 546 547 // This is for bug 4994840 548 private static void caretAtEndTest() throws Exception { 549 // Problem only occurs with multiline patterns 550 // containing a beginning-of-line caret "^" followed 551 // by an expression that also matches the empty string. 552 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 553 Matcher matcher = pattern.matcher("\r"); 554 matcher.find(); 555 matcher.find(); 556 report("Caret at end"); 557 } 558 559 // This test is for 4979006 560 // Check to see if word boundary construct properly handles unicode 561 // non spacing marks 562 private static void unicodeWordBoundsTest() throws Exception { 563 String spaces = " "; 564 String wordChar = "a"; 565 String nsm = "\u030a"; 566 567 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 568 569 Pattern pattern = Pattern.compile("\\b"); 570 Matcher matcher = pattern.matcher(""); 571 // S=other B=word character N=non spacing mark .=word boundary 572 // SS.BB.SS 573 String input = spaces + wordChar + wordChar + spaces; 574 twoFindIndexes(input, matcher, 2, 4); 575 // SS.BBN.SS 576 input = spaces + wordChar +wordChar + nsm + spaces; 577 twoFindIndexes(input, matcher, 2, 5); 578 // SS.BN.SS 579 input = spaces + wordChar + nsm + spaces; 580 twoFindIndexes(input, matcher, 2, 4); 581 // SS.BNN.SS 582 input = spaces + wordChar + nsm + nsm + spaces; 583 twoFindIndexes(input, matcher, 2, 5); 584 // SSN.BB.SS 585 input = spaces + nsm + wordChar + wordChar + spaces; 586 twoFindIndexes(input, matcher, 3, 5); 587 // SS.BNB.SS 588 input = spaces + wordChar + nsm + wordChar + spaces; 589 twoFindIndexes(input, matcher, 2, 5); 590 // SSNNSS 591 input = spaces + nsm + nsm + spaces; 592 matcher.reset(input); 593 if (matcher.find()) 594 failCount++; 595 // SSN.BBN.SS 596 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 597 twoFindIndexes(input, matcher, 3, 6); 598 599 report("Unicode word boundary"); 600 } 601 602 private static void twoFindIndexes(String input, Matcher matcher, int a, 603 int b) throws Exception 604 { 605 matcher.reset(input); 606 matcher.find(); 607 if (matcher.start() != a) 608 failCount++; 609 matcher.find(); 610 if (matcher.start() != b) 611 failCount++; 612 } 613 614 // This test is for 6284152 615 static void check(String regex, String input, String[] expected) { 616 List<String> result = new ArrayList<String>(); 617 Pattern p = Pattern.compile(regex); 618 Matcher m = p.matcher(input); 619 while (m.find()) { 620 result.add(m.group()); 621 } 622 if (!Arrays.asList(expected).equals(result)) 623 failCount++; 624 } 625 626 private static void lookbehindTest() throws Exception { 627 //Positive 628 check("(?<=%.{0,5})foo\\d", 629 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 630 new String[]{"foo1", "foo2", "foo3"}); 631 632 //boundary at end of the lookbehind sub-regex should work consistently 633 //with the boundary just after the lookbehind sub-regex 634 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 635 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 636 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 637 check("(?<!abc \\b)foo", "abc foo", new String[0]); 638 639 //Negative 640 check("(?<!%.{0,5})foo\\d", 641 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 642 new String[] {"foo4", "foo5"}); 643 644 //Positive greedy 645 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 646 647 //Positive reluctant 648 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 649 650 //supplementary 651 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 652 new String[] {"fo\ud800\udc00o"}); 653 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 654 new String[] {"fo\ud800\udc00o"}); 655 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 656 new String[] {"fo\ud800\udc00o"}); 657 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 658 new String[] {"fo\ud800\udc00o"}); 659 report("Lookbehind"); 660 } 661 662 // This test is for 4938995 663 // Check to see if weak region boundaries are transparent to 664 // lookahead and lookbehind constructs 665 private static void boundsTest() throws Exception { 666 String fullMessage = "catdogcat"; 667 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 668 Matcher matcher = pattern.matcher("catdogca"); 669 matcher.useTransparentBounds(true); 670 if (matcher.find()) 671 failCount++; 672 matcher.reset("atdogcat"); 673 if (matcher.find()) 674 failCount++; 675 matcher.reset(fullMessage); 676 if (!matcher.find()) 677 failCount++; 678 matcher.reset(fullMessage); 679 matcher.region(0,9); 680 if (!matcher.find()) 681 failCount++; 682 matcher.reset(fullMessage); 683 matcher.region(0,6); 684 if (!matcher.find()) 685 failCount++; 686 matcher.reset(fullMessage); 687 matcher.region(3,6); 688 if (!matcher.find()) 689 failCount++; 690 matcher.useTransparentBounds(false); 691 if (matcher.find()) 692 failCount++; 693 694 // Negative lookahead/lookbehind 695 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 696 matcher = pattern.matcher("dogcat"); 697 matcher.useTransparentBounds(true); 698 matcher.region(0,3); 699 if (matcher.find()) 700 failCount++; 701 matcher.reset("catdog"); 702 matcher.region(3,6); 703 if (matcher.find()) 704 failCount++; 705 matcher.useTransparentBounds(false); 706 matcher.reset("dogcat"); 707 matcher.region(0,3); 708 if (!matcher.find()) 709 failCount++; 710 matcher.reset("catdog"); 711 matcher.region(3,6); 712 if (!matcher.find()) 713 failCount++; 714 715 report("Region bounds transparency"); 716 } 717 718 // This test is for 4945394 719 private static void findFromTest() throws Exception { 720 String message = "This is 40 $0 message."; 721 Pattern pat = Pattern.compile("\\$0"); 722 Matcher match = pat.matcher(message); 723 if (!match.find()) 724 failCount++; 725 if (match.find()) 726 failCount++; 727 if (match.find()) 728 failCount++; 729 report("Check for alternating find"); 730 } 731 732 // This test is for 4872664 and 4892980 733 private static void negatedCharClassTest() throws Exception { 734 Pattern pattern = Pattern.compile("[^>]"); 735 Matcher matcher = pattern.matcher("\u203A"); 736 if (!matcher.matches()) 737 failCount++; 738 pattern = Pattern.compile("[^fr]"); 739 matcher = pattern.matcher("a"); 740 if (!matcher.find()) 741 failCount++; 742 matcher.reset("\u203A"); 743 if (!matcher.find()) 744 failCount++; 745 String s = "for"; 746 String result[] = s.split("[^fr]"); 747 if (!result[0].equals("f")) 748 failCount++; 749 if (!result[1].equals("r")) 750 failCount++; 751 s = "f\u203Ar"; 752 result = s.split("[^fr]"); 753 if (!result[0].equals("f")) 754 failCount++; 755 if (!result[1].equals("r")) 756 failCount++; 757 758 // Test adding to bits, subtracting a node, then adding to bits again 759 pattern = Pattern.compile("[^f\u203Ar]"); 760 matcher = pattern.matcher("a"); 761 if (!matcher.find()) 762 failCount++; 763 matcher.reset("f"); 764 if (matcher.find()) 765 failCount++; 766 matcher.reset("\u203A"); 767 if (matcher.find()) 768 failCount++; 769 matcher.reset("r"); 770 if (matcher.find()) 771 failCount++; 772 matcher.reset("\u203B"); 773 if (!matcher.find()) 774 failCount++; 775 776 // Test subtracting a node, adding to bits, subtracting again 777 pattern = Pattern.compile("[^\u203Ar\u203B]"); 778 matcher = pattern.matcher("a"); 779 if (!matcher.find()) 780 failCount++; 781 matcher.reset("\u203A"); 782 if (matcher.find()) 783 failCount++; 784 matcher.reset("r"); 785 if (matcher.find()) 786 failCount++; 787 matcher.reset("\u203B"); 788 if (matcher.find()) 789 failCount++; 790 matcher.reset("\u203C"); 791 if (!matcher.find()) 792 failCount++; 793 794 report("Negated Character Class"); 795 } 796 797 // This test is for 4628291 798 private static void toStringTest() throws Exception { 799 Pattern pattern = Pattern.compile("b+"); 800 if (pattern.toString() != "b+") 801 failCount++; 802 Matcher matcher = pattern.matcher("aaabbbccc"); 803 String matcherString = matcher.toString(); // unspecified 804 matcher.find(); 805 matcherString = matcher.toString(); // unspecified 806 matcher.region(0,3); 807 matcherString = matcher.toString(); // unspecified 808 matcher.reset(); 809 matcherString = matcher.toString(); // unspecified 810 report("toString"); 811 } 812 813 // This test is for 4808962 814 private static void literalPatternTest() throws Exception { 815 int flags = Pattern.LITERAL; 816 817 Pattern pattern = Pattern.compile("abc\\t$^", flags); 818 check(pattern, "abc\\t$^", true); 819 820 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 821 check(pattern, "abc\\t$^", true); 822 823 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 824 check(pattern, "\\Qa^$bcabc\\E", true); 825 check(pattern, "a^$bcabc", false); 826 827 pattern = Pattern.compile("\\\\Q\\\\E"); 828 check(pattern, "\\Q\\E", true); 829 830 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 831 check(pattern, "abcefg\\Q\\Ehij", true); 832 833 pattern = Pattern.compile("\\\\\\Q\\\\E"); 834 check(pattern, "\\\\\\\\", true); 835 836 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 837 check(pattern, "\\Qa^$bcabc\\E", true); 838 check(pattern, "a^$bcabc", false); 839 840 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 841 check(pattern, "\\Qabc\\Edef", true); 842 check(pattern, "abcdef", false); 843 844 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 845 check(pattern, "abc\\Edef", true); 846 check(pattern, "abcdef", false); 847 848 pattern = Pattern.compile(Pattern.quote("\\E")); 849 check(pattern, "\\E", true); 850 851 pattern = Pattern.compile("((((abc.+?:)", flags); 852 check(pattern, "((((abc.+?:)", true); 853 854 flags |= Pattern.MULTILINE; 855 856 pattern = Pattern.compile("^cat$", flags); 857 check(pattern, "abc^cat$def", true); 858 check(pattern, "cat", false); 859 860 flags |= Pattern.CASE_INSENSITIVE; 861 862 pattern = Pattern.compile("abcdef", flags); 863 check(pattern, "ABCDEF", true); 864 check(pattern, "AbCdEf", true); 865 866 flags |= Pattern.DOTALL; 867 868 pattern = Pattern.compile("a...b", flags); 869 check(pattern, "A...b", true); 870 check(pattern, "Axxxb", false); 871 872 flags |= Pattern.CANON_EQ; 873 874 Pattern p = Pattern.compile("testa\u030a", flags); 875 check(pattern, "testa\u030a", false); 876 check(pattern, "test\u00e5", false); 877 878 // Supplementary character test 879 flags = Pattern.LITERAL; 880 881 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 882 check(pattern, toSupplementaries("abc\\t$^"), true); 883 884 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 885 check(pattern, toSupplementaries("abc\\t$^"), true); 886 887 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 888 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 889 check(pattern, toSupplementaries("a^$bcabc"), false); 890 891 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 892 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 893 check(pattern, toSupplementaries("a^$bcabc"), false); 894 895 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 896 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 897 check(pattern, toSupplementaries("abcdef"), false); 898 899 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 900 check(pattern, toSupplementaries("abc\\Edef"), true); 901 check(pattern, toSupplementaries("abcdef"), false); 902 903 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 904 check(pattern, toSupplementaries("((((abc.+?:)"), true); 905 906 flags |= Pattern.MULTILINE; 907 908 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 909 check(pattern, toSupplementaries("abc^cat$def"), true); 910 check(pattern, toSupplementaries("cat"), false); 911 912 flags |= Pattern.DOTALL; 913 914 // note: this is case-sensitive. 915 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 916 check(pattern, toSupplementaries("a...b"), true); 917 check(pattern, toSupplementaries("axxxb"), false); 918 919 flags |= Pattern.CANON_EQ; 920 921 String t = toSupplementaries("test"); 922 p = Pattern.compile(t + "a\u030a", flags); 923 check(pattern, t + "a\u030a", false); 924 check(pattern, t + "\u00e5", false); 925 926 report("Literal pattern"); 927 } 928 929 // This test is for 4803179 930 // This test is also for 4808962, replacement parts 931 private static void literalReplacementTest() throws Exception { 932 int flags = Pattern.LITERAL; 933 934 Pattern pattern = Pattern.compile("abc", flags); 935 Matcher matcher = pattern.matcher("zzzabczzz"); 936 String replaceTest = "$0"; 937 String result = matcher.replaceAll(replaceTest); 938 if (!result.equals("zzzabczzz")) 939 failCount++; 940 941 matcher.reset(); 942 String literalReplacement = matcher.quoteReplacement(replaceTest); 943 result = matcher.replaceAll(literalReplacement); 944 if (!result.equals("zzz$0zzz")) 945 failCount++; 946 947 matcher.reset(); 948 replaceTest = "\\t$\\$"; 949 literalReplacement = matcher.quoteReplacement(replaceTest); 950 result = matcher.replaceAll(literalReplacement); 951 if (!result.equals("zzz\\t$\\$zzz")) 952 failCount++; 953 954 // Supplementary character test 955 pattern = Pattern.compile(toSupplementaries("abc"), flags); 956 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 957 replaceTest = "$0"; 958 result = matcher.replaceAll(replaceTest); 959 if (!result.equals(toSupplementaries("zzzabczzz"))) 960 failCount++; 961 962 matcher.reset(); 963 literalReplacement = matcher.quoteReplacement(replaceTest); 964 result = matcher.replaceAll(literalReplacement); 965 if (!result.equals(toSupplementaries("zzz$0zzz"))) 966 failCount++; 967 968 matcher.reset(); 969 replaceTest = "\\t$\\$"; 970 literalReplacement = matcher.quoteReplacement(replaceTest); 971 result = matcher.replaceAll(literalReplacement); 972 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 973 failCount++; 974 975 // IAE should be thrown if backslash or '$' is the last character 976 // in replacement string 977 try { 978 "\uac00".replaceAll("\uac00", "$"); 979 failCount++; 980 } catch (IllegalArgumentException iie) { 981 } catch (Exception e) { 982 failCount++; 983 } 984 try { 985 "\uac00".replaceAll("\uac00", "\\"); 986 failCount++; 987 } catch (IllegalArgumentException iie) { 988 } catch (Exception e) { 989 failCount++; 990 } 991 report("Literal replacement"); 992 } 993 994 // This test is for 4757029 995 private static void regionTest() throws Exception { 996 Pattern pattern = Pattern.compile("abc"); 997 Matcher matcher = pattern.matcher("abcdefabc"); 998 999 matcher.region(0,9); 1000 if (!matcher.find()) 1001 failCount++; 1002 if (!matcher.find()) 1003 failCount++; 1004 matcher.region(0,3); 1005 if (!matcher.find()) 1006 failCount++; 1007 matcher.region(3,6); 1008 if (matcher.find()) 1009 failCount++; 1010 matcher.region(0,2); 1011 if (matcher.find()) 1012 failCount++; 1013 1014 expectRegionFail(matcher, 1, -1); 1015 expectRegionFail(matcher, -1, -1); 1016 expectRegionFail(matcher, -1, 1); 1017 expectRegionFail(matcher, 5, 3); 1018 expectRegionFail(matcher, 5, 12); 1019 expectRegionFail(matcher, 12, 12); 1020 1021 pattern = Pattern.compile("^abc$"); 1022 matcher = pattern.matcher("zzzabczzz"); 1023 matcher.region(0,9); 1024 if (matcher.find()) 1025 failCount++; 1026 matcher.region(3,6); 1027 if (!matcher.find()) 1028 failCount++; 1029 matcher.region(3,6); 1030 matcher.useAnchoringBounds(false); 1031 if (matcher.find()) 1032 failCount++; 1033 1034 // Supplementary character test 1035 pattern = Pattern.compile(toSupplementaries("abc")); 1036 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1037 matcher.region(0,9*2); 1038 if (!matcher.find()) 1039 failCount++; 1040 if (!matcher.find()) 1041 failCount++; 1042 matcher.region(0,3*2); 1043 if (!matcher.find()) 1044 failCount++; 1045 matcher.region(1,3*2); 1046 if (matcher.find()) 1047 failCount++; 1048 matcher.region(3*2,6*2); 1049 if (matcher.find()) 1050 failCount++; 1051 matcher.region(0,2*2); 1052 if (matcher.find()) 1053 failCount++; 1054 matcher.region(0,2*2+1); 1055 if (matcher.find()) 1056 failCount++; 1057 1058 expectRegionFail(matcher, 1*2, -1); 1059 expectRegionFail(matcher, -1, -1); 1060 expectRegionFail(matcher, -1, 1*2); 1061 expectRegionFail(matcher, 5*2, 3*2); 1062 expectRegionFail(matcher, 5*2, 12*2); 1063 expectRegionFail(matcher, 12*2, 12*2); 1064 1065 pattern = Pattern.compile(toSupplementaries("^abc$")); 1066 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1067 matcher.region(0,9*2); 1068 if (matcher.find()) 1069 failCount++; 1070 matcher.region(3*2,6*2); 1071 if (!matcher.find()) 1072 failCount++; 1073 matcher.region(3*2+1,6*2); 1074 if (matcher.find()) 1075 failCount++; 1076 matcher.region(3*2,6*2-1); 1077 if (matcher.find()) 1078 failCount++; 1079 matcher.region(3*2,6*2); 1080 matcher.useAnchoringBounds(false); 1081 if (matcher.find()) 1082 failCount++; 1083 1084 // JDK-8230829 1085 pattern = Pattern.compile("\\ud800\\udc61"); 1086 matcher = pattern.matcher("\ud800\udc61"); 1087 matcher.region(0, 1); 1088 if (matcher.find()) { 1089 failCount++; 1090 System.out.println("Matched a surrogate pair" + 1091 " that crosses border of region"); 1092 } 1093 if (!matcher.hitEnd()) { 1094 failCount++; 1095 System.out.println("Expected to hit the end when" + 1096 " matching a surrogate pair crossing region"); 1097 } 1098 1099 report("Regions"); 1100 } 1101 1102 private static void expectRegionFail(Matcher matcher, int index1, 1103 int index2) 1104 { 1105 try { 1106 matcher.region(index1, index2); 1107 failCount++; 1108 } catch (IndexOutOfBoundsException ioobe) { 1109 // Correct result 1110 } catch (IllegalStateException ise) { 1111 // Correct result 1112 } 1113 } 1114 1115 // This test is for 4803197 1116 private static void escapedSegmentTest() throws Exception { 1117 1118 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1119 check(pattern, "dir1\\dir2", true); 1120 1121 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1122 check(pattern, "dir1\\dir2\\", true); 1123 1124 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1125 check(pattern, "dir1\\dir2\\", true); 1126 1127 // Supplementary character test 1128 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1129 check(pattern, toSupplementaries("dir1\\dir2"), true); 1130 1131 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1132 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1133 1134 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1135 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1136 1137 report("Escaped segment"); 1138 } 1139 1140 // This test is for 4792284 1141 private static void nonCaptureRepetitionTest() throws Exception { 1142 String input = "abcdefgh;"; 1143 1144 String[] patterns = new String[] { 1145 "(?:\\w{4})+;", 1146 "(?:\\w{8})*;", 1147 "(?:\\w{2}){2,4};", 1148 "(?:\\w{4}){2,};", // only matches the 1149 ".*?(?:\\w{5})+;", // specified minimum 1150 ".*?(?:\\w{9})*;", // number of reps - OK 1151 "(?:\\w{4})+?;", // lazy repetition - OK 1152 "(?:\\w{4})++;", // possessive repetition - OK 1153 "(?:\\w{2,}?)+;", // non-deterministic - OK 1154 "(\\w{4})+;", // capturing group - OK 1155 }; 1156 1157 for (int i = 0; i < patterns.length; i++) { 1158 // Check find() 1159 check(patterns[i], 0, input, input, true); 1160 // Check matches() 1161 Pattern p = Pattern.compile(patterns[i]); 1162 Matcher m = p.matcher(input); 1163 1164 if (m.matches()) { 1165 if (!m.group(0).equals(input)) 1166 failCount++; 1167 } else { 1168 failCount++; 1169 } 1170 } 1171 1172 report("Non capturing repetition"); 1173 } 1174 1175 // This test is for 6358731 1176 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1177 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1178 Matcher matcher = pattern.matcher("abcd"); 1179 if (!matcher.matches() || 1180 matcher.group(1) != null || 1181 !matcher.group(2).equals("abcd")) { 1182 failCount++; 1183 } 1184 report("Not captured GroupCurly"); 1185 } 1186 1187 // This test is for 4706545 1188 private static void javaCharClassTest() throws Exception { 1189 for (int i=0; i<1000; i++) { 1190 char c = (char)generator.nextInt(); 1191 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1192 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1193 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1194 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1195 check("{javaDigit}", c, Character.isDigit(c)); 1196 check("{javaDefined}", c, Character.isDefined(c)); 1197 check("{javaLetter}", c, Character.isLetter(c)); 1198 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1199 check("{javaJavaIdentifierStart}", c, 1200 Character.isJavaIdentifierStart(c)); 1201 check("{javaJavaIdentifierPart}", c, 1202 Character.isJavaIdentifierPart(c)); 1203 check("{javaUnicodeIdentifierStart}", c, 1204 Character.isUnicodeIdentifierStart(c)); 1205 check("{javaUnicodeIdentifierPart}", c, 1206 Character.isUnicodeIdentifierPart(c)); 1207 check("{javaIdentifierIgnorable}", c, 1208 Character.isIdentifierIgnorable(c)); 1209 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1210 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1211 check("{javaISOControl}", c, Character.isISOControl(c)); 1212 check("{javaMirrored}", c, Character.isMirrored(c)); 1213 1214 } 1215 1216 // Supplementary character test 1217 for (int i=0; i<1000; i++) { 1218 int c = generator.nextInt(Character.MAX_CODE_POINT 1219 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1220 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1221 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1222 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1223 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1224 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1225 check("{javaDigit}", c, Character.isDigit(c)); 1226 check("{javaDefined}", c, Character.isDefined(c)); 1227 check("{javaLetter}", c, Character.isLetter(c)); 1228 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1229 check("{javaJavaIdentifierStart}", c, 1230 Character.isJavaIdentifierStart(c)); 1231 check("{javaJavaIdentifierPart}", c, 1232 Character.isJavaIdentifierPart(c)); 1233 check("{javaUnicodeIdentifierStart}", c, 1234 Character.isUnicodeIdentifierStart(c)); 1235 check("{javaUnicodeIdentifierPart}", c, 1236 Character.isUnicodeIdentifierPart(c)); 1237 check("{javaIdentifierIgnorable}", c, 1238 Character.isIdentifierIgnorable(c)); 1239 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1240 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1241 check("{javaISOControl}", c, Character.isISOControl(c)); 1242 check("{javaMirrored}", c, Character.isMirrored(c)); 1243 } 1244 1245 report("Java character classes"); 1246 } 1247 1248 // This test is for 4523620 1249 /* 1250 private static void numOccurrencesTest() throws Exception { 1251 Pattern pattern = Pattern.compile("aaa"); 1252 1253 if (pattern.numOccurrences("aaaaaa", false) != 2) 1254 failCount++; 1255 if (pattern.numOccurrences("aaaaaa", true) != 4) 1256 failCount++; 1257 1258 pattern = Pattern.compile("^"); 1259 if (pattern.numOccurrences("aaaaaa", false) != 1) 1260 failCount++; 1261 if (pattern.numOccurrences("aaaaaa", true) != 1) 1262 failCount++; 1263 1264 report("Number of Occurrences"); 1265 } 1266 */ 1267 1268 // This test is for 4776374 1269 private static void caretBetweenTerminatorsTest() throws Exception { 1270 int flags1 = Pattern.DOTALL; 1271 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1272 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1273 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1274 1275 check("^....", flags1, "test\ntest", "test", true); 1276 check(".....^", flags1, "test\ntest", "test", false); 1277 check(".....^", flags1, "test\n", "test", false); 1278 check("....^", flags1, "test\r\n", "test", false); 1279 1280 check("^....", flags2, "test\ntest", "test", true); 1281 check("....^", flags2, "test\ntest", "test", false); 1282 check(".....^", flags2, "test\n", "test", false); 1283 check("....^", flags2, "test\r\n", "test", false); 1284 1285 check("^....", flags3, "test\ntest", "test", true); 1286 check(".....^", flags3, "test\ntest", "test\n", true); 1287 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1288 check(".....^", flags3, "test\n", "test", false); 1289 check(".....^", flags3, "test\r\n", "test", false); 1290 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1291 1292 check("^....", flags4, "test\ntest", "test", true); 1293 check(".....^", flags3, "test\ntest", "test\n", true); 1294 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1295 check(".....^", flags4, "test\n", "test\n", false); 1296 check(".....^", flags4, "test\r\n", "test\r", false); 1297 1298 // Supplementary character test 1299 String t = toSupplementaries("test"); 1300 check("^....", flags1, t+"\n"+t, t, true); 1301 check(".....^", flags1, t+"\n"+t, t, false); 1302 check(".....^", flags1, t+"\n", t, false); 1303 check("....^", flags1, t+"\r\n", t, false); 1304 1305 check("^....", flags2, t+"\n"+t, t, true); 1306 check("....^", flags2, t+"\n"+t, t, false); 1307 check(".....^", flags2, t+"\n", t, false); 1308 check("....^", flags2, t+"\r\n", t, false); 1309 1310 check("^....", flags3, t+"\n"+t, t, true); 1311 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1312 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1313 check(".....^", flags3, t+"\n", t, false); 1314 check(".....^", flags3, t+"\r\n", t, false); 1315 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1316 1317 check("^....", flags4, t+"\n"+t, t, true); 1318 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1319 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1320 check(".....^", flags4, t+"\n", t+"\n", false); 1321 check(".....^", flags4, t+"\r\n", t+"\r", false); 1322 1323 report("Caret between terminators"); 1324 } 1325 1326 // This test is for 4727935 1327 private static void dollarAtEndTest() throws Exception { 1328 int flags1 = Pattern.DOTALL; 1329 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1330 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1331 1332 check("....$", flags1, "test\n", "test", true); 1333 check("....$", flags1, "test\r\n", "test", true); 1334 check(".....$", flags1, "test\n", "test\n", true); 1335 check(".....$", flags1, "test\u0085", "test\u0085", true); 1336 check("....$", flags1, "test\u0085", "test", true); 1337 1338 check("....$", flags2, "test\n", "test", true); 1339 check(".....$", flags2, "test\n", "test\n", true); 1340 check(".....$", flags2, "test\u0085", "test\u0085", true); 1341 check("....$", flags2, "test\u0085", "est\u0085", true); 1342 1343 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1344 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1345 check("....$blah", flags3, "test\nblah", "!!!!", false); 1346 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1347 1348 // Supplementary character test 1349 String t = toSupplementaries("test"); 1350 String b = toSupplementaries("blah"); 1351 check("....$", flags1, t+"\n", t, true); 1352 check("....$", flags1, t+"\r\n", t, true); 1353 check(".....$", flags1, t+"\n", t+"\n", true); 1354 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1355 check("....$", flags1, t+"\u0085", t, true); 1356 1357 check("....$", flags2, t+"\n", t, true); 1358 check(".....$", flags2, t+"\n", t+"\n", true); 1359 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1360 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1361 1362 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1363 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1364 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1365 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1366 1367 report("Dollar at End"); 1368 } 1369 1370 // This test is for 4711773 1371 private static void multilineDollarTest() throws Exception { 1372 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1373 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1374 matcher.find(); 1375 if (matcher.start(0) != 9) 1376 failCount++; 1377 matcher.find(); 1378 if (matcher.start(0) != 20) 1379 failCount++; 1380 1381 // Supplementary character test 1382 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1383 matcher.find(); 1384 if (matcher.start(0) != 9*2) 1385 failCount++; 1386 matcher.find(); 1387 if (matcher.start(0) != 20*2) 1388 failCount++; 1389 1390 report("Multiline Dollar"); 1391 } 1392 1393 private static void reluctantRepetitionTest() throws Exception { 1394 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1395 check(p, "1 word word word 2", true); 1396 check(p, "1 wor wo w 2", true); 1397 check(p, "1 word word 2", true); 1398 check(p, "1 word 2", true); 1399 check(p, "1 wo w w 2", true); 1400 check(p, "1 wo w 2", true); 1401 check(p, "1 wor w 2", true); 1402 1403 p = Pattern.compile("([a-z])+?c"); 1404 Matcher m = p.matcher("ababcdefdec"); 1405 check(m, "ababc"); 1406 1407 // Supplementary character test 1408 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1409 m = p.matcher(toSupplementaries("ababcdefdec")); 1410 check(m, toSupplementaries("ababc")); 1411 1412 report("Reluctant Repetition"); 1413 } 1414 1415 private static Pattern serializedPattern(Pattern p) throws Exception { 1416 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1417 ObjectOutputStream oos = new ObjectOutputStream(baos); 1418 oos.writeObject(p); 1419 oos.close(); 1420 try (ObjectInputStream ois = new ObjectInputStream( 1421 new ByteArrayInputStream(baos.toByteArray()))) { 1422 return (Pattern)ois.readObject(); 1423 } 1424 } 1425 1426 private static void serializeTest() throws Exception { 1427 String patternStr = "(b)"; 1428 String matchStr = "b"; 1429 Pattern pattern = Pattern.compile(patternStr); 1430 Pattern serializedPattern = serializedPattern(pattern); 1431 Matcher matcher = serializedPattern.matcher(matchStr); 1432 if (!matcher.matches()) 1433 failCount++; 1434 if (matcher.groupCount() != 1) 1435 failCount++; 1436 1437 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1438 serializedPattern = serializedPattern(pattern); 1439 if (!serializedPattern.matcher("Ab").matches()) 1440 failCount++; 1441 if (serializedPattern.matcher("AB").matches()) 1442 failCount++; 1443 1444 report("Serialization"); 1445 } 1446 1447 private static void gTest() { 1448 Pattern pattern = Pattern.compile("\\G\\w"); 1449 Matcher matcher = pattern.matcher("abc#x#x"); 1450 matcher.find(); 1451 matcher.find(); 1452 matcher.find(); 1453 if (matcher.find()) 1454 failCount++; 1455 1456 pattern = Pattern.compile("\\GA*"); 1457 matcher = pattern.matcher("1A2AA3"); 1458 matcher.find(); 1459 if (matcher.find()) 1460 failCount++; 1461 1462 pattern = Pattern.compile("\\GA*"); 1463 matcher = pattern.matcher("1A2AA3"); 1464 if (!matcher.find(1)) 1465 failCount++; 1466 matcher.find(); 1467 if (matcher.find()) 1468 failCount++; 1469 1470 report("\\G"); 1471 } 1472 1473 private static void zTest() { 1474 Pattern pattern = Pattern.compile("foo\\Z"); 1475 // Positives 1476 check(pattern, "foo\u0085", true); 1477 check(pattern, "foo\u2028", true); 1478 check(pattern, "foo\u2029", true); 1479 check(pattern, "foo\n", true); 1480 check(pattern, "foo\r", true); 1481 check(pattern, "foo\r\n", true); 1482 // Negatives 1483 check(pattern, "fooo", false); 1484 check(pattern, "foo\n\r", false); 1485 1486 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1487 // Positives 1488 check(pattern, "foo", true); 1489 check(pattern, "foo\n", true); 1490 // Negatives 1491 check(pattern, "foo\r", false); 1492 check(pattern, "foo\u0085", false); 1493 check(pattern, "foo\u2028", false); 1494 check(pattern, "foo\u2029", false); 1495 1496 report("\\Z"); 1497 } 1498 1499 private static void replaceFirstTest() { 1500 Pattern pattern = Pattern.compile("(ab)(c*)"); 1501 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1502 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1503 failCount++; 1504 1505 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1506 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1507 failCount++; 1508 1509 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1510 String result = matcher.replaceFirst("$1"); 1511 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1512 failCount++; 1513 1514 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1515 result = matcher.replaceFirst("$2"); 1516 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1517 failCount++; 1518 1519 pattern = Pattern.compile("a*"); 1520 matcher = pattern.matcher("aaaaaaaaaa"); 1521 if (!matcher.replaceFirst("test").equals("test")) 1522 failCount++; 1523 1524 pattern = Pattern.compile("a+"); 1525 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1526 if (!matcher.replaceFirst("test").equals("zzztest")) 1527 failCount++; 1528 1529 // Supplementary character test 1530 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1531 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1532 if (!matcher.replaceFirst(toSupplementaries("test")) 1533 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1534 failCount++; 1535 1536 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1537 if (!matcher.replaceFirst(toSupplementaries("test")). 1538 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1539 failCount++; 1540 1541 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1542 result = matcher.replaceFirst("$1"); 1543 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1544 failCount++; 1545 1546 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1547 result = matcher.replaceFirst("$2"); 1548 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1549 failCount++; 1550 1551 pattern = Pattern.compile(toSupplementaries("a*")); 1552 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1553 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1554 failCount++; 1555 1556 pattern = Pattern.compile(toSupplementaries("a+")); 1557 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1558 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1559 failCount++; 1560 1561 report("Replace First"); 1562 } 1563 1564 private static void unixLinesTest() { 1565 Pattern pattern = Pattern.compile(".*"); 1566 Matcher matcher = pattern.matcher("aa\u2028blah"); 1567 matcher.find(); 1568 if (!matcher.group(0).equals("aa")) 1569 failCount++; 1570 1571 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1572 matcher = pattern.matcher("aa\u2028blah"); 1573 matcher.find(); 1574 if (!matcher.group(0).equals("aa\u2028blah")) 1575 failCount++; 1576 1577 pattern = Pattern.compile("[az]$", 1578 Pattern.MULTILINE | Pattern.UNIX_LINES); 1579 matcher = pattern.matcher("aa\u2028zz"); 1580 check(matcher, "a\u2028", false); 1581 1582 // Supplementary character test 1583 pattern = Pattern.compile(".*"); 1584 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1585 matcher.find(); 1586 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1587 failCount++; 1588 1589 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1590 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1591 matcher.find(); 1592 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1593 failCount++; 1594 1595 pattern = Pattern.compile(toSupplementaries("[az]$"), 1596 Pattern.MULTILINE | Pattern.UNIX_LINES); 1597 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1598 check(matcher, toSupplementaries("a\u2028"), false); 1599 1600 report("Unix Lines"); 1601 } 1602 1603 private static void commentsTest() { 1604 int flags = Pattern.COMMENTS; 1605 1606 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1607 Matcher matcher = pattern.matcher("aa#aa"); 1608 if (!matcher.matches()) 1609 failCount++; 1610 1611 pattern = Pattern.compile("aa # blah", flags); 1612 matcher = pattern.matcher("aa"); 1613 if (!matcher.matches()) 1614 failCount++; 1615 1616 pattern = Pattern.compile("aa blah", flags); 1617 matcher = pattern.matcher("aablah"); 1618 if (!matcher.matches()) 1619 failCount++; 1620 1621 pattern = Pattern.compile("aa # blah blech ", flags); 1622 matcher = pattern.matcher("aa"); 1623 if (!matcher.matches()) 1624 failCount++; 1625 1626 pattern = Pattern.compile("aa # blah\n ", flags); 1627 matcher = pattern.matcher("aa"); 1628 if (!matcher.matches()) 1629 failCount++; 1630 1631 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1632 matcher = pattern.matcher("aabc"); 1633 if (!matcher.matches()) 1634 failCount++; 1635 1636 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1637 matcher = pattern.matcher("aabc"); 1638 if (!matcher.matches()) 1639 failCount++; 1640 1641 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1642 matcher = pattern.matcher("aabc#blech"); 1643 if (!matcher.matches()) 1644 failCount++; 1645 1646 // Supplementary character test 1647 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1648 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1649 if (!matcher.matches()) 1650 failCount++; 1651 1652 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1653 matcher = pattern.matcher(toSupplementaries("aa")); 1654 if (!matcher.matches()) 1655 failCount++; 1656 1657 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1658 matcher = pattern.matcher(toSupplementaries("aablah")); 1659 if (!matcher.matches()) 1660 failCount++; 1661 1662 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1663 matcher = pattern.matcher(toSupplementaries("aa")); 1664 if (!matcher.matches()) 1665 failCount++; 1666 1667 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1668 matcher = pattern.matcher(toSupplementaries("aa")); 1669 if (!matcher.matches()) 1670 failCount++; 1671 1672 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1673 matcher = pattern.matcher(toSupplementaries("aabc")); 1674 if (!matcher.matches()) 1675 failCount++; 1676 1677 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1678 matcher = pattern.matcher(toSupplementaries("aabc")); 1679 if (!matcher.matches()) 1680 failCount++; 1681 1682 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1683 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1684 if (!matcher.matches()) 1685 failCount++; 1686 1687 report("Comments"); 1688 } 1689 1690 private static void caseFoldingTest() { // bug 4504687 1691 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1692 Pattern pattern = Pattern.compile("aa", flags); 1693 Matcher matcher = pattern.matcher("ab"); 1694 if (matcher.matches()) 1695 failCount++; 1696 1697 pattern = Pattern.compile("aA", flags); 1698 matcher = pattern.matcher("ab"); 1699 if (matcher.matches()) 1700 failCount++; 1701 1702 pattern = Pattern.compile("aa", flags); 1703 matcher = pattern.matcher("aB"); 1704 if (matcher.matches()) 1705 failCount++; 1706 matcher = pattern.matcher("Ab"); 1707 if (matcher.matches()) 1708 failCount++; 1709 1710 // ASCII "a" 1711 // Latin-1 Supplement "a" + grave 1712 // Cyrillic "a" 1713 String[] patterns = new String[] { 1714 //single 1715 "a", "\u00e0", "\u0430", 1716 //slice 1717 "ab", "\u00e0\u00e1", "\u0430\u0431", 1718 //class single 1719 "[a]", "[\u00e0]", "[\u0430]", 1720 //class range 1721 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1722 //back reference 1723 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1724 }; 1725 1726 String[] texts = new String[] { 1727 "A", "\u00c0", "\u0410", 1728 "AB", "\u00c0\u00c1", "\u0410\u0411", 1729 "A", "\u00c0", "\u0410", 1730 "B", "\u00c2", "\u0411", 1731 "aA", "\u00e0\u00c0", "\u0430\u0410" 1732 }; 1733 1734 boolean[] expected = new boolean[] { 1735 true, false, false, 1736 true, false, false, 1737 true, false, false, 1738 true, false, false, 1739 true, false, false 1740 }; 1741 1742 flags = Pattern.CASE_INSENSITIVE; 1743 for (int i = 0; i < patterns.length; i++) { 1744 pattern = Pattern.compile(patterns[i], flags); 1745 matcher = pattern.matcher(texts[i]); 1746 if (matcher.matches() != expected[i]) { 1747 System.out.println("<1> Failed at " + i); 1748 failCount++; 1749 } 1750 } 1751 1752 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1753 for (int i = 0; i < patterns.length; i++) { 1754 pattern = Pattern.compile(patterns[i], flags); 1755 matcher = pattern.matcher(texts[i]); 1756 if (!matcher.matches()) { 1757 System.out.println("<2> Failed at " + i); 1758 failCount++; 1759 } 1760 } 1761 // flag unicode_case alone should do nothing 1762 flags = Pattern.UNICODE_CASE; 1763 for (int i = 0; i < patterns.length; i++) { 1764 pattern = Pattern.compile(patterns[i], flags); 1765 matcher = pattern.matcher(texts[i]); 1766 if (matcher.matches()) { 1767 System.out.println("<3> Failed at " + i); 1768 failCount++; 1769 } 1770 } 1771 1772 // Special cases: i, I, u+0131 and u+0130 1773 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1774 pattern = Pattern.compile("[h-j]+", flags); 1775 if (!pattern.matcher("\u0131\u0130").matches()) 1776 failCount++; 1777 report("Case Folding"); 1778 } 1779 1780 private static void appendTest() { 1781 Pattern pattern = Pattern.compile("(ab)(cd)"); 1782 Matcher matcher = pattern.matcher("abcd"); 1783 String result = matcher.replaceAll("$2$1"); 1784 if (!result.equals("cdab")) 1785 failCount++; 1786 1787 String s1 = "Swap all: first = 123, second = 456"; 1788 String s2 = "Swap one: first = 123, second = 456"; 1789 String r = "$3$2$1"; 1790 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1791 matcher = pattern.matcher(s1); 1792 1793 result = matcher.replaceAll(r); 1794 if (!result.equals("Swap all: 123 = first, 456 = second")) 1795 failCount++; 1796 1797 matcher = pattern.matcher(s2); 1798 1799 if (matcher.find()) { 1800 StringBuffer sb = new StringBuffer(); 1801 matcher.appendReplacement(sb, r); 1802 matcher.appendTail(sb); 1803 result = sb.toString(); 1804 if (!result.equals("Swap one: 123 = first, second = 456")) 1805 failCount++; 1806 } 1807 1808 // Supplementary character test 1809 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1810 matcher = pattern.matcher(toSupplementaries("abcd")); 1811 result = matcher.replaceAll("$2$1"); 1812 if (!result.equals(toSupplementaries("cdab"))) 1813 failCount++; 1814 1815 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1816 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1817 r = toSupplementaries("$3$2$1"); 1818 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1819 matcher = pattern.matcher(s1); 1820 1821 result = matcher.replaceAll(r); 1822 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1823 failCount++; 1824 1825 matcher = pattern.matcher(s2); 1826 1827 if (matcher.find()) { 1828 StringBuffer sb = new StringBuffer(); 1829 matcher.appendReplacement(sb, r); 1830 matcher.appendTail(sb); 1831 result = sb.toString(); 1832 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1833 failCount++; 1834 } 1835 report("Append"); 1836 } 1837 1838 private static void splitTest() { 1839 Pattern pattern = Pattern.compile(":"); 1840 String[] result = pattern.split("foo:and:boo", 2); 1841 if (!result[0].equals("foo")) 1842 failCount++; 1843 if (!result[1].equals("and:boo")) 1844 failCount++; 1845 // Supplementary character test 1846 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1847 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1848 if (!result[0].equals(toSupplementaries("foo"))) 1849 failCount++; 1850 if (!result[1].equals(toSupplementaries("andXboo"))) 1851 failCount++; 1852 1853 CharBuffer cb = CharBuffer.allocate(100); 1854 cb.put("foo:and:boo"); 1855 cb.flip(); 1856 result = pattern.split(cb); 1857 if (!result[0].equals("foo")) 1858 failCount++; 1859 if (!result[1].equals("and")) 1860 failCount++; 1861 if (!result[2].equals("boo")) 1862 failCount++; 1863 1864 // Supplementary character test 1865 CharBuffer cbs = CharBuffer.allocate(100); 1866 cbs.put(toSupplementaries("fooXandXboo")); 1867 cbs.flip(); 1868 result = patternX.split(cbs); 1869 if (!result[0].equals(toSupplementaries("foo"))) 1870 failCount++; 1871 if (!result[1].equals(toSupplementaries("and"))) 1872 failCount++; 1873 if (!result[2].equals(toSupplementaries("boo"))) 1874 failCount++; 1875 1876 String source = "0123456789"; 1877 for (int limit=-2; limit<3; limit++) { 1878 for (int x=0; x<10; x++) { 1879 result = source.split(Integer.toString(x), limit); 1880 int expectedLength = limit < 1 ? 2 : limit; 1881 1882 if ((limit == 0) && (x == 9)) { 1883 // expected dropping of "" 1884 if (result.length != 1) 1885 failCount++; 1886 if (!result[0].equals("012345678")) { 1887 failCount++; 1888 } 1889 } else { 1890 if (result.length != expectedLength) { 1891 failCount++; 1892 } 1893 if (!result[0].equals(source.substring(0,x))) { 1894 if (limit != 1) { 1895 failCount++; 1896 } else { 1897 if (!result[0].equals(source.substring(0,10))) { 1898 failCount++; 1899 } 1900 } 1901 } 1902 if (expectedLength > 1) { // Check segment 2 1903 if (!result[1].equals(source.substring(x+1,10))) 1904 failCount++; 1905 } 1906 } 1907 } 1908 } 1909 // Check the case for no match found 1910 for (int limit=-2; limit<3; limit++) { 1911 result = source.split("e", limit); 1912 if (result.length != 1) 1913 failCount++; 1914 if (!result[0].equals(source)) 1915 failCount++; 1916 } 1917 // Check the case for limit == 0, source = ""; 1918 // split() now returns 0-length for empty source "" see #6559590 1919 source = ""; 1920 result = source.split("e", 0); 1921 if (result.length != 1) 1922 failCount++; 1923 if (!result[0].equals(source)) 1924 failCount++; 1925 1926 // Check both split() and splitAsStraem(), especially for zero-lenth 1927 // input and zero-lenth match cases 1928 String[][] input = new String[][] { 1929 { " ", "Abc Efg Hij" }, // normal non-zero-match 1930 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1931 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1932 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1933 { "(?=\\p{Lu})", "AbcEfg" }, 1934 { "(?=\\p{Lu})", "Abc" }, 1935 { " ", "" }, // zero-length input 1936 { ".*", "" }, 1937 1938 // some tests from PatternStreamTest.java 1939 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1940 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1941 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1942 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1943 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1944 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1945 { "\u56da", "" }, 1946 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1947 { "o", "boo:and:foo" }, 1948 { "o", "booooo:and:fooooo" }, 1949 { "o", "fooooo:" }, 1950 }; 1951 1952 String[][] expected = new String[][] { 1953 { "Abc", "Efg", "Hij" }, 1954 { "", "Abc", "Efg", "Hij" }, 1955 { "Abc", "", "Efg", "Hij" }, 1956 { "Abc", "Efg", "Hij" }, 1957 { "Abc", "Efg" }, 1958 { "Abc" }, 1959 { "" }, 1960 { "" }, 1961 1962 { "awgqwefg1fefw", "vssv1vvv1" }, 1963 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1964 { "awgqwefg", "fefw4vssv", "vvv" }, 1965 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1966 { "1", "23", "456", "7890" }, 1967 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1968 { "" }, 1969 { "This", "is", "testing", "", "with", "different", "separators" }, 1970 { "b", "", ":and:f" }, 1971 { "b", "", "", "", "", ":and:f" }, 1972 { "f", "", "", "", "", ":" }, 1973 }; 1974 for (int i = 0; i < input.length; i++) { 1975 pattern = Pattern.compile(input[i][0]); 1976 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1977 failCount++; 1978 } 1979 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1980 // array for zero-length input for now 1981 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1982 expected[i])) { 1983 failCount++; 1984 } 1985 } 1986 report("Split"); 1987 } 1988 1989 private static void negationTest() { 1990 Pattern pattern = Pattern.compile("[\\[@^]+"); 1991 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1992 if (!matcher.find()) 1993 failCount++; 1994 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1995 failCount++; 1996 pattern = Pattern.compile("[@\\[^]+"); 1997 matcher = pattern.matcher("@@@@[[[[^^^^"); 1998 if (!matcher.find()) 1999 failCount++; 2000 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2001 failCount++; 2002 pattern = Pattern.compile("[@\\[^@]+"); 2003 matcher = pattern.matcher("@@@@[[[[^^^^"); 2004 if (!matcher.find()) 2005 failCount++; 2006 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2007 failCount++; 2008 2009 pattern = Pattern.compile("\\)"); 2010 matcher = pattern.matcher("xxx)xxx"); 2011 if (!matcher.find()) 2012 failCount++; 2013 2014 report("Negation"); 2015 } 2016 2017 private static void ampersandTest() { 2018 Pattern pattern = Pattern.compile("[&@]+"); 2019 check(pattern, "@@@@&&&&", true); 2020 2021 pattern = Pattern.compile("[@&]+"); 2022 check(pattern, "@@@@&&&&", true); 2023 2024 pattern = Pattern.compile("[@\\&]+"); 2025 check(pattern, "@@@@&&&&", true); 2026 2027 report("Ampersand"); 2028 } 2029 2030 private static void octalTest() throws Exception { 2031 Pattern pattern = Pattern.compile("\\u0007"); 2032 Matcher matcher = pattern.matcher("\u0007"); 2033 if (!matcher.matches()) 2034 failCount++; 2035 pattern = Pattern.compile("\\07"); 2036 matcher = pattern.matcher("\u0007"); 2037 if (!matcher.matches()) 2038 failCount++; 2039 pattern = Pattern.compile("\\007"); 2040 matcher = pattern.matcher("\u0007"); 2041 if (!matcher.matches()) 2042 failCount++; 2043 pattern = Pattern.compile("\\0007"); 2044 matcher = pattern.matcher("\u0007"); 2045 if (!matcher.matches()) 2046 failCount++; 2047 pattern = Pattern.compile("\\040"); 2048 matcher = pattern.matcher("\u0020"); 2049 if (!matcher.matches()) 2050 failCount++; 2051 pattern = Pattern.compile("\\0403"); 2052 matcher = pattern.matcher("\u00203"); 2053 if (!matcher.matches()) 2054 failCount++; 2055 pattern = Pattern.compile("\\0103"); 2056 matcher = pattern.matcher("\u0043"); 2057 if (!matcher.matches()) 2058 failCount++; 2059 2060 report("Octal"); 2061 } 2062 2063 private static void longPatternTest() throws Exception { 2064 try { 2065 Pattern pattern = Pattern.compile( 2066 "a 32-character-long pattern xxxx"); 2067 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2068 pattern = Pattern.compile("a thirty four character long regex"); 2069 StringBuffer patternToBe = new StringBuffer(101); 2070 for (int i=0; i<100; i++) 2071 patternToBe.append((char)(97 + i%26)); 2072 pattern = Pattern.compile(patternToBe.toString()); 2073 } catch (PatternSyntaxException e) { 2074 failCount++; 2075 } 2076 2077 // Supplementary character test 2078 try { 2079 Pattern pattern = Pattern.compile( 2080 toSupplementaries("a 32-character-long pattern xxxx")); 2081 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2082 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2083 StringBuffer patternToBe = new StringBuffer(101*2); 2084 for (int i=0; i<100; i++) 2085 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2086 + 97 + i%26)); 2087 pattern = Pattern.compile(patternToBe.toString()); 2088 } catch (PatternSyntaxException e) { 2089 failCount++; 2090 } 2091 report("LongPattern"); 2092 } 2093 2094 private static void group0Test() throws Exception { 2095 Pattern pattern = Pattern.compile("(tes)ting"); 2096 Matcher matcher = pattern.matcher("testing"); 2097 check(matcher, "testing"); 2098 2099 matcher.reset("testing"); 2100 if (matcher.lookingAt()) { 2101 if (!matcher.group(0).equals("testing")) 2102 failCount++; 2103 } else { 2104 failCount++; 2105 } 2106 2107 matcher.reset("testing"); 2108 if (matcher.matches()) { 2109 if (!matcher.group(0).equals("testing")) 2110 failCount++; 2111 } else { 2112 failCount++; 2113 } 2114 2115 pattern = Pattern.compile("(tes)ting"); 2116 matcher = pattern.matcher("testing"); 2117 if (matcher.lookingAt()) { 2118 if (!matcher.group(0).equals("testing")) 2119 failCount++; 2120 } else { 2121 failCount++; 2122 } 2123 2124 pattern = Pattern.compile("^(tes)ting"); 2125 matcher = pattern.matcher("testing"); 2126 if (matcher.matches()) { 2127 if (!matcher.group(0).equals("testing")) 2128 failCount++; 2129 } else { 2130 failCount++; 2131 } 2132 2133 // Supplementary character test 2134 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2135 matcher = pattern.matcher(toSupplementaries("testing")); 2136 check(matcher, toSupplementaries("testing")); 2137 2138 matcher.reset(toSupplementaries("testing")); 2139 if (matcher.lookingAt()) { 2140 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2141 failCount++; 2142 } else { 2143 failCount++; 2144 } 2145 2146 matcher.reset(toSupplementaries("testing")); 2147 if (matcher.matches()) { 2148 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2149 failCount++; 2150 } else { 2151 failCount++; 2152 } 2153 2154 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2155 matcher = pattern.matcher(toSupplementaries("testing")); 2156 if (matcher.lookingAt()) { 2157 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2158 failCount++; 2159 } else { 2160 failCount++; 2161 } 2162 2163 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2164 matcher = pattern.matcher(toSupplementaries("testing")); 2165 if (matcher.matches()) { 2166 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2167 failCount++; 2168 } else { 2169 failCount++; 2170 } 2171 2172 report("Group0"); 2173 } 2174 2175 private static void findIntTest() throws Exception { 2176 Pattern p = Pattern.compile("blah"); 2177 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2178 boolean result = m.find(2); 2179 if (!result) 2180 failCount++; 2181 2182 p = Pattern.compile("$"); 2183 m = p.matcher("1234567890"); 2184 result = m.find(10); 2185 if (!result) 2186 failCount++; 2187 try { 2188 result = m.find(11); 2189 failCount++; 2190 } catch (IndexOutOfBoundsException e) { 2191 // correct result 2192 } 2193 2194 // Supplementary character test 2195 p = Pattern.compile(toSupplementaries("blah")); 2196 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2197 result = m.find(2); 2198 if (!result) 2199 failCount++; 2200 2201 report("FindInt"); 2202 } 2203 2204 private static void emptyPatternTest() throws Exception { 2205 Pattern p = Pattern.compile(""); 2206 Matcher m = p.matcher("foo"); 2207 2208 // Should find empty pattern at beginning of input 2209 boolean result = m.find(); 2210 if (result != true) 2211 failCount++; 2212 if (m.start() != 0) 2213 failCount++; 2214 2215 // Should not match entire input if input is not empty 2216 m.reset(); 2217 result = m.matches(); 2218 if (result == true) 2219 failCount++; 2220 2221 try { 2222 m.start(0); 2223 failCount++; 2224 } catch (IllegalStateException e) { 2225 // Correct result 2226 } 2227 2228 // Should match entire input if input is empty 2229 m.reset(""); 2230 result = m.matches(); 2231 if (result != true) 2232 failCount++; 2233 2234 result = Pattern.matches("", ""); 2235 if (result != true) 2236 failCount++; 2237 2238 result = Pattern.matches("", "foo"); 2239 if (result == true) 2240 failCount++; 2241 report("EmptyPattern"); 2242 } 2243 2244 private static void charClassTest() throws Exception { 2245 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2246 check(pattern, "blahb]blech", true); 2247 2248 pattern = Pattern.compile("[abc[def]]"); 2249 check(pattern, "b", true); 2250 2251 // Supplementary character tests 2252 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2253 check(pattern, toSupplementaries("blahb]blech"), true); 2254 2255 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2256 check(pattern, toSupplementaries("b"), true); 2257 2258 try { 2259 // u00ff when UNICODE_CASE 2260 pattern = Pattern.compile("[ab\u00ffcd]", 2261 Pattern.CASE_INSENSITIVE| 2262 Pattern.UNICODE_CASE); 2263 check(pattern, "ab\u00ffcd", true); 2264 check(pattern, "Ab\u0178Cd", true); 2265 2266 // u00b5 when UNICODE_CASE 2267 pattern = Pattern.compile("[ab\u00b5cd]", 2268 Pattern.CASE_INSENSITIVE| 2269 Pattern.UNICODE_CASE); 2270 check(pattern, "ab\u00b5cd", true); 2271 check(pattern, "Ab\u039cCd", true); 2272 } catch (Exception e) { failCount++; } 2273 2274 /* Special cases 2275 (1)LatinSmallLetterLongS u+017f 2276 (2)LatinSmallLetterDotlessI u+0131 2277 (3)LatineCapitalLetterIWithDotAbove u+0130 2278 (4)KelvinSign u+212a 2279 (5)AngstromSign u+212b 2280 */ 2281 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2282 pattern = Pattern.compile("[sik\u00c5]+", flags); 2283 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2284 failCount++; 2285 2286 report("CharClass"); 2287 } 2288 2289 private static void caretTest() throws Exception { 2290 Pattern pattern = Pattern.compile("\\w*"); 2291 Matcher matcher = pattern.matcher("a#bc#def##g"); 2292 check(matcher, "a"); 2293 check(matcher, ""); 2294 check(matcher, "bc"); 2295 check(matcher, ""); 2296 check(matcher, "def"); 2297 check(matcher, ""); 2298 check(matcher, ""); 2299 check(matcher, "g"); 2300 check(matcher, ""); 2301 if (matcher.find()) 2302 failCount++; 2303 2304 pattern = Pattern.compile("^\\w*"); 2305 matcher = pattern.matcher("a#bc#def##g"); 2306 check(matcher, "a"); 2307 if (matcher.find()) 2308 failCount++; 2309 2310 pattern = Pattern.compile("\\w"); 2311 matcher = pattern.matcher("abc##x"); 2312 check(matcher, "a"); 2313 check(matcher, "b"); 2314 check(matcher, "c"); 2315 check(matcher, "x"); 2316 if (matcher.find()) 2317 failCount++; 2318 2319 pattern = Pattern.compile("^\\w"); 2320 matcher = pattern.matcher("abc##x"); 2321 check(matcher, "a"); 2322 if (matcher.find()) 2323 failCount++; 2324 2325 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2326 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2327 check(matcher, "abc"); 2328 if (matcher.find()) 2329 failCount++; 2330 2331 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2332 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2333 check(matcher, "abc"); 2334 check(matcher, "jkl"); 2335 if (matcher.find()) 2336 failCount++; 2337 2338 pattern = Pattern.compile("^", Pattern.MULTILINE); 2339 matcher = pattern.matcher("this is some text"); 2340 String result = matcher.replaceAll("X"); 2341 if (!result.equals("Xthis is some text")) 2342 failCount++; 2343 2344 pattern = Pattern.compile("^"); 2345 matcher = pattern.matcher("this is some text"); 2346 result = matcher.replaceAll("X"); 2347 if (!result.equals("Xthis is some text")) 2348 failCount++; 2349 2350 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2351 matcher = pattern.matcher("this is some text\n"); 2352 result = matcher.replaceAll("X"); 2353 if (!result.equals("Xthis is some text\n")) 2354 failCount++; 2355 2356 report("Caret"); 2357 } 2358 2359 private static void groupCaptureTest() throws Exception { 2360 // Independent group 2361 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2362 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2363 matcher.find(); 2364 try { 2365 String blah = matcher.group(1); 2366 failCount++; 2367 } catch (IndexOutOfBoundsException ioobe) { 2368 // Good result 2369 } 2370 // Pure group 2371 pattern = Pattern.compile("x+(?:y+)z+"); 2372 matcher = pattern.matcher("xxxyyyzzz"); 2373 matcher.find(); 2374 try { 2375 String blah = matcher.group(1); 2376 failCount++; 2377 } catch (IndexOutOfBoundsException ioobe) { 2378 // Good result 2379 } 2380 2381 // Supplementary character tests 2382 // Independent group 2383 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2384 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2385 matcher.find(); 2386 try { 2387 String blah = matcher.group(1); 2388 failCount++; 2389 } catch (IndexOutOfBoundsException ioobe) { 2390 // Good result 2391 } 2392 // Pure group 2393 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2394 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2395 matcher.find(); 2396 try { 2397 String blah = matcher.group(1); 2398 failCount++; 2399 } catch (IndexOutOfBoundsException ioobe) { 2400 // Good result 2401 } 2402 2403 report("GroupCapture"); 2404 } 2405 2406 private static void backRefTest() throws Exception { 2407 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2408 check(pattern, "zzzaabcazzz", true); 2409 2410 pattern = Pattern.compile("(a*)bc\\1"); 2411 check(pattern, "zzzaabcaazzz", true); 2412 2413 pattern = Pattern.compile("(abc)(def)\\1"); 2414 check(pattern, "abcdefabc", true); 2415 2416 pattern = Pattern.compile("(abc)(def)\\3"); 2417 check(pattern, "abcdefabc", false); 2418 2419 try { 2420 for (int i = 1; i < 10; i++) { 2421 // Make sure backref 1-9 are always accepted 2422 pattern = Pattern.compile("abcdef\\" + i); 2423 // and fail to match if the target group does not exit 2424 check(pattern, "abcdef", false); 2425 } 2426 } catch(PatternSyntaxException e) { 2427 failCount++; 2428 } 2429 2430 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2431 check(pattern, "abcdefghija", false); 2432 check(pattern, "abcdefghija1", true); 2433 2434 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2435 check(pattern, "abcdefghijkk", true); 2436 2437 pattern = Pattern.compile("(a)bcdefghij\\11"); 2438 check(pattern, "abcdefghija1", true); 2439 2440 // Supplementary character tests 2441 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2442 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2443 2444 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2445 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2446 2447 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2448 check(pattern, toSupplementaries("abcdefabc"), true); 2449 2450 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2451 check(pattern, toSupplementaries("abcdefabc"), false); 2452 2453 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2454 check(pattern, toSupplementaries("abcdefghija"), false); 2455 check(pattern, toSupplementaries("abcdefghija1"), true); 2456 2457 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2458 check(pattern, toSupplementaries("abcdefghijkk"), true); 2459 2460 report("BackRef"); 2461 } 2462 2463 /** 2464 * Unicode Technical Report #18, section 2.6 End of Line 2465 * There is no empty line to be matched in the sequence \u000D\u000A 2466 * but there is an empty line in the sequence \u000A\u000D. 2467 */ 2468 private static void anchorTest() throws Exception { 2469 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2470 Matcher m = p.matcher("blah1\r\nblah2"); 2471 m.find(); 2472 m.find(); 2473 if (!m.group().equals("blah2")) 2474 failCount++; 2475 2476 m.reset("blah1\n\rblah2"); 2477 m.find(); 2478 m.find(); 2479 m.find(); 2480 if (!m.group().equals("blah2")) 2481 failCount++; 2482 2483 // Test behavior of $ with \r\n at end of input 2484 p = Pattern.compile(".+$"); 2485 m = p.matcher("blah1\r\n"); 2486 if (!m.find()) 2487 failCount++; 2488 if (!m.group().equals("blah1")) 2489 failCount++; 2490 if (m.find()) 2491 failCount++; 2492 2493 // Test behavior of $ with \r\n at end of input in multiline 2494 p = Pattern.compile(".+$", Pattern.MULTILINE); 2495 m = p.matcher("blah1\r\n"); 2496 if (!m.find()) 2497 failCount++; 2498 if (m.find()) 2499 failCount++; 2500 2501 // Test for $ recognition of \u0085 for bug 4527731 2502 p = Pattern.compile(".+$", Pattern.MULTILINE); 2503 m = p.matcher("blah1\u0085"); 2504 if (!m.find()) 2505 failCount++; 2506 2507 // Supplementary character test 2508 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2509 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2510 m.find(); 2511 m.find(); 2512 if (!m.group().equals(toSupplementaries("blah2"))) 2513 failCount++; 2514 2515 m.reset(toSupplementaries("blah1\n\rblah2")); 2516 m.find(); 2517 m.find(); 2518 m.find(); 2519 if (!m.group().equals(toSupplementaries("blah2"))) 2520 failCount++; 2521 2522 // Test behavior of $ with \r\n at end of input 2523 p = Pattern.compile(".+$"); 2524 m = p.matcher(toSupplementaries("blah1\r\n")); 2525 if (!m.find()) 2526 failCount++; 2527 if (!m.group().equals(toSupplementaries("blah1"))) 2528 failCount++; 2529 if (m.find()) 2530 failCount++; 2531 2532 // Test behavior of $ with \r\n at end of input in multiline 2533 p = Pattern.compile(".+$", Pattern.MULTILINE); 2534 m = p.matcher(toSupplementaries("blah1\r\n")); 2535 if (!m.find()) 2536 failCount++; 2537 if (m.find()) 2538 failCount++; 2539 2540 // Test for $ recognition of \u0085 for bug 4527731 2541 p = Pattern.compile(".+$", Pattern.MULTILINE); 2542 m = p.matcher(toSupplementaries("blah1\u0085")); 2543 if (!m.find()) 2544 failCount++; 2545 2546 report("Anchors"); 2547 } 2548 2549 /** 2550 * A basic sanity test of Matcher.lookingAt(). 2551 */ 2552 private static void lookingAtTest() throws Exception { 2553 Pattern p = Pattern.compile("(ab)(c*)"); 2554 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2555 2556 if (!m.lookingAt()) 2557 failCount++; 2558 2559 if (!m.group().equals(m.group(0))) 2560 failCount++; 2561 2562 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2563 if (m.lookingAt()) 2564 failCount++; 2565 2566 // Supplementary character test 2567 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2568 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2569 2570 if (!m.lookingAt()) 2571 failCount++; 2572 2573 if (!m.group().equals(m.group(0))) 2574 failCount++; 2575 2576 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2577 if (m.lookingAt()) 2578 failCount++; 2579 2580 report("Looking At"); 2581 } 2582 2583 /** 2584 * A basic sanity test of Matcher.matches(). 2585 */ 2586 private static void matchesTest() throws Exception { 2587 // matches() 2588 Pattern p = Pattern.compile("ulb(c*)"); 2589 Matcher m = p.matcher("ulbcccccc"); 2590 if (!m.matches()) 2591 failCount++; 2592 2593 // find() but not matches() 2594 m.reset("zzzulbcccccc"); 2595 if (m.matches()) 2596 failCount++; 2597 2598 // lookingAt() but not matches() 2599 m.reset("ulbccccccdef"); 2600 if (m.matches()) 2601 failCount++; 2602 2603 // matches() 2604 p = Pattern.compile("a|ad"); 2605 m = p.matcher("ad"); 2606 if (!m.matches()) 2607 failCount++; 2608 2609 // Supplementary character test 2610 // matches() 2611 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2612 m = p.matcher(toSupplementaries("ulbcccccc")); 2613 if (!m.matches()) 2614 failCount++; 2615 2616 // find() but not matches() 2617 m.reset(toSupplementaries("zzzulbcccccc")); 2618 if (m.matches()) 2619 failCount++; 2620 2621 // lookingAt() but not matches() 2622 m.reset(toSupplementaries("ulbccccccdef")); 2623 if (m.matches()) 2624 failCount++; 2625 2626 // matches() 2627 p = Pattern.compile(toSupplementaries("a|ad")); 2628 m = p.matcher(toSupplementaries("ad")); 2629 if (!m.matches()) 2630 failCount++; 2631 2632 report("Matches"); 2633 } 2634 2635 /** 2636 * A basic sanity test of Pattern.matches(). 2637 */ 2638 private static void patternMatchesTest() throws Exception { 2639 // matches() 2640 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2641 toSupplementaries("ulbcccccc"))) 2642 failCount++; 2643 2644 // find() but not matches() 2645 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2646 toSupplementaries("zzzulbcccccc"))) 2647 failCount++; 2648 2649 // lookingAt() but not matches() 2650 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2651 toSupplementaries("ulbccccccdef"))) 2652 failCount++; 2653 2654 // Supplementary character test 2655 // matches() 2656 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2657 toSupplementaries("ulbcccccc"))) 2658 failCount++; 2659 2660 // find() but not matches() 2661 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2662 toSupplementaries("zzzulbcccccc"))) 2663 failCount++; 2664 2665 // lookingAt() but not matches() 2666 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2667 toSupplementaries("ulbccccccdef"))) 2668 failCount++; 2669 2670 report("Pattern Matches"); 2671 } 2672 2673 /** 2674 * Canonical equivalence testing. Tests the ability of the engine 2675 * to match sequences that are not explicitly specified in the 2676 * pattern when they are considered equivalent by the Unicode Standard. 2677 */ 2678 private static void ceTest() throws Exception { 2679 // Decomposed char outside char classes 2680 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2681 Matcher m = p.matcher("test\u00e5"); 2682 if (!m.matches()) 2683 failCount++; 2684 2685 m.reset("testa\u030a"); 2686 if (!m.matches()) 2687 failCount++; 2688 2689 // Composed char outside char classes 2690 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2691 m = p.matcher("test\u00e5"); 2692 if (!m.matches()) 2693 failCount++; 2694 2695 m.reset("testa\u030a"); 2696 if (!m.find()) 2697 failCount++; 2698 2699 // Decomposed char inside a char class 2700 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2701 m = p.matcher("test\u00e5"); 2702 if (!m.find()) 2703 failCount++; 2704 2705 m.reset("testa\u030a"); 2706 if (!m.find()) 2707 failCount++; 2708 2709 // Composed char inside a char class 2710 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2711 m = p.matcher("test\u00e5"); 2712 if (!m.find()) 2713 failCount++; 2714 2715 m.reset("testa\u0300"); 2716 if (!m.find()) 2717 failCount++; 2718 2719 m.reset("testa\u030a"); 2720 if (!m.find()) 2721 failCount++; 2722 2723 // Marks that cannot legally change order and be equivalent 2724 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2725 check(p, "testa\u0308\u0300", true); 2726 check(p, "testa\u0300\u0308", false); 2727 2728 // Marks that can legally change order and be equivalent 2729 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2730 check(p, "testa\u0308\u0323", true); 2731 check(p, "testa\u0323\u0308", true); 2732 2733 // Test all equivalences of the sequence a\u0308\u0323\u0300 2734 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2735 check(p, "testa\u0308\u0323\u0300", true); 2736 check(p, "testa\u0323\u0308\u0300", true); 2737 check(p, "testa\u0308\u0300\u0323", true); 2738 check(p, "test\u00e4\u0323\u0300", true); 2739 check(p, "test\u00e4\u0300\u0323", true); 2740 2741 Object[][] data = new Object[][] { 2742 2743 // JDK-4867170 2744 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2745 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2746 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2747 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2748 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2749 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2750 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2751 2752 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2753 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2754 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2755 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2756 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2757 2758 // backtracking, force to match "\u1f80", instead of \u1f82" 2759 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2760 2761 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2762 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2763 2764 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2765 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2766 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2767 2768 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2769 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2770 { "\u1f80", "ab\u1f80cd", "f", true }, 2771 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2772 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2773 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2774 { "\u1f82", "\u1f80\u0300", "m", true }, 2775 2776 // JDK-7080302 # compile failed 2777 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2778 2779 // JDK-6728861, same cause as above one 2780 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2781 2782 // JDK-6995635 2783 { "(\u00e9)", "e\u0301", "m", true }, 2784 2785 // JDK-6736245 2786 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2787 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2788 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2789 2790 // 4916384. 2791 // Decomposed hangul (jamos) works inside clazz 2792 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2793 { "[\u1100\u1161]", "\uac00", "m", true}, 2794 2795 { "[\uac00]", "\u1100\u1161", "m", true}, 2796 { "[\uac00]", "\uac00", "m", true}, 2797 2798 // Decomposed hangul (jamos) 2799 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2800 { "\u1100\u1161", "\uac00", "m", true}, 2801 2802 // Composed hangul 2803 { "\uac00", "\u1100\u1161", "m", true }, 2804 { "\uac00", "\uac00", "m", true }, 2805 2806 /* Need a NFDSlice to nfd the source to solve this issue 2807 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2808 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2809 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2810 2811 // Decomposed supplementary outside char classes 2812 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2813 // Composed supplementary outside char classes 2814 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2815 */ 2816 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2817 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2818 2819 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2820 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2821 }; 2822 2823 int failCount = 0; 2824 for (Object[] d : data) { 2825 String pn = (String)d[0]; 2826 String tt = (String)d[1]; 2827 boolean isFind = "f".equals(((String)d[2])); 2828 boolean expected = (boolean)d[3]; 2829 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2830 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2831 if (ret != expected) { 2832 failCount++; 2833 continue; 2834 } 2835 } 2836 report("Canonical Equivalence"); 2837 } 2838 2839 /** 2840 * A basic sanity test of Matcher.replaceAll(). 2841 */ 2842 private static void globalSubstitute() throws Exception { 2843 // Global substitution with a literal 2844 Pattern p = Pattern.compile("(ab)(c*)"); 2845 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2846 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2847 failCount++; 2848 2849 m.reset("zzzabccczzzabcczzzabccczzz"); 2850 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2851 failCount++; 2852 2853 // Global substitution with groups 2854 m.reset("zzzabccczzzabcczzzabccczzz"); 2855 String result = m.replaceAll("$1"); 2856 if (!result.equals("zzzabzzzabzzzabzzz")) 2857 failCount++; 2858 2859 // Supplementary character test 2860 // Global substitution with a literal 2861 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2862 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2863 if (!m.replaceAll(toSupplementaries("test")). 2864 equals(toSupplementaries("testzzztestzzztest"))) 2865 failCount++; 2866 2867 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2868 if (!m.replaceAll(toSupplementaries("test")). 2869 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2870 failCount++; 2871 2872 // Global substitution with groups 2873 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2874 result = m.replaceAll("$1"); 2875 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2876 failCount++; 2877 2878 report("Global Substitution"); 2879 } 2880 2881 /** 2882 * Tests the usage of Matcher.appendReplacement() with literal 2883 * and group substitutions. 2884 */ 2885 private static void stringbufferSubstitute() throws Exception { 2886 // SB substitution with literal 2887 String blah = "zzzblahzzz"; 2888 Pattern p = Pattern.compile("blah"); 2889 Matcher m = p.matcher(blah); 2890 StringBuffer result = new StringBuffer(); 2891 try { 2892 m.appendReplacement(result, "blech"); 2893 failCount++; 2894 } catch (IllegalStateException e) { 2895 } 2896 m.find(); 2897 m.appendReplacement(result, "blech"); 2898 if (!result.toString().equals("zzzblech")) 2899 failCount++; 2900 2901 m.appendTail(result); 2902 if (!result.toString().equals("zzzblechzzz")) 2903 failCount++; 2904 2905 // SB substitution with groups 2906 blah = "zzzabcdzzz"; 2907 p = Pattern.compile("(ab)(cd)*"); 2908 m = p.matcher(blah); 2909 result = new StringBuffer(); 2910 try { 2911 m.appendReplacement(result, "$1"); 2912 failCount++; 2913 } catch (IllegalStateException e) { 2914 } 2915 m.find(); 2916 m.appendReplacement(result, "$1"); 2917 if (!result.toString().equals("zzzab")) 2918 failCount++; 2919 2920 m.appendTail(result); 2921 if (!result.toString().equals("zzzabzzz")) 2922 failCount++; 2923 2924 // SB substitution with 3 groups 2925 blah = "zzzabcdcdefzzz"; 2926 p = Pattern.compile("(ab)(cd)*(ef)"); 2927 m = p.matcher(blah); 2928 result = new StringBuffer(); 2929 try { 2930 m.appendReplacement(result, "$1w$2w$3"); 2931 failCount++; 2932 } catch (IllegalStateException e) { 2933 } 2934 m.find(); 2935 m.appendReplacement(result, "$1w$2w$3"); 2936 if (!result.toString().equals("zzzabwcdwef")) 2937 failCount++; 2938 2939 m.appendTail(result); 2940 if (!result.toString().equals("zzzabwcdwefzzz")) 2941 failCount++; 2942 2943 // SB substitution with groups and three matches 2944 // skipping middle match 2945 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2946 p = Pattern.compile("(ab)(cd*)"); 2947 m = p.matcher(blah); 2948 result = new StringBuffer(); 2949 try { 2950 m.appendReplacement(result, "$1"); 2951 failCount++; 2952 } catch (IllegalStateException e) { 2953 } 2954 m.find(); 2955 m.appendReplacement(result, "$1"); 2956 if (!result.toString().equals("zzzab")) 2957 failCount++; 2958 2959 m.find(); 2960 m.find(); 2961 m.appendReplacement(result, "$2"); 2962 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2963 failCount++; 2964 2965 m.appendTail(result); 2966 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2967 failCount++; 2968 2969 // Check to make sure escaped $ is ignored 2970 blah = "zzzabcdcdefzzz"; 2971 p = Pattern.compile("(ab)(cd)*(ef)"); 2972 m = p.matcher(blah); 2973 result = new StringBuffer(); 2974 m.find(); 2975 m.appendReplacement(result, "$1w\\$2w$3"); 2976 if (!result.toString().equals("zzzabw$2wef")) 2977 failCount++; 2978 2979 m.appendTail(result); 2980 if (!result.toString().equals("zzzabw$2wefzzz")) 2981 failCount++; 2982 2983 // Check to make sure a reference to nonexistent group causes error 2984 blah = "zzzabcdcdefzzz"; 2985 p = Pattern.compile("(ab)(cd)*(ef)"); 2986 m = p.matcher(blah); 2987 result = new StringBuffer(); 2988 m.find(); 2989 try { 2990 m.appendReplacement(result, "$1w$5w$3"); 2991 failCount++; 2992 } catch (IndexOutOfBoundsException ioobe) { 2993 // Correct result 2994 } 2995 2996 // Check double digit group references 2997 blah = "zzz123456789101112zzz"; 2998 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2999 m = p.matcher(blah); 3000 result = new StringBuffer(); 3001 m.find(); 3002 m.appendReplacement(result, "$1w$11w$3"); 3003 if (!result.toString().equals("zzz1w11w3")) 3004 failCount++; 3005 3006 // Check to make sure it backs off $15 to $1 if only three groups 3007 blah = "zzzabcdcdefzzz"; 3008 p = Pattern.compile("(ab)(cd)*(ef)"); 3009 m = p.matcher(blah); 3010 result = new StringBuffer(); 3011 m.find(); 3012 m.appendReplacement(result, "$1w$15w$3"); 3013 if (!result.toString().equals("zzzabwab5wef")) 3014 failCount++; 3015 3016 3017 // Supplementary character test 3018 // SB substitution with literal 3019 blah = toSupplementaries("zzzblahzzz"); 3020 p = Pattern.compile(toSupplementaries("blah")); 3021 m = p.matcher(blah); 3022 result = new StringBuffer(); 3023 try { 3024 m.appendReplacement(result, toSupplementaries("blech")); 3025 failCount++; 3026 } catch (IllegalStateException e) { 3027 } 3028 m.find(); 3029 m.appendReplacement(result, toSupplementaries("blech")); 3030 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3031 failCount++; 3032 3033 m.appendTail(result); 3034 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3035 failCount++; 3036 3037 // SB substitution with groups 3038 blah = toSupplementaries("zzzabcdzzz"); 3039 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3040 m = p.matcher(blah); 3041 result = new StringBuffer(); 3042 try { 3043 m.appendReplacement(result, "$1"); 3044 failCount++; 3045 } catch (IllegalStateException e) { 3046 } 3047 m.find(); 3048 m.appendReplacement(result, "$1"); 3049 if (!result.toString().equals(toSupplementaries("zzzab"))) 3050 failCount++; 3051 3052 m.appendTail(result); 3053 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3054 failCount++; 3055 3056 // SB substitution with 3 groups 3057 blah = toSupplementaries("zzzabcdcdefzzz"); 3058 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3059 m = p.matcher(blah); 3060 result = new StringBuffer(); 3061 try { 3062 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3063 failCount++; 3064 } catch (IllegalStateException e) { 3065 } 3066 m.find(); 3067 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3068 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3069 failCount++; 3070 3071 m.appendTail(result); 3072 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3073 failCount++; 3074 3075 // SB substitution with groups and three matches 3076 // skipping middle match 3077 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3078 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3079 m = p.matcher(blah); 3080 result = new StringBuffer(); 3081 try { 3082 m.appendReplacement(result, "$1"); 3083 failCount++; 3084 } catch (IllegalStateException e) { 3085 } 3086 m.find(); 3087 m.appendReplacement(result, "$1"); 3088 if (!result.toString().equals(toSupplementaries("zzzab"))) 3089 failCount++; 3090 3091 m.find(); 3092 m.find(); 3093 m.appendReplacement(result, "$2"); 3094 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3095 failCount++; 3096 3097 m.appendTail(result); 3098 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3099 failCount++; 3100 3101 // Check to make sure escaped $ is ignored 3102 blah = toSupplementaries("zzzabcdcdefzzz"); 3103 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3104 m = p.matcher(blah); 3105 result = new StringBuffer(); 3106 m.find(); 3107 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3108 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3109 failCount++; 3110 3111 m.appendTail(result); 3112 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3113 failCount++; 3114 3115 // Check to make sure a reference to nonexistent group causes error 3116 blah = toSupplementaries("zzzabcdcdefzzz"); 3117 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3118 m = p.matcher(blah); 3119 result = new StringBuffer(); 3120 m.find(); 3121 try { 3122 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3123 failCount++; 3124 } catch (IndexOutOfBoundsException ioobe) { 3125 // Correct result 3126 } 3127 3128 // Check double digit group references 3129 blah = toSupplementaries("zzz123456789101112zzz"); 3130 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3131 m = p.matcher(blah); 3132 result = new StringBuffer(); 3133 m.find(); 3134 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3135 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3136 failCount++; 3137 3138 // Check to make sure it backs off $15 to $1 if only three groups 3139 blah = toSupplementaries("zzzabcdcdefzzz"); 3140 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3141 m = p.matcher(blah); 3142 result = new StringBuffer(); 3143 m.find(); 3144 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3145 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3146 failCount++; 3147 3148 // Check nothing has been appended into the output buffer if 3149 // the replacement string triggers IllegalArgumentException. 3150 p = Pattern.compile("(abc)"); 3151 m = p.matcher("abcd"); 3152 result = new StringBuffer(); 3153 m.find(); 3154 try { 3155 m.appendReplacement(result, ("xyz$g")); 3156 failCount++; 3157 } catch (IllegalArgumentException iae) { 3158 if (result.length() != 0) 3159 failCount++; 3160 } 3161 3162 report("SB Substitution"); 3163 } 3164 3165 /** 3166 * Tests the usage of Matcher.appendReplacement() with literal 3167 * and group substitutions. 3168 */ 3169 private static void stringbuilderSubstitute() throws Exception { 3170 // SB substitution with literal 3171 String blah = "zzzblahzzz"; 3172 Pattern p = Pattern.compile("blah"); 3173 Matcher m = p.matcher(blah); 3174 StringBuilder result = new StringBuilder(); 3175 try { 3176 m.appendReplacement(result, "blech"); 3177 failCount++; 3178 } catch (IllegalStateException e) { 3179 } 3180 m.find(); 3181 m.appendReplacement(result, "blech"); 3182 if (!result.toString().equals("zzzblech")) 3183 failCount++; 3184 3185 m.appendTail(result); 3186 if (!result.toString().equals("zzzblechzzz")) 3187 failCount++; 3188 3189 // SB substitution with groups 3190 blah = "zzzabcdzzz"; 3191 p = Pattern.compile("(ab)(cd)*"); 3192 m = p.matcher(blah); 3193 result = new StringBuilder(); 3194 try { 3195 m.appendReplacement(result, "$1"); 3196 failCount++; 3197 } catch (IllegalStateException e) { 3198 } 3199 m.find(); 3200 m.appendReplacement(result, "$1"); 3201 if (!result.toString().equals("zzzab")) 3202 failCount++; 3203 3204 m.appendTail(result); 3205 if (!result.toString().equals("zzzabzzz")) 3206 failCount++; 3207 3208 // SB substitution with 3 groups 3209 blah = "zzzabcdcdefzzz"; 3210 p = Pattern.compile("(ab)(cd)*(ef)"); 3211 m = p.matcher(blah); 3212 result = new StringBuilder(); 3213 try { 3214 m.appendReplacement(result, "$1w$2w$3"); 3215 failCount++; 3216 } catch (IllegalStateException e) { 3217 } 3218 m.find(); 3219 m.appendReplacement(result, "$1w$2w$3"); 3220 if (!result.toString().equals("zzzabwcdwef")) 3221 failCount++; 3222 3223 m.appendTail(result); 3224 if (!result.toString().equals("zzzabwcdwefzzz")) 3225 failCount++; 3226 3227 // SB substitution with groups and three matches 3228 // skipping middle match 3229 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3230 p = Pattern.compile("(ab)(cd*)"); 3231 m = p.matcher(blah); 3232 result = new StringBuilder(); 3233 try { 3234 m.appendReplacement(result, "$1"); 3235 failCount++; 3236 } catch (IllegalStateException e) { 3237 } 3238 m.find(); 3239 m.appendReplacement(result, "$1"); 3240 if (!result.toString().equals("zzzab")) 3241 failCount++; 3242 3243 m.find(); 3244 m.find(); 3245 m.appendReplacement(result, "$2"); 3246 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3247 failCount++; 3248 3249 m.appendTail(result); 3250 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3251 failCount++; 3252 3253 // Check to make sure escaped $ is ignored 3254 blah = "zzzabcdcdefzzz"; 3255 p = Pattern.compile("(ab)(cd)*(ef)"); 3256 m = p.matcher(blah); 3257 result = new StringBuilder(); 3258 m.find(); 3259 m.appendReplacement(result, "$1w\\$2w$3"); 3260 if (!result.toString().equals("zzzabw$2wef")) 3261 failCount++; 3262 3263 m.appendTail(result); 3264 if (!result.toString().equals("zzzabw$2wefzzz")) 3265 failCount++; 3266 3267 // Check to make sure a reference to nonexistent group causes error 3268 blah = "zzzabcdcdefzzz"; 3269 p = Pattern.compile("(ab)(cd)*(ef)"); 3270 m = p.matcher(blah); 3271 result = new StringBuilder(); 3272 m.find(); 3273 try { 3274 m.appendReplacement(result, "$1w$5w$3"); 3275 failCount++; 3276 } catch (IndexOutOfBoundsException ioobe) { 3277 // Correct result 3278 } 3279 3280 // Check double digit group references 3281 blah = "zzz123456789101112zzz"; 3282 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3283 m = p.matcher(blah); 3284 result = new StringBuilder(); 3285 m.find(); 3286 m.appendReplacement(result, "$1w$11w$3"); 3287 if (!result.toString().equals("zzz1w11w3")) 3288 failCount++; 3289 3290 // Check to make sure it backs off $15 to $1 if only three groups 3291 blah = "zzzabcdcdefzzz"; 3292 p = Pattern.compile("(ab)(cd)*(ef)"); 3293 m = p.matcher(blah); 3294 result = new StringBuilder(); 3295 m.find(); 3296 m.appendReplacement(result, "$1w$15w$3"); 3297 if (!result.toString().equals("zzzabwab5wef")) 3298 failCount++; 3299 3300 3301 // Supplementary character test 3302 // SB substitution with literal 3303 blah = toSupplementaries("zzzblahzzz"); 3304 p = Pattern.compile(toSupplementaries("blah")); 3305 m = p.matcher(blah); 3306 result = new StringBuilder(); 3307 try { 3308 m.appendReplacement(result, toSupplementaries("blech")); 3309 failCount++; 3310 } catch (IllegalStateException e) { 3311 } 3312 m.find(); 3313 m.appendReplacement(result, toSupplementaries("blech")); 3314 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3315 failCount++; 3316 m.appendTail(result); 3317 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3318 failCount++; 3319 3320 // SB substitution with groups 3321 blah = toSupplementaries("zzzabcdzzz"); 3322 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3323 m = p.matcher(blah); 3324 result = new StringBuilder(); 3325 try { 3326 m.appendReplacement(result, "$1"); 3327 failCount++; 3328 } catch (IllegalStateException e) { 3329 } 3330 m.find(); 3331 m.appendReplacement(result, "$1"); 3332 if (!result.toString().equals(toSupplementaries("zzzab"))) 3333 failCount++; 3334 3335 m.appendTail(result); 3336 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3337 failCount++; 3338 3339 // SB substitution with 3 groups 3340 blah = toSupplementaries("zzzabcdcdefzzz"); 3341 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3342 m = p.matcher(blah); 3343 result = new StringBuilder(); 3344 try { 3345 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3346 failCount++; 3347 } catch (IllegalStateException e) { 3348 } 3349 m.find(); 3350 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3351 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3352 failCount++; 3353 3354 m.appendTail(result); 3355 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3356 failCount++; 3357 3358 // SB substitution with groups and three matches 3359 // skipping middle match 3360 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3361 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3362 m = p.matcher(blah); 3363 result = new StringBuilder(); 3364 try { 3365 m.appendReplacement(result, "$1"); 3366 failCount++; 3367 } catch (IllegalStateException e) { 3368 } 3369 m.find(); 3370 m.appendReplacement(result, "$1"); 3371 if (!result.toString().equals(toSupplementaries("zzzab"))) 3372 failCount++; 3373 3374 m.find(); 3375 m.find(); 3376 m.appendReplacement(result, "$2"); 3377 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3378 failCount++; 3379 3380 m.appendTail(result); 3381 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3382 failCount++; 3383 3384 // Check to make sure escaped $ is ignored 3385 blah = toSupplementaries("zzzabcdcdefzzz"); 3386 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3387 m = p.matcher(blah); 3388 result = new StringBuilder(); 3389 m.find(); 3390 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3391 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3392 failCount++; 3393 3394 m.appendTail(result); 3395 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3396 failCount++; 3397 3398 // Check to make sure a reference to nonexistent group causes error 3399 blah = toSupplementaries("zzzabcdcdefzzz"); 3400 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3401 m = p.matcher(blah); 3402 result = new StringBuilder(); 3403 m.find(); 3404 try { 3405 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3406 failCount++; 3407 } catch (IndexOutOfBoundsException ioobe) { 3408 // Correct result 3409 } 3410 // Check double digit group references 3411 blah = toSupplementaries("zzz123456789101112zzz"); 3412 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3413 m = p.matcher(blah); 3414 result = new StringBuilder(); 3415 m.find(); 3416 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3417 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3418 failCount++; 3419 3420 // Check to make sure it backs off $15 to $1 if only three groups 3421 blah = toSupplementaries("zzzabcdcdefzzz"); 3422 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3423 m = p.matcher(blah); 3424 result = new StringBuilder(); 3425 m.find(); 3426 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3427 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3428 failCount++; 3429 // Check nothing has been appended into the output buffer if 3430 // the replacement string triggers IllegalArgumentException. 3431 p = Pattern.compile("(abc)"); 3432 m = p.matcher("abcd"); 3433 result = new StringBuilder(); 3434 m.find(); 3435 try { 3436 m.appendReplacement(result, ("xyz$g")); 3437 failCount++; 3438 } catch (IllegalArgumentException iae) { 3439 if (result.length() != 0) 3440 failCount++; 3441 } 3442 report("SB Substitution 2"); 3443 } 3444 3445 /* 3446 * 5 groups of characters are created to make a substitution string. 3447 * A base string will be created including random lead chars, the 3448 * substitution string, and random trailing chars. 3449 * A pattern containing the 5 groups is searched for and replaced with: 3450 * random group + random string + random group. 3451 * The results are checked for correctness. 3452 */ 3453 private static void substitutionBasher() { 3454 for (int runs = 0; runs<1000; runs++) { 3455 // Create a base string to work in 3456 int leadingChars = generator.nextInt(10); 3457 StringBuffer baseBuffer = new StringBuffer(100); 3458 String leadingString = getRandomAlphaString(leadingChars); 3459 baseBuffer.append(leadingString); 3460 3461 // Create 5 groups of random number of random chars 3462 // Create the string to substitute 3463 // Create the pattern string to search for 3464 StringBuffer bufferToSub = new StringBuffer(25); 3465 StringBuffer bufferToPat = new StringBuffer(50); 3466 String[] groups = new String[5]; 3467 for(int i=0; i<5; i++) { 3468 int aGroupSize = generator.nextInt(5)+1; 3469 groups[i] = getRandomAlphaString(aGroupSize); 3470 bufferToSub.append(groups[i]); 3471 bufferToPat.append('('); 3472 bufferToPat.append(groups[i]); 3473 bufferToPat.append(')'); 3474 } 3475 String stringToSub = bufferToSub.toString(); 3476 String pattern = bufferToPat.toString(); 3477 3478 // Place sub string into working string at random index 3479 baseBuffer.append(stringToSub); 3480 3481 // Append random chars to end 3482 int trailingChars = generator.nextInt(10); 3483 String trailingString = getRandomAlphaString(trailingChars); 3484 baseBuffer.append(trailingString); 3485 String baseString = baseBuffer.toString(); 3486 3487 // Create test pattern and matcher 3488 Pattern p = Pattern.compile(pattern); 3489 Matcher m = p.matcher(baseString); 3490 3491 // Reject candidate if pattern happens to start early 3492 m.find(); 3493 if (m.start() < leadingChars) 3494 continue; 3495 3496 // Reject candidate if more than one match 3497 if (m.find()) 3498 continue; 3499 3500 // Construct a replacement string with : 3501 // random group + random string + random group 3502 StringBuffer bufferToRep = new StringBuffer(); 3503 int groupIndex1 = generator.nextInt(5); 3504 bufferToRep.append("$" + (groupIndex1 + 1)); 3505 String randomMidString = getRandomAlphaString(5); 3506 bufferToRep.append(randomMidString); 3507 int groupIndex2 = generator.nextInt(5); 3508 bufferToRep.append("$" + (groupIndex2 + 1)); 3509 String replacement = bufferToRep.toString(); 3510 3511 // Do the replacement 3512 String result = m.replaceAll(replacement); 3513 3514 // Construct expected result 3515 StringBuffer bufferToRes = new StringBuffer(); 3516 bufferToRes.append(leadingString); 3517 bufferToRes.append(groups[groupIndex1]); 3518 bufferToRes.append(randomMidString); 3519 bufferToRes.append(groups[groupIndex2]); 3520 bufferToRes.append(trailingString); 3521 String expectedResult = bufferToRes.toString(); 3522 3523 // Check results 3524 if (!result.equals(expectedResult)) 3525 failCount++; 3526 } 3527 3528 report("Substitution Basher"); 3529 } 3530 3531 /* 3532 * 5 groups of characters are created to make a substitution string. 3533 * A base string will be created including random lead chars, the 3534 * substitution string, and random trailing chars. 3535 * A pattern containing the 5 groups is searched for and replaced with: 3536 * random group + random string + random group. 3537 * The results are checked for correctness. 3538 */ 3539 private static void substitutionBasher2() { 3540 for (int runs = 0; runs<1000; runs++) { 3541 // Create a base string to work in 3542 int leadingChars = generator.nextInt(10); 3543 StringBuilder baseBuffer = new StringBuilder(100); 3544 String leadingString = getRandomAlphaString(leadingChars); 3545 baseBuffer.append(leadingString); 3546 3547 // Create 5 groups of random number of random chars 3548 // Create the string to substitute 3549 // Create the pattern string to search for 3550 StringBuilder bufferToSub = new StringBuilder(25); 3551 StringBuilder bufferToPat = new StringBuilder(50); 3552 String[] groups = new String[5]; 3553 for(int i=0; i<5; i++) { 3554 int aGroupSize = generator.nextInt(5)+1; 3555 groups[i] = getRandomAlphaString(aGroupSize); 3556 bufferToSub.append(groups[i]); 3557 bufferToPat.append('('); 3558 bufferToPat.append(groups[i]); 3559 bufferToPat.append(')'); 3560 } 3561 String stringToSub = bufferToSub.toString(); 3562 String pattern = bufferToPat.toString(); 3563 3564 // Place sub string into working string at random index 3565 baseBuffer.append(stringToSub); 3566 3567 // Append random chars to end 3568 int trailingChars = generator.nextInt(10); 3569 String trailingString = getRandomAlphaString(trailingChars); 3570 baseBuffer.append(trailingString); 3571 String baseString = baseBuffer.toString(); 3572 3573 // Create test pattern and matcher 3574 Pattern p = Pattern.compile(pattern); 3575 Matcher m = p.matcher(baseString); 3576 3577 // Reject candidate if pattern happens to start early 3578 m.find(); 3579 if (m.start() < leadingChars) 3580 continue; 3581 3582 // Reject candidate if more than one match 3583 if (m.find()) 3584 continue; 3585 3586 // Construct a replacement string with : 3587 // random group + random string + random group 3588 StringBuilder bufferToRep = new StringBuilder(); 3589 int groupIndex1 = generator.nextInt(5); 3590 bufferToRep.append("$" + (groupIndex1 + 1)); 3591 String randomMidString = getRandomAlphaString(5); 3592 bufferToRep.append(randomMidString); 3593 int groupIndex2 = generator.nextInt(5); 3594 bufferToRep.append("$" + (groupIndex2 + 1)); 3595 String replacement = bufferToRep.toString(); 3596 3597 // Do the replacement 3598 String result = m.replaceAll(replacement); 3599 3600 // Construct expected result 3601 StringBuilder bufferToRes = new StringBuilder(); 3602 bufferToRes.append(leadingString); 3603 bufferToRes.append(groups[groupIndex1]); 3604 bufferToRes.append(randomMidString); 3605 bufferToRes.append(groups[groupIndex2]); 3606 bufferToRes.append(trailingString); 3607 String expectedResult = bufferToRes.toString(); 3608 3609 // Check results 3610 if (!result.equals(expectedResult)) { 3611 failCount++; 3612 } 3613 } 3614 3615 report("Substitution Basher 2"); 3616 } 3617 3618 /** 3619 * Checks the handling of some escape sequences that the Pattern 3620 * class should process instead of the java compiler. These are 3621 * not in the file because the escapes should be be processed 3622 * by the Pattern class when the regex is compiled. 3623 */ 3624 private static void escapes() throws Exception { 3625 Pattern p = Pattern.compile("\\043"); 3626 Matcher m = p.matcher("#"); 3627 if (!m.find()) 3628 failCount++; 3629 3630 p = Pattern.compile("\\x23"); 3631 m = p.matcher("#"); 3632 if (!m.find()) 3633 failCount++; 3634 3635 p = Pattern.compile("\\u0023"); 3636 m = p.matcher("#"); 3637 if (!m.find()) 3638 failCount++; 3639 3640 report("Escape sequences"); 3641 } 3642 3643 /** 3644 * Checks the handling of blank input situations. These 3645 * tests are incompatible with my test file format. 3646 */ 3647 private static void blankInput() throws Exception { 3648 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3649 Matcher m = p.matcher(""); 3650 if (m.find()) 3651 failCount++; 3652 3653 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3654 m = p.matcher(""); 3655 if (!m.find()) 3656 failCount++; 3657 3658 p = Pattern.compile("abc"); 3659 m = p.matcher(""); 3660 if (m.find()) 3661 failCount++; 3662 3663 p = Pattern.compile("a*"); 3664 m = p.matcher(""); 3665 if (!m.find()) 3666 failCount++; 3667 3668 report("Blank input"); 3669 } 3670 3671 /** 3672 * Tests the Boyer-Moore pattern matching of a character sequence 3673 * on randomly generated patterns. 3674 */ 3675 private static void bm() throws Exception { 3676 doBnM('a'); 3677 report("Boyer Moore (ASCII)"); 3678 3679 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3680 report("Boyer Moore (Supplementary)"); 3681 } 3682 3683 private static void doBnM(int baseCharacter) throws Exception { 3684 int achar=0; 3685 3686 for (int i=0; i<100; i++) { 3687 // Create a short pattern to search for 3688 int patternLength = generator.nextInt(7) + 4; 3689 StringBuffer patternBuffer = new StringBuffer(patternLength); 3690 String pattern; 3691 retry: for (;;) { 3692 for (int x=0; x<patternLength; x++) { 3693 int ch = baseCharacter + generator.nextInt(26); 3694 if (Character.isSupplementaryCodePoint(ch)) { 3695 patternBuffer.append(Character.toChars(ch)); 3696 } else { 3697 patternBuffer.append((char)ch); 3698 } 3699 } 3700 pattern = patternBuffer.toString(); 3701 3702 // Avoid patterns that start and end with the same substring 3703 // See JDK-6854417 3704 for (int x=1; x < pattern.length(); x++) { 3705 if (pattern.startsWith(pattern.substring(x))) 3706 continue retry; 3707 } 3708 break; 3709 } 3710 Pattern p = Pattern.compile(pattern); 3711 3712 // Create a buffer with random ASCII chars that does 3713 // not match the sample 3714 String toSearch = null; 3715 StringBuffer s = null; 3716 Matcher m = p.matcher(""); 3717 do { 3718 s = new StringBuffer(100); 3719 for (int x=0; x<100; x++) { 3720 int ch = baseCharacter + generator.nextInt(26); 3721 if (Character.isSupplementaryCodePoint(ch)) { 3722 s.append(Character.toChars(ch)); 3723 } else { 3724 s.append((char)ch); 3725 } 3726 } 3727 toSearch = s.toString(); 3728 m.reset(toSearch); 3729 } while (m.find()); 3730 3731 // Insert the pattern at a random spot 3732 int insertIndex = generator.nextInt(99); 3733 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3734 insertIndex++; 3735 s = s.insert(insertIndex, pattern); 3736 toSearch = s.toString(); 3737 3738 // Make sure that the pattern is found 3739 m.reset(toSearch); 3740 if (!m.find()) 3741 failCount++; 3742 3743 // Make sure that the match text is the pattern 3744 if (!m.group().equals(pattern)) 3745 failCount++; 3746 3747 // Make sure match occured at insertion point 3748 if (m.start() != insertIndex) 3749 failCount++; 3750 } 3751 } 3752 3753 /** 3754 * Tests the matching of slices on randomly generated patterns. 3755 * The Boyer-Moore optimization is not done on these patterns 3756 * because it uses unicode case folding. 3757 */ 3758 private static void slice() throws Exception { 3759 doSlice(Character.MAX_VALUE); 3760 report("Slice"); 3761 3762 doSlice(Character.MAX_CODE_POINT); 3763 report("Slice (Supplementary)"); 3764 } 3765 3766 private static void doSlice(int maxCharacter) throws Exception { 3767 Random generator = new Random(); 3768 int achar=0; 3769 3770 for (int i=0; i<100; i++) { 3771 // Create a short pattern to search for 3772 int patternLength = generator.nextInt(7) + 4; 3773 StringBuffer patternBuffer = new StringBuffer(patternLength); 3774 for (int x=0; x<patternLength; x++) { 3775 int randomChar = 0; 3776 while (!Character.isLetterOrDigit(randomChar)) 3777 randomChar = generator.nextInt(maxCharacter); 3778 if (Character.isSupplementaryCodePoint(randomChar)) { 3779 patternBuffer.append(Character.toChars(randomChar)); 3780 } else { 3781 patternBuffer.append((char) randomChar); 3782 } 3783 } 3784 String pattern = patternBuffer.toString(); 3785 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3786 3787 // Create a buffer with random chars that does not match the sample 3788 String toSearch = null; 3789 StringBuffer s = null; 3790 Matcher m = p.matcher(""); 3791 do { 3792 s = new StringBuffer(100); 3793 for (int x=0; x<100; x++) { 3794 int randomChar = 0; 3795 while (!Character.isLetterOrDigit(randomChar)) 3796 randomChar = generator.nextInt(maxCharacter); 3797 if (Character.isSupplementaryCodePoint(randomChar)) { 3798 s.append(Character.toChars(randomChar)); 3799 } else { 3800 s.append((char) randomChar); 3801 } 3802 } 3803 toSearch = s.toString(); 3804 m.reset(toSearch); 3805 } while (m.find()); 3806 3807 // Insert the pattern at a random spot 3808 int insertIndex = generator.nextInt(99); 3809 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3810 insertIndex++; 3811 s = s.insert(insertIndex, pattern); 3812 toSearch = s.toString(); 3813 3814 // Make sure that the pattern is found 3815 m.reset(toSearch); 3816 if (!m.find()) 3817 failCount++; 3818 3819 // Make sure that the match text is the pattern 3820 if (!m.group().equals(pattern)) 3821 failCount++; 3822 3823 // Make sure match occured at insertion point 3824 if (m.start() != insertIndex) 3825 failCount++; 3826 } 3827 } 3828 3829 private static void explainFailure(String pattern, String data, 3830 String expected, String actual) { 3831 System.err.println("----------------------------------------"); 3832 System.err.println("Pattern = "+pattern); 3833 System.err.println("Data = "+data); 3834 System.err.println("Expected = " + expected); 3835 System.err.println("Actual = " + actual); 3836 } 3837 3838 private static void explainFailure(String pattern, String data, 3839 Throwable t) { 3840 System.err.println("----------------------------------------"); 3841 System.err.println("Pattern = "+pattern); 3842 System.err.println("Data = "+data); 3843 t.printStackTrace(System.err); 3844 } 3845 3846 // Testing examples from a file 3847 3848 /** 3849 * Goes through the file "TestCases.txt" and creates many patterns 3850 * described in the file, matching the patterns against input lines in 3851 * the file, and comparing the results against the correct results 3852 * also found in the file. The file format is described in comments 3853 * at the head of the file. 3854 */ 3855 private static void processFile(String fileName) throws Exception { 3856 File testCases = new File(System.getProperty("test.src", "."), 3857 fileName); 3858 FileInputStream in = new FileInputStream(testCases); 3859 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3860 3861 // Process next test case. 3862 String aLine; 3863 while((aLine = r.readLine()) != null) { 3864 // Read a line for pattern 3865 String patternString = grabLine(r); 3866 Pattern p = null; 3867 try { 3868 p = compileTestPattern(patternString); 3869 } catch (PatternSyntaxException e) { 3870 String dataString = grabLine(r); 3871 String expectedResult = grabLine(r); 3872 if (expectedResult.startsWith("error")) 3873 continue; 3874 explainFailure(patternString, dataString, e); 3875 failCount++; 3876 continue; 3877 } 3878 3879 // Read a line for input string 3880 String dataString = grabLine(r); 3881 Matcher m = p.matcher(dataString); 3882 StringBuffer result = new StringBuffer(); 3883 3884 // Check for IllegalStateExceptions before a match 3885 failCount += preMatchInvariants(m); 3886 3887 boolean found = m.find(); 3888 3889 if (found) 3890 failCount += postTrueMatchInvariants(m); 3891 else 3892 failCount += postFalseMatchInvariants(m); 3893 3894 if (found) { 3895 result.append("true "); 3896 result.append(m.group(0) + " "); 3897 } else { 3898 result.append("false "); 3899 } 3900 3901 result.append(m.groupCount()); 3902 3903 if (found) { 3904 for (int i=1; i<m.groupCount()+1; i++) 3905 if (m.group(i) != null) 3906 result.append(" " +m.group(i)); 3907 } 3908 3909 // Read a line for the expected result 3910 String expectedResult = grabLine(r); 3911 3912 if (!result.toString().equals(expectedResult)) { 3913 explainFailure(patternString, dataString, expectedResult, result.toString()); 3914 failCount++; 3915 } 3916 } 3917 3918 report(fileName); 3919 } 3920 3921 private static int preMatchInvariants(Matcher m) { 3922 int failCount = 0; 3923 try { 3924 m.start(); 3925 failCount++; 3926 } catch (IllegalStateException ise) {} 3927 try { 3928 m.end(); 3929 failCount++; 3930 } catch (IllegalStateException ise) {} 3931 try { 3932 m.group(); 3933 failCount++; 3934 } catch (IllegalStateException ise) {} 3935 return failCount; 3936 } 3937 3938 private static int postFalseMatchInvariants(Matcher m) { 3939 int failCount = 0; 3940 try { 3941 m.group(); 3942 failCount++; 3943 } catch (IllegalStateException ise) {} 3944 try { 3945 m.start(); 3946 failCount++; 3947 } catch (IllegalStateException ise) {} 3948 try { 3949 m.end(); 3950 failCount++; 3951 } catch (IllegalStateException ise) {} 3952 return failCount; 3953 } 3954 3955 private static int postTrueMatchInvariants(Matcher m) { 3956 int failCount = 0; 3957 //assert(m.start() = m.start(0); 3958 if (m.start() != m.start(0)) 3959 failCount++; 3960 //assert(m.end() = m.end(0); 3961 if (m.start() != m.start(0)) 3962 failCount++; 3963 //assert(m.group() = m.group(0); 3964 if (!m.group().equals(m.group(0))) 3965 failCount++; 3966 try { 3967 m.group(50); 3968 failCount++; 3969 } catch (IndexOutOfBoundsException ise) {} 3970 3971 return failCount; 3972 } 3973 3974 private static Pattern compileTestPattern(String patternString) { 3975 if (!patternString.startsWith("'")) { 3976 return Pattern.compile(patternString); 3977 } 3978 int break1 = patternString.lastIndexOf("'"); 3979 String flagString = patternString.substring( 3980 break1+1, patternString.length()); 3981 patternString = patternString.substring(1, break1); 3982 3983 if (flagString.equals("i")) 3984 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3985 3986 if (flagString.equals("m")) 3987 return Pattern.compile(patternString, Pattern.MULTILINE); 3988 3989 return Pattern.compile(patternString); 3990 } 3991 3992 /** 3993 * Reads a line from the input file. Keeps reading lines until a non 3994 * empty non comment line is read. If the line contains a \n then 3995 * these two characters are replaced by a newline char. If a \\uxxxx 3996 * sequence is read then the sequence is replaced by the unicode char. 3997 */ 3998 private static String grabLine(BufferedReader r) throws Exception { 3999 int index = 0; 4000 String line = r.readLine(); 4001 while (line.startsWith("//") || line.length() < 1) 4002 line = r.readLine(); 4003 while ((index = line.indexOf("\\n")) != -1) { 4004 StringBuffer temp = new StringBuffer(line); 4005 temp.replace(index, index+2, "\n"); 4006 line = temp.toString(); 4007 } 4008 while ((index = line.indexOf("\\u")) != -1) { 4009 StringBuffer temp = new StringBuffer(line); 4010 String value = temp.substring(index+2, index+6); 4011 char aChar = (char)Integer.parseInt(value, 16); 4012 String unicodeChar = "" + aChar; 4013 temp.replace(index, index+6, unicodeChar); 4014 line = temp.toString(); 4015 } 4016 4017 return line; 4018 } 4019 4020 private static void check(Pattern p, String s, String g, String expected) { 4021 Matcher m = p.matcher(s); 4022 m.find(); 4023 if (!m.group(g).equals(expected) || 4024 s.charAt(m.start(g)) != expected.charAt(0) || 4025 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4026 failCount++; 4027 } 4028 4029 private static void checkReplaceFirst(String p, String s, String r, String expected) 4030 { 4031 if (!expected.equals(Pattern.compile(p) 4032 .matcher(s) 4033 .replaceFirst(r))) 4034 failCount++; 4035 } 4036 4037 private static void checkReplaceAll(String p, String s, String r, String expected) 4038 { 4039 if (!expected.equals(Pattern.compile(p) 4040 .matcher(s) 4041 .replaceAll(r))) 4042 failCount++; 4043 } 4044 4045 private static void checkExpectedFail(String p) { 4046 try { 4047 Pattern.compile(p); 4048 } catch (PatternSyntaxException pse) { 4049 //pse.printStackTrace(); 4050 return; 4051 } 4052 failCount++; 4053 } 4054 4055 private static void checkExpectedIAE(Matcher m, String g) { 4056 m.find(); 4057 try { 4058 m.group(g); 4059 } catch (IllegalArgumentException x) { 4060 //iae.printStackTrace(); 4061 try { 4062 m.start(g); 4063 } catch (IllegalArgumentException xx) { 4064 try { 4065 m.start(g); 4066 } catch (IllegalArgumentException xxx) { 4067 return; 4068 } 4069 } 4070 } 4071 failCount++; 4072 } 4073 4074 private static void checkExpectedNPE(Matcher m) { 4075 m.find(); 4076 try { 4077 m.group(null); 4078 } catch (NullPointerException x) { 4079 try { 4080 m.start(null); 4081 } catch (NullPointerException xx) { 4082 try { 4083 m.end(null); 4084 } catch (NullPointerException xxx) { 4085 return; 4086 } 4087 } 4088 } 4089 failCount++; 4090 } 4091 4092 private static void namedGroupCaptureTest() throws Exception { 4093 check(Pattern.compile("x+(?<gname>y+)z+"), 4094 "xxxyyyzzz", 4095 "gname", 4096 "yyy"); 4097 4098 check(Pattern.compile("x+(?<gname8>y+)z+"), 4099 "xxxyyyzzz", 4100 "gname8", 4101 "yyy"); 4102 4103 //backref 4104 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4105 check(pattern, "zzzaabcazzz", true); // found "abca" 4106 4107 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4108 "zzzaabcaazzz", true); 4109 4110 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4111 "abcdefabc", true); 4112 4113 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4114 "abcdefghijkk", true); 4115 4116 // Supplementary character tests 4117 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4118 toSupplementaries("zzzaabcazzz"), true); 4119 4120 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4121 toSupplementaries("zzzaabcaazzz"), true); 4122 4123 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4124 toSupplementaries("abcdefabc"), true); 4125 4126 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4127 "(?<gname>" + 4128 toSupplementaries("k)") + "\\k<gname>"), 4129 toSupplementaries("abcdefghijkk"), true); 4130 4131 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4132 "xxxyyyzzzyyy", 4133 "gname", 4134 "yyy"); 4135 4136 //replaceFirst/All 4137 checkReplaceFirst("(?<gn>ab)(c*)", 4138 "abccczzzabcczzzabccc", 4139 "${gn}", 4140 "abzzzabcczzzabccc"); 4141 4142 checkReplaceAll("(?<gn>ab)(c*)", 4143 "abccczzzabcczzzabccc", 4144 "${gn}", 4145 "abzzzabzzzab"); 4146 4147 4148 checkReplaceFirst("(?<gn>ab)(c*)", 4149 "zzzabccczzzabcczzzabccczzz", 4150 "${gn}", 4151 "zzzabzzzabcczzzabccczzz"); 4152 4153 checkReplaceAll("(?<gn>ab)(c*)", 4154 "zzzabccczzzabcczzzabccczzz", 4155 "${gn}", 4156 "zzzabzzzabzzzabzzz"); 4157 4158 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4159 "zzzabccczzzabcczzzabccczzz", 4160 "${gn2}", 4161 "zzzccczzzabcczzzabccczzz"); 4162 4163 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4164 "zzzabccczzzabcczzzabccczzz", 4165 "${gn2}", 4166 "zzzccczzzcczzzccczzz"); 4167 4168 //toSupplementaries("(ab)(c*)")); 4169 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4170 ")(?<gn2>" + toSupplementaries("c") + "*)", 4171 toSupplementaries("abccczzzabcczzzabccc"), 4172 "${gn1}", 4173 toSupplementaries("abzzzabcczzzabccc")); 4174 4175 4176 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4177 ")(?<gn2>" + toSupplementaries("c") + "*)", 4178 toSupplementaries("abccczzzabcczzzabccc"), 4179 "${gn1}", 4180 toSupplementaries("abzzzabzzzab")); 4181 4182 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4183 ")(?<gn2>" + toSupplementaries("c") + "*)", 4184 toSupplementaries("abccczzzabcczzzabccc"), 4185 "${gn2}", 4186 toSupplementaries("ccczzzabcczzzabccc")); 4187 4188 4189 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4190 ")(?<gn2>" + toSupplementaries("c") + "*)", 4191 toSupplementaries("abccczzzabcczzzabccc"), 4192 "${gn2}", 4193 toSupplementaries("ccczzzcczzzccc")); 4194 4195 checkReplaceFirst("(?<dog>Dog)AndCat", 4196 "zzzDogAndCatzzzDogAndCatzzz", 4197 "${dog}", 4198 "zzzDogzzzDogAndCatzzz"); 4199 4200 4201 checkReplaceAll("(?<dog>Dog)AndCat", 4202 "zzzDogAndCatzzzDogAndCatzzz", 4203 "${dog}", 4204 "zzzDogzzzDogzzz"); 4205 4206 // backref in Matcher & String 4207 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4208 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4209 failCount++; 4210 4211 // negative 4212 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4213 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4214 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4215 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4216 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4217 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4218 "gnameX"); 4219 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4220 report("NamedGroupCapture"); 4221 } 4222 4223 // This is for bug 6919132 4224 private static void nonBmpClassComplementTest() throws Exception { 4225 Pattern p = Pattern.compile("\\P{Lu}"); 4226 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4227 4228 if (m.find() && m.start() == 1) 4229 failCount++; 4230 4231 // from a unicode category 4232 p = Pattern.compile("\\P{Lu}"); 4233 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4234 if (m.find()) 4235 failCount++; 4236 if (!m.hitEnd()) 4237 failCount++; 4238 4239 // block 4240 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4241 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4242 if (m.find() && m.start() == 1) 4243 failCount++; 4244 4245 p = Pattern.compile("\\P{sc=GRANTHA}"); 4246 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4247 if (m.find() && m.start() == 1) 4248 failCount++; 4249 4250 report("NonBmpClassComplement"); 4251 } 4252 4253 private static void unicodePropertiesTest() throws Exception { 4254 // different forms 4255 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4256 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4257 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4258 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4259 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4260 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4261 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4262 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4263 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4264 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4265 failCount++; 4266 4267 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4268 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4269 Matcher lastSM = common; 4270 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4271 4272 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4273 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4274 Matcher lastBM = latin; 4275 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4276 4277 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4278 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4279 continue; // only pick couple code points, they are the same 4280 } 4281 4282 // Unicode Script 4283 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4284 Matcher m; 4285 String str = new String(Character.toChars(cp)); 4286 if (script == lastScript) { 4287 m = lastSM; 4288 m.reset(str); 4289 } else { 4290 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4291 } 4292 if (!m.matches()) { 4293 failCount++; 4294 } 4295 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4296 other.reset(str); 4297 if (other.matches()) { 4298 failCount++; 4299 } 4300 lastSM = m; 4301 lastScript = script; 4302 4303 // Unicode Block 4304 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4305 if (block == null) { 4306 //System.out.printf("Not a Block: cp=%x%n", cp); 4307 continue; 4308 } 4309 if (block == lastBlock) { 4310 m = lastBM; 4311 m.reset(str); 4312 } else { 4313 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4314 } 4315 if (!m.matches()) { 4316 failCount++; 4317 } 4318 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4319 other.reset(str); 4320 if (other.matches()) { 4321 failCount++; 4322 } 4323 lastBM = m; 4324 lastBlock = block; 4325 } 4326 report("unicodeProperties"); 4327 } 4328 4329 private static void unicodeHexNotationTest() throws Exception { 4330 4331 // negative 4332 checkExpectedFail("\\x{-23}"); 4333 checkExpectedFail("\\x{110000}"); 4334 checkExpectedFail("\\x{}"); 4335 checkExpectedFail("\\x{AB[ef]"); 4336 4337 // codepoint 4338 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4339 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4340 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4341 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4342 4343 // in class 4344 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4345 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4346 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4347 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4348 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4349 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4350 4351 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4352 String s = "A" + new String(Character.toChars(cp)) + "B"; 4353 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4354 : String.format("\\u%04x\\u%04x", 4355 (int) Character.toChars(cp)[0], 4356 (int) Character.toChars(cp)[1]); 4357 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4358 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4359 failCount++; 4360 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4361 failCount++; 4362 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4363 failCount++; 4364 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4365 failCount++; 4366 } 4367 report("unicodeHexNotation"); 4368 } 4369 4370 private static void unicodeClassesTest() throws Exception { 4371 4372 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4373 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4374 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4375 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4376 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4377 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4378 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4379 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4380 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4381 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4382 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4383 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4384 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4385 Matcher bound = Pattern.compile("\\b").matcher(""); 4386 Matcher word = Pattern.compile("\\w++").matcher(""); 4387 // UNICODE_CHARACTER_CLASS 4388 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4389 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4390 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4391 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4394 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4395 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4397 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4398 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4399 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4400 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4401 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4402 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4403 // embedded flag (?U) 4404 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4405 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4406 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4407 4408 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4409 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4410 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4411 // properties 4412 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4413 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4414 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4415 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4416 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4417 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4418 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4419 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4420 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4421 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4422 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4423 // javaMethod 4424 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4425 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4426 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4427 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4428 // GC/C 4429 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4430 4431 for (int cp = 1; cp < 0x30000; cp++) { 4432 String str = new String(Character.toChars(cp)); 4433 int type = Character.getType(cp); 4434 if (// lower 4435 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4436 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4437 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4438 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4439 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4440 // upper 4441 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4442 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4443 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4444 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4445 // alpha 4446 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4447 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4448 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4449 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4450 // digit 4451 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4452 Character.isDigit(cp) != digitU.reset(str).matches() || 4453 // alnum 4454 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4455 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4456 // punct 4457 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4458 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4459 // graph 4460 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4461 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4462 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4463 // blank 4464 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4465 != blank.reset(str).matches() || 4466 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4467 // print 4468 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4469 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4470 // cntrl 4471 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4472 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4473 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4474 // hexdigit 4475 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4476 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4477 // space 4478 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4479 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4480 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4481 // word 4482 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4483 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4484 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4485 // bwordb 4486 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4487 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4488 // properties 4489 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4490 Character.isLetter(cp) != letterP.reset(str).matches()|| 4491 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4492 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4493 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4494 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4495 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4496 // gc_C 4497 (Character.CONTROL == type || Character.FORMAT == type || 4498 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4499 Character.UNASSIGNED == type) 4500 != gcC.reset(str).matches()) { 4501 failCount++; 4502 } 4503 } 4504 4505 // bounds/word align 4506 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4507 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4508 failCount++; 4509 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4510 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4511 failCount++; 4512 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4513 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4514 failCount++; 4515 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4516 failCount++; 4517 report("unicodePredefinedClasses"); 4518 } 4519 4520 private static void unicodeCharacterNameTest() throws Exception { 4521 4522 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4523 if (!Character.isValidCodePoint(cp) || 4524 Character.getType(cp) == Character.UNASSIGNED) 4525 continue; 4526 String str = new String(Character.toChars(cp)); 4527 // single 4528 String p = "\\N{" + Character.getName(cp) + "}"; 4529 if (!Pattern.compile(p).matcher(str).matches()) { 4530 failCount++; 4531 } 4532 // class[c] 4533 p = "[\\N{" + Character.getName(cp) + "}]"; 4534 if (!Pattern.compile(p).matcher(str).matches()) { 4535 failCount++; 4536 } 4537 } 4538 4539 // range 4540 for (int i = 0; i < 10; i++) { 4541 int start = generator.nextInt(20); 4542 int end = start + generator.nextInt(200); 4543 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4544 String str; 4545 for (int cp = start; cp < end; cp++) { 4546 str = new String(Character.toChars(cp)); 4547 if (!Pattern.compile(p).matcher(str).matches()) { 4548 failCount++; 4549 } 4550 } 4551 str = new String(Character.toChars(end + 10)); 4552 if (Pattern.compile(p).matcher(str).matches()) { 4553 failCount++; 4554 } 4555 } 4556 4557 // slice 4558 for (int i = 0; i < 10; i++) { 4559 int n = generator.nextInt(256); 4560 int[] buf = new int[n]; 4561 StringBuffer sb = new StringBuffer(1024); 4562 for (int j = 0; j < n; j++) { 4563 int cp = generator.nextInt(1000); 4564 if (!Character.isValidCodePoint(cp) || 4565 Character.getType(cp) == Character.UNASSIGNED) 4566 cp = 0x4e00; // just use 4e00 4567 sb.append("\\N{" + Character.getName(cp) + "}"); 4568 buf[j] = cp; 4569 } 4570 String p = sb.toString(); 4571 String str = new String(buf, 0, buf.length); 4572 if (!Pattern.compile(p).matcher(str).matches()) { 4573 failCount++; 4574 } 4575 } 4576 report("unicodeCharacterName"); 4577 } 4578 4579 private static void horizontalAndVerticalWSTest() throws Exception { 4580 String hws = new String (new char[] { 4581 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4582 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4583 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4584 0x202f, 0x205f, 0x3000 }); 4585 String vws = new String (new char[] { 4586 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4587 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4588 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4589 failCount++; 4590 if (Pattern.compile("\\H").matcher(hws).find() || 4591 Pattern.compile("[\\H]").matcher(hws).find()) 4592 failCount++; 4593 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4594 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4595 failCount++; 4596 if (Pattern.compile("\\V").matcher(vws).find() || 4597 Pattern.compile("[\\V]").matcher(vws).find()) 4598 failCount++; 4599 String prefix = "abcd"; 4600 String suffix = "efgh"; 4601 String ng = "A"; 4602 for (int i = 0; i < hws.length(); i++) { 4603 String c = String.valueOf(hws.charAt(i)); 4604 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4605 if (!m.find() || !c.equals(m.group())) 4606 failCount++; 4607 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4608 if (!m.find() || !c.equals(m.group())) 4609 failCount++; 4610 4611 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4612 if (!m.find() || !ng.equals(m.group())) 4613 failCount++; 4614 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4615 if (!m.find() || !ng.equals(m.group())) 4616 failCount++; 4617 } 4618 for (int i = 0; i < vws.length(); i++) { 4619 String c = String.valueOf(vws.charAt(i)); 4620 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4621 if (!m.find() || !c.equals(m.group())) 4622 failCount++; 4623 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4624 if (!m.find() || !c.equals(m.group())) 4625 failCount++; 4626 4627 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4628 if (!m.find() || !ng.equals(m.group())) 4629 failCount++; 4630 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4631 if (!m.find() || !ng.equals(m.group())) 4632 failCount++; 4633 } 4634 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4635 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4636 failCount++; 4637 report("horizontalAndVerticalWSTest"); 4638 } 4639 4640 private static void linebreakTest() throws Exception { 4641 String linebreaks = new String (new char[] { 4642 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4643 String crnl = "\r\n"; 4644 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4645 Pattern.compile("\\R").matcher(crnl).matches() && 4646 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4647 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4648 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4649 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4650 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4651 failCount++; 4652 } 4653 report("linebreakTest"); 4654 } 4655 4656 // #7189363 4657 private static void branchTest() throws Exception { 4658 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4659 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4660 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4661 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4662 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4663 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4664 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4665 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4666 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4667 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4668 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4669 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4670 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4671 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4672 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4673 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4674 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4675 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4676 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4677 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4678 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4679 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4680 failCount++; 4681 report("branchTest"); 4682 } 4683 4684 // This test is for 8007395 4685 private static void groupCurlyNotFoundSuppTest() throws Exception { 4686 String input = "test this as \ud83d\ude0d"; 4687 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4688 "test(.)*(@[a-zA-Z.]+)", 4689 "test([^B])+(@[a-zA-Z.]+)", 4690 "test([^B])*(@[a-zA-Z.]+)", 4691 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4692 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4693 }) { 4694 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4695 .matcher(input); 4696 try { 4697 if (m.find()) { 4698 failCount++; 4699 } 4700 } catch (Exception x) { 4701 failCount++; 4702 } 4703 } 4704 report("GroupCurly NotFoundSupp"); 4705 } 4706 4707 // This test is for 8023647 4708 private static void groupCurlyBackoffTest() throws Exception { 4709 if (!"abc1c".matches("(\\w)+1\\1") || 4710 "abc11".matches("(\\w)+1\\1")) { 4711 failCount++; 4712 } 4713 report("GroupCurly backoff"); 4714 } 4715 4716 // This test is for 8012646 4717 private static void patternAsPredicate() throws Exception { 4718 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4719 4720 if (p.test("")) { 4721 failCount++; 4722 } 4723 if (!p.test("word")) { 4724 failCount++; 4725 } 4726 if (p.test("1234")) { 4727 failCount++; 4728 } 4729 if (!p.test("word1234")) { 4730 failCount++; 4731 } 4732 report("Pattern.asPredicate"); 4733 } 4734 4735 // This test is for 8184692 4736 private static void patternAsMatchPredicate() throws Exception { 4737 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4738 4739 if (p.test("")) { 4740 failCount++; 4741 } 4742 if (!p.test("word")) { 4743 failCount++; 4744 } 4745 if (p.test("1234word")) { 4746 failCount++; 4747 } 4748 if (p.test("1234")) { 4749 failCount++; 4750 } 4751 report("Pattern.asMatchPredicate"); 4752 } 4753 4754 4755 // This test is for 8035975 4756 private static void invalidFlags() throws Exception { 4757 for (int flag = 1; flag != 0; flag <<= 1) { 4758 switch (flag) { 4759 case Pattern.CASE_INSENSITIVE: 4760 case Pattern.MULTILINE: 4761 case Pattern.DOTALL: 4762 case Pattern.UNICODE_CASE: 4763 case Pattern.CANON_EQ: 4764 case Pattern.UNIX_LINES: 4765 case Pattern.LITERAL: 4766 case Pattern.UNICODE_CHARACTER_CLASS: 4767 case Pattern.COMMENTS: 4768 // valid flag, continue 4769 break; 4770 default: 4771 try { 4772 Pattern.compile(".", flag); 4773 failCount++; 4774 } catch (IllegalArgumentException expected) { 4775 } 4776 } 4777 } 4778 report("Invalid compile flags"); 4779 } 4780 4781 // This test is for 8158482 4782 private static void embeddedFlags() throws Exception { 4783 try { 4784 Pattern.compile("(?i).(?-i)."); 4785 Pattern.compile("(?m).(?-m)."); 4786 Pattern.compile("(?s).(?-s)."); 4787 Pattern.compile("(?d).(?-d)."); 4788 Pattern.compile("(?u).(?-u)."); 4789 Pattern.compile("(?c).(?-c)."); 4790 Pattern.compile("(?x).(?-x)."); 4791 Pattern.compile("(?U).(?-U)."); 4792 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4793 } catch (PatternSyntaxException x) { 4794 failCount++; 4795 } 4796 report("Embedded flags"); 4797 } 4798 4799 private static void grapheme() throws Exception { 4800 Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), 4801 Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt"))) 4802 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4803 .forEach( ln -> { 4804 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4805 // System.out.println(str); 4806 String[] strs = ln.split("\u00f7|\u00d7"); 4807 StringBuilder src = new StringBuilder(); 4808 ArrayList<String> graphemes = new ArrayList<>(); 4809 StringBuilder buf = new StringBuilder(); 4810 int offBk = 0; 4811 for (String str : strs) { 4812 if (str.length() == 0) // first empty str 4813 continue; 4814 int cp = Integer.parseInt(str, 16); 4815 src.appendCodePoint(cp); 4816 buf.appendCodePoint(cp); 4817 offBk += (str.length() + 1); 4818 if (ln.charAt(offBk) == '\u00f7') { // DIV 4819 graphemes.add(buf.toString()); 4820 buf = new StringBuilder(); 4821 } 4822 } 4823 Pattern p = Pattern.compile("\\X"); 4824 Matcher m = p.matcher(src.toString()); 4825 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4826 for (String g : graphemes) { 4827 // System.out.printf(" grapheme:=[%s]%n", g); 4828 // (1) test \\X directly 4829 if (!m.find() || !m.group().equals(g)) { 4830 System.out.println("Failed \\X [" + ln + "] : " + g); 4831 failCount++; 4832 } 4833 // (2) test \\b{g} + \\X via Scanner 4834 boolean hasNext = s.hasNext(p); 4835 // if (!s.hasNext() || !s.next().equals(next)) { 4836 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4837 System.out.println("Failed b{g} [" + ln + "] : " + g); 4838 failCount++; 4839 } 4840 } 4841 }); 4842 // some sanity checks 4843 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4844 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4845 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4846 failCount++; 4847 // make sure "\b{n}" still works 4848 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4849 failCount++; 4850 report("Unicode extended grapheme cluster"); 4851 } 4852 4853 // hangup/timeout if go into exponential backtracking 4854 private static void expoBacktracking() throws Exception { 4855 4856 Object[][] patternMatchers = { 4857 // 6328855 4858 { "(.*\n*)*", 4859 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4860 false }, 4861 // 6192895 4862 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4863 "Hello World this is a test this is a test this is a test A", 4864 true }, 4865 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4866 "Hello World this is a test this is a test this is a test \u4e00 ", 4867 false }, 4868 { " *([a-z0-9]+ *)+", 4869 "hello world this is a test this is a test this is a test A", 4870 false }, 4871 // 4771934 [FIXED] #5013651? 4872 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4873 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4874 true }, 4875 // 4866249 [FIXED] 4876 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4877 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4878 true }, 4879 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4880 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4881 false }, 4882 // 6345469 4883 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4884 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4885 true }, // --> matched 4886 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4887 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4888 false }, 4889 // 5026912 4890 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4891 "156580451111112225588087755221111111566969655555555", 4892 false}, 4893 // 6988218 4894 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4895 "'%)) order by ANGEBOT.ID", 4896 false}, // find 4897 // 6693451 4898 { "^(\\s*foo\\s*)*$", 4899 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4900 true }, 4901 { "^(\\s*foo\\s*)*$", 4902 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4903 false 4904 }, 4905 // 7006761 4906 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4907 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4908 // 8140212 4909 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4910 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4911 false 4912 }, 4913 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4914 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4915 4916 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4917 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4918 4919 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4920 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4921 4922 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4923 4924 /* not fixed 4925 //8132141 ---> second level exponential backtracking 4926 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4927 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4928 */ 4929 }; 4930 4931 for (Object[] pm : patternMatchers) { 4932 String p = (String)pm[0]; 4933 String s = (String)pm[1]; 4934 boolean r = (Boolean)pm[2]; 4935 if (r != Pattern.compile(p).matcher(s).matches()) { 4936 failCount++; 4937 } 4938 } 4939 } 4940 4941 private static void invalidGroupName() { 4942 // Invalid start of a group name 4943 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4944 "\u0060", "\u007b", "\u0416")) { 4945 for (String pat : List.of("(?<" + groupName + ">)", 4946 "\\k<" + groupName + ">")) { 4947 try { 4948 Pattern.compile(pat); 4949 failCount++; 4950 } catch (PatternSyntaxException e) { 4951 if (!e.getMessage().startsWith( 4952 "capturing group name does not start with a" 4953 + " Latin letter")) { 4954 failCount++; 4955 } 4956 } 4957 } 4958 } 4959 // Invalid char in a group name 4960 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4961 "d\u0060", "e\u007b", "f\u0416")) { 4962 for (String pat : List.of("(?<" + groupName + ">)", 4963 "\\k<" + groupName + ">")) { 4964 try { 4965 Pattern.compile(pat); 4966 failCount++; 4967 } catch (PatternSyntaxException e) { 4968 if (!e.getMessage().startsWith( 4969 "named capturing group is missing trailing '>'")) { 4970 failCount++; 4971 } 4972 } 4973 } 4974 } 4975 report("Invalid capturing group names"); 4976 } 4977 4978 private static void illegalRepetitionRange() { 4979 // huge integers > (2^31 - 1) 4980 String n = BigInteger.valueOf(1L << 32) 4981 .toString(); 4982 String m = BigInteger.valueOf(1L << 31) 4983 .add(new BigInteger(80, generator)) 4984 .toString(); 4985 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4986 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4987 String pat = ".{" + rep + "}"; 4988 try { 4989 Pattern.compile(pat); 4990 failCount++; 4991 System.out.println("Expected to fail. Pattern: " + pat); 4992 } catch (PatternSyntaxException e) { 4993 if (!e.getMessage().startsWith("Illegal repetition")) { 4994 failCount++; 4995 System.out.println("Unexpected error message: " + e.getMessage()); 4996 } 4997 } catch (Throwable t) { 4998 failCount++; 4999 System.out.println("Unexpected exception: " + t); 5000 } 5001 } 5002 report("illegalRepetitionRange"); 5003 } 5004 5005 private static void surrogatePairWithCanonEq() { 5006 try { 5007 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 5008 } catch (Throwable t) { 5009 failCount++; 5010 System.out.println("Unexpected exception: " + t); 5011 } 5012 report("surrogatePairWithCanonEq"); 5013 } 5014 5015 // This test is for 8235812 5016 private static void lineBreakWithQuantifier() { 5017 // key: pattern 5018 // value: lengths of input that must match the pattern 5019 Map<String, List<Integer>> cases = Map.ofEntries( 5020 Map.entry("\\R?", List.of(0, 1)), 5021 Map.entry("\\R*", List.of(0, 1, 2, 3)), 5022 Map.entry("\\R+", List.of(1, 2, 3)), 5023 Map.entry("\\R{0}", List.of(0)), 5024 Map.entry("\\R{1}", List.of(1)), 5025 Map.entry("\\R{2}", List.of(2)), 5026 Map.entry("\\R{3}", List.of(3)), 5027 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)), 5028 Map.entry("\\R{1,}", List.of(1, 2, 3)), 5029 Map.entry("\\R{2,}", List.of(2, 3)), 5030 Map.entry("\\R{3,}", List.of(3)), 5031 Map.entry("\\R{0,0}", List.of(0)), 5032 Map.entry("\\R{0,1}", List.of(0, 1)), 5033 Map.entry("\\R{0,2}", List.of(0, 1, 2)), 5034 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)), 5035 Map.entry("\\R{1,1}", List.of(1)), 5036 Map.entry("\\R{1,2}", List.of(1, 2)), 5037 Map.entry("\\R{1,3}", List.of(1, 2, 3)), 5038 Map.entry("\\R{2,2}", List.of(2)), 5039 Map.entry("\\R{2,3}", List.of(2, 3)), 5040 Map.entry("\\R{3,3}", List.of(3)), 5041 Map.entry("\\R", List.of(1)), 5042 Map.entry("\\R\\R", List.of(2)), 5043 Map.entry("\\R\\R\\R", List.of(3)) 5044 ); 5045 5046 // key: length of input 5047 // value: all possible inputs of given length 5048 Map<Integer, List<String>> inputs = new HashMap<>(); 5049 String[] Rs = { "\r\n", "\r", "\n", 5050 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" }; 5051 StringBuilder sb = new StringBuilder(); 5052 for (int len = 0; len <= 3; ++len) { 5053 int[] idx = new int[len + 1]; 5054 do { 5055 sb.setLength(0); 5056 for (int j = 0; j < len; ++j) 5057 sb.append(Rs[idx[j]]); 5058 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString()); 5059 idx[0]++; 5060 for (int j = 0; j < len; ++j) { 5061 if (idx[j] < Rs.length) 5062 break; 5063 idx[j] = 0; 5064 idx[j+1]++; 5065 } 5066 } while (idx[len] == 0); 5067 } 5068 5069 // exhaustive testing 5070 for (String patStr : cases.keySet()) { 5071 Pattern[] pats = patStr.endsWith("R") 5072 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers 5073 : new Pattern[] { Pattern.compile(patStr), // greedy 5074 Pattern.compile(patStr + "?") }; // reluctant 5075 Matcher m = pats[0].matcher(""); 5076 for (Pattern p : pats) { 5077 m.usePattern(p); 5078 for (int len : cases.get(patStr)) { 5079 for (String in : inputs.get(len)) { 5080 if (!m.reset(in).matches()) { 5081 failCount++; 5082 System.err.println("Expected to match '" + 5083 in + "' =~ /" + p + "/"); 5084 } 5085 } 5086 } 5087 } 5088 } 5089 report("lineBreakWithQuantifier"); 5090 } 5091 5092 // This test is for 8214245 5093 private static void caseInsensitivePMatch() { 5094 for (String input : List.of("abcd", "AbCd", "ABCD")) { 5095 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", 5096 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", 5097 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", 5098 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", 5099 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", 5100 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", 5101 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", 5102 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", 5103 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", 5104 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", 5105 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", 5106 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", 5107 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", 5108 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", 5109 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", 5110 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", 5111 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", 5112 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) 5113 { 5114 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) 5115 .matcher(input) 5116 .matches()) 5117 { 5118 failCount++; 5119 System.err.println("Expected to match: " + 5120 "'" + input + "' =~ /" + pattern + "/"); 5121 } 5122 } 5123 } 5124 5125 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { 5126 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", 5127 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", 5128 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", 5129 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", 5130 "\\p{general_category=Ll}", "\\p{IsLowercase}", 5131 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", 5132 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", 5133 "\\p{IsUppercase}", "\\p{javaUpperCase}", 5134 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", 5135 "\\p{general_category=Lt}", "\\p{IsTitlecase}", 5136 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", 5137 "[\\p{IsLl}]", "[\\p{gc=Ll}]", 5138 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", 5139 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", 5140 "[\\p{IsLu}]", "[\\p{gc=Lu}]", 5141 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", 5142 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", 5143 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", 5144 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) 5145 { 5146 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE 5147 | Pattern.UNICODE_CHARACTER_CLASS) 5148 .matcher(input) 5149 .matches()) 5150 { 5151 failCount++; 5152 System.err.println("Expected to match: " + 5153 "'" + input + "' =~ /" + pattern + "/"); 5154 } 5155 } 5156 } 5157 report("caseInsensitivePMatch"); 5158 } 5159 5160 // This test is for 8237599 5161 private static void surrogatePairOverlapRegion() { 5162 String input = "\ud801\udc37"; 5163 5164 Pattern p = Pattern.compile(".+"); 5165 Matcher m = p.matcher(input); 5166 m.region(0, 1); 5167 5168 boolean ok = m.find(); 5169 if (!ok || !m.group(0).equals(input.substring(0, 1))) 5170 { 5171 failCount++; 5172 System.out.println("Input \"" + input + "\".substr(0, 1)" + 5173 " expected to match pattern \"" + p + "\""); 5174 if (ok) { 5175 System.out.println("group(0): \"" + m.group(0) + "\""); 5176 } 5177 } else if (!m.hitEnd()) { 5178 failCount++; 5179 System.out.println("Expected m.hitEnd() == true"); 5180 } 5181 5182 p = Pattern.compile(".*(.)"); 5183 m = p.matcher(input); 5184 m.region(1, 2); 5185 5186 ok = m.find(); 5187 if (!ok || !m.group(0).equals(input.substring(1, 2)) 5188 || !m.group(1).equals(input.substring(1, 2))) 5189 { 5190 failCount++; 5191 System.out.println("Input \"" + input + "\".substr(1, 2)" + 5192 " expected to match pattern \"" + p + "\""); 5193 if (ok) { 5194 System.out.println("group(0): \"" + m.group(0) + "\""); 5195 System.out.println("group(1): \"" + m.group(1) + "\""); 5196 } 5197 } 5198 report("surrogatePairOverlapRegion"); 5199 } 5200 }