1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.io.BufferedReader; 48 import java.io.ByteArrayInputStream; 49 import java.io.ByteArrayOutputStream; 50 import java.io.File; 51 import java.io.FileInputStream; 52 import java.io.InputStreamReader; 53 import java.io.ObjectInputStream; 54 import java.io.ObjectOutputStream; 55 import java.math.BigInteger; 56 import java.nio.CharBuffer; 57 import java.nio.file.Files; 58 import java.util.ArrayList; 59 import java.util.Arrays; 60 import java.util.List; 61 import java.util.Random; 62 import java.util.Scanner; 63 import java.util.function.Function; 64 import java.util.function.Predicate; 65 import java.util.regex.Matcher; 66 import java.util.regex.MatchResult; 67 import java.util.regex.Pattern; 68 import java.util.regex.PatternSyntaxException; 69 import jdk.test.lib.RandomFactory; 70 71 /** 72 * This is a test class created to check the operation of 73 * the Pattern and Matcher classes. 74 */ 75 public class RegExTest { 76 77 private static Random generator = RandomFactory.getRandom(); 78 private static boolean failure = false; 79 private static int failCount = 0; 80 private static String firstFailure = null; 81 82 /** 83 * Main to interpret arguments and run several tests. 84 * 85 */ 86 public static void main(String[] args) throws Exception { 87 // Most of the tests are in a file 88 processFile("TestCases.txt"); 89 //processFile("PerlCases.txt"); 90 processFile("BMPTestCases.txt"); 91 processFile("SupplementaryTestCases.txt"); 92 93 // These test many randomly generated char patterns 94 bm(); 95 slice(); 96 97 // These are hard to put into the file 98 escapes(); 99 blankInput(); 100 101 // Substitition tests on randomly generated sequences 102 globalSubstitute(); 103 stringbufferSubstitute(); 104 stringbuilderSubstitute(); 105 106 substitutionBasher(); 107 substitutionBasher2(); 108 109 // Canonical Equivalence 110 ceTest(); 111 112 // Anchors 113 anchorTest(); 114 115 // boolean match calls 116 matchesTest(); 117 lookingAtTest(); 118 119 // Pattern API 120 patternMatchesTest(); 121 122 // Misc 123 lookbehindTest(); 124 nullArgumentTest(); 125 backRefTest(); 126 groupCaptureTest(); 127 caretTest(); 128 charClassTest(); 129 emptyPatternTest(); 130 findIntTest(); 131 group0Test(); 132 longPatternTest(); 133 octalTest(); 134 ampersandTest(); 135 negationTest(); 136 splitTest(); 137 appendTest(); 138 caseFoldingTest(); 139 commentsTest(); 140 unixLinesTest(); 141 replaceFirstTest(); 142 gTest(); 143 zTest(); 144 serializeTest(); 145 reluctantRepetitionTest(); 146 multilineDollarTest(); 147 dollarAtEndTest(); 148 caretBetweenTerminatorsTest(); 149 // This RFE rejected in Tiger numOccurrencesTest(); 150 javaCharClassTest(); 151 nonCaptureRepetitionTest(); 152 notCapturedGroupCurlyMatchTest(); 153 escapedSegmentTest(); 154 literalPatternTest(); 155 literalReplacementTest(); 156 regionTest(); 157 toStringTest(); 158 negatedCharClassTest(); 159 findFromTest(); 160 boundsTest(); 161 unicodeWordBoundsTest(); 162 caretAtEndTest(); 163 wordSearchTest(); 164 hitEndTest(); 165 toMatchResultTest(); 166 toMatchResultTest2(); 167 surrogatesInClassTest(); 168 removeQEQuotingTest(); 169 namedGroupCaptureTest(); 170 nonBmpClassComplementTest(); 171 unicodePropertiesTest(); 172 unicodeHexNotationTest(); 173 unicodeClassesTest(); 174 unicodeCharacterNameTest(); 175 horizontalAndVerticalWSTest(); 176 linebreakTest(); 177 branchTest(); 178 groupCurlyNotFoundSuppTest(); 179 groupCurlyBackoffTest(); 180 patternAsPredicate(); 181 patternAsMatchPredicate(); 182 invalidFlags(); 183 embeddedFlags(); 184 grapheme(); 185 expoBacktracking(); 186 invalidGroupName(); 187 illegalRepetitionRange(); 188 surrogatePairWithCanonEq(); 189 190 if (failure) { 191 throw new 192 RuntimeException("RegExTest failed, 1st failure: " + 193 firstFailure); 194 } else { 195 System.err.println("OKAY: All tests passed."); 196 } 197 } 198 199 // Utility functions 200 201 private static String getRandomAlphaString(int length) { 202 StringBuffer buf = new StringBuffer(length); 203 for (int i=0; i<length; i++) { 204 char randChar = (char)(97 + generator.nextInt(26)); 205 buf.append(randChar); 206 } 207 return buf.toString(); 208 } 209 210 private static void check(Matcher m, String expected) { 211 m.find(); 212 if (!m.group().equals(expected)) 213 failCount++; 214 } 215 216 private static void check(Matcher m, String result, boolean expected) { 217 m.find(); 218 if (m.group().equals(result) != expected) 219 failCount++; 220 } 221 222 private static void check(Pattern p, String s, boolean expected) { 223 if (p.matcher(s).find() != expected) 224 failCount++; 225 } 226 227 private static void check(String p, String s, boolean expected) { 228 Matcher matcher = Pattern.compile(p).matcher(s); 229 if (matcher.find() != expected) 230 failCount++; 231 } 232 233 private static void check(String p, char c, boolean expected) { 234 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 235 Pattern pattern = Pattern.compile(propertyPattern); 236 char[] ca = new char[1]; ca[0] = c; 237 Matcher matcher = pattern.matcher(new String(ca)); 238 if (!matcher.find()) 239 failCount++; 240 } 241 242 private static void check(String p, int codePoint, boolean expected) { 243 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 244 Pattern pattern = Pattern.compile(propertyPattern); 245 char[] ca = Character.toChars(codePoint); 246 Matcher matcher = pattern.matcher(new String(ca)); 247 if (!matcher.find()) 248 failCount++; 249 } 250 251 private static void check(String p, int flag, String input, String s, 252 boolean expected) 253 { 254 Pattern pattern = Pattern.compile(p, flag); 255 Matcher matcher = pattern.matcher(input); 256 if (expected) 257 check(matcher, s, expected); 258 else 259 check(pattern, input, false); 260 } 261 262 private static void report(String testName) { 263 int spacesToAdd = 30 - testName.length(); 264 StringBuffer paddedNameBuffer = new StringBuffer(testName); 265 for (int i=0; i<spacesToAdd; i++) 266 paddedNameBuffer.append(" "); 267 String paddedName = paddedNameBuffer.toString(); 268 System.err.println(paddedName + ": " + 269 (failCount==0 ? "Passed":"Failed("+failCount+")")); 270 if (failCount > 0) { 271 failure = true; 272 273 if (firstFailure == null) { 274 firstFailure = testName; 275 } 276 } 277 278 failCount = 0; 279 } 280 281 /** 282 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 283 * supplementary characters. This method does NOT fully take care 284 * of the regex syntax. 285 */ 286 private static String toSupplementaries(String s) { 287 int length = s.length(); 288 StringBuffer sb = new StringBuffer(length * 2); 289 290 for (int i = 0; i < length; ) { 291 char c = s.charAt(i++); 292 if (c == '\\') { 293 sb.append(c); 294 if (i < length) { 295 c = s.charAt(i++); 296 sb.append(c); 297 if (c == 'u') { 298 // assume no syntax error 299 sb.append(s.charAt(i++)); 300 sb.append(s.charAt(i++)); 301 sb.append(s.charAt(i++)); 302 sb.append(s.charAt(i++)); 303 } 304 } 305 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 306 sb.append('\ud800').append((char)('\udc00'+c)); 307 } else { 308 sb.append(c); 309 } 310 } 311 return sb.toString(); 312 } 313 314 // Regular expression tests 315 316 // This is for bug 6178785 317 // Test if an expected NPE gets thrown when passing in a null argument 318 private static boolean check(Runnable test) { 319 try { 320 test.run(); 321 failCount++; 322 return false; 323 } catch (NullPointerException npe) { 324 return true; 325 } 326 } 327 328 private static void nullArgumentTest() { 329 check(() -> Pattern.compile(null)); 330 check(() -> Pattern.matches(null, null)); 331 check(() -> Pattern.matches("xyz", null)); 332 check(() -> Pattern.quote(null)); 333 check(() -> Pattern.compile("xyz").split(null)); 334 check(() -> Pattern.compile("xyz").matcher(null)); 335 336 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 337 m.matches(); 338 check(() -> m.appendTail((StringBuffer) null)); 339 check(() -> m.appendTail((StringBuilder)null)); 340 check(() -> m.replaceAll((String) null)); 341 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 342 check(() -> m.replaceFirst((String)null)); 343 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 344 check(() -> m.appendReplacement((StringBuffer)null, null)); 345 check(() -> m.appendReplacement((StringBuilder)null, null)); 346 check(() -> m.reset(null)); 347 check(() -> Matcher.quoteReplacement(null)); 348 //check(() -> m.usePattern(null)); 349 350 report("Null Argument"); 351 } 352 353 // This is for bug6635133 354 // Test if surrogate pair in Unicode escapes can be handled correctly. 355 private static void surrogatesInClassTest() throws Exception { 356 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 357 Matcher matcher = pattern.matcher("\ud834\udd22"); 358 if (!matcher.find()) 359 failCount++; 360 361 report("Surrogate pair in Unicode escape"); 362 } 363 364 // This is for bug6990617 365 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 366 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 367 // char is an octal digit. 368 private static void removeQEQuotingTest() throws Exception { 369 Pattern pattern = 370 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 371 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 372 if (!matcher.find()) 373 failCount++; 374 375 report("Remove Q/E Quoting"); 376 } 377 378 // This is for bug 4988891 379 // Test toMatchResult to see that it is a copy of the Matcher 380 // that is not affected by subsequent operations on the original 381 private static void toMatchResultTest() throws Exception { 382 Pattern pattern = Pattern.compile("squid"); 383 Matcher matcher = pattern.matcher( 384 "agiantsquidofdestinyasmallsquidoffate"); 385 matcher.find(); 386 int matcherStart1 = matcher.start(); 387 MatchResult mr = matcher.toMatchResult(); 388 if (mr == matcher) 389 failCount++; 390 int resultStart1 = mr.start(); 391 if (matcherStart1 != resultStart1) 392 failCount++; 393 matcher.find(); 394 int matcherStart2 = matcher.start(); 395 int resultStart2 = mr.start(); 396 if (matcherStart2 == resultStart2) 397 failCount++; 398 if (resultStart1 != resultStart2) 399 failCount++; 400 MatchResult mr2 = matcher.toMatchResult(); 401 if (mr == mr2) 402 failCount++; 403 if (mr2.start() != matcherStart2) 404 failCount++; 405 report("toMatchResult is a copy"); 406 } 407 408 private static void checkExpectedISE(Runnable test) { 409 try { 410 test.run(); 411 failCount++; 412 } catch (IllegalStateException x) { 413 } catch (IndexOutOfBoundsException xx) { 414 failCount++; 415 } 416 } 417 418 private static void checkExpectedIOOE(Runnable test) { 419 try { 420 test.run(); 421 failCount++; 422 } catch (IndexOutOfBoundsException x) {} 423 } 424 425 // This is for bug 8074678 426 // Test the result of toMatchResult throws ISE if no match is availble 427 private static void toMatchResultTest2() throws Exception { 428 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 429 matcher.find(); 430 MatchResult mr = matcher.toMatchResult(); 431 432 checkExpectedISE(() -> mr.start()); 433 checkExpectedISE(() -> mr.start(2)); 434 checkExpectedISE(() -> mr.end()); 435 checkExpectedISE(() -> mr.end(2)); 436 checkExpectedISE(() -> mr.group()); 437 checkExpectedISE(() -> mr.group(2)); 438 439 matcher = Pattern.compile("(match)").matcher("there is a match"); 440 matcher.find(); 441 MatchResult mr2 = matcher.toMatchResult(); 442 checkExpectedIOOE(() -> mr2.start(2)); 443 checkExpectedIOOE(() -> mr2.end(2)); 444 checkExpectedIOOE(() -> mr2.group(2)); 445 446 report("toMatchResult2 appropriate exceptions"); 447 } 448 449 // This is for bug 5013885 450 // Must test a slice to see if it reports hitEnd correctly 451 private static void hitEndTest() throws Exception { 452 // Basic test of Slice node 453 Pattern p = Pattern.compile("^squidattack"); 454 Matcher m = p.matcher("squack"); 455 m.find(); 456 if (m.hitEnd()) 457 failCount++; 458 m.reset("squid"); 459 m.find(); 460 if (!m.hitEnd()) 461 failCount++; 462 463 // Test Slice, SliceA and SliceU nodes 464 for (int i=0; i<3; i++) { 465 int flags = 0; 466 if (i==1) flags = Pattern.CASE_INSENSITIVE; 467 if (i==2) flags = Pattern.UNICODE_CASE; 468 p = Pattern.compile("^abc", flags); 469 m = p.matcher("ad"); 470 m.find(); 471 if (m.hitEnd()) 472 failCount++; 473 m.reset("ab"); 474 m.find(); 475 if (!m.hitEnd()) 476 failCount++; 477 } 478 479 // Test Boyer-Moore node 480 p = Pattern.compile("catattack"); 481 m = p.matcher("attack"); 482 m.find(); 483 if (!m.hitEnd()) 484 failCount++; 485 486 p = Pattern.compile("catattack"); 487 m = p.matcher("attackattackattackcatatta"); 488 m.find(); 489 if (!m.hitEnd()) 490 failCount++; 491 492 // 8184706: Matching u+0d at EOL against \R should hit-end 493 p = Pattern.compile("...\\R"); 494 m = p.matcher("cat" + (char)0x0a); 495 m.find(); 496 if (m.hitEnd()) 497 failCount++; 498 499 m = p.matcher("cat" + (char)0x0d); 500 m.find(); 501 if (!m.hitEnd()) 502 failCount++; 503 504 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 505 m.find(); 506 if (m.hitEnd()) 507 failCount++; 508 509 report("hitEnd"); 510 } 511 512 // This is for bug 4997476 513 // It is weird code submitted by customer demonstrating a regression 514 private static void wordSearchTest() throws Exception { 515 String testString = new String("word1 word2 word3"); 516 Pattern p = Pattern.compile("\\b"); 517 Matcher m = p.matcher(testString); 518 int position = 0; 519 int start = 0; 520 while (m.find(position)) { 521 start = m.start(); 522 if (start == testString.length()) 523 break; 524 if (m.find(start+1)) { 525 position = m.start(); 526 } else { 527 position = testString.length(); 528 } 529 if (testString.substring(start, position).equals(" ")) 530 continue; 531 if (!testString.substring(start, position-1).startsWith("word")) 532 failCount++; 533 } 534 report("Customer word search"); 535 } 536 537 // This is for bug 4994840 538 private static void caretAtEndTest() throws Exception { 539 // Problem only occurs with multiline patterns 540 // containing a beginning-of-line caret "^" followed 541 // by an expression that also matches the empty string. 542 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 543 Matcher matcher = pattern.matcher("\r"); 544 matcher.find(); 545 matcher.find(); 546 report("Caret at end"); 547 } 548 549 // This test is for 4979006 550 // Check to see if word boundary construct properly handles unicode 551 // non spacing marks 552 private static void unicodeWordBoundsTest() throws Exception { 553 String spaces = " "; 554 String wordChar = "a"; 555 String nsm = "\u030a"; 556 557 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 558 559 Pattern pattern = Pattern.compile("\\b"); 560 Matcher matcher = pattern.matcher(""); 561 // S=other B=word character N=non spacing mark .=word boundary 562 // SS.BB.SS 563 String input = spaces + wordChar + wordChar + spaces; 564 twoFindIndexes(input, matcher, 2, 4); 565 // SS.BBN.SS 566 input = spaces + wordChar +wordChar + nsm + spaces; 567 twoFindIndexes(input, matcher, 2, 5); 568 // SS.BN.SS 569 input = spaces + wordChar + nsm + spaces; 570 twoFindIndexes(input, matcher, 2, 4); 571 // SS.BNN.SS 572 input = spaces + wordChar + nsm + nsm + spaces; 573 twoFindIndexes(input, matcher, 2, 5); 574 // SSN.BB.SS 575 input = spaces + nsm + wordChar + wordChar + spaces; 576 twoFindIndexes(input, matcher, 3, 5); 577 // SS.BNB.SS 578 input = spaces + wordChar + nsm + wordChar + spaces; 579 twoFindIndexes(input, matcher, 2, 5); 580 // SSNNSS 581 input = spaces + nsm + nsm + spaces; 582 matcher.reset(input); 583 if (matcher.find()) 584 failCount++; 585 // SSN.BBN.SS 586 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 587 twoFindIndexes(input, matcher, 3, 6); 588 589 report("Unicode word boundary"); 590 } 591 592 private static void twoFindIndexes(String input, Matcher matcher, int a, 593 int b) throws Exception 594 { 595 matcher.reset(input); 596 matcher.find(); 597 if (matcher.start() != a) 598 failCount++; 599 matcher.find(); 600 if (matcher.start() != b) 601 failCount++; 602 } 603 604 // This test is for 6284152 605 static void check(String regex, String input, String[] expected) { 606 List<String> result = new ArrayList<String>(); 607 Pattern p = Pattern.compile(regex); 608 Matcher m = p.matcher(input); 609 while (m.find()) { 610 result.add(m.group()); 611 } 612 if (!Arrays.asList(expected).equals(result)) 613 failCount++; 614 } 615 616 private static void lookbehindTest() throws Exception { 617 //Positive 618 check("(?<=%.{0,5})foo\\d", 619 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 620 new String[]{"foo1", "foo2", "foo3"}); 621 622 //boundary at end of the lookbehind sub-regex should work consistently 623 //with the boundary just after the lookbehind sub-regex 624 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 625 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 626 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 627 check("(?<!abc \\b)foo", "abc foo", new String[0]); 628 629 //Negative 630 check("(?<!%.{0,5})foo\\d", 631 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 632 new String[] {"foo4", "foo5"}); 633 634 //Positive greedy 635 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 636 637 //Positive reluctant 638 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 639 640 //supplementary 641 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 642 new String[] {"fo\ud800\udc00o"}); 643 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 644 new String[] {"fo\ud800\udc00o"}); 645 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 646 new String[] {"fo\ud800\udc00o"}); 647 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 648 new String[] {"fo\ud800\udc00o"}); 649 report("Lookbehind"); 650 } 651 652 // This test is for 4938995 653 // Check to see if weak region boundaries are transparent to 654 // lookahead and lookbehind constructs 655 private static void boundsTest() throws Exception { 656 String fullMessage = "catdogcat"; 657 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 658 Matcher matcher = pattern.matcher("catdogca"); 659 matcher.useTransparentBounds(true); 660 if (matcher.find()) 661 failCount++; 662 matcher.reset("atdogcat"); 663 if (matcher.find()) 664 failCount++; 665 matcher.reset(fullMessage); 666 if (!matcher.find()) 667 failCount++; 668 matcher.reset(fullMessage); 669 matcher.region(0,9); 670 if (!matcher.find()) 671 failCount++; 672 matcher.reset(fullMessage); 673 matcher.region(0,6); 674 if (!matcher.find()) 675 failCount++; 676 matcher.reset(fullMessage); 677 matcher.region(3,6); 678 if (!matcher.find()) 679 failCount++; 680 matcher.useTransparentBounds(false); 681 if (matcher.find()) 682 failCount++; 683 684 // Negative lookahead/lookbehind 685 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 686 matcher = pattern.matcher("dogcat"); 687 matcher.useTransparentBounds(true); 688 matcher.region(0,3); 689 if (matcher.find()) 690 failCount++; 691 matcher.reset("catdog"); 692 matcher.region(3,6); 693 if (matcher.find()) 694 failCount++; 695 matcher.useTransparentBounds(false); 696 matcher.reset("dogcat"); 697 matcher.region(0,3); 698 if (!matcher.find()) 699 failCount++; 700 matcher.reset("catdog"); 701 matcher.region(3,6); 702 if (!matcher.find()) 703 failCount++; 704 705 report("Region bounds transparency"); 706 } 707 708 // This test is for 4945394 709 private static void findFromTest() throws Exception { 710 String message = "This is 40 $0 message."; 711 Pattern pat = Pattern.compile("\\$0"); 712 Matcher match = pat.matcher(message); 713 if (!match.find()) 714 failCount++; 715 if (match.find()) 716 failCount++; 717 if (match.find()) 718 failCount++; 719 report("Check for alternating find"); 720 } 721 722 // This test is for 4872664 and 4892980 723 private static void negatedCharClassTest() throws Exception { 724 Pattern pattern = Pattern.compile("[^>]"); 725 Matcher matcher = pattern.matcher("\u203A"); 726 if (!matcher.matches()) 727 failCount++; 728 pattern = Pattern.compile("[^fr]"); 729 matcher = pattern.matcher("a"); 730 if (!matcher.find()) 731 failCount++; 732 matcher.reset("\u203A"); 733 if (!matcher.find()) 734 failCount++; 735 String s = "for"; 736 String result[] = s.split("[^fr]"); 737 if (!result[0].equals("f")) 738 failCount++; 739 if (!result[1].equals("r")) 740 failCount++; 741 s = "f\u203Ar"; 742 result = s.split("[^fr]"); 743 if (!result[0].equals("f")) 744 failCount++; 745 if (!result[1].equals("r")) 746 failCount++; 747 748 // Test adding to bits, subtracting a node, then adding to bits again 749 pattern = Pattern.compile("[^f\u203Ar]"); 750 matcher = pattern.matcher("a"); 751 if (!matcher.find()) 752 failCount++; 753 matcher.reset("f"); 754 if (matcher.find()) 755 failCount++; 756 matcher.reset("\u203A"); 757 if (matcher.find()) 758 failCount++; 759 matcher.reset("r"); 760 if (matcher.find()) 761 failCount++; 762 matcher.reset("\u203B"); 763 if (!matcher.find()) 764 failCount++; 765 766 // Test subtracting a node, adding to bits, subtracting again 767 pattern = Pattern.compile("[^\u203Ar\u203B]"); 768 matcher = pattern.matcher("a"); 769 if (!matcher.find()) 770 failCount++; 771 matcher.reset("\u203A"); 772 if (matcher.find()) 773 failCount++; 774 matcher.reset("r"); 775 if (matcher.find()) 776 failCount++; 777 matcher.reset("\u203B"); 778 if (matcher.find()) 779 failCount++; 780 matcher.reset("\u203C"); 781 if (!matcher.find()) 782 failCount++; 783 784 report("Negated Character Class"); 785 } 786 787 // This test is for 4628291 788 private static void toStringTest() throws Exception { 789 Pattern pattern = Pattern.compile("b+"); 790 if (pattern.toString() != "b+") 791 failCount++; 792 Matcher matcher = pattern.matcher("aaabbbccc"); 793 String matcherString = matcher.toString(); // unspecified 794 matcher.find(); 795 matcherString = matcher.toString(); // unspecified 796 matcher.region(0,3); 797 matcherString = matcher.toString(); // unspecified 798 matcher.reset(); 799 matcherString = matcher.toString(); // unspecified 800 report("toString"); 801 } 802 803 // This test is for 4808962 804 private static void literalPatternTest() throws Exception { 805 int flags = Pattern.LITERAL; 806 807 Pattern pattern = Pattern.compile("abc\\t$^", flags); 808 check(pattern, "abc\\t$^", true); 809 810 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 811 check(pattern, "abc\\t$^", true); 812 813 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 814 check(pattern, "\\Qa^$bcabc\\E", true); 815 check(pattern, "a^$bcabc", false); 816 817 pattern = Pattern.compile("\\\\Q\\\\E"); 818 check(pattern, "\\Q\\E", true); 819 820 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 821 check(pattern, "abcefg\\Q\\Ehij", true); 822 823 pattern = Pattern.compile("\\\\\\Q\\\\E"); 824 check(pattern, "\\\\\\\\", true); 825 826 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 827 check(pattern, "\\Qa^$bcabc\\E", true); 828 check(pattern, "a^$bcabc", false); 829 830 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 831 check(pattern, "\\Qabc\\Edef", true); 832 check(pattern, "abcdef", false); 833 834 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 835 check(pattern, "abc\\Edef", true); 836 check(pattern, "abcdef", false); 837 838 pattern = Pattern.compile(Pattern.quote("\\E")); 839 check(pattern, "\\E", true); 840 841 pattern = Pattern.compile("((((abc.+?:)", flags); 842 check(pattern, "((((abc.+?:)", true); 843 844 flags |= Pattern.MULTILINE; 845 846 pattern = Pattern.compile("^cat$", flags); 847 check(pattern, "abc^cat$def", true); 848 check(pattern, "cat", false); 849 850 flags |= Pattern.CASE_INSENSITIVE; 851 852 pattern = Pattern.compile("abcdef", flags); 853 check(pattern, "ABCDEF", true); 854 check(pattern, "AbCdEf", true); 855 856 flags |= Pattern.DOTALL; 857 858 pattern = Pattern.compile("a...b", flags); 859 check(pattern, "A...b", true); 860 check(pattern, "Axxxb", false); 861 862 flags |= Pattern.CANON_EQ; 863 864 Pattern p = Pattern.compile("testa\u030a", flags); 865 check(pattern, "testa\u030a", false); 866 check(pattern, "test\u00e5", false); 867 868 // Supplementary character test 869 flags = Pattern.LITERAL; 870 871 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 872 check(pattern, toSupplementaries("abc\\t$^"), true); 873 874 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 875 check(pattern, toSupplementaries("abc\\t$^"), true); 876 877 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 878 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 879 check(pattern, toSupplementaries("a^$bcabc"), false); 880 881 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 882 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 883 check(pattern, toSupplementaries("a^$bcabc"), false); 884 885 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 886 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 887 check(pattern, toSupplementaries("abcdef"), false); 888 889 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 890 check(pattern, toSupplementaries("abc\\Edef"), true); 891 check(pattern, toSupplementaries("abcdef"), false); 892 893 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 894 check(pattern, toSupplementaries("((((abc.+?:)"), true); 895 896 flags |= Pattern.MULTILINE; 897 898 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 899 check(pattern, toSupplementaries("abc^cat$def"), true); 900 check(pattern, toSupplementaries("cat"), false); 901 902 flags |= Pattern.DOTALL; 903 904 // note: this is case-sensitive. 905 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 906 check(pattern, toSupplementaries("a...b"), true); 907 check(pattern, toSupplementaries("axxxb"), false); 908 909 flags |= Pattern.CANON_EQ; 910 911 String t = toSupplementaries("test"); 912 p = Pattern.compile(t + "a\u030a", flags); 913 check(pattern, t + "a\u030a", false); 914 check(pattern, t + "\u00e5", false); 915 916 report("Literal pattern"); 917 } 918 919 // This test is for 4803179 920 // This test is also for 4808962, replacement parts 921 private static void literalReplacementTest() throws Exception { 922 int flags = Pattern.LITERAL; 923 924 Pattern pattern = Pattern.compile("abc", flags); 925 Matcher matcher = pattern.matcher("zzzabczzz"); 926 String replaceTest = "$0"; 927 String result = matcher.replaceAll(replaceTest); 928 if (!result.equals("zzzabczzz")) 929 failCount++; 930 931 matcher.reset(); 932 String literalReplacement = matcher.quoteReplacement(replaceTest); 933 result = matcher.replaceAll(literalReplacement); 934 if (!result.equals("zzz$0zzz")) 935 failCount++; 936 937 matcher.reset(); 938 replaceTest = "\\t$\\$"; 939 literalReplacement = matcher.quoteReplacement(replaceTest); 940 result = matcher.replaceAll(literalReplacement); 941 if (!result.equals("zzz\\t$\\$zzz")) 942 failCount++; 943 944 // Supplementary character test 945 pattern = Pattern.compile(toSupplementaries("abc"), flags); 946 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 947 replaceTest = "$0"; 948 result = matcher.replaceAll(replaceTest); 949 if (!result.equals(toSupplementaries("zzzabczzz"))) 950 failCount++; 951 952 matcher.reset(); 953 literalReplacement = matcher.quoteReplacement(replaceTest); 954 result = matcher.replaceAll(literalReplacement); 955 if (!result.equals(toSupplementaries("zzz$0zzz"))) 956 failCount++; 957 958 matcher.reset(); 959 replaceTest = "\\t$\\$"; 960 literalReplacement = matcher.quoteReplacement(replaceTest); 961 result = matcher.replaceAll(literalReplacement); 962 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 963 failCount++; 964 965 // IAE should be thrown if backslash or '$' is the last character 966 // in replacement string 967 try { 968 "\uac00".replaceAll("\uac00", "$"); 969 failCount++; 970 } catch (IllegalArgumentException iie) { 971 } catch (Exception e) { 972 failCount++; 973 } 974 try { 975 "\uac00".replaceAll("\uac00", "\\"); 976 failCount++; 977 } catch (IllegalArgumentException iie) { 978 } catch (Exception e) { 979 failCount++; 980 } 981 report("Literal replacement"); 982 } 983 984 // This test is for 4757029 985 private static void regionTest() throws Exception { 986 Pattern pattern = Pattern.compile("abc"); 987 Matcher matcher = pattern.matcher("abcdefabc"); 988 989 matcher.region(0,9); 990 if (!matcher.find()) 991 failCount++; 992 if (!matcher.find()) 993 failCount++; 994 matcher.region(0,3); 995 if (!matcher.find()) 996 failCount++; 997 matcher.region(3,6); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(0,2); 1001 if (matcher.find()) 1002 failCount++; 1003 1004 expectRegionFail(matcher, 1, -1); 1005 expectRegionFail(matcher, -1, -1); 1006 expectRegionFail(matcher, -1, 1); 1007 expectRegionFail(matcher, 5, 3); 1008 expectRegionFail(matcher, 5, 12); 1009 expectRegionFail(matcher, 12, 12); 1010 1011 pattern = Pattern.compile("^abc$"); 1012 matcher = pattern.matcher("zzzabczzz"); 1013 matcher.region(0,9); 1014 if (matcher.find()) 1015 failCount++; 1016 matcher.region(3,6); 1017 if (!matcher.find()) 1018 failCount++; 1019 matcher.region(3,6); 1020 matcher.useAnchoringBounds(false); 1021 if (matcher.find()) 1022 failCount++; 1023 1024 // Supplementary character test 1025 pattern = Pattern.compile(toSupplementaries("abc")); 1026 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1027 matcher.region(0,9*2); 1028 if (!matcher.find()) 1029 failCount++; 1030 if (!matcher.find()) 1031 failCount++; 1032 matcher.region(0,3*2); 1033 if (!matcher.find()) 1034 failCount++; 1035 matcher.region(1,3*2); 1036 if (matcher.find()) 1037 failCount++; 1038 matcher.region(3*2,6*2); 1039 if (matcher.find()) 1040 failCount++; 1041 matcher.region(0,2*2); 1042 if (matcher.find()) 1043 failCount++; 1044 matcher.region(0,2*2+1); 1045 if (matcher.find()) 1046 failCount++; 1047 1048 expectRegionFail(matcher, 1*2, -1); 1049 expectRegionFail(matcher, -1, -1); 1050 expectRegionFail(matcher, -1, 1*2); 1051 expectRegionFail(matcher, 5*2, 3*2); 1052 expectRegionFail(matcher, 5*2, 12*2); 1053 expectRegionFail(matcher, 12*2, 12*2); 1054 1055 pattern = Pattern.compile(toSupplementaries("^abc$")); 1056 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1057 matcher.region(0,9*2); 1058 if (matcher.find()) 1059 failCount++; 1060 matcher.region(3*2,6*2); 1061 if (!matcher.find()) 1062 failCount++; 1063 matcher.region(3*2+1,6*2); 1064 if (matcher.find()) 1065 failCount++; 1066 matcher.region(3*2,6*2-1); 1067 if (matcher.find()) 1068 failCount++; 1069 matcher.region(3*2,6*2); 1070 matcher.useAnchoringBounds(false); 1071 if (matcher.find()) 1072 failCount++; 1073 1074 // JDK-8230829 1075 pattern = Pattern.compile("\\ud800\\udc61"); 1076 matcher = pattern.matcher("\ud800\udc61"); 1077 matcher.region(0, 1); 1078 if (matcher.find()) { 1079 failCount++; 1080 System.out.println("Matched a surrogate pair" + 1081 " that crosses border of region"); 1082 } 1083 if (!matcher.hitEnd()) { 1084 failCount++; 1085 System.out.println("Expected to hit the end when" + 1086 " matching a surrogate pair crossing region"); 1087 } 1088 1089 report("Regions"); 1090 } 1091 1092 private static void expectRegionFail(Matcher matcher, int index1, 1093 int index2) 1094 { 1095 try { 1096 matcher.region(index1, index2); 1097 failCount++; 1098 } catch (IndexOutOfBoundsException ioobe) { 1099 // Correct result 1100 } catch (IllegalStateException ise) { 1101 // Correct result 1102 } 1103 } 1104 1105 // This test is for 4803197 1106 private static void escapedSegmentTest() throws Exception { 1107 1108 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1109 check(pattern, "dir1\\dir2", true); 1110 1111 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1112 check(pattern, "dir1\\dir2\\", true); 1113 1114 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1115 check(pattern, "dir1\\dir2\\", true); 1116 1117 // Supplementary character test 1118 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1119 check(pattern, toSupplementaries("dir1\\dir2"), true); 1120 1121 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1122 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1123 1124 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1125 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1126 1127 report("Escaped segment"); 1128 } 1129 1130 // This test is for 4792284 1131 private static void nonCaptureRepetitionTest() throws Exception { 1132 String input = "abcdefgh;"; 1133 1134 String[] patterns = new String[] { 1135 "(?:\\w{4})+;", 1136 "(?:\\w{8})*;", 1137 "(?:\\w{2}){2,4};", 1138 "(?:\\w{4}){2,};", // only matches the 1139 ".*?(?:\\w{5})+;", // specified minimum 1140 ".*?(?:\\w{9})*;", // number of reps - OK 1141 "(?:\\w{4})+?;", // lazy repetition - OK 1142 "(?:\\w{4})++;", // possessive repetition - OK 1143 "(?:\\w{2,}?)+;", // non-deterministic - OK 1144 "(\\w{4})+;", // capturing group - OK 1145 }; 1146 1147 for (int i = 0; i < patterns.length; i++) { 1148 // Check find() 1149 check(patterns[i], 0, input, input, true); 1150 // Check matches() 1151 Pattern p = Pattern.compile(patterns[i]); 1152 Matcher m = p.matcher(input); 1153 1154 if (m.matches()) { 1155 if (!m.group(0).equals(input)) 1156 failCount++; 1157 } else { 1158 failCount++; 1159 } 1160 } 1161 1162 report("Non capturing repetition"); 1163 } 1164 1165 // This test is for 6358731 1166 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1167 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1168 Matcher matcher = pattern.matcher("abcd"); 1169 if (!matcher.matches() || 1170 matcher.group(1) != null || 1171 !matcher.group(2).equals("abcd")) { 1172 failCount++; 1173 } 1174 report("Not captured GroupCurly"); 1175 } 1176 1177 // This test is for 4706545 1178 private static void javaCharClassTest() throws Exception { 1179 for (int i=0; i<1000; i++) { 1180 char c = (char)generator.nextInt(); 1181 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1182 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1183 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1184 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1185 check("{javaDigit}", c, Character.isDigit(c)); 1186 check("{javaDefined}", c, Character.isDefined(c)); 1187 check("{javaLetter}", c, Character.isLetter(c)); 1188 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1189 check("{javaJavaIdentifierStart}", c, 1190 Character.isJavaIdentifierStart(c)); 1191 check("{javaJavaIdentifierPart}", c, 1192 Character.isJavaIdentifierPart(c)); 1193 check("{javaUnicodeIdentifierStart}", c, 1194 Character.isUnicodeIdentifierStart(c)); 1195 check("{javaUnicodeIdentifierPart}", c, 1196 Character.isUnicodeIdentifierPart(c)); 1197 check("{javaIdentifierIgnorable}", c, 1198 Character.isIdentifierIgnorable(c)); 1199 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1200 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1201 check("{javaISOControl}", c, Character.isISOControl(c)); 1202 check("{javaMirrored}", c, Character.isMirrored(c)); 1203 1204 } 1205 1206 // Supplementary character test 1207 for (int i=0; i<1000; i++) { 1208 int c = generator.nextInt(Character.MAX_CODE_POINT 1209 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1210 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1211 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1212 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1213 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1214 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1215 check("{javaDigit}", c, Character.isDigit(c)); 1216 check("{javaDefined}", c, Character.isDefined(c)); 1217 check("{javaLetter}", c, Character.isLetter(c)); 1218 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1219 check("{javaJavaIdentifierStart}", c, 1220 Character.isJavaIdentifierStart(c)); 1221 check("{javaJavaIdentifierPart}", c, 1222 Character.isJavaIdentifierPart(c)); 1223 check("{javaUnicodeIdentifierStart}", c, 1224 Character.isUnicodeIdentifierStart(c)); 1225 check("{javaUnicodeIdentifierPart}", c, 1226 Character.isUnicodeIdentifierPart(c)); 1227 check("{javaIdentifierIgnorable}", c, 1228 Character.isIdentifierIgnorable(c)); 1229 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1230 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1231 check("{javaISOControl}", c, Character.isISOControl(c)); 1232 check("{javaMirrored}", c, Character.isMirrored(c)); 1233 } 1234 1235 report("Java character classes"); 1236 } 1237 1238 // This test is for 4523620 1239 /* 1240 private static void numOccurrencesTest() throws Exception { 1241 Pattern pattern = Pattern.compile("aaa"); 1242 1243 if (pattern.numOccurrences("aaaaaa", false) != 2) 1244 failCount++; 1245 if (pattern.numOccurrences("aaaaaa", true) != 4) 1246 failCount++; 1247 1248 pattern = Pattern.compile("^"); 1249 if (pattern.numOccurrences("aaaaaa", false) != 1) 1250 failCount++; 1251 if (pattern.numOccurrences("aaaaaa", true) != 1) 1252 failCount++; 1253 1254 report("Number of Occurrences"); 1255 } 1256 */ 1257 1258 // This test is for 4776374 1259 private static void caretBetweenTerminatorsTest() throws Exception { 1260 int flags1 = Pattern.DOTALL; 1261 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1262 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1263 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1264 1265 check("^....", flags1, "test\ntest", "test", true); 1266 check(".....^", flags1, "test\ntest", "test", false); 1267 check(".....^", flags1, "test\n", "test", false); 1268 check("....^", flags1, "test\r\n", "test", false); 1269 1270 check("^....", flags2, "test\ntest", "test", true); 1271 check("....^", flags2, "test\ntest", "test", false); 1272 check(".....^", flags2, "test\n", "test", false); 1273 check("....^", flags2, "test\r\n", "test", false); 1274 1275 check("^....", flags3, "test\ntest", "test", true); 1276 check(".....^", flags3, "test\ntest", "test\n", true); 1277 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1278 check(".....^", flags3, "test\n", "test", false); 1279 check(".....^", flags3, "test\r\n", "test", false); 1280 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1281 1282 check("^....", flags4, "test\ntest", "test", true); 1283 check(".....^", flags3, "test\ntest", "test\n", true); 1284 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1285 check(".....^", flags4, "test\n", "test\n", false); 1286 check(".....^", flags4, "test\r\n", "test\r", false); 1287 1288 // Supplementary character test 1289 String t = toSupplementaries("test"); 1290 check("^....", flags1, t+"\n"+t, t, true); 1291 check(".....^", flags1, t+"\n"+t, t, false); 1292 check(".....^", flags1, t+"\n", t, false); 1293 check("....^", flags1, t+"\r\n", t, false); 1294 1295 check("^....", flags2, t+"\n"+t, t, true); 1296 check("....^", flags2, t+"\n"+t, t, false); 1297 check(".....^", flags2, t+"\n", t, false); 1298 check("....^", flags2, t+"\r\n", t, false); 1299 1300 check("^....", flags3, t+"\n"+t, t, true); 1301 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1302 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1303 check(".....^", flags3, t+"\n", t, false); 1304 check(".....^", flags3, t+"\r\n", t, false); 1305 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1306 1307 check("^....", flags4, t+"\n"+t, t, true); 1308 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1309 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1310 check(".....^", flags4, t+"\n", t+"\n", false); 1311 check(".....^", flags4, t+"\r\n", t+"\r", false); 1312 1313 report("Caret between terminators"); 1314 } 1315 1316 // This test is for 4727935 1317 private static void dollarAtEndTest() throws Exception { 1318 int flags1 = Pattern.DOTALL; 1319 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1320 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1321 1322 check("....$", flags1, "test\n", "test", true); 1323 check("....$", flags1, "test\r\n", "test", true); 1324 check(".....$", flags1, "test\n", "test\n", true); 1325 check(".....$", flags1, "test\u0085", "test\u0085", true); 1326 check("....$", flags1, "test\u0085", "test", true); 1327 1328 check("....$", flags2, "test\n", "test", true); 1329 check(".....$", flags2, "test\n", "test\n", true); 1330 check(".....$", flags2, "test\u0085", "test\u0085", true); 1331 check("....$", flags2, "test\u0085", "est\u0085", true); 1332 1333 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1334 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1335 check("....$blah", flags3, "test\nblah", "!!!!", false); 1336 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1337 1338 // Supplementary character test 1339 String t = toSupplementaries("test"); 1340 String b = toSupplementaries("blah"); 1341 check("....$", flags1, t+"\n", t, true); 1342 check("....$", flags1, t+"\r\n", t, true); 1343 check(".....$", flags1, t+"\n", t+"\n", true); 1344 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1345 check("....$", flags1, t+"\u0085", t, true); 1346 1347 check("....$", flags2, t+"\n", t, true); 1348 check(".....$", flags2, t+"\n", t+"\n", true); 1349 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1350 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1351 1352 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1353 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1354 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1355 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1356 1357 report("Dollar at End"); 1358 } 1359 1360 // This test is for 4711773 1361 private static void multilineDollarTest() throws Exception { 1362 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1363 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1364 matcher.find(); 1365 if (matcher.start(0) != 9) 1366 failCount++; 1367 matcher.find(); 1368 if (matcher.start(0) != 20) 1369 failCount++; 1370 1371 // Supplementary character test 1372 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1373 matcher.find(); 1374 if (matcher.start(0) != 9*2) 1375 failCount++; 1376 matcher.find(); 1377 if (matcher.start(0) != 20*2) 1378 failCount++; 1379 1380 report("Multiline Dollar"); 1381 } 1382 1383 private static void reluctantRepetitionTest() throws Exception { 1384 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1385 check(p, "1 word word word 2", true); 1386 check(p, "1 wor wo w 2", true); 1387 check(p, "1 word word 2", true); 1388 check(p, "1 word 2", true); 1389 check(p, "1 wo w w 2", true); 1390 check(p, "1 wo w 2", true); 1391 check(p, "1 wor w 2", true); 1392 1393 p = Pattern.compile("([a-z])+?c"); 1394 Matcher m = p.matcher("ababcdefdec"); 1395 check(m, "ababc"); 1396 1397 // Supplementary character test 1398 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1399 m = p.matcher(toSupplementaries("ababcdefdec")); 1400 check(m, toSupplementaries("ababc")); 1401 1402 report("Reluctant Repetition"); 1403 } 1404 1405 private static Pattern serializedPattern(Pattern p) throws Exception { 1406 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1407 ObjectOutputStream oos = new ObjectOutputStream(baos); 1408 oos.writeObject(p); 1409 oos.close(); 1410 try (ObjectInputStream ois = new ObjectInputStream( 1411 new ByteArrayInputStream(baos.toByteArray()))) { 1412 return (Pattern)ois.readObject(); 1413 } 1414 } 1415 1416 private static void serializeTest() throws Exception { 1417 String patternStr = "(b)"; 1418 String matchStr = "b"; 1419 Pattern pattern = Pattern.compile(patternStr); 1420 Pattern serializedPattern = serializedPattern(pattern); 1421 Matcher matcher = serializedPattern.matcher(matchStr); 1422 if (!matcher.matches()) 1423 failCount++; 1424 if (matcher.groupCount() != 1) 1425 failCount++; 1426 1427 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1428 serializedPattern = serializedPattern(pattern); 1429 if (!serializedPattern.matcher("Ab").matches()) 1430 failCount++; 1431 if (serializedPattern.matcher("AB").matches()) 1432 failCount++; 1433 1434 report("Serialization"); 1435 } 1436 1437 private static void gTest() { 1438 Pattern pattern = Pattern.compile("\\G\\w"); 1439 Matcher matcher = pattern.matcher("abc#x#x"); 1440 matcher.find(); 1441 matcher.find(); 1442 matcher.find(); 1443 if (matcher.find()) 1444 failCount++; 1445 1446 pattern = Pattern.compile("\\GA*"); 1447 matcher = pattern.matcher("1A2AA3"); 1448 matcher.find(); 1449 if (matcher.find()) 1450 failCount++; 1451 1452 pattern = Pattern.compile("\\GA*"); 1453 matcher = pattern.matcher("1A2AA3"); 1454 if (!matcher.find(1)) 1455 failCount++; 1456 matcher.find(); 1457 if (matcher.find()) 1458 failCount++; 1459 1460 report("\\G"); 1461 } 1462 1463 private static void zTest() { 1464 Pattern pattern = Pattern.compile("foo\\Z"); 1465 // Positives 1466 check(pattern, "foo\u0085", true); 1467 check(pattern, "foo\u2028", true); 1468 check(pattern, "foo\u2029", true); 1469 check(pattern, "foo\n", true); 1470 check(pattern, "foo\r", true); 1471 check(pattern, "foo\r\n", true); 1472 // Negatives 1473 check(pattern, "fooo", false); 1474 check(pattern, "foo\n\r", false); 1475 1476 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1477 // Positives 1478 check(pattern, "foo", true); 1479 check(pattern, "foo\n", true); 1480 // Negatives 1481 check(pattern, "foo\r", false); 1482 check(pattern, "foo\u0085", false); 1483 check(pattern, "foo\u2028", false); 1484 check(pattern, "foo\u2029", false); 1485 1486 report("\\Z"); 1487 } 1488 1489 private static void replaceFirstTest() { 1490 Pattern pattern = Pattern.compile("(ab)(c*)"); 1491 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1492 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1493 failCount++; 1494 1495 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1496 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1497 failCount++; 1498 1499 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1500 String result = matcher.replaceFirst("$1"); 1501 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1502 failCount++; 1503 1504 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1505 result = matcher.replaceFirst("$2"); 1506 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1507 failCount++; 1508 1509 pattern = Pattern.compile("a*"); 1510 matcher = pattern.matcher("aaaaaaaaaa"); 1511 if (!matcher.replaceFirst("test").equals("test")) 1512 failCount++; 1513 1514 pattern = Pattern.compile("a+"); 1515 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1516 if (!matcher.replaceFirst("test").equals("zzztest")) 1517 failCount++; 1518 1519 // Supplementary character test 1520 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1521 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1522 if (!matcher.replaceFirst(toSupplementaries("test")) 1523 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1524 failCount++; 1525 1526 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1527 if (!matcher.replaceFirst(toSupplementaries("test")). 1528 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1529 failCount++; 1530 1531 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1532 result = matcher.replaceFirst("$1"); 1533 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1534 failCount++; 1535 1536 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1537 result = matcher.replaceFirst("$2"); 1538 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1539 failCount++; 1540 1541 pattern = Pattern.compile(toSupplementaries("a*")); 1542 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1543 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1544 failCount++; 1545 1546 pattern = Pattern.compile(toSupplementaries("a+")); 1547 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1548 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1549 failCount++; 1550 1551 report("Replace First"); 1552 } 1553 1554 private static void unixLinesTest() { 1555 Pattern pattern = Pattern.compile(".*"); 1556 Matcher matcher = pattern.matcher("aa\u2028blah"); 1557 matcher.find(); 1558 if (!matcher.group(0).equals("aa")) 1559 failCount++; 1560 1561 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1562 matcher = pattern.matcher("aa\u2028blah"); 1563 matcher.find(); 1564 if (!matcher.group(0).equals("aa\u2028blah")) 1565 failCount++; 1566 1567 pattern = Pattern.compile("[az]$", 1568 Pattern.MULTILINE | Pattern.UNIX_LINES); 1569 matcher = pattern.matcher("aa\u2028zz"); 1570 check(matcher, "a\u2028", false); 1571 1572 // Supplementary character test 1573 pattern = Pattern.compile(".*"); 1574 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1575 matcher.find(); 1576 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1577 failCount++; 1578 1579 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1580 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1581 matcher.find(); 1582 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1583 failCount++; 1584 1585 pattern = Pattern.compile(toSupplementaries("[az]$"), 1586 Pattern.MULTILINE | Pattern.UNIX_LINES); 1587 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1588 check(matcher, toSupplementaries("a\u2028"), false); 1589 1590 report("Unix Lines"); 1591 } 1592 1593 private static void commentsTest() { 1594 int flags = Pattern.COMMENTS; 1595 1596 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1597 Matcher matcher = pattern.matcher("aa#aa"); 1598 if (!matcher.matches()) 1599 failCount++; 1600 1601 pattern = Pattern.compile("aa # blah", flags); 1602 matcher = pattern.matcher("aa"); 1603 if (!matcher.matches()) 1604 failCount++; 1605 1606 pattern = Pattern.compile("aa blah", flags); 1607 matcher = pattern.matcher("aablah"); 1608 if (!matcher.matches()) 1609 failCount++; 1610 1611 pattern = Pattern.compile("aa # blah blech ", flags); 1612 matcher = pattern.matcher("aa"); 1613 if (!matcher.matches()) 1614 failCount++; 1615 1616 pattern = Pattern.compile("aa # blah\n ", flags); 1617 matcher = pattern.matcher("aa"); 1618 if (!matcher.matches()) 1619 failCount++; 1620 1621 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1622 matcher = pattern.matcher("aabc"); 1623 if (!matcher.matches()) 1624 failCount++; 1625 1626 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1627 matcher = pattern.matcher("aabc"); 1628 if (!matcher.matches()) 1629 failCount++; 1630 1631 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1632 matcher = pattern.matcher("aabc#blech"); 1633 if (!matcher.matches()) 1634 failCount++; 1635 1636 // Supplementary character test 1637 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1638 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1639 if (!matcher.matches()) 1640 failCount++; 1641 1642 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1643 matcher = pattern.matcher(toSupplementaries("aa")); 1644 if (!matcher.matches()) 1645 failCount++; 1646 1647 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1648 matcher = pattern.matcher(toSupplementaries("aablah")); 1649 if (!matcher.matches()) 1650 failCount++; 1651 1652 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1653 matcher = pattern.matcher(toSupplementaries("aa")); 1654 if (!matcher.matches()) 1655 failCount++; 1656 1657 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1658 matcher = pattern.matcher(toSupplementaries("aa")); 1659 if (!matcher.matches()) 1660 failCount++; 1661 1662 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1663 matcher = pattern.matcher(toSupplementaries("aabc")); 1664 if (!matcher.matches()) 1665 failCount++; 1666 1667 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1668 matcher = pattern.matcher(toSupplementaries("aabc")); 1669 if (!matcher.matches()) 1670 failCount++; 1671 1672 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1673 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1674 if (!matcher.matches()) 1675 failCount++; 1676 1677 report("Comments"); 1678 } 1679 1680 private static void caseFoldingTest() { // bug 4504687 1681 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1682 Pattern pattern = Pattern.compile("aa", flags); 1683 Matcher matcher = pattern.matcher("ab"); 1684 if (matcher.matches()) 1685 failCount++; 1686 1687 pattern = Pattern.compile("aA", flags); 1688 matcher = pattern.matcher("ab"); 1689 if (matcher.matches()) 1690 failCount++; 1691 1692 pattern = Pattern.compile("aa", flags); 1693 matcher = pattern.matcher("aB"); 1694 if (matcher.matches()) 1695 failCount++; 1696 matcher = pattern.matcher("Ab"); 1697 if (matcher.matches()) 1698 failCount++; 1699 1700 // ASCII "a" 1701 // Latin-1 Supplement "a" + grave 1702 // Cyrillic "a" 1703 String[] patterns = new String[] { 1704 //single 1705 "a", "\u00e0", "\u0430", 1706 //slice 1707 "ab", "\u00e0\u00e1", "\u0430\u0431", 1708 //class single 1709 "[a]", "[\u00e0]", "[\u0430]", 1710 //class range 1711 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1712 //back reference 1713 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1714 }; 1715 1716 String[] texts = new String[] { 1717 "A", "\u00c0", "\u0410", 1718 "AB", "\u00c0\u00c1", "\u0410\u0411", 1719 "A", "\u00c0", "\u0410", 1720 "B", "\u00c2", "\u0411", 1721 "aA", "\u00e0\u00c0", "\u0430\u0410" 1722 }; 1723 1724 boolean[] expected = new boolean[] { 1725 true, false, false, 1726 true, false, false, 1727 true, false, false, 1728 true, false, false, 1729 true, false, false 1730 }; 1731 1732 flags = Pattern.CASE_INSENSITIVE; 1733 for (int i = 0; i < patterns.length; i++) { 1734 pattern = Pattern.compile(patterns[i], flags); 1735 matcher = pattern.matcher(texts[i]); 1736 if (matcher.matches() != expected[i]) { 1737 System.out.println("<1> Failed at " + i); 1738 failCount++; 1739 } 1740 } 1741 1742 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1743 for (int i = 0; i < patterns.length; i++) { 1744 pattern = Pattern.compile(patterns[i], flags); 1745 matcher = pattern.matcher(texts[i]); 1746 if (!matcher.matches()) { 1747 System.out.println("<2> Failed at " + i); 1748 failCount++; 1749 } 1750 } 1751 // flag unicode_case alone should do nothing 1752 flags = Pattern.UNICODE_CASE; 1753 for (int i = 0; i < patterns.length; i++) { 1754 pattern = Pattern.compile(patterns[i], flags); 1755 matcher = pattern.matcher(texts[i]); 1756 if (matcher.matches()) { 1757 System.out.println("<3> Failed at " + i); 1758 failCount++; 1759 } 1760 } 1761 1762 // Special cases: i, I, u+0131 and u+0130 1763 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1764 pattern = Pattern.compile("[h-j]+", flags); 1765 if (!pattern.matcher("\u0131\u0130").matches()) 1766 failCount++; 1767 report("Case Folding"); 1768 } 1769 1770 private static void appendTest() { 1771 Pattern pattern = Pattern.compile("(ab)(cd)"); 1772 Matcher matcher = pattern.matcher("abcd"); 1773 String result = matcher.replaceAll("$2$1"); 1774 if (!result.equals("cdab")) 1775 failCount++; 1776 1777 String s1 = "Swap all: first = 123, second = 456"; 1778 String s2 = "Swap one: first = 123, second = 456"; 1779 String r = "$3$2$1"; 1780 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1781 matcher = pattern.matcher(s1); 1782 1783 result = matcher.replaceAll(r); 1784 if (!result.equals("Swap all: 123 = first, 456 = second")) 1785 failCount++; 1786 1787 matcher = pattern.matcher(s2); 1788 1789 if (matcher.find()) { 1790 StringBuffer sb = new StringBuffer(); 1791 matcher.appendReplacement(sb, r); 1792 matcher.appendTail(sb); 1793 result = sb.toString(); 1794 if (!result.equals("Swap one: 123 = first, second = 456")) 1795 failCount++; 1796 } 1797 1798 // Supplementary character test 1799 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1800 matcher = pattern.matcher(toSupplementaries("abcd")); 1801 result = matcher.replaceAll("$2$1"); 1802 if (!result.equals(toSupplementaries("cdab"))) 1803 failCount++; 1804 1805 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1806 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1807 r = toSupplementaries("$3$2$1"); 1808 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1809 matcher = pattern.matcher(s1); 1810 1811 result = matcher.replaceAll(r); 1812 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1813 failCount++; 1814 1815 matcher = pattern.matcher(s2); 1816 1817 if (matcher.find()) { 1818 StringBuffer sb = new StringBuffer(); 1819 matcher.appendReplacement(sb, r); 1820 matcher.appendTail(sb); 1821 result = sb.toString(); 1822 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1823 failCount++; 1824 } 1825 report("Append"); 1826 } 1827 1828 private static void splitTest() { 1829 Pattern pattern = Pattern.compile(":"); 1830 String[] result = pattern.split("foo:and:boo", 2); 1831 if (!result[0].equals("foo")) 1832 failCount++; 1833 if (!result[1].equals("and:boo")) 1834 failCount++; 1835 // Supplementary character test 1836 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1837 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1838 if (!result[0].equals(toSupplementaries("foo"))) 1839 failCount++; 1840 if (!result[1].equals(toSupplementaries("andXboo"))) 1841 failCount++; 1842 1843 CharBuffer cb = CharBuffer.allocate(100); 1844 cb.put("foo:and:boo"); 1845 cb.flip(); 1846 result = pattern.split(cb); 1847 if (!result[0].equals("foo")) 1848 failCount++; 1849 if (!result[1].equals("and")) 1850 failCount++; 1851 if (!result[2].equals("boo")) 1852 failCount++; 1853 1854 // Supplementary character test 1855 CharBuffer cbs = CharBuffer.allocate(100); 1856 cbs.put(toSupplementaries("fooXandXboo")); 1857 cbs.flip(); 1858 result = patternX.split(cbs); 1859 if (!result[0].equals(toSupplementaries("foo"))) 1860 failCount++; 1861 if (!result[1].equals(toSupplementaries("and"))) 1862 failCount++; 1863 if (!result[2].equals(toSupplementaries("boo"))) 1864 failCount++; 1865 1866 String source = "0123456789"; 1867 for (int limit=-2; limit<3; limit++) { 1868 for (int x=0; x<10; x++) { 1869 result = source.split(Integer.toString(x), limit); 1870 int expectedLength = limit < 1 ? 2 : limit; 1871 1872 if ((limit == 0) && (x == 9)) { 1873 // expected dropping of "" 1874 if (result.length != 1) 1875 failCount++; 1876 if (!result[0].equals("012345678")) { 1877 failCount++; 1878 } 1879 } else { 1880 if (result.length != expectedLength) { 1881 failCount++; 1882 } 1883 if (!result[0].equals(source.substring(0,x))) { 1884 if (limit != 1) { 1885 failCount++; 1886 } else { 1887 if (!result[0].equals(source.substring(0,10))) { 1888 failCount++; 1889 } 1890 } 1891 } 1892 if (expectedLength > 1) { // Check segment 2 1893 if (!result[1].equals(source.substring(x+1,10))) 1894 failCount++; 1895 } 1896 } 1897 } 1898 } 1899 // Check the case for no match found 1900 for (int limit=-2; limit<3; limit++) { 1901 result = source.split("e", limit); 1902 if (result.length != 1) 1903 failCount++; 1904 if (!result[0].equals(source)) 1905 failCount++; 1906 } 1907 // Check the case for limit == 0, source = ""; 1908 // split() now returns 0-length for empty source "" see #6559590 1909 source = ""; 1910 result = source.split("e", 0); 1911 if (result.length != 1) 1912 failCount++; 1913 if (!result[0].equals(source)) 1914 failCount++; 1915 1916 // Check both split() and splitAsStraem(), especially for zero-lenth 1917 // input and zero-lenth match cases 1918 String[][] input = new String[][] { 1919 { " ", "Abc Efg Hij" }, // normal non-zero-match 1920 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1921 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1922 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1923 { "(?=\\p{Lu})", "AbcEfg" }, 1924 { "(?=\\p{Lu})", "Abc" }, 1925 { " ", "" }, // zero-length input 1926 { ".*", "" }, 1927 1928 // some tests from PatternStreamTest.java 1929 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1930 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1931 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1932 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1933 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1934 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1935 { "\u56da", "" }, 1936 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1937 { "o", "boo:and:foo" }, 1938 { "o", "booooo:and:fooooo" }, 1939 { "o", "fooooo:" }, 1940 }; 1941 1942 String[][] expected = new String[][] { 1943 { "Abc", "Efg", "Hij" }, 1944 { "", "Abc", "Efg", "Hij" }, 1945 { "Abc", "", "Efg", "Hij" }, 1946 { "Abc", "Efg", "Hij" }, 1947 { "Abc", "Efg" }, 1948 { "Abc" }, 1949 { "" }, 1950 { "" }, 1951 1952 { "awgqwefg1fefw", "vssv1vvv1" }, 1953 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1954 { "awgqwefg", "fefw4vssv", "vvv" }, 1955 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1956 { "1", "23", "456", "7890" }, 1957 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1958 { "" }, 1959 { "This", "is", "testing", "", "with", "different", "separators" }, 1960 { "b", "", ":and:f" }, 1961 { "b", "", "", "", "", ":and:f" }, 1962 { "f", "", "", "", "", ":" }, 1963 }; 1964 for (int i = 0; i < input.length; i++) { 1965 pattern = Pattern.compile(input[i][0]); 1966 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1967 failCount++; 1968 } 1969 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1970 // array for zero-length input for now 1971 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1972 expected[i])) { 1973 failCount++; 1974 } 1975 } 1976 report("Split"); 1977 } 1978 1979 private static void negationTest() { 1980 Pattern pattern = Pattern.compile("[\\[@^]+"); 1981 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1982 if (!matcher.find()) 1983 failCount++; 1984 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1985 failCount++; 1986 pattern = Pattern.compile("[@\\[^]+"); 1987 matcher = pattern.matcher("@@@@[[[[^^^^"); 1988 if (!matcher.find()) 1989 failCount++; 1990 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1991 failCount++; 1992 pattern = Pattern.compile("[@\\[^@]+"); 1993 matcher = pattern.matcher("@@@@[[[[^^^^"); 1994 if (!matcher.find()) 1995 failCount++; 1996 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1997 failCount++; 1998 1999 pattern = Pattern.compile("\\)"); 2000 matcher = pattern.matcher("xxx)xxx"); 2001 if (!matcher.find()) 2002 failCount++; 2003 2004 report("Negation"); 2005 } 2006 2007 private static void ampersandTest() { 2008 Pattern pattern = Pattern.compile("[&@]+"); 2009 check(pattern, "@@@@&&&&", true); 2010 2011 pattern = Pattern.compile("[@&]+"); 2012 check(pattern, "@@@@&&&&", true); 2013 2014 pattern = Pattern.compile("[@\\&]+"); 2015 check(pattern, "@@@@&&&&", true); 2016 2017 report("Ampersand"); 2018 } 2019 2020 private static void octalTest() throws Exception { 2021 Pattern pattern = Pattern.compile("\\u0007"); 2022 Matcher matcher = pattern.matcher("\u0007"); 2023 if (!matcher.matches()) 2024 failCount++; 2025 pattern = Pattern.compile("\\07"); 2026 matcher = pattern.matcher("\u0007"); 2027 if (!matcher.matches()) 2028 failCount++; 2029 pattern = Pattern.compile("\\007"); 2030 matcher = pattern.matcher("\u0007"); 2031 if (!matcher.matches()) 2032 failCount++; 2033 pattern = Pattern.compile("\\0007"); 2034 matcher = pattern.matcher("\u0007"); 2035 if (!matcher.matches()) 2036 failCount++; 2037 pattern = Pattern.compile("\\040"); 2038 matcher = pattern.matcher("\u0020"); 2039 if (!matcher.matches()) 2040 failCount++; 2041 pattern = Pattern.compile("\\0403"); 2042 matcher = pattern.matcher("\u00203"); 2043 if (!matcher.matches()) 2044 failCount++; 2045 pattern = Pattern.compile("\\0103"); 2046 matcher = pattern.matcher("\u0043"); 2047 if (!matcher.matches()) 2048 failCount++; 2049 2050 report("Octal"); 2051 } 2052 2053 private static void longPatternTest() throws Exception { 2054 try { 2055 Pattern pattern = Pattern.compile( 2056 "a 32-character-long pattern xxxx"); 2057 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2058 pattern = Pattern.compile("a thirty four character long regex"); 2059 StringBuffer patternToBe = new StringBuffer(101); 2060 for (int i=0; i<100; i++) 2061 patternToBe.append((char)(97 + i%26)); 2062 pattern = Pattern.compile(patternToBe.toString()); 2063 } catch (PatternSyntaxException e) { 2064 failCount++; 2065 } 2066 2067 // Supplementary character test 2068 try { 2069 Pattern pattern = Pattern.compile( 2070 toSupplementaries("a 32-character-long pattern xxxx")); 2071 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2072 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2073 StringBuffer patternToBe = new StringBuffer(101*2); 2074 for (int i=0; i<100; i++) 2075 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2076 + 97 + i%26)); 2077 pattern = Pattern.compile(patternToBe.toString()); 2078 } catch (PatternSyntaxException e) { 2079 failCount++; 2080 } 2081 report("LongPattern"); 2082 } 2083 2084 private static void group0Test() throws Exception { 2085 Pattern pattern = Pattern.compile("(tes)ting"); 2086 Matcher matcher = pattern.matcher("testing"); 2087 check(matcher, "testing"); 2088 2089 matcher.reset("testing"); 2090 if (matcher.lookingAt()) { 2091 if (!matcher.group(0).equals("testing")) 2092 failCount++; 2093 } else { 2094 failCount++; 2095 } 2096 2097 matcher.reset("testing"); 2098 if (matcher.matches()) { 2099 if (!matcher.group(0).equals("testing")) 2100 failCount++; 2101 } else { 2102 failCount++; 2103 } 2104 2105 pattern = Pattern.compile("(tes)ting"); 2106 matcher = pattern.matcher("testing"); 2107 if (matcher.lookingAt()) { 2108 if (!matcher.group(0).equals("testing")) 2109 failCount++; 2110 } else { 2111 failCount++; 2112 } 2113 2114 pattern = Pattern.compile("^(tes)ting"); 2115 matcher = pattern.matcher("testing"); 2116 if (matcher.matches()) { 2117 if (!matcher.group(0).equals("testing")) 2118 failCount++; 2119 } else { 2120 failCount++; 2121 } 2122 2123 // Supplementary character test 2124 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2125 matcher = pattern.matcher(toSupplementaries("testing")); 2126 check(matcher, toSupplementaries("testing")); 2127 2128 matcher.reset(toSupplementaries("testing")); 2129 if (matcher.lookingAt()) { 2130 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2131 failCount++; 2132 } else { 2133 failCount++; 2134 } 2135 2136 matcher.reset(toSupplementaries("testing")); 2137 if (matcher.matches()) { 2138 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2139 failCount++; 2140 } else { 2141 failCount++; 2142 } 2143 2144 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2145 matcher = pattern.matcher(toSupplementaries("testing")); 2146 if (matcher.lookingAt()) { 2147 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2148 failCount++; 2149 } else { 2150 failCount++; 2151 } 2152 2153 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2154 matcher = pattern.matcher(toSupplementaries("testing")); 2155 if (matcher.matches()) { 2156 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2157 failCount++; 2158 } else { 2159 failCount++; 2160 } 2161 2162 report("Group0"); 2163 } 2164 2165 private static void findIntTest() throws Exception { 2166 Pattern p = Pattern.compile("blah"); 2167 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2168 boolean result = m.find(2); 2169 if (!result) 2170 failCount++; 2171 2172 p = Pattern.compile("$"); 2173 m = p.matcher("1234567890"); 2174 result = m.find(10); 2175 if (!result) 2176 failCount++; 2177 try { 2178 result = m.find(11); 2179 failCount++; 2180 } catch (IndexOutOfBoundsException e) { 2181 // correct result 2182 } 2183 2184 // Supplementary character test 2185 p = Pattern.compile(toSupplementaries("blah")); 2186 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2187 result = m.find(2); 2188 if (!result) 2189 failCount++; 2190 2191 report("FindInt"); 2192 } 2193 2194 private static void emptyPatternTest() throws Exception { 2195 Pattern p = Pattern.compile(""); 2196 Matcher m = p.matcher("foo"); 2197 2198 // Should find empty pattern at beginning of input 2199 boolean result = m.find(); 2200 if (result != true) 2201 failCount++; 2202 if (m.start() != 0) 2203 failCount++; 2204 2205 // Should not match entire input if input is not empty 2206 m.reset(); 2207 result = m.matches(); 2208 if (result == true) 2209 failCount++; 2210 2211 try { 2212 m.start(0); 2213 failCount++; 2214 } catch (IllegalStateException e) { 2215 // Correct result 2216 } 2217 2218 // Should match entire input if input is empty 2219 m.reset(""); 2220 result = m.matches(); 2221 if (result != true) 2222 failCount++; 2223 2224 result = Pattern.matches("", ""); 2225 if (result != true) 2226 failCount++; 2227 2228 result = Pattern.matches("", "foo"); 2229 if (result == true) 2230 failCount++; 2231 report("EmptyPattern"); 2232 } 2233 2234 private static void charClassTest() throws Exception { 2235 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2236 check(pattern, "blahb]blech", true); 2237 2238 pattern = Pattern.compile("[abc[def]]"); 2239 check(pattern, "b", true); 2240 2241 // Supplementary character tests 2242 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2243 check(pattern, toSupplementaries("blahb]blech"), true); 2244 2245 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2246 check(pattern, toSupplementaries("b"), true); 2247 2248 try { 2249 // u00ff when UNICODE_CASE 2250 pattern = Pattern.compile("[ab\u00ffcd]", 2251 Pattern.CASE_INSENSITIVE| 2252 Pattern.UNICODE_CASE); 2253 check(pattern, "ab\u00ffcd", true); 2254 check(pattern, "Ab\u0178Cd", true); 2255 2256 // u00b5 when UNICODE_CASE 2257 pattern = Pattern.compile("[ab\u00b5cd]", 2258 Pattern.CASE_INSENSITIVE| 2259 Pattern.UNICODE_CASE); 2260 check(pattern, "ab\u00b5cd", true); 2261 check(pattern, "Ab\u039cCd", true); 2262 } catch (Exception e) { failCount++; } 2263 2264 /* Special cases 2265 (1)LatinSmallLetterLongS u+017f 2266 (2)LatinSmallLetterDotlessI u+0131 2267 (3)LatineCapitalLetterIWithDotAbove u+0130 2268 (4)KelvinSign u+212a 2269 (5)AngstromSign u+212b 2270 */ 2271 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2272 pattern = Pattern.compile("[sik\u00c5]+", flags); 2273 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2274 failCount++; 2275 2276 report("CharClass"); 2277 } 2278 2279 private static void caretTest() throws Exception { 2280 Pattern pattern = Pattern.compile("\\w*"); 2281 Matcher matcher = pattern.matcher("a#bc#def##g"); 2282 check(matcher, "a"); 2283 check(matcher, ""); 2284 check(matcher, "bc"); 2285 check(matcher, ""); 2286 check(matcher, "def"); 2287 check(matcher, ""); 2288 check(matcher, ""); 2289 check(matcher, "g"); 2290 check(matcher, ""); 2291 if (matcher.find()) 2292 failCount++; 2293 2294 pattern = Pattern.compile("^\\w*"); 2295 matcher = pattern.matcher("a#bc#def##g"); 2296 check(matcher, "a"); 2297 if (matcher.find()) 2298 failCount++; 2299 2300 pattern = Pattern.compile("\\w"); 2301 matcher = pattern.matcher("abc##x"); 2302 check(matcher, "a"); 2303 check(matcher, "b"); 2304 check(matcher, "c"); 2305 check(matcher, "x"); 2306 if (matcher.find()) 2307 failCount++; 2308 2309 pattern = Pattern.compile("^\\w"); 2310 matcher = pattern.matcher("abc##x"); 2311 check(matcher, "a"); 2312 if (matcher.find()) 2313 failCount++; 2314 2315 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2316 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2317 check(matcher, "abc"); 2318 if (matcher.find()) 2319 failCount++; 2320 2321 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2322 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2323 check(matcher, "abc"); 2324 check(matcher, "jkl"); 2325 if (matcher.find()) 2326 failCount++; 2327 2328 pattern = Pattern.compile("^", Pattern.MULTILINE); 2329 matcher = pattern.matcher("this is some text"); 2330 String result = matcher.replaceAll("X"); 2331 if (!result.equals("Xthis is some text")) 2332 failCount++; 2333 2334 pattern = Pattern.compile("^"); 2335 matcher = pattern.matcher("this is some text"); 2336 result = matcher.replaceAll("X"); 2337 if (!result.equals("Xthis is some text")) 2338 failCount++; 2339 2340 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2341 matcher = pattern.matcher("this is some text\n"); 2342 result = matcher.replaceAll("X"); 2343 if (!result.equals("Xthis is some text\n")) 2344 failCount++; 2345 2346 report("Caret"); 2347 } 2348 2349 private static void groupCaptureTest() throws Exception { 2350 // Independent group 2351 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2352 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2353 matcher.find(); 2354 try { 2355 String blah = matcher.group(1); 2356 failCount++; 2357 } catch (IndexOutOfBoundsException ioobe) { 2358 // Good result 2359 } 2360 // Pure group 2361 pattern = Pattern.compile("x+(?:y+)z+"); 2362 matcher = pattern.matcher("xxxyyyzzz"); 2363 matcher.find(); 2364 try { 2365 String blah = matcher.group(1); 2366 failCount++; 2367 } catch (IndexOutOfBoundsException ioobe) { 2368 // Good result 2369 } 2370 2371 // Supplementary character tests 2372 // Independent group 2373 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2374 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2375 matcher.find(); 2376 try { 2377 String blah = matcher.group(1); 2378 failCount++; 2379 } catch (IndexOutOfBoundsException ioobe) { 2380 // Good result 2381 } 2382 // Pure group 2383 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2384 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2385 matcher.find(); 2386 try { 2387 String blah = matcher.group(1); 2388 failCount++; 2389 } catch (IndexOutOfBoundsException ioobe) { 2390 // Good result 2391 } 2392 2393 report("GroupCapture"); 2394 } 2395 2396 private static void backRefTest() throws Exception { 2397 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2398 check(pattern, "zzzaabcazzz", true); 2399 2400 pattern = Pattern.compile("(a*)bc\\1"); 2401 check(pattern, "zzzaabcaazzz", true); 2402 2403 pattern = Pattern.compile("(abc)(def)\\1"); 2404 check(pattern, "abcdefabc", true); 2405 2406 pattern = Pattern.compile("(abc)(def)\\3"); 2407 check(pattern, "abcdefabc", false); 2408 2409 try { 2410 for (int i = 1; i < 10; i++) { 2411 // Make sure backref 1-9 are always accepted 2412 pattern = Pattern.compile("abcdef\\" + i); 2413 // and fail to match if the target group does not exit 2414 check(pattern, "abcdef", false); 2415 } 2416 } catch(PatternSyntaxException e) { 2417 failCount++; 2418 } 2419 2420 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2421 check(pattern, "abcdefghija", false); 2422 check(pattern, "abcdefghija1", true); 2423 2424 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2425 check(pattern, "abcdefghijkk", true); 2426 2427 pattern = Pattern.compile("(a)bcdefghij\\11"); 2428 check(pattern, "abcdefghija1", true); 2429 2430 // Supplementary character tests 2431 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2432 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2433 2434 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2435 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2436 2437 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2438 check(pattern, toSupplementaries("abcdefabc"), true); 2439 2440 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2441 check(pattern, toSupplementaries("abcdefabc"), false); 2442 2443 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2444 check(pattern, toSupplementaries("abcdefghija"), false); 2445 check(pattern, toSupplementaries("abcdefghija1"), true); 2446 2447 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2448 check(pattern, toSupplementaries("abcdefghijkk"), true); 2449 2450 report("BackRef"); 2451 } 2452 2453 /** 2454 * Unicode Technical Report #18, section 2.6 End of Line 2455 * There is no empty line to be matched in the sequence \u000D\u000A 2456 * but there is an empty line in the sequence \u000A\u000D. 2457 */ 2458 private static void anchorTest() throws Exception { 2459 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2460 Matcher m = p.matcher("blah1\r\nblah2"); 2461 m.find(); 2462 m.find(); 2463 if (!m.group().equals("blah2")) 2464 failCount++; 2465 2466 m.reset("blah1\n\rblah2"); 2467 m.find(); 2468 m.find(); 2469 m.find(); 2470 if (!m.group().equals("blah2")) 2471 failCount++; 2472 2473 // Test behavior of $ with \r\n at end of input 2474 p = Pattern.compile(".+$"); 2475 m = p.matcher("blah1\r\n"); 2476 if (!m.find()) 2477 failCount++; 2478 if (!m.group().equals("blah1")) 2479 failCount++; 2480 if (m.find()) 2481 failCount++; 2482 2483 // Test behavior of $ with \r\n at end of input in multiline 2484 p = Pattern.compile(".+$", Pattern.MULTILINE); 2485 m = p.matcher("blah1\r\n"); 2486 if (!m.find()) 2487 failCount++; 2488 if (m.find()) 2489 failCount++; 2490 2491 // Test for $ recognition of \u0085 for bug 4527731 2492 p = Pattern.compile(".+$", Pattern.MULTILINE); 2493 m = p.matcher("blah1\u0085"); 2494 if (!m.find()) 2495 failCount++; 2496 2497 // Supplementary character test 2498 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2499 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2500 m.find(); 2501 m.find(); 2502 if (!m.group().equals(toSupplementaries("blah2"))) 2503 failCount++; 2504 2505 m.reset(toSupplementaries("blah1\n\rblah2")); 2506 m.find(); 2507 m.find(); 2508 m.find(); 2509 if (!m.group().equals(toSupplementaries("blah2"))) 2510 failCount++; 2511 2512 // Test behavior of $ with \r\n at end of input 2513 p = Pattern.compile(".+$"); 2514 m = p.matcher(toSupplementaries("blah1\r\n")); 2515 if (!m.find()) 2516 failCount++; 2517 if (!m.group().equals(toSupplementaries("blah1"))) 2518 failCount++; 2519 if (m.find()) 2520 failCount++; 2521 2522 // Test behavior of $ with \r\n at end of input in multiline 2523 p = Pattern.compile(".+$", Pattern.MULTILINE); 2524 m = p.matcher(toSupplementaries("blah1\r\n")); 2525 if (!m.find()) 2526 failCount++; 2527 if (m.find()) 2528 failCount++; 2529 2530 // Test for $ recognition of \u0085 for bug 4527731 2531 p = Pattern.compile(".+$", Pattern.MULTILINE); 2532 m = p.matcher(toSupplementaries("blah1\u0085")); 2533 if (!m.find()) 2534 failCount++; 2535 2536 report("Anchors"); 2537 } 2538 2539 /** 2540 * A basic sanity test of Matcher.lookingAt(). 2541 */ 2542 private static void lookingAtTest() throws Exception { 2543 Pattern p = Pattern.compile("(ab)(c*)"); 2544 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2545 2546 if (!m.lookingAt()) 2547 failCount++; 2548 2549 if (!m.group().equals(m.group(0))) 2550 failCount++; 2551 2552 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2553 if (m.lookingAt()) 2554 failCount++; 2555 2556 // Supplementary character test 2557 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2558 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2559 2560 if (!m.lookingAt()) 2561 failCount++; 2562 2563 if (!m.group().equals(m.group(0))) 2564 failCount++; 2565 2566 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2567 if (m.lookingAt()) 2568 failCount++; 2569 2570 report("Looking At"); 2571 } 2572 2573 /** 2574 * A basic sanity test of Matcher.matches(). 2575 */ 2576 private static void matchesTest() throws Exception { 2577 // matches() 2578 Pattern p = Pattern.compile("ulb(c*)"); 2579 Matcher m = p.matcher("ulbcccccc"); 2580 if (!m.matches()) 2581 failCount++; 2582 2583 // find() but not matches() 2584 m.reset("zzzulbcccccc"); 2585 if (m.matches()) 2586 failCount++; 2587 2588 // lookingAt() but not matches() 2589 m.reset("ulbccccccdef"); 2590 if (m.matches()) 2591 failCount++; 2592 2593 // matches() 2594 p = Pattern.compile("a|ad"); 2595 m = p.matcher("ad"); 2596 if (!m.matches()) 2597 failCount++; 2598 2599 // Supplementary character test 2600 // matches() 2601 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2602 m = p.matcher(toSupplementaries("ulbcccccc")); 2603 if (!m.matches()) 2604 failCount++; 2605 2606 // find() but not matches() 2607 m.reset(toSupplementaries("zzzulbcccccc")); 2608 if (m.matches()) 2609 failCount++; 2610 2611 // lookingAt() but not matches() 2612 m.reset(toSupplementaries("ulbccccccdef")); 2613 if (m.matches()) 2614 failCount++; 2615 2616 // matches() 2617 p = Pattern.compile(toSupplementaries("a|ad")); 2618 m = p.matcher(toSupplementaries("ad")); 2619 if (!m.matches()) 2620 failCount++; 2621 2622 report("Matches"); 2623 } 2624 2625 /** 2626 * A basic sanity test of Pattern.matches(). 2627 */ 2628 private static void patternMatchesTest() throws Exception { 2629 // matches() 2630 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2631 toSupplementaries("ulbcccccc"))) 2632 failCount++; 2633 2634 // find() but not matches() 2635 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2636 toSupplementaries("zzzulbcccccc"))) 2637 failCount++; 2638 2639 // lookingAt() but not matches() 2640 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2641 toSupplementaries("ulbccccccdef"))) 2642 failCount++; 2643 2644 // Supplementary character test 2645 // matches() 2646 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2647 toSupplementaries("ulbcccccc"))) 2648 failCount++; 2649 2650 // find() but not matches() 2651 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2652 toSupplementaries("zzzulbcccccc"))) 2653 failCount++; 2654 2655 // lookingAt() but not matches() 2656 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2657 toSupplementaries("ulbccccccdef"))) 2658 failCount++; 2659 2660 report("Pattern Matches"); 2661 } 2662 2663 /** 2664 * Canonical equivalence testing. Tests the ability of the engine 2665 * to match sequences that are not explicitly specified in the 2666 * pattern when they are considered equivalent by the Unicode Standard. 2667 */ 2668 private static void ceTest() throws Exception { 2669 // Decomposed char outside char classes 2670 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2671 Matcher m = p.matcher("test\u00e5"); 2672 if (!m.matches()) 2673 failCount++; 2674 2675 m.reset("testa\u030a"); 2676 if (!m.matches()) 2677 failCount++; 2678 2679 // Composed char outside char classes 2680 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2681 m = p.matcher("test\u00e5"); 2682 if (!m.matches()) 2683 failCount++; 2684 2685 m.reset("testa\u030a"); 2686 if (!m.find()) 2687 failCount++; 2688 2689 // Decomposed char inside a char class 2690 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2691 m = p.matcher("test\u00e5"); 2692 if (!m.find()) 2693 failCount++; 2694 2695 m.reset("testa\u030a"); 2696 if (!m.find()) 2697 failCount++; 2698 2699 // Composed char inside a char class 2700 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2701 m = p.matcher("test\u00e5"); 2702 if (!m.find()) 2703 failCount++; 2704 2705 m.reset("testa\u0300"); 2706 if (!m.find()) 2707 failCount++; 2708 2709 m.reset("testa\u030a"); 2710 if (!m.find()) 2711 failCount++; 2712 2713 // Marks that cannot legally change order and be equivalent 2714 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2715 check(p, "testa\u0308\u0300", true); 2716 check(p, "testa\u0300\u0308", false); 2717 2718 // Marks that can legally change order and be equivalent 2719 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2720 check(p, "testa\u0308\u0323", true); 2721 check(p, "testa\u0323\u0308", true); 2722 2723 // Test all equivalences of the sequence a\u0308\u0323\u0300 2724 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2725 check(p, "testa\u0308\u0323\u0300", true); 2726 check(p, "testa\u0323\u0308\u0300", true); 2727 check(p, "testa\u0308\u0300\u0323", true); 2728 check(p, "test\u00e4\u0323\u0300", true); 2729 check(p, "test\u00e4\u0300\u0323", true); 2730 2731 Object[][] data = new Object[][] { 2732 2733 // JDK-4867170 2734 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2735 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2736 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2737 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2738 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2739 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2740 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2741 2742 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2743 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2744 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2745 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2746 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2747 2748 // backtracking, force to match "\u1f80", instead of \u1f82" 2749 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2750 2751 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2752 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2753 2754 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2755 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2756 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2757 2758 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2759 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2760 { "\u1f80", "ab\u1f80cd", "f", true }, 2761 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2762 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2763 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2764 { "\u1f82", "\u1f80\u0300", "m", true }, 2765 2766 // JDK-7080302 # compile failed 2767 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2768 2769 // JDK-6728861, same cause as above one 2770 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2771 2772 // JDK-6995635 2773 { "(\u00e9)", "e\u0301", "m", true }, 2774 2775 // JDK-6736245 2776 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2777 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2778 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2779 2780 // 4916384. 2781 // Decomposed hangul (jamos) works inside clazz 2782 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2783 { "[\u1100\u1161]", "\uac00", "m", true}, 2784 2785 { "[\uac00]", "\u1100\u1161", "m", true}, 2786 { "[\uac00]", "\uac00", "m", true}, 2787 2788 // Decomposed hangul (jamos) 2789 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2790 { "\u1100\u1161", "\uac00", "m", true}, 2791 2792 // Composed hangul 2793 { "\uac00", "\u1100\u1161", "m", true }, 2794 { "\uac00", "\uac00", "m", true }, 2795 2796 /* Need a NFDSlice to nfd the source to solve this issue 2797 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2798 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2799 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2800 2801 // Decomposed supplementary outside char classes 2802 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2803 // Composed supplementary outside char classes 2804 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2805 */ 2806 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2807 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2808 2809 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2810 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2811 }; 2812 2813 int failCount = 0; 2814 for (Object[] d : data) { 2815 String pn = (String)d[0]; 2816 String tt = (String)d[1]; 2817 boolean isFind = "f".equals(((String)d[2])); 2818 boolean expected = (boolean)d[3]; 2819 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2820 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2821 if (ret != expected) { 2822 failCount++; 2823 continue; 2824 } 2825 } 2826 report("Canonical Equivalence"); 2827 } 2828 2829 /** 2830 * A basic sanity test of Matcher.replaceAll(). 2831 */ 2832 private static void globalSubstitute() throws Exception { 2833 // Global substitution with a literal 2834 Pattern p = Pattern.compile("(ab)(c*)"); 2835 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2836 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2837 failCount++; 2838 2839 m.reset("zzzabccczzzabcczzzabccczzz"); 2840 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2841 failCount++; 2842 2843 // Global substitution with groups 2844 m.reset("zzzabccczzzabcczzzabccczzz"); 2845 String result = m.replaceAll("$1"); 2846 if (!result.equals("zzzabzzzabzzzabzzz")) 2847 failCount++; 2848 2849 // Supplementary character test 2850 // Global substitution with a literal 2851 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2852 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2853 if (!m.replaceAll(toSupplementaries("test")). 2854 equals(toSupplementaries("testzzztestzzztest"))) 2855 failCount++; 2856 2857 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2858 if (!m.replaceAll(toSupplementaries("test")). 2859 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2860 failCount++; 2861 2862 // Global substitution with groups 2863 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2864 result = m.replaceAll("$1"); 2865 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2866 failCount++; 2867 2868 report("Global Substitution"); 2869 } 2870 2871 /** 2872 * Tests the usage of Matcher.appendReplacement() with literal 2873 * and group substitutions. 2874 */ 2875 private static void stringbufferSubstitute() throws Exception { 2876 // SB substitution with literal 2877 String blah = "zzzblahzzz"; 2878 Pattern p = Pattern.compile("blah"); 2879 Matcher m = p.matcher(blah); 2880 StringBuffer result = new StringBuffer(); 2881 try { 2882 m.appendReplacement(result, "blech"); 2883 failCount++; 2884 } catch (IllegalStateException e) { 2885 } 2886 m.find(); 2887 m.appendReplacement(result, "blech"); 2888 if (!result.toString().equals("zzzblech")) 2889 failCount++; 2890 2891 m.appendTail(result); 2892 if (!result.toString().equals("zzzblechzzz")) 2893 failCount++; 2894 2895 // SB substitution with groups 2896 blah = "zzzabcdzzz"; 2897 p = Pattern.compile("(ab)(cd)*"); 2898 m = p.matcher(blah); 2899 result = new StringBuffer(); 2900 try { 2901 m.appendReplacement(result, "$1"); 2902 failCount++; 2903 } catch (IllegalStateException e) { 2904 } 2905 m.find(); 2906 m.appendReplacement(result, "$1"); 2907 if (!result.toString().equals("zzzab")) 2908 failCount++; 2909 2910 m.appendTail(result); 2911 if (!result.toString().equals("zzzabzzz")) 2912 failCount++; 2913 2914 // SB substitution with 3 groups 2915 blah = "zzzabcdcdefzzz"; 2916 p = Pattern.compile("(ab)(cd)*(ef)"); 2917 m = p.matcher(blah); 2918 result = new StringBuffer(); 2919 try { 2920 m.appendReplacement(result, "$1w$2w$3"); 2921 failCount++; 2922 } catch (IllegalStateException e) { 2923 } 2924 m.find(); 2925 m.appendReplacement(result, "$1w$2w$3"); 2926 if (!result.toString().equals("zzzabwcdwef")) 2927 failCount++; 2928 2929 m.appendTail(result); 2930 if (!result.toString().equals("zzzabwcdwefzzz")) 2931 failCount++; 2932 2933 // SB substitution with groups and three matches 2934 // skipping middle match 2935 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2936 p = Pattern.compile("(ab)(cd*)"); 2937 m = p.matcher(blah); 2938 result = new StringBuffer(); 2939 try { 2940 m.appendReplacement(result, "$1"); 2941 failCount++; 2942 } catch (IllegalStateException e) { 2943 } 2944 m.find(); 2945 m.appendReplacement(result, "$1"); 2946 if (!result.toString().equals("zzzab")) 2947 failCount++; 2948 2949 m.find(); 2950 m.find(); 2951 m.appendReplacement(result, "$2"); 2952 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2953 failCount++; 2954 2955 m.appendTail(result); 2956 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2957 failCount++; 2958 2959 // Check to make sure escaped $ is ignored 2960 blah = "zzzabcdcdefzzz"; 2961 p = Pattern.compile("(ab)(cd)*(ef)"); 2962 m = p.matcher(blah); 2963 result = new StringBuffer(); 2964 m.find(); 2965 m.appendReplacement(result, "$1w\\$2w$3"); 2966 if (!result.toString().equals("zzzabw$2wef")) 2967 failCount++; 2968 2969 m.appendTail(result); 2970 if (!result.toString().equals("zzzabw$2wefzzz")) 2971 failCount++; 2972 2973 // Check to make sure a reference to nonexistent group causes error 2974 blah = "zzzabcdcdefzzz"; 2975 p = Pattern.compile("(ab)(cd)*(ef)"); 2976 m = p.matcher(blah); 2977 result = new StringBuffer(); 2978 m.find(); 2979 try { 2980 m.appendReplacement(result, "$1w$5w$3"); 2981 failCount++; 2982 } catch (IndexOutOfBoundsException ioobe) { 2983 // Correct result 2984 } 2985 2986 // Check double digit group references 2987 blah = "zzz123456789101112zzz"; 2988 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2989 m = p.matcher(blah); 2990 result = new StringBuffer(); 2991 m.find(); 2992 m.appendReplacement(result, "$1w$11w$3"); 2993 if (!result.toString().equals("zzz1w11w3")) 2994 failCount++; 2995 2996 // Check to make sure it backs off $15 to $1 if only three groups 2997 blah = "zzzabcdcdefzzz"; 2998 p = Pattern.compile("(ab)(cd)*(ef)"); 2999 m = p.matcher(blah); 3000 result = new StringBuffer(); 3001 m.find(); 3002 m.appendReplacement(result, "$1w$15w$3"); 3003 if (!result.toString().equals("zzzabwab5wef")) 3004 failCount++; 3005 3006 3007 // Supplementary character test 3008 // SB substitution with literal 3009 blah = toSupplementaries("zzzblahzzz"); 3010 p = Pattern.compile(toSupplementaries("blah")); 3011 m = p.matcher(blah); 3012 result = new StringBuffer(); 3013 try { 3014 m.appendReplacement(result, toSupplementaries("blech")); 3015 failCount++; 3016 } catch (IllegalStateException e) { 3017 } 3018 m.find(); 3019 m.appendReplacement(result, toSupplementaries("blech")); 3020 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3021 failCount++; 3022 3023 m.appendTail(result); 3024 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3025 failCount++; 3026 3027 // SB substitution with groups 3028 blah = toSupplementaries("zzzabcdzzz"); 3029 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3030 m = p.matcher(blah); 3031 result = new StringBuffer(); 3032 try { 3033 m.appendReplacement(result, "$1"); 3034 failCount++; 3035 } catch (IllegalStateException e) { 3036 } 3037 m.find(); 3038 m.appendReplacement(result, "$1"); 3039 if (!result.toString().equals(toSupplementaries("zzzab"))) 3040 failCount++; 3041 3042 m.appendTail(result); 3043 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3044 failCount++; 3045 3046 // SB substitution with 3 groups 3047 blah = toSupplementaries("zzzabcdcdefzzz"); 3048 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3049 m = p.matcher(blah); 3050 result = new StringBuffer(); 3051 try { 3052 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3053 failCount++; 3054 } catch (IllegalStateException e) { 3055 } 3056 m.find(); 3057 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3058 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3059 failCount++; 3060 3061 m.appendTail(result); 3062 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3063 failCount++; 3064 3065 // SB substitution with groups and three matches 3066 // skipping middle match 3067 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3068 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3069 m = p.matcher(blah); 3070 result = new StringBuffer(); 3071 try { 3072 m.appendReplacement(result, "$1"); 3073 failCount++; 3074 } catch (IllegalStateException e) { 3075 } 3076 m.find(); 3077 m.appendReplacement(result, "$1"); 3078 if (!result.toString().equals(toSupplementaries("zzzab"))) 3079 failCount++; 3080 3081 m.find(); 3082 m.find(); 3083 m.appendReplacement(result, "$2"); 3084 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3085 failCount++; 3086 3087 m.appendTail(result); 3088 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3089 failCount++; 3090 3091 // Check to make sure escaped $ is ignored 3092 blah = toSupplementaries("zzzabcdcdefzzz"); 3093 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3094 m = p.matcher(blah); 3095 result = new StringBuffer(); 3096 m.find(); 3097 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3098 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3099 failCount++; 3100 3101 m.appendTail(result); 3102 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3103 failCount++; 3104 3105 // Check to make sure a reference to nonexistent group causes error 3106 blah = toSupplementaries("zzzabcdcdefzzz"); 3107 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3108 m = p.matcher(blah); 3109 result = new StringBuffer(); 3110 m.find(); 3111 try { 3112 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3113 failCount++; 3114 } catch (IndexOutOfBoundsException ioobe) { 3115 // Correct result 3116 } 3117 3118 // Check double digit group references 3119 blah = toSupplementaries("zzz123456789101112zzz"); 3120 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3121 m = p.matcher(blah); 3122 result = new StringBuffer(); 3123 m.find(); 3124 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3125 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3126 failCount++; 3127 3128 // Check to make sure it backs off $15 to $1 if only three groups 3129 blah = toSupplementaries("zzzabcdcdefzzz"); 3130 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3131 m = p.matcher(blah); 3132 result = new StringBuffer(); 3133 m.find(); 3134 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3135 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3136 failCount++; 3137 3138 // Check nothing has been appended into the output buffer if 3139 // the replacement string triggers IllegalArgumentException. 3140 p = Pattern.compile("(abc)"); 3141 m = p.matcher("abcd"); 3142 result = new StringBuffer(); 3143 m.find(); 3144 try { 3145 m.appendReplacement(result, ("xyz$g")); 3146 failCount++; 3147 } catch (IllegalArgumentException iae) { 3148 if (result.length() != 0) 3149 failCount++; 3150 } 3151 3152 report("SB Substitution"); 3153 } 3154 3155 /** 3156 * Tests the usage of Matcher.appendReplacement() with literal 3157 * and group substitutions. 3158 */ 3159 private static void stringbuilderSubstitute() throws Exception { 3160 // SB substitution with literal 3161 String blah = "zzzblahzzz"; 3162 Pattern p = Pattern.compile("blah"); 3163 Matcher m = p.matcher(blah); 3164 StringBuilder result = new StringBuilder(); 3165 try { 3166 m.appendReplacement(result, "blech"); 3167 failCount++; 3168 } catch (IllegalStateException e) { 3169 } 3170 m.find(); 3171 m.appendReplacement(result, "blech"); 3172 if (!result.toString().equals("zzzblech")) 3173 failCount++; 3174 3175 m.appendTail(result); 3176 if (!result.toString().equals("zzzblechzzz")) 3177 failCount++; 3178 3179 // SB substitution with groups 3180 blah = "zzzabcdzzz"; 3181 p = Pattern.compile("(ab)(cd)*"); 3182 m = p.matcher(blah); 3183 result = new StringBuilder(); 3184 try { 3185 m.appendReplacement(result, "$1"); 3186 failCount++; 3187 } catch (IllegalStateException e) { 3188 } 3189 m.find(); 3190 m.appendReplacement(result, "$1"); 3191 if (!result.toString().equals("zzzab")) 3192 failCount++; 3193 3194 m.appendTail(result); 3195 if (!result.toString().equals("zzzabzzz")) 3196 failCount++; 3197 3198 // SB substitution with 3 groups 3199 blah = "zzzabcdcdefzzz"; 3200 p = Pattern.compile("(ab)(cd)*(ef)"); 3201 m = p.matcher(blah); 3202 result = new StringBuilder(); 3203 try { 3204 m.appendReplacement(result, "$1w$2w$3"); 3205 failCount++; 3206 } catch (IllegalStateException e) { 3207 } 3208 m.find(); 3209 m.appendReplacement(result, "$1w$2w$3"); 3210 if (!result.toString().equals("zzzabwcdwef")) 3211 failCount++; 3212 3213 m.appendTail(result); 3214 if (!result.toString().equals("zzzabwcdwefzzz")) 3215 failCount++; 3216 3217 // SB substitution with groups and three matches 3218 // skipping middle match 3219 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3220 p = Pattern.compile("(ab)(cd*)"); 3221 m = p.matcher(blah); 3222 result = new StringBuilder(); 3223 try { 3224 m.appendReplacement(result, "$1"); 3225 failCount++; 3226 } catch (IllegalStateException e) { 3227 } 3228 m.find(); 3229 m.appendReplacement(result, "$1"); 3230 if (!result.toString().equals("zzzab")) 3231 failCount++; 3232 3233 m.find(); 3234 m.find(); 3235 m.appendReplacement(result, "$2"); 3236 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3237 failCount++; 3238 3239 m.appendTail(result); 3240 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3241 failCount++; 3242 3243 // Check to make sure escaped $ is ignored 3244 blah = "zzzabcdcdefzzz"; 3245 p = Pattern.compile("(ab)(cd)*(ef)"); 3246 m = p.matcher(blah); 3247 result = new StringBuilder(); 3248 m.find(); 3249 m.appendReplacement(result, "$1w\\$2w$3"); 3250 if (!result.toString().equals("zzzabw$2wef")) 3251 failCount++; 3252 3253 m.appendTail(result); 3254 if (!result.toString().equals("zzzabw$2wefzzz")) 3255 failCount++; 3256 3257 // Check to make sure a reference to nonexistent group causes error 3258 blah = "zzzabcdcdefzzz"; 3259 p = Pattern.compile("(ab)(cd)*(ef)"); 3260 m = p.matcher(blah); 3261 result = new StringBuilder(); 3262 m.find(); 3263 try { 3264 m.appendReplacement(result, "$1w$5w$3"); 3265 failCount++; 3266 } catch (IndexOutOfBoundsException ioobe) { 3267 // Correct result 3268 } 3269 3270 // Check double digit group references 3271 blah = "zzz123456789101112zzz"; 3272 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3273 m = p.matcher(blah); 3274 result = new StringBuilder(); 3275 m.find(); 3276 m.appendReplacement(result, "$1w$11w$3"); 3277 if (!result.toString().equals("zzz1w11w3")) 3278 failCount++; 3279 3280 // Check to make sure it backs off $15 to $1 if only three groups 3281 blah = "zzzabcdcdefzzz"; 3282 p = Pattern.compile("(ab)(cd)*(ef)"); 3283 m = p.matcher(blah); 3284 result = new StringBuilder(); 3285 m.find(); 3286 m.appendReplacement(result, "$1w$15w$3"); 3287 if (!result.toString().equals("zzzabwab5wef")) 3288 failCount++; 3289 3290 3291 // Supplementary character test 3292 // SB substitution with literal 3293 blah = toSupplementaries("zzzblahzzz"); 3294 p = Pattern.compile(toSupplementaries("blah")); 3295 m = p.matcher(blah); 3296 result = new StringBuilder(); 3297 try { 3298 m.appendReplacement(result, toSupplementaries("blech")); 3299 failCount++; 3300 } catch (IllegalStateException e) { 3301 } 3302 m.find(); 3303 m.appendReplacement(result, toSupplementaries("blech")); 3304 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3305 failCount++; 3306 m.appendTail(result); 3307 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3308 failCount++; 3309 3310 // SB substitution with groups 3311 blah = toSupplementaries("zzzabcdzzz"); 3312 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3313 m = p.matcher(blah); 3314 result = new StringBuilder(); 3315 try { 3316 m.appendReplacement(result, "$1"); 3317 failCount++; 3318 } catch (IllegalStateException e) { 3319 } 3320 m.find(); 3321 m.appendReplacement(result, "$1"); 3322 if (!result.toString().equals(toSupplementaries("zzzab"))) 3323 failCount++; 3324 3325 m.appendTail(result); 3326 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3327 failCount++; 3328 3329 // SB substitution with 3 groups 3330 blah = toSupplementaries("zzzabcdcdefzzz"); 3331 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3332 m = p.matcher(blah); 3333 result = new StringBuilder(); 3334 try { 3335 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3336 failCount++; 3337 } catch (IllegalStateException e) { 3338 } 3339 m.find(); 3340 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3341 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3342 failCount++; 3343 3344 m.appendTail(result); 3345 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3346 failCount++; 3347 3348 // SB substitution with groups and three matches 3349 // skipping middle match 3350 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3351 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3352 m = p.matcher(blah); 3353 result = new StringBuilder(); 3354 try { 3355 m.appendReplacement(result, "$1"); 3356 failCount++; 3357 } catch (IllegalStateException e) { 3358 } 3359 m.find(); 3360 m.appendReplacement(result, "$1"); 3361 if (!result.toString().equals(toSupplementaries("zzzab"))) 3362 failCount++; 3363 3364 m.find(); 3365 m.find(); 3366 m.appendReplacement(result, "$2"); 3367 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3368 failCount++; 3369 3370 m.appendTail(result); 3371 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3372 failCount++; 3373 3374 // Check to make sure escaped $ is ignored 3375 blah = toSupplementaries("zzzabcdcdefzzz"); 3376 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3377 m = p.matcher(blah); 3378 result = new StringBuilder(); 3379 m.find(); 3380 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3381 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3382 failCount++; 3383 3384 m.appendTail(result); 3385 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3386 failCount++; 3387 3388 // Check to make sure a reference to nonexistent group causes error 3389 blah = toSupplementaries("zzzabcdcdefzzz"); 3390 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3391 m = p.matcher(blah); 3392 result = new StringBuilder(); 3393 m.find(); 3394 try { 3395 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3396 failCount++; 3397 } catch (IndexOutOfBoundsException ioobe) { 3398 // Correct result 3399 } 3400 // Check double digit group references 3401 blah = toSupplementaries("zzz123456789101112zzz"); 3402 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3403 m = p.matcher(blah); 3404 result = new StringBuilder(); 3405 m.find(); 3406 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3407 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3408 failCount++; 3409 3410 // Check to make sure it backs off $15 to $1 if only three groups 3411 blah = toSupplementaries("zzzabcdcdefzzz"); 3412 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3413 m = p.matcher(blah); 3414 result = new StringBuilder(); 3415 m.find(); 3416 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3417 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3418 failCount++; 3419 // Check nothing has been appended into the output buffer if 3420 // the replacement string triggers IllegalArgumentException. 3421 p = Pattern.compile("(abc)"); 3422 m = p.matcher("abcd"); 3423 result = new StringBuilder(); 3424 m.find(); 3425 try { 3426 m.appendReplacement(result, ("xyz$g")); 3427 failCount++; 3428 } catch (IllegalArgumentException iae) { 3429 if (result.length() != 0) 3430 failCount++; 3431 } 3432 report("SB Substitution 2"); 3433 } 3434 3435 /* 3436 * 5 groups of characters are created to make a substitution string. 3437 * A base string will be created including random lead chars, the 3438 * substitution string, and random trailing chars. 3439 * A pattern containing the 5 groups is searched for and replaced with: 3440 * random group + random string + random group. 3441 * The results are checked for correctness. 3442 */ 3443 private static void substitutionBasher() { 3444 for (int runs = 0; runs<1000; runs++) { 3445 // Create a base string to work in 3446 int leadingChars = generator.nextInt(10); 3447 StringBuffer baseBuffer = new StringBuffer(100); 3448 String leadingString = getRandomAlphaString(leadingChars); 3449 baseBuffer.append(leadingString); 3450 3451 // Create 5 groups of random number of random chars 3452 // Create the string to substitute 3453 // Create the pattern string to search for 3454 StringBuffer bufferToSub = new StringBuffer(25); 3455 StringBuffer bufferToPat = new StringBuffer(50); 3456 String[] groups = new String[5]; 3457 for(int i=0; i<5; i++) { 3458 int aGroupSize = generator.nextInt(5)+1; 3459 groups[i] = getRandomAlphaString(aGroupSize); 3460 bufferToSub.append(groups[i]); 3461 bufferToPat.append('('); 3462 bufferToPat.append(groups[i]); 3463 bufferToPat.append(')'); 3464 } 3465 String stringToSub = bufferToSub.toString(); 3466 String pattern = bufferToPat.toString(); 3467 3468 // Place sub string into working string at random index 3469 baseBuffer.append(stringToSub); 3470 3471 // Append random chars to end 3472 int trailingChars = generator.nextInt(10); 3473 String trailingString = getRandomAlphaString(trailingChars); 3474 baseBuffer.append(trailingString); 3475 String baseString = baseBuffer.toString(); 3476 3477 // Create test pattern and matcher 3478 Pattern p = Pattern.compile(pattern); 3479 Matcher m = p.matcher(baseString); 3480 3481 // Reject candidate if pattern happens to start early 3482 m.find(); 3483 if (m.start() < leadingChars) 3484 continue; 3485 3486 // Reject candidate if more than one match 3487 if (m.find()) 3488 continue; 3489 3490 // Construct a replacement string with : 3491 // random group + random string + random group 3492 StringBuffer bufferToRep = new StringBuffer(); 3493 int groupIndex1 = generator.nextInt(5); 3494 bufferToRep.append("$" + (groupIndex1 + 1)); 3495 String randomMidString = getRandomAlphaString(5); 3496 bufferToRep.append(randomMidString); 3497 int groupIndex2 = generator.nextInt(5); 3498 bufferToRep.append("$" + (groupIndex2 + 1)); 3499 String replacement = bufferToRep.toString(); 3500 3501 // Do the replacement 3502 String result = m.replaceAll(replacement); 3503 3504 // Construct expected result 3505 StringBuffer bufferToRes = new StringBuffer(); 3506 bufferToRes.append(leadingString); 3507 bufferToRes.append(groups[groupIndex1]); 3508 bufferToRes.append(randomMidString); 3509 bufferToRes.append(groups[groupIndex2]); 3510 bufferToRes.append(trailingString); 3511 String expectedResult = bufferToRes.toString(); 3512 3513 // Check results 3514 if (!result.equals(expectedResult)) 3515 failCount++; 3516 } 3517 3518 report("Substitution Basher"); 3519 } 3520 3521 /* 3522 * 5 groups of characters are created to make a substitution string. 3523 * A base string will be created including random lead chars, the 3524 * substitution string, and random trailing chars. 3525 * A pattern containing the 5 groups is searched for and replaced with: 3526 * random group + random string + random group. 3527 * The results are checked for correctness. 3528 */ 3529 private static void substitutionBasher2() { 3530 for (int runs = 0; runs<1000; runs++) { 3531 // Create a base string to work in 3532 int leadingChars = generator.nextInt(10); 3533 StringBuilder baseBuffer = new StringBuilder(100); 3534 String leadingString = getRandomAlphaString(leadingChars); 3535 baseBuffer.append(leadingString); 3536 3537 // Create 5 groups of random number of random chars 3538 // Create the string to substitute 3539 // Create the pattern string to search for 3540 StringBuilder bufferToSub = new StringBuilder(25); 3541 StringBuilder bufferToPat = new StringBuilder(50); 3542 String[] groups = new String[5]; 3543 for(int i=0; i<5; i++) { 3544 int aGroupSize = generator.nextInt(5)+1; 3545 groups[i] = getRandomAlphaString(aGroupSize); 3546 bufferToSub.append(groups[i]); 3547 bufferToPat.append('('); 3548 bufferToPat.append(groups[i]); 3549 bufferToPat.append(')'); 3550 } 3551 String stringToSub = bufferToSub.toString(); 3552 String pattern = bufferToPat.toString(); 3553 3554 // Place sub string into working string at random index 3555 baseBuffer.append(stringToSub); 3556 3557 // Append random chars to end 3558 int trailingChars = generator.nextInt(10); 3559 String trailingString = getRandomAlphaString(trailingChars); 3560 baseBuffer.append(trailingString); 3561 String baseString = baseBuffer.toString(); 3562 3563 // Create test pattern and matcher 3564 Pattern p = Pattern.compile(pattern); 3565 Matcher m = p.matcher(baseString); 3566 3567 // Reject candidate if pattern happens to start early 3568 m.find(); 3569 if (m.start() < leadingChars) 3570 continue; 3571 3572 // Reject candidate if more than one match 3573 if (m.find()) 3574 continue; 3575 3576 // Construct a replacement string with : 3577 // random group + random string + random group 3578 StringBuilder bufferToRep = new StringBuilder(); 3579 int groupIndex1 = generator.nextInt(5); 3580 bufferToRep.append("$" + (groupIndex1 + 1)); 3581 String randomMidString = getRandomAlphaString(5); 3582 bufferToRep.append(randomMidString); 3583 int groupIndex2 = generator.nextInt(5); 3584 bufferToRep.append("$" + (groupIndex2 + 1)); 3585 String replacement = bufferToRep.toString(); 3586 3587 // Do the replacement 3588 String result = m.replaceAll(replacement); 3589 3590 // Construct expected result 3591 StringBuilder bufferToRes = new StringBuilder(); 3592 bufferToRes.append(leadingString); 3593 bufferToRes.append(groups[groupIndex1]); 3594 bufferToRes.append(randomMidString); 3595 bufferToRes.append(groups[groupIndex2]); 3596 bufferToRes.append(trailingString); 3597 String expectedResult = bufferToRes.toString(); 3598 3599 // Check results 3600 if (!result.equals(expectedResult)) { 3601 failCount++; 3602 } 3603 } 3604 3605 report("Substitution Basher 2"); 3606 } 3607 3608 /** 3609 * Checks the handling of some escape sequences that the Pattern 3610 * class should process instead of the java compiler. These are 3611 * not in the file because the escapes should be be processed 3612 * by the Pattern class when the regex is compiled. 3613 */ 3614 private static void escapes() throws Exception { 3615 Pattern p = Pattern.compile("\\043"); 3616 Matcher m = p.matcher("#"); 3617 if (!m.find()) 3618 failCount++; 3619 3620 p = Pattern.compile("\\x23"); 3621 m = p.matcher("#"); 3622 if (!m.find()) 3623 failCount++; 3624 3625 p = Pattern.compile("\\u0023"); 3626 m = p.matcher("#"); 3627 if (!m.find()) 3628 failCount++; 3629 3630 report("Escape sequences"); 3631 } 3632 3633 /** 3634 * Checks the handling of blank input situations. These 3635 * tests are incompatible with my test file format. 3636 */ 3637 private static void blankInput() throws Exception { 3638 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3639 Matcher m = p.matcher(""); 3640 if (m.find()) 3641 failCount++; 3642 3643 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3644 m = p.matcher(""); 3645 if (!m.find()) 3646 failCount++; 3647 3648 p = Pattern.compile("abc"); 3649 m = p.matcher(""); 3650 if (m.find()) 3651 failCount++; 3652 3653 p = Pattern.compile("a*"); 3654 m = p.matcher(""); 3655 if (!m.find()) 3656 failCount++; 3657 3658 report("Blank input"); 3659 } 3660 3661 /** 3662 * Tests the Boyer-Moore pattern matching of a character sequence 3663 * on randomly generated patterns. 3664 */ 3665 private static void bm() throws Exception { 3666 doBnM('a'); 3667 report("Boyer Moore (ASCII)"); 3668 3669 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3670 report("Boyer Moore (Supplementary)"); 3671 } 3672 3673 private static void doBnM(int baseCharacter) throws Exception { 3674 int achar=0; 3675 3676 for (int i=0; i<100; i++) { 3677 // Create a short pattern to search for 3678 int patternLength = generator.nextInt(7) + 4; 3679 StringBuffer patternBuffer = new StringBuffer(patternLength); 3680 String pattern; 3681 retry: for (;;) { 3682 for (int x=0; x<patternLength; x++) { 3683 int ch = baseCharacter + generator.nextInt(26); 3684 if (Character.isSupplementaryCodePoint(ch)) { 3685 patternBuffer.append(Character.toChars(ch)); 3686 } else { 3687 patternBuffer.append((char)ch); 3688 } 3689 } 3690 pattern = patternBuffer.toString(); 3691 3692 // Avoid patterns that start and end with the same substring 3693 // See JDK-6854417 3694 for (int x=1; x < pattern.length(); x++) { 3695 if (pattern.startsWith(pattern.substring(x))) 3696 continue retry; 3697 } 3698 break; 3699 } 3700 Pattern p = Pattern.compile(pattern); 3701 3702 // Create a buffer with random ASCII chars that does 3703 // not match the sample 3704 String toSearch = null; 3705 StringBuffer s = null; 3706 Matcher m = p.matcher(""); 3707 do { 3708 s = new StringBuffer(100); 3709 for (int x=0; x<100; x++) { 3710 int ch = baseCharacter + generator.nextInt(26); 3711 if (Character.isSupplementaryCodePoint(ch)) { 3712 s.append(Character.toChars(ch)); 3713 } else { 3714 s.append((char)ch); 3715 } 3716 } 3717 toSearch = s.toString(); 3718 m.reset(toSearch); 3719 } while (m.find()); 3720 3721 // Insert the pattern at a random spot 3722 int insertIndex = generator.nextInt(99); 3723 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3724 insertIndex++; 3725 s = s.insert(insertIndex, pattern); 3726 toSearch = s.toString(); 3727 3728 // Make sure that the pattern is found 3729 m.reset(toSearch); 3730 if (!m.find()) 3731 failCount++; 3732 3733 // Make sure that the match text is the pattern 3734 if (!m.group().equals(pattern)) 3735 failCount++; 3736 3737 // Make sure match occured at insertion point 3738 if (m.start() != insertIndex) 3739 failCount++; 3740 } 3741 } 3742 3743 /** 3744 * Tests the matching of slices on randomly generated patterns. 3745 * The Boyer-Moore optimization is not done on these patterns 3746 * because it uses unicode case folding. 3747 */ 3748 private static void slice() throws Exception { 3749 doSlice(Character.MAX_VALUE); 3750 report("Slice"); 3751 3752 doSlice(Character.MAX_CODE_POINT); 3753 report("Slice (Supplementary)"); 3754 } 3755 3756 private static void doSlice(int maxCharacter) throws Exception { 3757 Random generator = new Random(); 3758 int achar=0; 3759 3760 for (int i=0; i<100; i++) { 3761 // Create a short pattern to search for 3762 int patternLength = generator.nextInt(7) + 4; 3763 StringBuffer patternBuffer = new StringBuffer(patternLength); 3764 for (int x=0; x<patternLength; x++) { 3765 int randomChar = 0; 3766 while (!Character.isLetterOrDigit(randomChar)) 3767 randomChar = generator.nextInt(maxCharacter); 3768 if (Character.isSupplementaryCodePoint(randomChar)) { 3769 patternBuffer.append(Character.toChars(randomChar)); 3770 } else { 3771 patternBuffer.append((char) randomChar); 3772 } 3773 } 3774 String pattern = patternBuffer.toString(); 3775 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3776 3777 // Create a buffer with random chars that does not match the sample 3778 String toSearch = null; 3779 StringBuffer s = null; 3780 Matcher m = p.matcher(""); 3781 do { 3782 s = new StringBuffer(100); 3783 for (int x=0; x<100; x++) { 3784 int randomChar = 0; 3785 while (!Character.isLetterOrDigit(randomChar)) 3786 randomChar = generator.nextInt(maxCharacter); 3787 if (Character.isSupplementaryCodePoint(randomChar)) { 3788 s.append(Character.toChars(randomChar)); 3789 } else { 3790 s.append((char) randomChar); 3791 } 3792 } 3793 toSearch = s.toString(); 3794 m.reset(toSearch); 3795 } while (m.find()); 3796 3797 // Insert the pattern at a random spot 3798 int insertIndex = generator.nextInt(99); 3799 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3800 insertIndex++; 3801 s = s.insert(insertIndex, pattern); 3802 toSearch = s.toString(); 3803 3804 // Make sure that the pattern is found 3805 m.reset(toSearch); 3806 if (!m.find()) 3807 failCount++; 3808 3809 // Make sure that the match text is the pattern 3810 if (!m.group().equals(pattern)) 3811 failCount++; 3812 3813 // Make sure match occured at insertion point 3814 if (m.start() != insertIndex) 3815 failCount++; 3816 } 3817 } 3818 3819 private static void explainFailure(String pattern, String data, 3820 String expected, String actual) { 3821 System.err.println("----------------------------------------"); 3822 System.err.println("Pattern = "+pattern); 3823 System.err.println("Data = "+data); 3824 System.err.println("Expected = " + expected); 3825 System.err.println("Actual = " + actual); 3826 } 3827 3828 private static void explainFailure(String pattern, String data, 3829 Throwable t) { 3830 System.err.println("----------------------------------------"); 3831 System.err.println("Pattern = "+pattern); 3832 System.err.println("Data = "+data); 3833 t.printStackTrace(System.err); 3834 } 3835 3836 // Testing examples from a file 3837 3838 /** 3839 * Goes through the file "TestCases.txt" and creates many patterns 3840 * described in the file, matching the patterns against input lines in 3841 * the file, and comparing the results against the correct results 3842 * also found in the file. The file format is described in comments 3843 * at the head of the file. 3844 */ 3845 private static void processFile(String fileName) throws Exception { 3846 File testCases = new File(System.getProperty("test.src", "."), 3847 fileName); 3848 FileInputStream in = new FileInputStream(testCases); 3849 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3850 3851 // Process next test case. 3852 String aLine; 3853 while((aLine = r.readLine()) != null) { 3854 // Read a line for pattern 3855 String patternString = grabLine(r); 3856 Pattern p = null; 3857 try { 3858 p = compileTestPattern(patternString); 3859 } catch (PatternSyntaxException e) { 3860 String dataString = grabLine(r); 3861 String expectedResult = grabLine(r); 3862 if (expectedResult.startsWith("error")) 3863 continue; 3864 explainFailure(patternString, dataString, e); 3865 failCount++; 3866 continue; 3867 } 3868 3869 // Read a line for input string 3870 String dataString = grabLine(r); 3871 Matcher m = p.matcher(dataString); 3872 StringBuffer result = new StringBuffer(); 3873 3874 // Check for IllegalStateExceptions before a match 3875 failCount += preMatchInvariants(m); 3876 3877 boolean found = m.find(); 3878 3879 if (found) 3880 failCount += postTrueMatchInvariants(m); 3881 else 3882 failCount += postFalseMatchInvariants(m); 3883 3884 if (found) { 3885 result.append("true "); 3886 result.append(m.group(0) + " "); 3887 } else { 3888 result.append("false "); 3889 } 3890 3891 result.append(m.groupCount()); 3892 3893 if (found) { 3894 for (int i=1; i<m.groupCount()+1; i++) 3895 if (m.group(i) != null) 3896 result.append(" " +m.group(i)); 3897 } 3898 3899 // Read a line for the expected result 3900 String expectedResult = grabLine(r); 3901 3902 if (!result.toString().equals(expectedResult)) { 3903 explainFailure(patternString, dataString, expectedResult, result.toString()); 3904 failCount++; 3905 } 3906 } 3907 3908 report(fileName); 3909 } 3910 3911 private static int preMatchInvariants(Matcher m) { 3912 int failCount = 0; 3913 try { 3914 m.start(); 3915 failCount++; 3916 } catch (IllegalStateException ise) {} 3917 try { 3918 m.end(); 3919 failCount++; 3920 } catch (IllegalStateException ise) {} 3921 try { 3922 m.group(); 3923 failCount++; 3924 } catch (IllegalStateException ise) {} 3925 return failCount; 3926 } 3927 3928 private static int postFalseMatchInvariants(Matcher m) { 3929 int failCount = 0; 3930 try { 3931 m.group(); 3932 failCount++; 3933 } catch (IllegalStateException ise) {} 3934 try { 3935 m.start(); 3936 failCount++; 3937 } catch (IllegalStateException ise) {} 3938 try { 3939 m.end(); 3940 failCount++; 3941 } catch (IllegalStateException ise) {} 3942 return failCount; 3943 } 3944 3945 private static int postTrueMatchInvariants(Matcher m) { 3946 int failCount = 0; 3947 //assert(m.start() = m.start(0); 3948 if (m.start() != m.start(0)) 3949 failCount++; 3950 //assert(m.end() = m.end(0); 3951 if (m.start() != m.start(0)) 3952 failCount++; 3953 //assert(m.group() = m.group(0); 3954 if (!m.group().equals(m.group(0))) 3955 failCount++; 3956 try { 3957 m.group(50); 3958 failCount++; 3959 } catch (IndexOutOfBoundsException ise) {} 3960 3961 return failCount; 3962 } 3963 3964 private static Pattern compileTestPattern(String patternString) { 3965 if (!patternString.startsWith("'")) { 3966 return Pattern.compile(patternString); 3967 } 3968 int break1 = patternString.lastIndexOf("'"); 3969 String flagString = patternString.substring( 3970 break1+1, patternString.length()); 3971 patternString = patternString.substring(1, break1); 3972 3973 if (flagString.equals("i")) 3974 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3975 3976 if (flagString.equals("m")) 3977 return Pattern.compile(patternString, Pattern.MULTILINE); 3978 3979 return Pattern.compile(patternString); 3980 } 3981 3982 /** 3983 * Reads a line from the input file. Keeps reading lines until a non 3984 * empty non comment line is read. If the line contains a \n then 3985 * these two characters are replaced by a newline char. If a \\uxxxx 3986 * sequence is read then the sequence is replaced by the unicode char. 3987 */ 3988 private static String grabLine(BufferedReader r) throws Exception { 3989 int index = 0; 3990 String line = r.readLine(); 3991 while (line.startsWith("//") || line.length() < 1) 3992 line = r.readLine(); 3993 while ((index = line.indexOf("\\n")) != -1) { 3994 StringBuffer temp = new StringBuffer(line); 3995 temp.replace(index, index+2, "\n"); 3996 line = temp.toString(); 3997 } 3998 while ((index = line.indexOf("\\u")) != -1) { 3999 StringBuffer temp = new StringBuffer(line); 4000 String value = temp.substring(index+2, index+6); 4001 char aChar = (char)Integer.parseInt(value, 16); 4002 String unicodeChar = "" + aChar; 4003 temp.replace(index, index+6, unicodeChar); 4004 line = temp.toString(); 4005 } 4006 4007 return line; 4008 } 4009 4010 private static void check(Pattern p, String s, String g, String expected) { 4011 Matcher m = p.matcher(s); 4012 m.find(); 4013 if (!m.group(g).equals(expected) || 4014 s.charAt(m.start(g)) != expected.charAt(0) || 4015 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4016 failCount++; 4017 } 4018 4019 private static void checkReplaceFirst(String p, String s, String r, String expected) 4020 { 4021 if (!expected.equals(Pattern.compile(p) 4022 .matcher(s) 4023 .replaceFirst(r))) 4024 failCount++; 4025 } 4026 4027 private static void checkReplaceAll(String p, String s, String r, String expected) 4028 { 4029 if (!expected.equals(Pattern.compile(p) 4030 .matcher(s) 4031 .replaceAll(r))) 4032 failCount++; 4033 } 4034 4035 private static void checkExpectedFail(String p) { 4036 try { 4037 Pattern.compile(p); 4038 } catch (PatternSyntaxException pse) { 4039 //pse.printStackTrace(); 4040 return; 4041 } 4042 failCount++; 4043 } 4044 4045 private static void checkExpectedIAE(Matcher m, String g) { 4046 m.find(); 4047 try { 4048 m.group(g); 4049 } catch (IllegalArgumentException x) { 4050 //iae.printStackTrace(); 4051 try { 4052 m.start(g); 4053 } catch (IllegalArgumentException xx) { 4054 try { 4055 m.start(g); 4056 } catch (IllegalArgumentException xxx) { 4057 return; 4058 } 4059 } 4060 } 4061 failCount++; 4062 } 4063 4064 private static void checkExpectedNPE(Matcher m) { 4065 m.find(); 4066 try { 4067 m.group(null); 4068 } catch (NullPointerException x) { 4069 try { 4070 m.start(null); 4071 } catch (NullPointerException xx) { 4072 try { 4073 m.end(null); 4074 } catch (NullPointerException xxx) { 4075 return; 4076 } 4077 } 4078 } 4079 failCount++; 4080 } 4081 4082 private static void namedGroupCaptureTest() throws Exception { 4083 check(Pattern.compile("x+(?<gname>y+)z+"), 4084 "xxxyyyzzz", 4085 "gname", 4086 "yyy"); 4087 4088 check(Pattern.compile("x+(?<gname8>y+)z+"), 4089 "xxxyyyzzz", 4090 "gname8", 4091 "yyy"); 4092 4093 //backref 4094 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4095 check(pattern, "zzzaabcazzz", true); // found "abca" 4096 4097 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4098 "zzzaabcaazzz", true); 4099 4100 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4101 "abcdefabc", true); 4102 4103 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4104 "abcdefghijkk", true); 4105 4106 // Supplementary character tests 4107 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4108 toSupplementaries("zzzaabcazzz"), true); 4109 4110 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4111 toSupplementaries("zzzaabcaazzz"), true); 4112 4113 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4114 toSupplementaries("abcdefabc"), true); 4115 4116 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4117 "(?<gname>" + 4118 toSupplementaries("k)") + "\\k<gname>"), 4119 toSupplementaries("abcdefghijkk"), true); 4120 4121 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4122 "xxxyyyzzzyyy", 4123 "gname", 4124 "yyy"); 4125 4126 //replaceFirst/All 4127 checkReplaceFirst("(?<gn>ab)(c*)", 4128 "abccczzzabcczzzabccc", 4129 "${gn}", 4130 "abzzzabcczzzabccc"); 4131 4132 checkReplaceAll("(?<gn>ab)(c*)", 4133 "abccczzzabcczzzabccc", 4134 "${gn}", 4135 "abzzzabzzzab"); 4136 4137 4138 checkReplaceFirst("(?<gn>ab)(c*)", 4139 "zzzabccczzzabcczzzabccczzz", 4140 "${gn}", 4141 "zzzabzzzabcczzzabccczzz"); 4142 4143 checkReplaceAll("(?<gn>ab)(c*)", 4144 "zzzabccczzzabcczzzabccczzz", 4145 "${gn}", 4146 "zzzabzzzabzzzabzzz"); 4147 4148 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4149 "zzzabccczzzabcczzzabccczzz", 4150 "${gn2}", 4151 "zzzccczzzabcczzzabccczzz"); 4152 4153 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4154 "zzzabccczzzabcczzzabccczzz", 4155 "${gn2}", 4156 "zzzccczzzcczzzccczzz"); 4157 4158 //toSupplementaries("(ab)(c*)")); 4159 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4160 ")(?<gn2>" + toSupplementaries("c") + "*)", 4161 toSupplementaries("abccczzzabcczzzabccc"), 4162 "${gn1}", 4163 toSupplementaries("abzzzabcczzzabccc")); 4164 4165 4166 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4167 ")(?<gn2>" + toSupplementaries("c") + "*)", 4168 toSupplementaries("abccczzzabcczzzabccc"), 4169 "${gn1}", 4170 toSupplementaries("abzzzabzzzab")); 4171 4172 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4173 ")(?<gn2>" + toSupplementaries("c") + "*)", 4174 toSupplementaries("abccczzzabcczzzabccc"), 4175 "${gn2}", 4176 toSupplementaries("ccczzzabcczzzabccc")); 4177 4178 4179 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4180 ")(?<gn2>" + toSupplementaries("c") + "*)", 4181 toSupplementaries("abccczzzabcczzzabccc"), 4182 "${gn2}", 4183 toSupplementaries("ccczzzcczzzccc")); 4184 4185 checkReplaceFirst("(?<dog>Dog)AndCat", 4186 "zzzDogAndCatzzzDogAndCatzzz", 4187 "${dog}", 4188 "zzzDogzzzDogAndCatzzz"); 4189 4190 4191 checkReplaceAll("(?<dog>Dog)AndCat", 4192 "zzzDogAndCatzzzDogAndCatzzz", 4193 "${dog}", 4194 "zzzDogzzzDogzzz"); 4195 4196 // backref in Matcher & String 4197 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4198 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4199 failCount++; 4200 4201 // negative 4202 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4203 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4204 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4205 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4206 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4207 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4208 "gnameX"); 4209 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4210 report("NamedGroupCapture"); 4211 } 4212 4213 // This is for bug 6919132 4214 private static void nonBmpClassComplementTest() throws Exception { 4215 Pattern p = Pattern.compile("\\P{Lu}"); 4216 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4217 4218 if (m.find() && m.start() == 1) 4219 failCount++; 4220 4221 // from a unicode category 4222 p = Pattern.compile("\\P{Lu}"); 4223 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4224 if (m.find()) 4225 failCount++; 4226 if (!m.hitEnd()) 4227 failCount++; 4228 4229 // block 4230 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4231 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4232 if (m.find() && m.start() == 1) 4233 failCount++; 4234 4235 p = Pattern.compile("\\P{sc=GRANTHA}"); 4236 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4237 if (m.find() && m.start() == 1) 4238 failCount++; 4239 4240 report("NonBmpClassComplement"); 4241 } 4242 4243 private static void unicodePropertiesTest() throws Exception { 4244 // different forms 4245 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4246 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4247 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4248 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4249 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4250 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4251 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4252 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4253 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4254 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4255 failCount++; 4256 4257 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4258 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4259 Matcher lastSM = common; 4260 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4261 4262 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4263 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4264 Matcher lastBM = latin; 4265 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4266 4267 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4268 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4269 continue; // only pick couple code points, they are the same 4270 } 4271 4272 // Unicode Script 4273 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4274 Matcher m; 4275 String str = new String(Character.toChars(cp)); 4276 if (script == lastScript) { 4277 m = lastSM; 4278 m.reset(str); 4279 } else { 4280 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4281 } 4282 if (!m.matches()) { 4283 failCount++; 4284 } 4285 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4286 other.reset(str); 4287 if (other.matches()) { 4288 failCount++; 4289 } 4290 lastSM = m; 4291 lastScript = script; 4292 4293 // Unicode Block 4294 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4295 if (block == null) { 4296 //System.out.printf("Not a Block: cp=%x%n", cp); 4297 continue; 4298 } 4299 if (block == lastBlock) { 4300 m = lastBM; 4301 m.reset(str); 4302 } else { 4303 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4304 } 4305 if (!m.matches()) { 4306 failCount++; 4307 } 4308 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4309 other.reset(str); 4310 if (other.matches()) { 4311 failCount++; 4312 } 4313 lastBM = m; 4314 lastBlock = block; 4315 } 4316 report("unicodeProperties"); 4317 } 4318 4319 private static void unicodeHexNotationTest() throws Exception { 4320 4321 // negative 4322 checkExpectedFail("\\x{-23}"); 4323 checkExpectedFail("\\x{110000}"); 4324 checkExpectedFail("\\x{}"); 4325 checkExpectedFail("\\x{AB[ef]"); 4326 4327 // codepoint 4328 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4329 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4330 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4331 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4332 4333 // in class 4334 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4335 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4336 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4337 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4338 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4339 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4340 4341 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4342 String s = "A" + new String(Character.toChars(cp)) + "B"; 4343 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4344 : String.format("\\u%04x\\u%04x", 4345 (int) Character.toChars(cp)[0], 4346 (int) Character.toChars(cp)[1]); 4347 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4348 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4349 failCount++; 4350 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4351 failCount++; 4352 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4353 failCount++; 4354 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4355 failCount++; 4356 } 4357 report("unicodeHexNotation"); 4358 } 4359 4360 private static void unicodeClassesTest() throws Exception { 4361 4362 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4363 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4364 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4365 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4366 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4367 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4368 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4369 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4370 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4371 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4372 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4373 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4374 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4375 Matcher bound = Pattern.compile("\\b").matcher(""); 4376 Matcher word = Pattern.compile("\\w++").matcher(""); 4377 // UNICODE_CHARACTER_CLASS 4378 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4379 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4380 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4381 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4382 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4383 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4384 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4385 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4386 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4387 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4388 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4389 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4390 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4391 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 // embedded flag (?U) 4394 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4395 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4397 4398 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4399 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4400 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4401 // properties 4402 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4403 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4404 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4405 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4406 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4407 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4408 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4409 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4410 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4411 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4412 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4413 // javaMethod 4414 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4415 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4416 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4417 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4418 // GC/C 4419 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4420 4421 for (int cp = 1; cp < 0x30000; cp++) { 4422 String str = new String(Character.toChars(cp)); 4423 int type = Character.getType(cp); 4424 if (// lower 4425 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4426 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4427 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4428 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4429 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4430 // upper 4431 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4432 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4433 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4434 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4435 // alpha 4436 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4437 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4438 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4439 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4440 // digit 4441 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4442 Character.isDigit(cp) != digitU.reset(str).matches() || 4443 // alnum 4444 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4445 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4446 // punct 4447 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4448 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4449 // graph 4450 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4451 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4452 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4453 // blank 4454 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4455 != blank.reset(str).matches() || 4456 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4457 // print 4458 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4459 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4460 // cntrl 4461 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4462 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4463 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4464 // hexdigit 4465 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4466 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4467 // space 4468 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4469 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4470 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4471 // word 4472 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4473 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4474 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4475 // bwordb 4476 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4477 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4478 // properties 4479 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4480 Character.isLetter(cp) != letterP.reset(str).matches()|| 4481 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4482 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4483 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4484 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4485 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4486 // gc_C 4487 (Character.CONTROL == type || Character.FORMAT == type || 4488 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4489 Character.UNASSIGNED == type) 4490 != gcC.reset(str).matches()) { 4491 failCount++; 4492 } 4493 } 4494 4495 // bounds/word align 4496 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4497 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4498 failCount++; 4499 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4500 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4501 failCount++; 4502 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4503 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4504 failCount++; 4505 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4506 failCount++; 4507 report("unicodePredefinedClasses"); 4508 } 4509 4510 private static void unicodeCharacterNameTest() throws Exception { 4511 4512 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4513 if (!Character.isValidCodePoint(cp) || 4514 Character.getType(cp) == Character.UNASSIGNED) 4515 continue; 4516 String str = new String(Character.toChars(cp)); 4517 // single 4518 String p = "\\N{" + Character.getName(cp) + "}"; 4519 if (!Pattern.compile(p).matcher(str).matches()) { 4520 failCount++; 4521 } 4522 // class[c] 4523 p = "[\\N{" + Character.getName(cp) + "}]"; 4524 if (!Pattern.compile(p).matcher(str).matches()) { 4525 failCount++; 4526 } 4527 } 4528 4529 // range 4530 for (int i = 0; i < 10; i++) { 4531 int start = generator.nextInt(20); 4532 int end = start + generator.nextInt(200); 4533 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4534 String str; 4535 for (int cp = start; cp < end; cp++) { 4536 str = new String(Character.toChars(cp)); 4537 if (!Pattern.compile(p).matcher(str).matches()) { 4538 failCount++; 4539 } 4540 } 4541 str = new String(Character.toChars(end + 10)); 4542 if (Pattern.compile(p).matcher(str).matches()) { 4543 failCount++; 4544 } 4545 } 4546 4547 // slice 4548 for (int i = 0; i < 10; i++) { 4549 int n = generator.nextInt(256); 4550 int[] buf = new int[n]; 4551 StringBuffer sb = new StringBuffer(1024); 4552 for (int j = 0; j < n; j++) { 4553 int cp = generator.nextInt(1000); 4554 if (!Character.isValidCodePoint(cp) || 4555 Character.getType(cp) == Character.UNASSIGNED) 4556 cp = 0x4e00; // just use 4e00 4557 sb.append("\\N{" + Character.getName(cp) + "}"); 4558 buf[j] = cp; 4559 } 4560 String p = sb.toString(); 4561 String str = new String(buf, 0, buf.length); 4562 if (!Pattern.compile(p).matcher(str).matches()) { 4563 failCount++; 4564 } 4565 } 4566 report("unicodeCharacterName"); 4567 } 4568 4569 private static void horizontalAndVerticalWSTest() throws Exception { 4570 String hws = new String (new char[] { 4571 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4572 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4573 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4574 0x202f, 0x205f, 0x3000 }); 4575 String vws = new String (new char[] { 4576 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4577 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4578 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4579 failCount++; 4580 if (Pattern.compile("\\H").matcher(hws).find() || 4581 Pattern.compile("[\\H]").matcher(hws).find()) 4582 failCount++; 4583 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4584 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4585 failCount++; 4586 if (Pattern.compile("\\V").matcher(vws).find() || 4587 Pattern.compile("[\\V]").matcher(vws).find()) 4588 failCount++; 4589 String prefix = "abcd"; 4590 String suffix = "efgh"; 4591 String ng = "A"; 4592 for (int i = 0; i < hws.length(); i++) { 4593 String c = String.valueOf(hws.charAt(i)); 4594 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4595 if (!m.find() || !c.equals(m.group())) 4596 failCount++; 4597 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4598 if (!m.find() || !c.equals(m.group())) 4599 failCount++; 4600 4601 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4602 if (!m.find() || !ng.equals(m.group())) 4603 failCount++; 4604 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4605 if (!m.find() || !ng.equals(m.group())) 4606 failCount++; 4607 } 4608 for (int i = 0; i < vws.length(); i++) { 4609 String c = String.valueOf(vws.charAt(i)); 4610 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4611 if (!m.find() || !c.equals(m.group())) 4612 failCount++; 4613 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4614 if (!m.find() || !c.equals(m.group())) 4615 failCount++; 4616 4617 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4618 if (!m.find() || !ng.equals(m.group())) 4619 failCount++; 4620 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4621 if (!m.find() || !ng.equals(m.group())) 4622 failCount++; 4623 } 4624 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4625 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4626 failCount++; 4627 report("horizontalAndVerticalWSTest"); 4628 } 4629 4630 private static void linebreakTest() throws Exception { 4631 String linebreaks = new String (new char[] { 4632 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4633 String crnl = "\r\n"; 4634 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4635 Pattern.compile("\\R").matcher(crnl).matches() && 4636 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4637 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4638 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4639 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4640 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4641 failCount++; 4642 } 4643 report("linebreakTest"); 4644 } 4645 4646 // #7189363 4647 private static void branchTest() throws Exception { 4648 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4649 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4650 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4651 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4652 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4653 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4654 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4655 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4656 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4657 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4658 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4659 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4660 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4661 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4662 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4663 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4664 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4665 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4666 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4667 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4668 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4669 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4670 failCount++; 4671 report("branchTest"); 4672 } 4673 4674 // This test is for 8007395 4675 private static void groupCurlyNotFoundSuppTest() throws Exception { 4676 String input = "test this as \ud83d\ude0d"; 4677 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4678 "test(.)*(@[a-zA-Z.]+)", 4679 "test([^B])+(@[a-zA-Z.]+)", 4680 "test([^B])*(@[a-zA-Z.]+)", 4681 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4682 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4683 }) { 4684 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4685 .matcher(input); 4686 try { 4687 if (m.find()) { 4688 failCount++; 4689 } 4690 } catch (Exception x) { 4691 failCount++; 4692 } 4693 } 4694 report("GroupCurly NotFoundSupp"); 4695 } 4696 4697 // This test is for 8023647 4698 private static void groupCurlyBackoffTest() throws Exception { 4699 if (!"abc1c".matches("(\\w)+1\\1") || 4700 "abc11".matches("(\\w)+1\\1")) { 4701 failCount++; 4702 } 4703 report("GroupCurly backoff"); 4704 } 4705 4706 // This test is for 8012646 4707 private static void patternAsPredicate() throws Exception { 4708 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4709 4710 if (p.test("")) { 4711 failCount++; 4712 } 4713 if (!p.test("word")) { 4714 failCount++; 4715 } 4716 if (p.test("1234")) { 4717 failCount++; 4718 } 4719 if (!p.test("word1234")) { 4720 failCount++; 4721 } 4722 report("Pattern.asPredicate"); 4723 } 4724 4725 // This test is for 8184692 4726 private static void patternAsMatchPredicate() throws Exception { 4727 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4728 4729 if (p.test("")) { 4730 failCount++; 4731 } 4732 if (!p.test("word")) { 4733 failCount++; 4734 } 4735 if (p.test("1234word")) { 4736 failCount++; 4737 } 4738 if (p.test("1234")) { 4739 failCount++; 4740 } 4741 report("Pattern.asMatchPredicate"); 4742 } 4743 4744 4745 // This test is for 8035975 4746 private static void invalidFlags() throws Exception { 4747 for (int flag = 1; flag != 0; flag <<= 1) { 4748 switch (flag) { 4749 case Pattern.CASE_INSENSITIVE: 4750 case Pattern.MULTILINE: 4751 case Pattern.DOTALL: 4752 case Pattern.UNICODE_CASE: 4753 case Pattern.CANON_EQ: 4754 case Pattern.UNIX_LINES: 4755 case Pattern.LITERAL: 4756 case Pattern.UNICODE_CHARACTER_CLASS: 4757 case Pattern.COMMENTS: 4758 // valid flag, continue 4759 break; 4760 default: 4761 try { 4762 Pattern.compile(".", flag); 4763 failCount++; 4764 } catch (IllegalArgumentException expected) { 4765 } 4766 } 4767 } 4768 report("Invalid compile flags"); 4769 } 4770 4771 // This test is for 8158482 4772 private static void embeddedFlags() throws Exception { 4773 try { 4774 Pattern.compile("(?i).(?-i)."); 4775 Pattern.compile("(?m).(?-m)."); 4776 Pattern.compile("(?s).(?-s)."); 4777 Pattern.compile("(?d).(?-d)."); 4778 Pattern.compile("(?u).(?-u)."); 4779 Pattern.compile("(?c).(?-c)."); 4780 Pattern.compile("(?x).(?-x)."); 4781 Pattern.compile("(?U).(?-U)."); 4782 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4783 } catch (PatternSyntaxException x) { 4784 failCount++; 4785 } 4786 report("Embedded flags"); 4787 } 4788 4789 private static void grapheme() throws Exception { 4790 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4791 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4792 .forEach( ln -> { 4793 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4794 // System.out.println(str); 4795 String[] strs = ln.split("\u00f7|\u00d7"); 4796 StringBuilder src = new StringBuilder(); 4797 ArrayList<String> graphemes = new ArrayList<>(); 4798 StringBuilder buf = new StringBuilder(); 4799 int offBk = 0; 4800 for (String str : strs) { 4801 if (str.length() == 0) // first empty str 4802 continue; 4803 int cp = Integer.parseInt(str, 16); 4804 src.appendCodePoint(cp); 4805 buf.appendCodePoint(cp); 4806 offBk += (str.length() + 1); 4807 if (ln.charAt(offBk) == '\u00f7') { // DIV 4808 graphemes.add(buf.toString()); 4809 buf = new StringBuilder(); 4810 } 4811 } 4812 Pattern p = Pattern.compile("\\X"); 4813 Matcher m = p.matcher(src.toString()); 4814 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4815 for (String g : graphemes) { 4816 // System.out.printf(" grapheme:=[%s]%n", g); 4817 // (1) test \\X directly 4818 if (!m.find() || !m.group().equals(g)) { 4819 System.out.println("Failed \\X [" + ln + "] : " + g); 4820 failCount++; 4821 } 4822 // (2) test \\b{g} + \\X via Scanner 4823 boolean hasNext = s.hasNext(p); 4824 // if (!s.hasNext() || !s.next().equals(next)) { 4825 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4826 System.out.println("Failed b{g} [" + ln + "] : " + g); 4827 failCount++; 4828 } 4829 } 4830 }); 4831 // some sanity checks 4832 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4833 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4834 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4835 failCount++; 4836 // make sure "\b{n}" still works 4837 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4838 failCount++; 4839 report("Unicode extended grapheme cluster"); 4840 } 4841 4842 // hangup/timeout if go into exponential backtracking 4843 private static void expoBacktracking() throws Exception { 4844 4845 Object[][] patternMatchers = { 4846 // 6328855 4847 { "(.*\n*)*", 4848 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4849 false }, 4850 // 6192895 4851 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4852 "Hello World this is a test this is a test this is a test A", 4853 true }, 4854 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4855 "Hello World this is a test this is a test this is a test \u4e00 ", 4856 false }, 4857 { " *([a-z0-9]+ *)+", 4858 "hello world this is a test this is a test this is a test A", 4859 false }, 4860 // 4771934 [FIXED] #5013651? 4861 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4862 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4863 true }, 4864 // 4866249 [FIXED] 4865 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4866 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4867 true }, 4868 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4869 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4870 false }, 4871 // 6345469 4872 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4873 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4874 true }, // --> matched 4875 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4876 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4877 false }, 4878 // 5026912 4879 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4880 "156580451111112225588087755221111111566969655555555", 4881 false}, 4882 // 6988218 4883 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4884 "'%)) order by ANGEBOT.ID", 4885 false}, // find 4886 // 6693451 4887 { "^(\\s*foo\\s*)*$", 4888 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4889 true }, 4890 { "^(\\s*foo\\s*)*$", 4891 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4892 false 4893 }, 4894 // 7006761 4895 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4896 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4897 // 8140212 4898 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4899 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4900 false 4901 }, 4902 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4903 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4904 4905 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4906 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4907 4908 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4909 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4910 4911 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4912 4913 /* not fixed 4914 //8132141 ---> second level exponential backtracking 4915 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4916 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4917 */ 4918 }; 4919 4920 for (Object[] pm : patternMatchers) { 4921 String p = (String)pm[0]; 4922 String s = (String)pm[1]; 4923 boolean r = (Boolean)pm[2]; 4924 if (r != Pattern.compile(p).matcher(s).matches()) { 4925 failCount++; 4926 } 4927 } 4928 } 4929 4930 private static void invalidGroupName() { 4931 // Invalid start of a group name 4932 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4933 "\u0060", "\u007b", "\u0416")) { 4934 for (String pat : List.of("(?<" + groupName + ">)", 4935 "\\k<" + groupName + ">")) { 4936 try { 4937 Pattern.compile(pat); 4938 failCount++; 4939 } catch (PatternSyntaxException e) { 4940 if (!e.getMessage().startsWith( 4941 "capturing group name does not start with a" 4942 + " Latin letter")) { 4943 failCount++; 4944 } 4945 } 4946 } 4947 } 4948 // Invalid char in a group name 4949 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4950 "d\u0060", "e\u007b", "f\u0416")) { 4951 for (String pat : List.of("(?<" + groupName + ">)", 4952 "\\k<" + groupName + ">")) { 4953 try { 4954 Pattern.compile(pat); 4955 failCount++; 4956 } catch (PatternSyntaxException e) { 4957 if (!e.getMessage().startsWith( 4958 "named capturing group is missing trailing '>'")) { 4959 failCount++; 4960 } 4961 } 4962 } 4963 } 4964 report("Invalid capturing group names"); 4965 } 4966 4967 private static void illegalRepetitionRange() { 4968 // huge integers > (2^31 - 1) 4969 String n = BigInteger.valueOf(1L << 32) 4970 .toString(); 4971 String m = BigInteger.valueOf(1L << 31) 4972 .add(new BigInteger(80, generator)) 4973 .toString(); 4974 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4975 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4976 String pat = ".{" + rep + "}"; 4977 try { 4978 Pattern.compile(pat); 4979 failCount++; 4980 System.out.println("Expected to fail. Pattern: " + pat); 4981 } catch (PatternSyntaxException e) { 4982 if (!e.getMessage().startsWith("Illegal repetition")) { 4983 failCount++; 4984 System.out.println("Unexpected error message: " + e.getMessage()); 4985 } 4986 } catch (Throwable t) { 4987 failCount++; 4988 System.out.println("Unexpected exception: " + t); 4989 } 4990 } 4991 report("illegalRepetitionRange"); 4992 } 4993 4994 private static void surrogatePairWithCanonEq() { 4995 try { 4996 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 4997 } catch (Throwable t) { 4998 failCount++; 4999 System.out.println("Unexpected exception: " + t); 5000 } 5001 report("surrogatePairWithCanonEq"); 5002 } 5003 }