1 /* 2 * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230365 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.io.BufferedReader; 48 import java.io.ByteArrayInputStream; 49 import java.io.ByteArrayOutputStream; 50 import java.io.File; 51 import java.io.FileInputStream; 52 import java.io.InputStreamReader; 53 import java.io.ObjectInputStream; 54 import java.io.ObjectOutputStream; 55 import java.math.BigInteger; 56 import java.nio.CharBuffer; 57 import java.nio.file.Files; 58 import java.util.ArrayList; 59 import java.util.Arrays; 60 import java.util.HashMap; 61 import java.util.List; 62 import java.util.Random; 63 import java.util.Scanner; 64 import java.util.function.Function; 65 import java.util.function.Predicate; 66 import java.util.regex.Matcher; 67 import java.util.regex.MatchResult; 68 import java.util.regex.Pattern; 69 import java.util.regex.PatternSyntaxException; 70 import jdk.test.lib.RandomFactory; 71 72 /** 73 * This is a test class created to check the operation of 74 * the Pattern and Matcher classes. 75 */ 76 public class RegExTest { 77 78 private static Random generator = RandomFactory.getRandom(); 79 private static boolean failure = false; 80 private static int failCount = 0; 81 private static String firstFailure = null; 82 83 /** 84 * Main to interpret arguments and run several tests. 85 * 86 */ 87 public static void main(String[] args) throws Exception { 88 // Most of the tests are in a file 89 processFile("TestCases.txt"); 90 //processFile("PerlCases.txt"); 91 processFile("BMPTestCases.txt"); 92 processFile("SupplementaryTestCases.txt"); 93 94 // These test many randomly generated char patterns 95 bm(); 96 slice(); 97 98 // These are hard to put into the file 99 escapes(); 100 blankInput(); 101 102 // Substitition tests on randomly generated sequences 103 globalSubstitute(); 104 stringbufferSubstitute(); 105 stringbuilderSubstitute(); 106 107 substitutionBasher(); 108 substitutionBasher2(); 109 110 // Canonical Equivalence 111 ceTest(); 112 113 // Anchors 114 anchorTest(); 115 116 // boolean match calls 117 matchesTest(); 118 lookingAtTest(); 119 120 // Pattern API 121 patternMatchesTest(); 122 123 // Misc 124 lookbehindTest(); 125 nullArgumentTest(); 126 backRefTest(); 127 groupCaptureTest(); 128 caretTest(); 129 charClassTest(); 130 emptyPatternTest(); 131 findIntTest(); 132 group0Test(); 133 longPatternTest(); 134 octalTest(); 135 ampersandTest(); 136 negationTest(); 137 splitTest(); 138 appendTest(); 139 caseFoldingTest(); 140 commentsTest(); 141 unixLinesTest(); 142 replaceFirstTest(); 143 gTest(); 144 zTest(); 145 serializeTest(); 146 reluctantRepetitionTest(); 147 multilineDollarTest(); 148 dollarAtEndTest(); 149 caretBetweenTerminatorsTest(); 150 // This RFE rejected in Tiger numOccurrencesTest(); 151 javaCharClassTest(); 152 nonCaptureRepetitionTest(); 153 notCapturedGroupCurlyMatchTest(); 154 escapedSegmentTest(); 155 literalPatternTest(); 156 literalReplacementTest(); 157 regionTest(); 158 toStringTest(); 159 negatedCharClassTest(); 160 findFromTest(); 161 boundsTest(); 162 unicodeWordBoundsTest(); 163 caretAtEndTest(); 164 wordSearchTest(); 165 hitEndTest(); 166 toMatchResultTest(); 167 toMatchResultTest2(); 168 surrogatesInClassTest(); 169 removeQEQuotingTest(); 170 namedGroupCaptureTest(); 171 nonBmpClassComplementTest(); 172 unicodePropertiesTest(); 173 unicodeHexNotationTest(); 174 unicodeClassesTest(); 175 unicodeCharacterNameTest(); 176 horizontalAndVerticalWSTest(); 177 linebreakTest(); 178 branchTest(); 179 groupCurlyNotFoundSuppTest(); 180 groupCurlyBackoffTest(); 181 patternAsPredicate(); 182 patternAsMatchPredicate(); 183 invalidFlags(); 184 embeddedFlags(); 185 grapheme(); 186 expoBacktracking(); 187 invalidGroupName(); 188 illegalRepetitionRange(); 189 surrogatePairWithCanonEq(); 190 controlCharacters(); 191 192 if (failure) { 193 throw new 194 RuntimeException("RegExTest failed, 1st failure: " + 195 firstFailure); 196 } else { 197 System.err.println("OKAY: All tests passed."); 198 } 199 } 200 201 // Utility functions 202 203 private static String getRandomAlphaString(int length) { 204 StringBuffer buf = new StringBuffer(length); 205 for (int i=0; i<length; i++) { 206 char randChar = (char)(97 + generator.nextInt(26)); 207 buf.append(randChar); 208 } 209 return buf.toString(); 210 } 211 212 private static void check(Matcher m, String expected) { 213 m.find(); 214 if (!m.group().equals(expected)) 215 failCount++; 216 } 217 218 private static void check(Matcher m, String result, boolean expected) { 219 m.find(); 220 if (m.group().equals(result) != expected) 221 failCount++; 222 } 223 224 private static void check(Pattern p, String s, boolean expected) { 225 if (p.matcher(s).find() != expected) 226 failCount++; 227 } 228 229 private static void check(String p, String s, boolean expected) { 230 Matcher matcher = Pattern.compile(p).matcher(s); 231 if (matcher.find() != expected) 232 failCount++; 233 } 234 235 private static void check(String p, char c, boolean expected) { 236 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 237 Pattern pattern = Pattern.compile(propertyPattern); 238 char[] ca = new char[1]; ca[0] = c; 239 Matcher matcher = pattern.matcher(new String(ca)); 240 if (!matcher.find()) 241 failCount++; 242 } 243 244 private static void check(String p, int codePoint, boolean expected) { 245 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 246 Pattern pattern = Pattern.compile(propertyPattern); 247 char[] ca = Character.toChars(codePoint); 248 Matcher matcher = pattern.matcher(new String(ca)); 249 if (!matcher.find()) 250 failCount++; 251 } 252 253 private static void check(String p, int flag, String input, String s, 254 boolean expected) 255 { 256 Pattern pattern = Pattern.compile(p, flag); 257 Matcher matcher = pattern.matcher(input); 258 if (expected) 259 check(matcher, s, expected); 260 else 261 check(pattern, input, false); 262 } 263 264 private static void report(String testName) { 265 int spacesToAdd = 30 - testName.length(); 266 StringBuffer paddedNameBuffer = new StringBuffer(testName); 267 for (int i=0; i<spacesToAdd; i++) 268 paddedNameBuffer.append(" "); 269 String paddedName = paddedNameBuffer.toString(); 270 System.err.println(paddedName + ": " + 271 (failCount==0 ? "Passed":"Failed("+failCount+")")); 272 if (failCount > 0) { 273 failure = true; 274 275 if (firstFailure == null) { 276 firstFailure = testName; 277 } 278 } 279 280 failCount = 0; 281 } 282 283 /** 284 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 285 * supplementary characters. This method does NOT fully take care 286 * of the regex syntax. 287 */ 288 private static String toSupplementaries(String s) { 289 int length = s.length(); 290 StringBuffer sb = new StringBuffer(length * 2); 291 292 for (int i = 0; i < length; ) { 293 char c = s.charAt(i++); 294 if (c == '\\') { 295 sb.append(c); 296 if (i < length) { 297 c = s.charAt(i++); 298 sb.append(c); 299 if (c == 'u') { 300 // assume no syntax error 301 sb.append(s.charAt(i++)); 302 sb.append(s.charAt(i++)); 303 sb.append(s.charAt(i++)); 304 sb.append(s.charAt(i++)); 305 } 306 } 307 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 308 sb.append('\ud800').append((char)('\udc00'+c)); 309 } else { 310 sb.append(c); 311 } 312 } 313 return sb.toString(); 314 } 315 316 // Regular expression tests 317 318 // This is for bug 6178785 319 // Test if an expected NPE gets thrown when passing in a null argument 320 private static boolean check(Runnable test) { 321 try { 322 test.run(); 323 failCount++; 324 return false; 325 } catch (NullPointerException npe) { 326 return true; 327 } 328 } 329 330 private static void nullArgumentTest() { 331 check(() -> Pattern.compile(null)); 332 check(() -> Pattern.matches(null, null)); 333 check(() -> Pattern.matches("xyz", null)); 334 check(() -> Pattern.quote(null)); 335 check(() -> Pattern.compile("xyz").split(null)); 336 check(() -> Pattern.compile("xyz").matcher(null)); 337 338 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 339 m.matches(); 340 check(() -> m.appendTail((StringBuffer) null)); 341 check(() -> m.appendTail((StringBuilder)null)); 342 check(() -> m.replaceAll((String) null)); 343 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 344 check(() -> m.replaceFirst((String)null)); 345 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 346 check(() -> m.appendReplacement((StringBuffer)null, null)); 347 check(() -> m.appendReplacement((StringBuilder)null, null)); 348 check(() -> m.reset(null)); 349 check(() -> Matcher.quoteReplacement(null)); 350 //check(() -> m.usePattern(null)); 351 352 report("Null Argument"); 353 } 354 355 // This is for bug6635133 356 // Test if surrogate pair in Unicode escapes can be handled correctly. 357 private static void surrogatesInClassTest() throws Exception { 358 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 359 Matcher matcher = pattern.matcher("\ud834\udd22"); 360 if (!matcher.find()) 361 failCount++; 362 363 report("Surrogate pair in Unicode escape"); 364 } 365 366 // This is for bug6990617 367 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 368 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 369 // char is an octal digit. 370 private static void removeQEQuotingTest() throws Exception { 371 Pattern pattern = 372 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 373 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 374 if (!matcher.find()) 375 failCount++; 376 377 report("Remove Q/E Quoting"); 378 } 379 380 // This is for bug 4988891 381 // Test toMatchResult to see that it is a copy of the Matcher 382 // that is not affected by subsequent operations on the original 383 private static void toMatchResultTest() throws Exception { 384 Pattern pattern = Pattern.compile("squid"); 385 Matcher matcher = pattern.matcher( 386 "agiantsquidofdestinyasmallsquidoffate"); 387 matcher.find(); 388 int matcherStart1 = matcher.start(); 389 MatchResult mr = matcher.toMatchResult(); 390 if (mr == matcher) 391 failCount++; 392 int resultStart1 = mr.start(); 393 if (matcherStart1 != resultStart1) 394 failCount++; 395 matcher.find(); 396 int matcherStart2 = matcher.start(); 397 int resultStart2 = mr.start(); 398 if (matcherStart2 == resultStart2) 399 failCount++; 400 if (resultStart1 != resultStart2) 401 failCount++; 402 MatchResult mr2 = matcher.toMatchResult(); 403 if (mr == mr2) 404 failCount++; 405 if (mr2.start() != matcherStart2) 406 failCount++; 407 report("toMatchResult is a copy"); 408 } 409 410 private static void checkExpectedISE(Runnable test) { 411 try { 412 test.run(); 413 failCount++; 414 } catch (IllegalStateException x) { 415 } catch (IndexOutOfBoundsException xx) { 416 failCount++; 417 } 418 } 419 420 private static void checkExpectedIOOE(Runnable test) { 421 try { 422 test.run(); 423 failCount++; 424 } catch (IndexOutOfBoundsException x) {} 425 } 426 427 // This is for bug 8074678 428 // Test the result of toMatchResult throws ISE if no match is availble 429 private static void toMatchResultTest2() throws Exception { 430 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 431 matcher.find(); 432 MatchResult mr = matcher.toMatchResult(); 433 434 checkExpectedISE(() -> mr.start()); 435 checkExpectedISE(() -> mr.start(2)); 436 checkExpectedISE(() -> mr.end()); 437 checkExpectedISE(() -> mr.end(2)); 438 checkExpectedISE(() -> mr.group()); 439 checkExpectedISE(() -> mr.group(2)); 440 441 matcher = Pattern.compile("(match)").matcher("there is a match"); 442 matcher.find(); 443 MatchResult mr2 = matcher.toMatchResult(); 444 checkExpectedIOOE(() -> mr2.start(2)); 445 checkExpectedIOOE(() -> mr2.end(2)); 446 checkExpectedIOOE(() -> mr2.group(2)); 447 448 report("toMatchResult2 appropriate exceptions"); 449 } 450 451 // This is for bug 5013885 452 // Must test a slice to see if it reports hitEnd correctly 453 private static void hitEndTest() throws Exception { 454 // Basic test of Slice node 455 Pattern p = Pattern.compile("^squidattack"); 456 Matcher m = p.matcher("squack"); 457 m.find(); 458 if (m.hitEnd()) 459 failCount++; 460 m.reset("squid"); 461 m.find(); 462 if (!m.hitEnd()) 463 failCount++; 464 465 // Test Slice, SliceA and SliceU nodes 466 for (int i=0; i<3; i++) { 467 int flags = 0; 468 if (i==1) flags = Pattern.CASE_INSENSITIVE; 469 if (i==2) flags = Pattern.UNICODE_CASE; 470 p = Pattern.compile("^abc", flags); 471 m = p.matcher("ad"); 472 m.find(); 473 if (m.hitEnd()) 474 failCount++; 475 m.reset("ab"); 476 m.find(); 477 if (!m.hitEnd()) 478 failCount++; 479 } 480 481 // Test Boyer-Moore node 482 p = Pattern.compile("catattack"); 483 m = p.matcher("attack"); 484 m.find(); 485 if (!m.hitEnd()) 486 failCount++; 487 488 p = Pattern.compile("catattack"); 489 m = p.matcher("attackattackattackcatatta"); 490 m.find(); 491 if (!m.hitEnd()) 492 failCount++; 493 494 // 8184706: Matching u+0d at EOL against \R should hit-end 495 p = Pattern.compile("...\\R"); 496 m = p.matcher("cat" + (char)0x0a); 497 m.find(); 498 if (m.hitEnd()) 499 failCount++; 500 501 m = p.matcher("cat" + (char)0x0d); 502 m.find(); 503 if (!m.hitEnd()) 504 failCount++; 505 506 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 507 m.find(); 508 if (m.hitEnd()) 509 failCount++; 510 511 report("hitEnd"); 512 } 513 514 // This is for bug 4997476 515 // It is weird code submitted by customer demonstrating a regression 516 private static void wordSearchTest() throws Exception { 517 String testString = new String("word1 word2 word3"); 518 Pattern p = Pattern.compile("\\b"); 519 Matcher m = p.matcher(testString); 520 int position = 0; 521 int start = 0; 522 while (m.find(position)) { 523 start = m.start(); 524 if (start == testString.length()) 525 break; 526 if (m.find(start+1)) { 527 position = m.start(); 528 } else { 529 position = testString.length(); 530 } 531 if (testString.substring(start, position).equals(" ")) 532 continue; 533 if (!testString.substring(start, position-1).startsWith("word")) 534 failCount++; 535 } 536 report("Customer word search"); 537 } 538 539 // This is for bug 4994840 540 private static void caretAtEndTest() throws Exception { 541 // Problem only occurs with multiline patterns 542 // containing a beginning-of-line caret "^" followed 543 // by an expression that also matches the empty string. 544 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 545 Matcher matcher = pattern.matcher("\r"); 546 matcher.find(); 547 matcher.find(); 548 report("Caret at end"); 549 } 550 551 // This test is for 4979006 552 // Check to see if word boundary construct properly handles unicode 553 // non spacing marks 554 private static void unicodeWordBoundsTest() throws Exception { 555 String spaces = " "; 556 String wordChar = "a"; 557 String nsm = "\u030a"; 558 559 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 560 561 Pattern pattern = Pattern.compile("\\b"); 562 Matcher matcher = pattern.matcher(""); 563 // S=other B=word character N=non spacing mark .=word boundary 564 // SS.BB.SS 565 String input = spaces + wordChar + wordChar + spaces; 566 twoFindIndexes(input, matcher, 2, 4); 567 // SS.BBN.SS 568 input = spaces + wordChar +wordChar + nsm + spaces; 569 twoFindIndexes(input, matcher, 2, 5); 570 // SS.BN.SS 571 input = spaces + wordChar + nsm + spaces; 572 twoFindIndexes(input, matcher, 2, 4); 573 // SS.BNN.SS 574 input = spaces + wordChar + nsm + nsm + spaces; 575 twoFindIndexes(input, matcher, 2, 5); 576 // SSN.BB.SS 577 input = spaces + nsm + wordChar + wordChar + spaces; 578 twoFindIndexes(input, matcher, 3, 5); 579 // SS.BNB.SS 580 input = spaces + wordChar + nsm + wordChar + spaces; 581 twoFindIndexes(input, matcher, 2, 5); 582 // SSNNSS 583 input = spaces + nsm + nsm + spaces; 584 matcher.reset(input); 585 if (matcher.find()) 586 failCount++; 587 // SSN.BBN.SS 588 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 589 twoFindIndexes(input, matcher, 3, 6); 590 591 report("Unicode word boundary"); 592 } 593 594 private static void twoFindIndexes(String input, Matcher matcher, int a, 595 int b) throws Exception 596 { 597 matcher.reset(input); 598 matcher.find(); 599 if (matcher.start() != a) 600 failCount++; 601 matcher.find(); 602 if (matcher.start() != b) 603 failCount++; 604 } 605 606 // This test is for 6284152 607 static void check(String regex, String input, String[] expected) { 608 List<String> result = new ArrayList<String>(); 609 Pattern p = Pattern.compile(regex); 610 Matcher m = p.matcher(input); 611 while (m.find()) { 612 result.add(m.group()); 613 } 614 if (!Arrays.asList(expected).equals(result)) 615 failCount++; 616 } 617 618 private static void lookbehindTest() throws Exception { 619 //Positive 620 check("(?<=%.{0,5})foo\\d", 621 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 622 new String[]{"foo1", "foo2", "foo3"}); 623 624 //boundary at end of the lookbehind sub-regex should work consistently 625 //with the boundary just after the lookbehind sub-regex 626 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 627 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 628 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 629 check("(?<!abc \\b)foo", "abc foo", new String[0]); 630 631 //Negative 632 check("(?<!%.{0,5})foo\\d", 633 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 634 new String[] {"foo4", "foo5"}); 635 636 //Positive greedy 637 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 638 639 //Positive reluctant 640 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 641 642 //supplementary 643 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 644 new String[] {"fo\ud800\udc00o"}); 645 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 646 new String[] {"fo\ud800\udc00o"}); 647 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 648 new String[] {"fo\ud800\udc00o"}); 649 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 650 new String[] {"fo\ud800\udc00o"}); 651 report("Lookbehind"); 652 } 653 654 // This test is for 4938995 655 // Check to see if weak region boundaries are transparent to 656 // lookahead and lookbehind constructs 657 private static void boundsTest() throws Exception { 658 String fullMessage = "catdogcat"; 659 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 660 Matcher matcher = pattern.matcher("catdogca"); 661 matcher.useTransparentBounds(true); 662 if (matcher.find()) 663 failCount++; 664 matcher.reset("atdogcat"); 665 if (matcher.find()) 666 failCount++; 667 matcher.reset(fullMessage); 668 if (!matcher.find()) 669 failCount++; 670 matcher.reset(fullMessage); 671 matcher.region(0,9); 672 if (!matcher.find()) 673 failCount++; 674 matcher.reset(fullMessage); 675 matcher.region(0,6); 676 if (!matcher.find()) 677 failCount++; 678 matcher.reset(fullMessage); 679 matcher.region(3,6); 680 if (!matcher.find()) 681 failCount++; 682 matcher.useTransparentBounds(false); 683 if (matcher.find()) 684 failCount++; 685 686 // Negative lookahead/lookbehind 687 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 688 matcher = pattern.matcher("dogcat"); 689 matcher.useTransparentBounds(true); 690 matcher.region(0,3); 691 if (matcher.find()) 692 failCount++; 693 matcher.reset("catdog"); 694 matcher.region(3,6); 695 if (matcher.find()) 696 failCount++; 697 matcher.useTransparentBounds(false); 698 matcher.reset("dogcat"); 699 matcher.region(0,3); 700 if (!matcher.find()) 701 failCount++; 702 matcher.reset("catdog"); 703 matcher.region(3,6); 704 if (!matcher.find()) 705 failCount++; 706 707 report("Region bounds transparency"); 708 } 709 710 // This test is for 4945394 711 private static void findFromTest() throws Exception { 712 String message = "This is 40 $0 message."; 713 Pattern pat = Pattern.compile("\\$0"); 714 Matcher match = pat.matcher(message); 715 if (!match.find()) 716 failCount++; 717 if (match.find()) 718 failCount++; 719 if (match.find()) 720 failCount++; 721 report("Check for alternating find"); 722 } 723 724 // This test is for 4872664 and 4892980 725 private static void negatedCharClassTest() throws Exception { 726 Pattern pattern = Pattern.compile("[^>]"); 727 Matcher matcher = pattern.matcher("\u203A"); 728 if (!matcher.matches()) 729 failCount++; 730 pattern = Pattern.compile("[^fr]"); 731 matcher = pattern.matcher("a"); 732 if (!matcher.find()) 733 failCount++; 734 matcher.reset("\u203A"); 735 if (!matcher.find()) 736 failCount++; 737 String s = "for"; 738 String result[] = s.split("[^fr]"); 739 if (!result[0].equals("f")) 740 failCount++; 741 if (!result[1].equals("r")) 742 failCount++; 743 s = "f\u203Ar"; 744 result = s.split("[^fr]"); 745 if (!result[0].equals("f")) 746 failCount++; 747 if (!result[1].equals("r")) 748 failCount++; 749 750 // Test adding to bits, subtracting a node, then adding to bits again 751 pattern = Pattern.compile("[^f\u203Ar]"); 752 matcher = pattern.matcher("a"); 753 if (!matcher.find()) 754 failCount++; 755 matcher.reset("f"); 756 if (matcher.find()) 757 failCount++; 758 matcher.reset("\u203A"); 759 if (matcher.find()) 760 failCount++; 761 matcher.reset("r"); 762 if (matcher.find()) 763 failCount++; 764 matcher.reset("\u203B"); 765 if (!matcher.find()) 766 failCount++; 767 768 // Test subtracting a node, adding to bits, subtracting again 769 pattern = Pattern.compile("[^\u203Ar\u203B]"); 770 matcher = pattern.matcher("a"); 771 if (!matcher.find()) 772 failCount++; 773 matcher.reset("\u203A"); 774 if (matcher.find()) 775 failCount++; 776 matcher.reset("r"); 777 if (matcher.find()) 778 failCount++; 779 matcher.reset("\u203B"); 780 if (matcher.find()) 781 failCount++; 782 matcher.reset("\u203C"); 783 if (!matcher.find()) 784 failCount++; 785 786 report("Negated Character Class"); 787 } 788 789 // This test is for 4628291 790 private static void toStringTest() throws Exception { 791 Pattern pattern = Pattern.compile("b+"); 792 if (pattern.toString() != "b+") 793 failCount++; 794 Matcher matcher = pattern.matcher("aaabbbccc"); 795 String matcherString = matcher.toString(); // unspecified 796 matcher.find(); 797 matcherString = matcher.toString(); // unspecified 798 matcher.region(0,3); 799 matcherString = matcher.toString(); // unspecified 800 matcher.reset(); 801 matcherString = matcher.toString(); // unspecified 802 report("toString"); 803 } 804 805 // This test is for 4808962 806 private static void literalPatternTest() throws Exception { 807 int flags = Pattern.LITERAL; 808 809 Pattern pattern = Pattern.compile("abc\\t$^", flags); 810 check(pattern, "abc\\t$^", true); 811 812 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 813 check(pattern, "abc\\t$^", true); 814 815 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 816 check(pattern, "\\Qa^$bcabc\\E", true); 817 check(pattern, "a^$bcabc", false); 818 819 pattern = Pattern.compile("\\\\Q\\\\E"); 820 check(pattern, "\\Q\\E", true); 821 822 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 823 check(pattern, "abcefg\\Q\\Ehij", true); 824 825 pattern = Pattern.compile("\\\\\\Q\\\\E"); 826 check(pattern, "\\\\\\\\", true); 827 828 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 829 check(pattern, "\\Qa^$bcabc\\E", true); 830 check(pattern, "a^$bcabc", false); 831 832 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 833 check(pattern, "\\Qabc\\Edef", true); 834 check(pattern, "abcdef", false); 835 836 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 837 check(pattern, "abc\\Edef", true); 838 check(pattern, "abcdef", false); 839 840 pattern = Pattern.compile(Pattern.quote("\\E")); 841 check(pattern, "\\E", true); 842 843 pattern = Pattern.compile("((((abc.+?:)", flags); 844 check(pattern, "((((abc.+?:)", true); 845 846 flags |= Pattern.MULTILINE; 847 848 pattern = Pattern.compile("^cat$", flags); 849 check(pattern, "abc^cat$def", true); 850 check(pattern, "cat", false); 851 852 flags |= Pattern.CASE_INSENSITIVE; 853 854 pattern = Pattern.compile("abcdef", flags); 855 check(pattern, "ABCDEF", true); 856 check(pattern, "AbCdEf", true); 857 858 flags |= Pattern.DOTALL; 859 860 pattern = Pattern.compile("a...b", flags); 861 check(pattern, "A...b", true); 862 check(pattern, "Axxxb", false); 863 864 flags |= Pattern.CANON_EQ; 865 866 Pattern p = Pattern.compile("testa\u030a", flags); 867 check(pattern, "testa\u030a", false); 868 check(pattern, "test\u00e5", false); 869 870 // Supplementary character test 871 flags = Pattern.LITERAL; 872 873 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 874 check(pattern, toSupplementaries("abc\\t$^"), true); 875 876 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 877 check(pattern, toSupplementaries("abc\\t$^"), true); 878 879 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 880 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 881 check(pattern, toSupplementaries("a^$bcabc"), false); 882 883 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 884 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 885 check(pattern, toSupplementaries("a^$bcabc"), false); 886 887 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 888 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 889 check(pattern, toSupplementaries("abcdef"), false); 890 891 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 892 check(pattern, toSupplementaries("abc\\Edef"), true); 893 check(pattern, toSupplementaries("abcdef"), false); 894 895 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 896 check(pattern, toSupplementaries("((((abc.+?:)"), true); 897 898 flags |= Pattern.MULTILINE; 899 900 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 901 check(pattern, toSupplementaries("abc^cat$def"), true); 902 check(pattern, toSupplementaries("cat"), false); 903 904 flags |= Pattern.DOTALL; 905 906 // note: this is case-sensitive. 907 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 908 check(pattern, toSupplementaries("a...b"), true); 909 check(pattern, toSupplementaries("axxxb"), false); 910 911 flags |= Pattern.CANON_EQ; 912 913 String t = toSupplementaries("test"); 914 p = Pattern.compile(t + "a\u030a", flags); 915 check(pattern, t + "a\u030a", false); 916 check(pattern, t + "\u00e5", false); 917 918 report("Literal pattern"); 919 } 920 921 // This test is for 4803179 922 // This test is also for 4808962, replacement parts 923 private static void literalReplacementTest() throws Exception { 924 int flags = Pattern.LITERAL; 925 926 Pattern pattern = Pattern.compile("abc", flags); 927 Matcher matcher = pattern.matcher("zzzabczzz"); 928 String replaceTest = "$0"; 929 String result = matcher.replaceAll(replaceTest); 930 if (!result.equals("zzzabczzz")) 931 failCount++; 932 933 matcher.reset(); 934 String literalReplacement = matcher.quoteReplacement(replaceTest); 935 result = matcher.replaceAll(literalReplacement); 936 if (!result.equals("zzz$0zzz")) 937 failCount++; 938 939 matcher.reset(); 940 replaceTest = "\\t$\\$"; 941 literalReplacement = matcher.quoteReplacement(replaceTest); 942 result = matcher.replaceAll(literalReplacement); 943 if (!result.equals("zzz\\t$\\$zzz")) 944 failCount++; 945 946 // Supplementary character test 947 pattern = Pattern.compile(toSupplementaries("abc"), flags); 948 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 949 replaceTest = "$0"; 950 result = matcher.replaceAll(replaceTest); 951 if (!result.equals(toSupplementaries("zzzabczzz"))) 952 failCount++; 953 954 matcher.reset(); 955 literalReplacement = matcher.quoteReplacement(replaceTest); 956 result = matcher.replaceAll(literalReplacement); 957 if (!result.equals(toSupplementaries("zzz$0zzz"))) 958 failCount++; 959 960 matcher.reset(); 961 replaceTest = "\\t$\\$"; 962 literalReplacement = matcher.quoteReplacement(replaceTest); 963 result = matcher.replaceAll(literalReplacement); 964 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 965 failCount++; 966 967 // IAE should be thrown if backslash or '$' is the last character 968 // in replacement string 969 try { 970 "\uac00".replaceAll("\uac00", "$"); 971 failCount++; 972 } catch (IllegalArgumentException iie) { 973 } catch (Exception e) { 974 failCount++; 975 } 976 try { 977 "\uac00".replaceAll("\uac00", "\\"); 978 failCount++; 979 } catch (IllegalArgumentException iie) { 980 } catch (Exception e) { 981 failCount++; 982 } 983 report("Literal replacement"); 984 } 985 986 // This test is for 4757029 987 private static void regionTest() throws Exception { 988 Pattern pattern = Pattern.compile("abc"); 989 Matcher matcher = pattern.matcher("abcdefabc"); 990 991 matcher.region(0,9); 992 if (!matcher.find()) 993 failCount++; 994 if (!matcher.find()) 995 failCount++; 996 matcher.region(0,3); 997 if (!matcher.find()) 998 failCount++; 999 matcher.region(3,6); 1000 if (matcher.find()) 1001 failCount++; 1002 matcher.region(0,2); 1003 if (matcher.find()) 1004 failCount++; 1005 1006 expectRegionFail(matcher, 1, -1); 1007 expectRegionFail(matcher, -1, -1); 1008 expectRegionFail(matcher, -1, 1); 1009 expectRegionFail(matcher, 5, 3); 1010 expectRegionFail(matcher, 5, 12); 1011 expectRegionFail(matcher, 12, 12); 1012 1013 pattern = Pattern.compile("^abc$"); 1014 matcher = pattern.matcher("zzzabczzz"); 1015 matcher.region(0,9); 1016 if (matcher.find()) 1017 failCount++; 1018 matcher.region(3,6); 1019 if (!matcher.find()) 1020 failCount++; 1021 matcher.region(3,6); 1022 matcher.useAnchoringBounds(false); 1023 if (matcher.find()) 1024 failCount++; 1025 1026 // Supplementary character test 1027 pattern = Pattern.compile(toSupplementaries("abc")); 1028 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1029 matcher.region(0,9*2); 1030 if (!matcher.find()) 1031 failCount++; 1032 if (!matcher.find()) 1033 failCount++; 1034 matcher.region(0,3*2); 1035 if (!matcher.find()) 1036 failCount++; 1037 matcher.region(1,3*2); 1038 if (matcher.find()) 1039 failCount++; 1040 matcher.region(3*2,6*2); 1041 if (matcher.find()) 1042 failCount++; 1043 matcher.region(0,2*2); 1044 if (matcher.find()) 1045 failCount++; 1046 matcher.region(0,2*2+1); 1047 if (matcher.find()) 1048 failCount++; 1049 1050 expectRegionFail(matcher, 1*2, -1); 1051 expectRegionFail(matcher, -1, -1); 1052 expectRegionFail(matcher, -1, 1*2); 1053 expectRegionFail(matcher, 5*2, 3*2); 1054 expectRegionFail(matcher, 5*2, 12*2); 1055 expectRegionFail(matcher, 12*2, 12*2); 1056 1057 pattern = Pattern.compile(toSupplementaries("^abc$")); 1058 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1059 matcher.region(0,9*2); 1060 if (matcher.find()) 1061 failCount++; 1062 matcher.region(3*2,6*2); 1063 if (!matcher.find()) 1064 failCount++; 1065 matcher.region(3*2+1,6*2); 1066 if (matcher.find()) 1067 failCount++; 1068 matcher.region(3*2,6*2-1); 1069 if (matcher.find()) 1070 failCount++; 1071 matcher.region(3*2,6*2); 1072 matcher.useAnchoringBounds(false); 1073 if (matcher.find()) 1074 failCount++; 1075 report("Regions"); 1076 } 1077 1078 private static void expectRegionFail(Matcher matcher, int index1, 1079 int index2) 1080 { 1081 try { 1082 matcher.region(index1, index2); 1083 failCount++; 1084 } catch (IndexOutOfBoundsException ioobe) { 1085 // Correct result 1086 } catch (IllegalStateException ise) { 1087 // Correct result 1088 } 1089 } 1090 1091 // This test is for 4803197 1092 private static void escapedSegmentTest() throws Exception { 1093 1094 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1095 check(pattern, "dir1\\dir2", true); 1096 1097 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1098 check(pattern, "dir1\\dir2\\", true); 1099 1100 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1101 check(pattern, "dir1\\dir2\\", true); 1102 1103 // Supplementary character test 1104 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1105 check(pattern, toSupplementaries("dir1\\dir2"), true); 1106 1107 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1108 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1109 1110 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1111 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1112 1113 report("Escaped segment"); 1114 } 1115 1116 // This test is for 4792284 1117 private static void nonCaptureRepetitionTest() throws Exception { 1118 String input = "abcdefgh;"; 1119 1120 String[] patterns = new String[] { 1121 "(?:\\w{4})+;", 1122 "(?:\\w{8})*;", 1123 "(?:\\w{2}){2,4};", 1124 "(?:\\w{4}){2,};", // only matches the 1125 ".*?(?:\\w{5})+;", // specified minimum 1126 ".*?(?:\\w{9})*;", // number of reps - OK 1127 "(?:\\w{4})+?;", // lazy repetition - OK 1128 "(?:\\w{4})++;", // possessive repetition - OK 1129 "(?:\\w{2,}?)+;", // non-deterministic - OK 1130 "(\\w{4})+;", // capturing group - OK 1131 }; 1132 1133 for (int i = 0; i < patterns.length; i++) { 1134 // Check find() 1135 check(patterns[i], 0, input, input, true); 1136 // Check matches() 1137 Pattern p = Pattern.compile(patterns[i]); 1138 Matcher m = p.matcher(input); 1139 1140 if (m.matches()) { 1141 if (!m.group(0).equals(input)) 1142 failCount++; 1143 } else { 1144 failCount++; 1145 } 1146 } 1147 1148 report("Non capturing repetition"); 1149 } 1150 1151 // This test is for 6358731 1152 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1153 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1154 Matcher matcher = pattern.matcher("abcd"); 1155 if (!matcher.matches() || 1156 matcher.group(1) != null || 1157 !matcher.group(2).equals("abcd")) { 1158 failCount++; 1159 } 1160 report("Not captured GroupCurly"); 1161 } 1162 1163 // This test is for 4706545 1164 private static void javaCharClassTest() throws Exception { 1165 for (int i=0; i<1000; i++) { 1166 char c = (char)generator.nextInt(); 1167 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1168 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1169 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1170 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1171 check("{javaDigit}", c, Character.isDigit(c)); 1172 check("{javaDefined}", c, Character.isDefined(c)); 1173 check("{javaLetter}", c, Character.isLetter(c)); 1174 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1175 check("{javaJavaIdentifierStart}", c, 1176 Character.isJavaIdentifierStart(c)); 1177 check("{javaJavaIdentifierPart}", c, 1178 Character.isJavaIdentifierPart(c)); 1179 check("{javaUnicodeIdentifierStart}", c, 1180 Character.isUnicodeIdentifierStart(c)); 1181 check("{javaUnicodeIdentifierPart}", c, 1182 Character.isUnicodeIdentifierPart(c)); 1183 check("{javaIdentifierIgnorable}", c, 1184 Character.isIdentifierIgnorable(c)); 1185 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1186 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1187 check("{javaISOControl}", c, Character.isISOControl(c)); 1188 check("{javaMirrored}", c, Character.isMirrored(c)); 1189 1190 } 1191 1192 // Supplementary character test 1193 for (int i=0; i<1000; i++) { 1194 int c = generator.nextInt(Character.MAX_CODE_POINT 1195 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1196 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1197 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1198 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1199 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1200 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1201 check("{javaDigit}", c, Character.isDigit(c)); 1202 check("{javaDefined}", c, Character.isDefined(c)); 1203 check("{javaLetter}", c, Character.isLetter(c)); 1204 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1205 check("{javaJavaIdentifierStart}", c, 1206 Character.isJavaIdentifierStart(c)); 1207 check("{javaJavaIdentifierPart}", c, 1208 Character.isJavaIdentifierPart(c)); 1209 check("{javaUnicodeIdentifierStart}", c, 1210 Character.isUnicodeIdentifierStart(c)); 1211 check("{javaUnicodeIdentifierPart}", c, 1212 Character.isUnicodeIdentifierPart(c)); 1213 check("{javaIdentifierIgnorable}", c, 1214 Character.isIdentifierIgnorable(c)); 1215 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1216 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1217 check("{javaISOControl}", c, Character.isISOControl(c)); 1218 check("{javaMirrored}", c, Character.isMirrored(c)); 1219 } 1220 1221 report("Java character classes"); 1222 } 1223 1224 // This test is for 4523620 1225 /* 1226 private static void numOccurrencesTest() throws Exception { 1227 Pattern pattern = Pattern.compile("aaa"); 1228 1229 if (pattern.numOccurrences("aaaaaa", false) != 2) 1230 failCount++; 1231 if (pattern.numOccurrences("aaaaaa", true) != 4) 1232 failCount++; 1233 1234 pattern = Pattern.compile("^"); 1235 if (pattern.numOccurrences("aaaaaa", false) != 1) 1236 failCount++; 1237 if (pattern.numOccurrences("aaaaaa", true) != 1) 1238 failCount++; 1239 1240 report("Number of Occurrences"); 1241 } 1242 */ 1243 1244 // This test is for 4776374 1245 private static void caretBetweenTerminatorsTest() throws Exception { 1246 int flags1 = Pattern.DOTALL; 1247 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1248 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1249 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1250 1251 check("^....", flags1, "test\ntest", "test", true); 1252 check(".....^", flags1, "test\ntest", "test", false); 1253 check(".....^", flags1, "test\n", "test", false); 1254 check("....^", flags1, "test\r\n", "test", false); 1255 1256 check("^....", flags2, "test\ntest", "test", true); 1257 check("....^", flags2, "test\ntest", "test", false); 1258 check(".....^", flags2, "test\n", "test", false); 1259 check("....^", flags2, "test\r\n", "test", false); 1260 1261 check("^....", flags3, "test\ntest", "test", true); 1262 check(".....^", flags3, "test\ntest", "test\n", true); 1263 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1264 check(".....^", flags3, "test\n", "test", false); 1265 check(".....^", flags3, "test\r\n", "test", false); 1266 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1267 1268 check("^....", flags4, "test\ntest", "test", true); 1269 check(".....^", flags3, "test\ntest", "test\n", true); 1270 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1271 check(".....^", flags4, "test\n", "test\n", false); 1272 check(".....^", flags4, "test\r\n", "test\r", false); 1273 1274 // Supplementary character test 1275 String t = toSupplementaries("test"); 1276 check("^....", flags1, t+"\n"+t, t, true); 1277 check(".....^", flags1, t+"\n"+t, t, false); 1278 check(".....^", flags1, t+"\n", t, false); 1279 check("....^", flags1, t+"\r\n", t, false); 1280 1281 check("^....", flags2, t+"\n"+t, t, true); 1282 check("....^", flags2, t+"\n"+t, t, false); 1283 check(".....^", flags2, t+"\n", t, false); 1284 check("....^", flags2, t+"\r\n", t, false); 1285 1286 check("^....", flags3, t+"\n"+t, t, true); 1287 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1288 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1289 check(".....^", flags3, t+"\n", t, false); 1290 check(".....^", flags3, t+"\r\n", t, false); 1291 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1292 1293 check("^....", flags4, t+"\n"+t, t, true); 1294 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1295 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1296 check(".....^", flags4, t+"\n", t+"\n", false); 1297 check(".....^", flags4, t+"\r\n", t+"\r", false); 1298 1299 report("Caret between terminators"); 1300 } 1301 1302 // This test is for 4727935 1303 private static void dollarAtEndTest() throws Exception { 1304 int flags1 = Pattern.DOTALL; 1305 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1306 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1307 1308 check("....$", flags1, "test\n", "test", true); 1309 check("....$", flags1, "test\r\n", "test", true); 1310 check(".....$", flags1, "test\n", "test\n", true); 1311 check(".....$", flags1, "test\u0085", "test\u0085", true); 1312 check("....$", flags1, "test\u0085", "test", true); 1313 1314 check("....$", flags2, "test\n", "test", true); 1315 check(".....$", flags2, "test\n", "test\n", true); 1316 check(".....$", flags2, "test\u0085", "test\u0085", true); 1317 check("....$", flags2, "test\u0085", "est\u0085", true); 1318 1319 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1320 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1321 check("....$blah", flags3, "test\nblah", "!!!!", false); 1322 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1323 1324 // Supplementary character test 1325 String t = toSupplementaries("test"); 1326 String b = toSupplementaries("blah"); 1327 check("....$", flags1, t+"\n", t, true); 1328 check("....$", flags1, t+"\r\n", t, true); 1329 check(".....$", flags1, t+"\n", t+"\n", true); 1330 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1331 check("....$", flags1, t+"\u0085", t, true); 1332 1333 check("....$", flags2, t+"\n", t, true); 1334 check(".....$", flags2, t+"\n", t+"\n", true); 1335 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1336 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1337 1338 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1339 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1340 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1341 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1342 1343 report("Dollar at End"); 1344 } 1345 1346 // This test is for 4711773 1347 private static void multilineDollarTest() throws Exception { 1348 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1349 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1350 matcher.find(); 1351 if (matcher.start(0) != 9) 1352 failCount++; 1353 matcher.find(); 1354 if (matcher.start(0) != 20) 1355 failCount++; 1356 1357 // Supplementary character test 1358 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1359 matcher.find(); 1360 if (matcher.start(0) != 9*2) 1361 failCount++; 1362 matcher.find(); 1363 if (matcher.start(0) != 20*2) 1364 failCount++; 1365 1366 report("Multiline Dollar"); 1367 } 1368 1369 private static void reluctantRepetitionTest() throws Exception { 1370 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1371 check(p, "1 word word word 2", true); 1372 check(p, "1 wor wo w 2", true); 1373 check(p, "1 word word 2", true); 1374 check(p, "1 word 2", true); 1375 check(p, "1 wo w w 2", true); 1376 check(p, "1 wo w 2", true); 1377 check(p, "1 wor w 2", true); 1378 1379 p = Pattern.compile("([a-z])+?c"); 1380 Matcher m = p.matcher("ababcdefdec"); 1381 check(m, "ababc"); 1382 1383 // Supplementary character test 1384 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1385 m = p.matcher(toSupplementaries("ababcdefdec")); 1386 check(m, toSupplementaries("ababc")); 1387 1388 report("Reluctant Repetition"); 1389 } 1390 1391 private static Pattern serializedPattern(Pattern p) throws Exception { 1392 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1393 ObjectOutputStream oos = new ObjectOutputStream(baos); 1394 oos.writeObject(p); 1395 oos.close(); 1396 try (ObjectInputStream ois = new ObjectInputStream( 1397 new ByteArrayInputStream(baos.toByteArray()))) { 1398 return (Pattern)ois.readObject(); 1399 } 1400 } 1401 1402 private static void serializeTest() throws Exception { 1403 String patternStr = "(b)"; 1404 String matchStr = "b"; 1405 Pattern pattern = Pattern.compile(patternStr); 1406 Pattern serializedPattern = serializedPattern(pattern); 1407 Matcher matcher = serializedPattern.matcher(matchStr); 1408 if (!matcher.matches()) 1409 failCount++; 1410 if (matcher.groupCount() != 1) 1411 failCount++; 1412 1413 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1414 serializedPattern = serializedPattern(pattern); 1415 if (!serializedPattern.matcher("Ab").matches()) 1416 failCount++; 1417 if (serializedPattern.matcher("AB").matches()) 1418 failCount++; 1419 1420 report("Serialization"); 1421 } 1422 1423 private static void gTest() { 1424 Pattern pattern = Pattern.compile("\\G\\w"); 1425 Matcher matcher = pattern.matcher("abc#x#x"); 1426 matcher.find(); 1427 matcher.find(); 1428 matcher.find(); 1429 if (matcher.find()) 1430 failCount++; 1431 1432 pattern = Pattern.compile("\\GA*"); 1433 matcher = pattern.matcher("1A2AA3"); 1434 matcher.find(); 1435 if (matcher.find()) 1436 failCount++; 1437 1438 pattern = Pattern.compile("\\GA*"); 1439 matcher = pattern.matcher("1A2AA3"); 1440 if (!matcher.find(1)) 1441 failCount++; 1442 matcher.find(); 1443 if (matcher.find()) 1444 failCount++; 1445 1446 report("\\G"); 1447 } 1448 1449 private static void zTest() { 1450 Pattern pattern = Pattern.compile("foo\\Z"); 1451 // Positives 1452 check(pattern, "foo\u0085", true); 1453 check(pattern, "foo\u2028", true); 1454 check(pattern, "foo\u2029", true); 1455 check(pattern, "foo\n", true); 1456 check(pattern, "foo\r", true); 1457 check(pattern, "foo\r\n", true); 1458 // Negatives 1459 check(pattern, "fooo", false); 1460 check(pattern, "foo\n\r", false); 1461 1462 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1463 // Positives 1464 check(pattern, "foo", true); 1465 check(pattern, "foo\n", true); 1466 // Negatives 1467 check(pattern, "foo\r", false); 1468 check(pattern, "foo\u0085", false); 1469 check(pattern, "foo\u2028", false); 1470 check(pattern, "foo\u2029", false); 1471 1472 report("\\Z"); 1473 } 1474 1475 private static void replaceFirstTest() { 1476 Pattern pattern = Pattern.compile("(ab)(c*)"); 1477 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1478 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1479 failCount++; 1480 1481 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1482 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1483 failCount++; 1484 1485 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1486 String result = matcher.replaceFirst("$1"); 1487 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1488 failCount++; 1489 1490 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1491 result = matcher.replaceFirst("$2"); 1492 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1493 failCount++; 1494 1495 pattern = Pattern.compile("a*"); 1496 matcher = pattern.matcher("aaaaaaaaaa"); 1497 if (!matcher.replaceFirst("test").equals("test")) 1498 failCount++; 1499 1500 pattern = Pattern.compile("a+"); 1501 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1502 if (!matcher.replaceFirst("test").equals("zzztest")) 1503 failCount++; 1504 1505 // Supplementary character test 1506 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1507 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1508 if (!matcher.replaceFirst(toSupplementaries("test")) 1509 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1510 failCount++; 1511 1512 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1513 if (!matcher.replaceFirst(toSupplementaries("test")). 1514 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1515 failCount++; 1516 1517 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1518 result = matcher.replaceFirst("$1"); 1519 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1520 failCount++; 1521 1522 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1523 result = matcher.replaceFirst("$2"); 1524 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1525 failCount++; 1526 1527 pattern = Pattern.compile(toSupplementaries("a*")); 1528 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1529 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1530 failCount++; 1531 1532 pattern = Pattern.compile(toSupplementaries("a+")); 1533 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1534 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1535 failCount++; 1536 1537 report("Replace First"); 1538 } 1539 1540 private static void unixLinesTest() { 1541 Pattern pattern = Pattern.compile(".*"); 1542 Matcher matcher = pattern.matcher("aa\u2028blah"); 1543 matcher.find(); 1544 if (!matcher.group(0).equals("aa")) 1545 failCount++; 1546 1547 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1548 matcher = pattern.matcher("aa\u2028blah"); 1549 matcher.find(); 1550 if (!matcher.group(0).equals("aa\u2028blah")) 1551 failCount++; 1552 1553 pattern = Pattern.compile("[az]$", 1554 Pattern.MULTILINE | Pattern.UNIX_LINES); 1555 matcher = pattern.matcher("aa\u2028zz"); 1556 check(matcher, "a\u2028", false); 1557 1558 // Supplementary character test 1559 pattern = Pattern.compile(".*"); 1560 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1561 matcher.find(); 1562 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1563 failCount++; 1564 1565 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1566 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1567 matcher.find(); 1568 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1569 failCount++; 1570 1571 pattern = Pattern.compile(toSupplementaries("[az]$"), 1572 Pattern.MULTILINE | Pattern.UNIX_LINES); 1573 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1574 check(matcher, toSupplementaries("a\u2028"), false); 1575 1576 report("Unix Lines"); 1577 } 1578 1579 private static void commentsTest() { 1580 int flags = Pattern.COMMENTS; 1581 1582 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1583 Matcher matcher = pattern.matcher("aa#aa"); 1584 if (!matcher.matches()) 1585 failCount++; 1586 1587 pattern = Pattern.compile("aa # blah", flags); 1588 matcher = pattern.matcher("aa"); 1589 if (!matcher.matches()) 1590 failCount++; 1591 1592 pattern = Pattern.compile("aa blah", flags); 1593 matcher = pattern.matcher("aablah"); 1594 if (!matcher.matches()) 1595 failCount++; 1596 1597 pattern = Pattern.compile("aa # blah blech ", flags); 1598 matcher = pattern.matcher("aa"); 1599 if (!matcher.matches()) 1600 failCount++; 1601 1602 pattern = Pattern.compile("aa # blah\n ", flags); 1603 matcher = pattern.matcher("aa"); 1604 if (!matcher.matches()) 1605 failCount++; 1606 1607 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1608 matcher = pattern.matcher("aabc"); 1609 if (!matcher.matches()) 1610 failCount++; 1611 1612 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1613 matcher = pattern.matcher("aabc"); 1614 if (!matcher.matches()) 1615 failCount++; 1616 1617 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1618 matcher = pattern.matcher("aabc#blech"); 1619 if (!matcher.matches()) 1620 failCount++; 1621 1622 // Supplementary character test 1623 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1624 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1625 if (!matcher.matches()) 1626 failCount++; 1627 1628 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1629 matcher = pattern.matcher(toSupplementaries("aa")); 1630 if (!matcher.matches()) 1631 failCount++; 1632 1633 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1634 matcher = pattern.matcher(toSupplementaries("aablah")); 1635 if (!matcher.matches()) 1636 failCount++; 1637 1638 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1639 matcher = pattern.matcher(toSupplementaries("aa")); 1640 if (!matcher.matches()) 1641 failCount++; 1642 1643 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1644 matcher = pattern.matcher(toSupplementaries("aa")); 1645 if (!matcher.matches()) 1646 failCount++; 1647 1648 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1649 matcher = pattern.matcher(toSupplementaries("aabc")); 1650 if (!matcher.matches()) 1651 failCount++; 1652 1653 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1654 matcher = pattern.matcher(toSupplementaries("aabc")); 1655 if (!matcher.matches()) 1656 failCount++; 1657 1658 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1659 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1660 if (!matcher.matches()) 1661 failCount++; 1662 1663 report("Comments"); 1664 } 1665 1666 private static void caseFoldingTest() { // bug 4504687 1667 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1668 Pattern pattern = Pattern.compile("aa", flags); 1669 Matcher matcher = pattern.matcher("ab"); 1670 if (matcher.matches()) 1671 failCount++; 1672 1673 pattern = Pattern.compile("aA", flags); 1674 matcher = pattern.matcher("ab"); 1675 if (matcher.matches()) 1676 failCount++; 1677 1678 pattern = Pattern.compile("aa", flags); 1679 matcher = pattern.matcher("aB"); 1680 if (matcher.matches()) 1681 failCount++; 1682 matcher = pattern.matcher("Ab"); 1683 if (matcher.matches()) 1684 failCount++; 1685 1686 // ASCII "a" 1687 // Latin-1 Supplement "a" + grave 1688 // Cyrillic "a" 1689 String[] patterns = new String[] { 1690 //single 1691 "a", "\u00e0", "\u0430", 1692 //slice 1693 "ab", "\u00e0\u00e1", "\u0430\u0431", 1694 //class single 1695 "[a]", "[\u00e0]", "[\u0430]", 1696 //class range 1697 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1698 //back reference 1699 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1700 }; 1701 1702 String[] texts = new String[] { 1703 "A", "\u00c0", "\u0410", 1704 "AB", "\u00c0\u00c1", "\u0410\u0411", 1705 "A", "\u00c0", "\u0410", 1706 "B", "\u00c2", "\u0411", 1707 "aA", "\u00e0\u00c0", "\u0430\u0410" 1708 }; 1709 1710 boolean[] expected = new boolean[] { 1711 true, false, false, 1712 true, false, false, 1713 true, false, false, 1714 true, false, false, 1715 true, false, false 1716 }; 1717 1718 flags = Pattern.CASE_INSENSITIVE; 1719 for (int i = 0; i < patterns.length; i++) { 1720 pattern = Pattern.compile(patterns[i], flags); 1721 matcher = pattern.matcher(texts[i]); 1722 if (matcher.matches() != expected[i]) { 1723 System.out.println("<1> Failed at " + i); 1724 failCount++; 1725 } 1726 } 1727 1728 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1729 for (int i = 0; i < patterns.length; i++) { 1730 pattern = Pattern.compile(patterns[i], flags); 1731 matcher = pattern.matcher(texts[i]); 1732 if (!matcher.matches()) { 1733 System.out.println("<2> Failed at " + i); 1734 failCount++; 1735 } 1736 } 1737 // flag unicode_case alone should do nothing 1738 flags = Pattern.UNICODE_CASE; 1739 for (int i = 0; i < patterns.length; i++) { 1740 pattern = Pattern.compile(patterns[i], flags); 1741 matcher = pattern.matcher(texts[i]); 1742 if (matcher.matches()) { 1743 System.out.println("<3> Failed at " + i); 1744 failCount++; 1745 } 1746 } 1747 1748 // Special cases: i, I, u+0131 and u+0130 1749 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1750 pattern = Pattern.compile("[h-j]+", flags); 1751 if (!pattern.matcher("\u0131\u0130").matches()) 1752 failCount++; 1753 report("Case Folding"); 1754 } 1755 1756 private static void appendTest() { 1757 Pattern pattern = Pattern.compile("(ab)(cd)"); 1758 Matcher matcher = pattern.matcher("abcd"); 1759 String result = matcher.replaceAll("$2$1"); 1760 if (!result.equals("cdab")) 1761 failCount++; 1762 1763 String s1 = "Swap all: first = 123, second = 456"; 1764 String s2 = "Swap one: first = 123, second = 456"; 1765 String r = "$3$2$1"; 1766 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1767 matcher = pattern.matcher(s1); 1768 1769 result = matcher.replaceAll(r); 1770 if (!result.equals("Swap all: 123 = first, 456 = second")) 1771 failCount++; 1772 1773 matcher = pattern.matcher(s2); 1774 1775 if (matcher.find()) { 1776 StringBuffer sb = new StringBuffer(); 1777 matcher.appendReplacement(sb, r); 1778 matcher.appendTail(sb); 1779 result = sb.toString(); 1780 if (!result.equals("Swap one: 123 = first, second = 456")) 1781 failCount++; 1782 } 1783 1784 // Supplementary character test 1785 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1786 matcher = pattern.matcher(toSupplementaries("abcd")); 1787 result = matcher.replaceAll("$2$1"); 1788 if (!result.equals(toSupplementaries("cdab"))) 1789 failCount++; 1790 1791 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1792 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1793 r = toSupplementaries("$3$2$1"); 1794 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1795 matcher = pattern.matcher(s1); 1796 1797 result = matcher.replaceAll(r); 1798 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1799 failCount++; 1800 1801 matcher = pattern.matcher(s2); 1802 1803 if (matcher.find()) { 1804 StringBuffer sb = new StringBuffer(); 1805 matcher.appendReplacement(sb, r); 1806 matcher.appendTail(sb); 1807 result = sb.toString(); 1808 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1809 failCount++; 1810 } 1811 report("Append"); 1812 } 1813 1814 private static void splitTest() { 1815 Pattern pattern = Pattern.compile(":"); 1816 String[] result = pattern.split("foo:and:boo", 2); 1817 if (!result[0].equals("foo")) 1818 failCount++; 1819 if (!result[1].equals("and:boo")) 1820 failCount++; 1821 // Supplementary character test 1822 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1823 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1824 if (!result[0].equals(toSupplementaries("foo"))) 1825 failCount++; 1826 if (!result[1].equals(toSupplementaries("andXboo"))) 1827 failCount++; 1828 1829 CharBuffer cb = CharBuffer.allocate(100); 1830 cb.put("foo:and:boo"); 1831 cb.flip(); 1832 result = pattern.split(cb); 1833 if (!result[0].equals("foo")) 1834 failCount++; 1835 if (!result[1].equals("and")) 1836 failCount++; 1837 if (!result[2].equals("boo")) 1838 failCount++; 1839 1840 // Supplementary character test 1841 CharBuffer cbs = CharBuffer.allocate(100); 1842 cbs.put(toSupplementaries("fooXandXboo")); 1843 cbs.flip(); 1844 result = patternX.split(cbs); 1845 if (!result[0].equals(toSupplementaries("foo"))) 1846 failCount++; 1847 if (!result[1].equals(toSupplementaries("and"))) 1848 failCount++; 1849 if (!result[2].equals(toSupplementaries("boo"))) 1850 failCount++; 1851 1852 String source = "0123456789"; 1853 for (int limit=-2; limit<3; limit++) { 1854 for (int x=0; x<10; x++) { 1855 result = source.split(Integer.toString(x), limit); 1856 int expectedLength = limit < 1 ? 2 : limit; 1857 1858 if ((limit == 0) && (x == 9)) { 1859 // expected dropping of "" 1860 if (result.length != 1) 1861 failCount++; 1862 if (!result[0].equals("012345678")) { 1863 failCount++; 1864 } 1865 } else { 1866 if (result.length != expectedLength) { 1867 failCount++; 1868 } 1869 if (!result[0].equals(source.substring(0,x))) { 1870 if (limit != 1) { 1871 failCount++; 1872 } else { 1873 if (!result[0].equals(source.substring(0,10))) { 1874 failCount++; 1875 } 1876 } 1877 } 1878 if (expectedLength > 1) { // Check segment 2 1879 if (!result[1].equals(source.substring(x+1,10))) 1880 failCount++; 1881 } 1882 } 1883 } 1884 } 1885 // Check the case for no match found 1886 for (int limit=-2; limit<3; limit++) { 1887 result = source.split("e", limit); 1888 if (result.length != 1) 1889 failCount++; 1890 if (!result[0].equals(source)) 1891 failCount++; 1892 } 1893 // Check the case for limit == 0, source = ""; 1894 // split() now returns 0-length for empty source "" see #6559590 1895 source = ""; 1896 result = source.split("e", 0); 1897 if (result.length != 1) 1898 failCount++; 1899 if (!result[0].equals(source)) 1900 failCount++; 1901 1902 // Check both split() and splitAsStraem(), especially for zero-lenth 1903 // input and zero-lenth match cases 1904 String[][] input = new String[][] { 1905 { " ", "Abc Efg Hij" }, // normal non-zero-match 1906 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1907 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1908 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1909 { "(?=\\p{Lu})", "AbcEfg" }, 1910 { "(?=\\p{Lu})", "Abc" }, 1911 { " ", "" }, // zero-length input 1912 { ".*", "" }, 1913 1914 // some tests from PatternStreamTest.java 1915 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1916 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1917 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1918 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1919 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1920 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1921 { "\u56da", "" }, 1922 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1923 { "o", "boo:and:foo" }, 1924 { "o", "booooo:and:fooooo" }, 1925 { "o", "fooooo:" }, 1926 }; 1927 1928 String[][] expected = new String[][] { 1929 { "Abc", "Efg", "Hij" }, 1930 { "", "Abc", "Efg", "Hij" }, 1931 { "Abc", "", "Efg", "Hij" }, 1932 { "Abc", "Efg", "Hij" }, 1933 { "Abc", "Efg" }, 1934 { "Abc" }, 1935 { "" }, 1936 { "" }, 1937 1938 { "awgqwefg1fefw", "vssv1vvv1" }, 1939 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1940 { "awgqwefg", "fefw4vssv", "vvv" }, 1941 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1942 { "1", "23", "456", "7890" }, 1943 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1944 { "" }, 1945 { "This", "is", "testing", "", "with", "different", "separators" }, 1946 { "b", "", ":and:f" }, 1947 { "b", "", "", "", "", ":and:f" }, 1948 { "f", "", "", "", "", ":" }, 1949 }; 1950 for (int i = 0; i < input.length; i++) { 1951 pattern = Pattern.compile(input[i][0]); 1952 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1953 failCount++; 1954 } 1955 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1956 // array for zero-length input for now 1957 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1958 expected[i])) { 1959 failCount++; 1960 } 1961 } 1962 report("Split"); 1963 } 1964 1965 private static void negationTest() { 1966 Pattern pattern = Pattern.compile("[\\[@^]+"); 1967 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1968 if (!matcher.find()) 1969 failCount++; 1970 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1971 failCount++; 1972 pattern = Pattern.compile("[@\\[^]+"); 1973 matcher = pattern.matcher("@@@@[[[[^^^^"); 1974 if (!matcher.find()) 1975 failCount++; 1976 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1977 failCount++; 1978 pattern = Pattern.compile("[@\\[^@]+"); 1979 matcher = pattern.matcher("@@@@[[[[^^^^"); 1980 if (!matcher.find()) 1981 failCount++; 1982 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1983 failCount++; 1984 1985 pattern = Pattern.compile("\\)"); 1986 matcher = pattern.matcher("xxx)xxx"); 1987 if (!matcher.find()) 1988 failCount++; 1989 1990 report("Negation"); 1991 } 1992 1993 private static void ampersandTest() { 1994 Pattern pattern = Pattern.compile("[&@]+"); 1995 check(pattern, "@@@@&&&&", true); 1996 1997 pattern = Pattern.compile("[@&]+"); 1998 check(pattern, "@@@@&&&&", true); 1999 2000 pattern = Pattern.compile("[@\\&]+"); 2001 check(pattern, "@@@@&&&&", true); 2002 2003 report("Ampersand"); 2004 } 2005 2006 private static void octalTest() throws Exception { 2007 Pattern pattern = Pattern.compile("\\u0007"); 2008 Matcher matcher = pattern.matcher("\u0007"); 2009 if (!matcher.matches()) 2010 failCount++; 2011 pattern = Pattern.compile("\\07"); 2012 matcher = pattern.matcher("\u0007"); 2013 if (!matcher.matches()) 2014 failCount++; 2015 pattern = Pattern.compile("\\007"); 2016 matcher = pattern.matcher("\u0007"); 2017 if (!matcher.matches()) 2018 failCount++; 2019 pattern = Pattern.compile("\\0007"); 2020 matcher = pattern.matcher("\u0007"); 2021 if (!matcher.matches()) 2022 failCount++; 2023 pattern = Pattern.compile("\\040"); 2024 matcher = pattern.matcher("\u0020"); 2025 if (!matcher.matches()) 2026 failCount++; 2027 pattern = Pattern.compile("\\0403"); 2028 matcher = pattern.matcher("\u00203"); 2029 if (!matcher.matches()) 2030 failCount++; 2031 pattern = Pattern.compile("\\0103"); 2032 matcher = pattern.matcher("\u0043"); 2033 if (!matcher.matches()) 2034 failCount++; 2035 2036 report("Octal"); 2037 } 2038 2039 private static void longPatternTest() throws Exception { 2040 try { 2041 Pattern pattern = Pattern.compile( 2042 "a 32-character-long pattern xxxx"); 2043 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2044 pattern = Pattern.compile("a thirty four character long regex"); 2045 StringBuffer patternToBe = new StringBuffer(101); 2046 for (int i=0; i<100; i++) 2047 patternToBe.append((char)(97 + i%26)); 2048 pattern = Pattern.compile(patternToBe.toString()); 2049 } catch (PatternSyntaxException e) { 2050 failCount++; 2051 } 2052 2053 // Supplementary character test 2054 try { 2055 Pattern pattern = Pattern.compile( 2056 toSupplementaries("a 32-character-long pattern xxxx")); 2057 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2058 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2059 StringBuffer patternToBe = new StringBuffer(101*2); 2060 for (int i=0; i<100; i++) 2061 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2062 + 97 + i%26)); 2063 pattern = Pattern.compile(patternToBe.toString()); 2064 } catch (PatternSyntaxException e) { 2065 failCount++; 2066 } 2067 report("LongPattern"); 2068 } 2069 2070 private static void group0Test() throws Exception { 2071 Pattern pattern = Pattern.compile("(tes)ting"); 2072 Matcher matcher = pattern.matcher("testing"); 2073 check(matcher, "testing"); 2074 2075 matcher.reset("testing"); 2076 if (matcher.lookingAt()) { 2077 if (!matcher.group(0).equals("testing")) 2078 failCount++; 2079 } else { 2080 failCount++; 2081 } 2082 2083 matcher.reset("testing"); 2084 if (matcher.matches()) { 2085 if (!matcher.group(0).equals("testing")) 2086 failCount++; 2087 } else { 2088 failCount++; 2089 } 2090 2091 pattern = Pattern.compile("(tes)ting"); 2092 matcher = pattern.matcher("testing"); 2093 if (matcher.lookingAt()) { 2094 if (!matcher.group(0).equals("testing")) 2095 failCount++; 2096 } else { 2097 failCount++; 2098 } 2099 2100 pattern = Pattern.compile("^(tes)ting"); 2101 matcher = pattern.matcher("testing"); 2102 if (matcher.matches()) { 2103 if (!matcher.group(0).equals("testing")) 2104 failCount++; 2105 } else { 2106 failCount++; 2107 } 2108 2109 // Supplementary character test 2110 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2111 matcher = pattern.matcher(toSupplementaries("testing")); 2112 check(matcher, toSupplementaries("testing")); 2113 2114 matcher.reset(toSupplementaries("testing")); 2115 if (matcher.lookingAt()) { 2116 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2117 failCount++; 2118 } else { 2119 failCount++; 2120 } 2121 2122 matcher.reset(toSupplementaries("testing")); 2123 if (matcher.matches()) { 2124 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2125 failCount++; 2126 } else { 2127 failCount++; 2128 } 2129 2130 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2131 matcher = pattern.matcher(toSupplementaries("testing")); 2132 if (matcher.lookingAt()) { 2133 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2134 failCount++; 2135 } else { 2136 failCount++; 2137 } 2138 2139 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2140 matcher = pattern.matcher(toSupplementaries("testing")); 2141 if (matcher.matches()) { 2142 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2143 failCount++; 2144 } else { 2145 failCount++; 2146 } 2147 2148 report("Group0"); 2149 } 2150 2151 private static void findIntTest() throws Exception { 2152 Pattern p = Pattern.compile("blah"); 2153 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2154 boolean result = m.find(2); 2155 if (!result) 2156 failCount++; 2157 2158 p = Pattern.compile("$"); 2159 m = p.matcher("1234567890"); 2160 result = m.find(10); 2161 if (!result) 2162 failCount++; 2163 try { 2164 result = m.find(11); 2165 failCount++; 2166 } catch (IndexOutOfBoundsException e) { 2167 // correct result 2168 } 2169 2170 // Supplementary character test 2171 p = Pattern.compile(toSupplementaries("blah")); 2172 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2173 result = m.find(2); 2174 if (!result) 2175 failCount++; 2176 2177 report("FindInt"); 2178 } 2179 2180 private static void emptyPatternTest() throws Exception { 2181 Pattern p = Pattern.compile(""); 2182 Matcher m = p.matcher("foo"); 2183 2184 // Should find empty pattern at beginning of input 2185 boolean result = m.find(); 2186 if (result != true) 2187 failCount++; 2188 if (m.start() != 0) 2189 failCount++; 2190 2191 // Should not match entire input if input is not empty 2192 m.reset(); 2193 result = m.matches(); 2194 if (result == true) 2195 failCount++; 2196 2197 try { 2198 m.start(0); 2199 failCount++; 2200 } catch (IllegalStateException e) { 2201 // Correct result 2202 } 2203 2204 // Should match entire input if input is empty 2205 m.reset(""); 2206 result = m.matches(); 2207 if (result != true) 2208 failCount++; 2209 2210 result = Pattern.matches("", ""); 2211 if (result != true) 2212 failCount++; 2213 2214 result = Pattern.matches("", "foo"); 2215 if (result == true) 2216 failCount++; 2217 report("EmptyPattern"); 2218 } 2219 2220 private static void charClassTest() throws Exception { 2221 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2222 check(pattern, "blahb]blech", true); 2223 2224 pattern = Pattern.compile("[abc[def]]"); 2225 check(pattern, "b", true); 2226 2227 // Supplementary character tests 2228 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2229 check(pattern, toSupplementaries("blahb]blech"), true); 2230 2231 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2232 check(pattern, toSupplementaries("b"), true); 2233 2234 try { 2235 // u00ff when UNICODE_CASE 2236 pattern = Pattern.compile("[ab\u00ffcd]", 2237 Pattern.CASE_INSENSITIVE| 2238 Pattern.UNICODE_CASE); 2239 check(pattern, "ab\u00ffcd", true); 2240 check(pattern, "Ab\u0178Cd", true); 2241 2242 // u00b5 when UNICODE_CASE 2243 pattern = Pattern.compile("[ab\u00b5cd]", 2244 Pattern.CASE_INSENSITIVE| 2245 Pattern.UNICODE_CASE); 2246 check(pattern, "ab\u00b5cd", true); 2247 check(pattern, "Ab\u039cCd", true); 2248 } catch (Exception e) { failCount++; } 2249 2250 /* Special cases 2251 (1)LatinSmallLetterLongS u+017f 2252 (2)LatinSmallLetterDotlessI u+0131 2253 (3)LatineCapitalLetterIWithDotAbove u+0130 2254 (4)KelvinSign u+212a 2255 (5)AngstromSign u+212b 2256 */ 2257 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2258 pattern = Pattern.compile("[sik\u00c5]+", flags); 2259 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2260 failCount++; 2261 2262 report("CharClass"); 2263 } 2264 2265 private static void caretTest() throws Exception { 2266 Pattern pattern = Pattern.compile("\\w*"); 2267 Matcher matcher = pattern.matcher("a#bc#def##g"); 2268 check(matcher, "a"); 2269 check(matcher, ""); 2270 check(matcher, "bc"); 2271 check(matcher, ""); 2272 check(matcher, "def"); 2273 check(matcher, ""); 2274 check(matcher, ""); 2275 check(matcher, "g"); 2276 check(matcher, ""); 2277 if (matcher.find()) 2278 failCount++; 2279 2280 pattern = Pattern.compile("^\\w*"); 2281 matcher = pattern.matcher("a#bc#def##g"); 2282 check(matcher, "a"); 2283 if (matcher.find()) 2284 failCount++; 2285 2286 pattern = Pattern.compile("\\w"); 2287 matcher = pattern.matcher("abc##x"); 2288 check(matcher, "a"); 2289 check(matcher, "b"); 2290 check(matcher, "c"); 2291 check(matcher, "x"); 2292 if (matcher.find()) 2293 failCount++; 2294 2295 pattern = Pattern.compile("^\\w"); 2296 matcher = pattern.matcher("abc##x"); 2297 check(matcher, "a"); 2298 if (matcher.find()) 2299 failCount++; 2300 2301 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2302 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2303 check(matcher, "abc"); 2304 if (matcher.find()) 2305 failCount++; 2306 2307 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2308 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2309 check(matcher, "abc"); 2310 check(matcher, "jkl"); 2311 if (matcher.find()) 2312 failCount++; 2313 2314 pattern = Pattern.compile("^", Pattern.MULTILINE); 2315 matcher = pattern.matcher("this is some text"); 2316 String result = matcher.replaceAll("X"); 2317 if (!result.equals("Xthis is some text")) 2318 failCount++; 2319 2320 pattern = Pattern.compile("^"); 2321 matcher = pattern.matcher("this is some text"); 2322 result = matcher.replaceAll("X"); 2323 if (!result.equals("Xthis is some text")) 2324 failCount++; 2325 2326 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2327 matcher = pattern.matcher("this is some text\n"); 2328 result = matcher.replaceAll("X"); 2329 if (!result.equals("Xthis is some text\n")) 2330 failCount++; 2331 2332 report("Caret"); 2333 } 2334 2335 private static void groupCaptureTest() throws Exception { 2336 // Independent group 2337 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2338 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2339 matcher.find(); 2340 try { 2341 String blah = matcher.group(1); 2342 failCount++; 2343 } catch (IndexOutOfBoundsException ioobe) { 2344 // Good result 2345 } 2346 // Pure group 2347 pattern = Pattern.compile("x+(?:y+)z+"); 2348 matcher = pattern.matcher("xxxyyyzzz"); 2349 matcher.find(); 2350 try { 2351 String blah = matcher.group(1); 2352 failCount++; 2353 } catch (IndexOutOfBoundsException ioobe) { 2354 // Good result 2355 } 2356 2357 // Supplementary character tests 2358 // Independent group 2359 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2360 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2361 matcher.find(); 2362 try { 2363 String blah = matcher.group(1); 2364 failCount++; 2365 } catch (IndexOutOfBoundsException ioobe) { 2366 // Good result 2367 } 2368 // Pure group 2369 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2370 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2371 matcher.find(); 2372 try { 2373 String blah = matcher.group(1); 2374 failCount++; 2375 } catch (IndexOutOfBoundsException ioobe) { 2376 // Good result 2377 } 2378 2379 report("GroupCapture"); 2380 } 2381 2382 private static void backRefTest() throws Exception { 2383 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2384 check(pattern, "zzzaabcazzz", true); 2385 2386 pattern = Pattern.compile("(a*)bc\\1"); 2387 check(pattern, "zzzaabcaazzz", true); 2388 2389 pattern = Pattern.compile("(abc)(def)\\1"); 2390 check(pattern, "abcdefabc", true); 2391 2392 pattern = Pattern.compile("(abc)(def)\\3"); 2393 check(pattern, "abcdefabc", false); 2394 2395 try { 2396 for (int i = 1; i < 10; i++) { 2397 // Make sure backref 1-9 are always accepted 2398 pattern = Pattern.compile("abcdef\\" + i); 2399 // and fail to match if the target group does not exit 2400 check(pattern, "abcdef", false); 2401 } 2402 } catch(PatternSyntaxException e) { 2403 failCount++; 2404 } 2405 2406 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2407 check(pattern, "abcdefghija", false); 2408 check(pattern, "abcdefghija1", true); 2409 2410 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2411 check(pattern, "abcdefghijkk", true); 2412 2413 pattern = Pattern.compile("(a)bcdefghij\\11"); 2414 check(pattern, "abcdefghija1", true); 2415 2416 // Supplementary character tests 2417 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2418 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2419 2420 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2421 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2422 2423 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2424 check(pattern, toSupplementaries("abcdefabc"), true); 2425 2426 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2427 check(pattern, toSupplementaries("abcdefabc"), false); 2428 2429 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2430 check(pattern, toSupplementaries("abcdefghija"), false); 2431 check(pattern, toSupplementaries("abcdefghija1"), true); 2432 2433 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2434 check(pattern, toSupplementaries("abcdefghijkk"), true); 2435 2436 report("BackRef"); 2437 } 2438 2439 /** 2440 * Unicode Technical Report #18, section 2.6 End of Line 2441 * There is no empty line to be matched in the sequence \u000D\u000A 2442 * but there is an empty line in the sequence \u000A\u000D. 2443 */ 2444 private static void anchorTest() throws Exception { 2445 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2446 Matcher m = p.matcher("blah1\r\nblah2"); 2447 m.find(); 2448 m.find(); 2449 if (!m.group().equals("blah2")) 2450 failCount++; 2451 2452 m.reset("blah1\n\rblah2"); 2453 m.find(); 2454 m.find(); 2455 m.find(); 2456 if (!m.group().equals("blah2")) 2457 failCount++; 2458 2459 // Test behavior of $ with \r\n at end of input 2460 p = Pattern.compile(".+$"); 2461 m = p.matcher("blah1\r\n"); 2462 if (!m.find()) 2463 failCount++; 2464 if (!m.group().equals("blah1")) 2465 failCount++; 2466 if (m.find()) 2467 failCount++; 2468 2469 // Test behavior of $ with \r\n at end of input in multiline 2470 p = Pattern.compile(".+$", Pattern.MULTILINE); 2471 m = p.matcher("blah1\r\n"); 2472 if (!m.find()) 2473 failCount++; 2474 if (m.find()) 2475 failCount++; 2476 2477 // Test for $ recognition of \u0085 for bug 4527731 2478 p = Pattern.compile(".+$", Pattern.MULTILINE); 2479 m = p.matcher("blah1\u0085"); 2480 if (!m.find()) 2481 failCount++; 2482 2483 // Supplementary character test 2484 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2485 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2486 m.find(); 2487 m.find(); 2488 if (!m.group().equals(toSupplementaries("blah2"))) 2489 failCount++; 2490 2491 m.reset(toSupplementaries("blah1\n\rblah2")); 2492 m.find(); 2493 m.find(); 2494 m.find(); 2495 if (!m.group().equals(toSupplementaries("blah2"))) 2496 failCount++; 2497 2498 // Test behavior of $ with \r\n at end of input 2499 p = Pattern.compile(".+$"); 2500 m = p.matcher(toSupplementaries("blah1\r\n")); 2501 if (!m.find()) 2502 failCount++; 2503 if (!m.group().equals(toSupplementaries("blah1"))) 2504 failCount++; 2505 if (m.find()) 2506 failCount++; 2507 2508 // Test behavior of $ with \r\n at end of input in multiline 2509 p = Pattern.compile(".+$", Pattern.MULTILINE); 2510 m = p.matcher(toSupplementaries("blah1\r\n")); 2511 if (!m.find()) 2512 failCount++; 2513 if (m.find()) 2514 failCount++; 2515 2516 // Test for $ recognition of \u0085 for bug 4527731 2517 p = Pattern.compile(".+$", Pattern.MULTILINE); 2518 m = p.matcher(toSupplementaries("blah1\u0085")); 2519 if (!m.find()) 2520 failCount++; 2521 2522 report("Anchors"); 2523 } 2524 2525 /** 2526 * A basic sanity test of Matcher.lookingAt(). 2527 */ 2528 private static void lookingAtTest() throws Exception { 2529 Pattern p = Pattern.compile("(ab)(c*)"); 2530 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2531 2532 if (!m.lookingAt()) 2533 failCount++; 2534 2535 if (!m.group().equals(m.group(0))) 2536 failCount++; 2537 2538 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2539 if (m.lookingAt()) 2540 failCount++; 2541 2542 // Supplementary character test 2543 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2544 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2545 2546 if (!m.lookingAt()) 2547 failCount++; 2548 2549 if (!m.group().equals(m.group(0))) 2550 failCount++; 2551 2552 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2553 if (m.lookingAt()) 2554 failCount++; 2555 2556 report("Looking At"); 2557 } 2558 2559 /** 2560 * A basic sanity test of Matcher.matches(). 2561 */ 2562 private static void matchesTest() throws Exception { 2563 // matches() 2564 Pattern p = Pattern.compile("ulb(c*)"); 2565 Matcher m = p.matcher("ulbcccccc"); 2566 if (!m.matches()) 2567 failCount++; 2568 2569 // find() but not matches() 2570 m.reset("zzzulbcccccc"); 2571 if (m.matches()) 2572 failCount++; 2573 2574 // lookingAt() but not matches() 2575 m.reset("ulbccccccdef"); 2576 if (m.matches()) 2577 failCount++; 2578 2579 // matches() 2580 p = Pattern.compile("a|ad"); 2581 m = p.matcher("ad"); 2582 if (!m.matches()) 2583 failCount++; 2584 2585 // Supplementary character test 2586 // matches() 2587 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2588 m = p.matcher(toSupplementaries("ulbcccccc")); 2589 if (!m.matches()) 2590 failCount++; 2591 2592 // find() but not matches() 2593 m.reset(toSupplementaries("zzzulbcccccc")); 2594 if (m.matches()) 2595 failCount++; 2596 2597 // lookingAt() but not matches() 2598 m.reset(toSupplementaries("ulbccccccdef")); 2599 if (m.matches()) 2600 failCount++; 2601 2602 // matches() 2603 p = Pattern.compile(toSupplementaries("a|ad")); 2604 m = p.matcher(toSupplementaries("ad")); 2605 if (!m.matches()) 2606 failCount++; 2607 2608 report("Matches"); 2609 } 2610 2611 /** 2612 * A basic sanity test of Pattern.matches(). 2613 */ 2614 private static void patternMatchesTest() throws Exception { 2615 // matches() 2616 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2617 toSupplementaries("ulbcccccc"))) 2618 failCount++; 2619 2620 // find() but not matches() 2621 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2622 toSupplementaries("zzzulbcccccc"))) 2623 failCount++; 2624 2625 // lookingAt() but not matches() 2626 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2627 toSupplementaries("ulbccccccdef"))) 2628 failCount++; 2629 2630 // Supplementary character test 2631 // matches() 2632 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2633 toSupplementaries("ulbcccccc"))) 2634 failCount++; 2635 2636 // find() but not matches() 2637 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2638 toSupplementaries("zzzulbcccccc"))) 2639 failCount++; 2640 2641 // lookingAt() but not matches() 2642 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2643 toSupplementaries("ulbccccccdef"))) 2644 failCount++; 2645 2646 report("Pattern Matches"); 2647 } 2648 2649 /** 2650 * Canonical equivalence testing. Tests the ability of the engine 2651 * to match sequences that are not explicitly specified in the 2652 * pattern when they are considered equivalent by the Unicode Standard. 2653 */ 2654 private static void ceTest() throws Exception { 2655 // Decomposed char outside char classes 2656 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2657 Matcher m = p.matcher("test\u00e5"); 2658 if (!m.matches()) 2659 failCount++; 2660 2661 m.reset("testa\u030a"); 2662 if (!m.matches()) 2663 failCount++; 2664 2665 // Composed char outside char classes 2666 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2667 m = p.matcher("test\u00e5"); 2668 if (!m.matches()) 2669 failCount++; 2670 2671 m.reset("testa\u030a"); 2672 if (!m.find()) 2673 failCount++; 2674 2675 // Decomposed char inside a char class 2676 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2677 m = p.matcher("test\u00e5"); 2678 if (!m.find()) 2679 failCount++; 2680 2681 m.reset("testa\u030a"); 2682 if (!m.find()) 2683 failCount++; 2684 2685 // Composed char inside a char class 2686 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2687 m = p.matcher("test\u00e5"); 2688 if (!m.find()) 2689 failCount++; 2690 2691 m.reset("testa\u0300"); 2692 if (!m.find()) 2693 failCount++; 2694 2695 m.reset("testa\u030a"); 2696 if (!m.find()) 2697 failCount++; 2698 2699 // Marks that cannot legally change order and be equivalent 2700 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2701 check(p, "testa\u0308\u0300", true); 2702 check(p, "testa\u0300\u0308", false); 2703 2704 // Marks that can legally change order and be equivalent 2705 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2706 check(p, "testa\u0308\u0323", true); 2707 check(p, "testa\u0323\u0308", true); 2708 2709 // Test all equivalences of the sequence a\u0308\u0323\u0300 2710 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2711 check(p, "testa\u0308\u0323\u0300", true); 2712 check(p, "testa\u0323\u0308\u0300", true); 2713 check(p, "testa\u0308\u0300\u0323", true); 2714 check(p, "test\u00e4\u0323\u0300", true); 2715 check(p, "test\u00e4\u0300\u0323", true); 2716 2717 Object[][] data = new Object[][] { 2718 2719 // JDK-4867170 2720 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2721 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2722 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2723 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2724 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2725 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2726 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2727 2728 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2729 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2730 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2731 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2732 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2733 2734 // backtracking, force to match "\u1f80", instead of \u1f82" 2735 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2736 2737 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2738 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2739 2740 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2741 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2742 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2743 2744 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2745 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2746 { "\u1f80", "ab\u1f80cd", "f", true }, 2747 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2748 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2749 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2750 { "\u1f82", "\u1f80\u0300", "m", true }, 2751 2752 // JDK-7080302 # compile failed 2753 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2754 2755 // JDK-6728861, same cause as above one 2756 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2757 2758 // JDK-6995635 2759 { "(\u00e9)", "e\u0301", "m", true }, 2760 2761 // JDK-6736245 2762 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2763 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2764 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2765 2766 // 4916384. 2767 // Decomposed hangul (jamos) works inside clazz 2768 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2769 { "[\u1100\u1161]", "\uac00", "m", true}, 2770 2771 { "[\uac00]", "\u1100\u1161", "m", true}, 2772 { "[\uac00]", "\uac00", "m", true}, 2773 2774 // Decomposed hangul (jamos) 2775 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2776 { "\u1100\u1161", "\uac00", "m", true}, 2777 2778 // Composed hangul 2779 { "\uac00", "\u1100\u1161", "m", true }, 2780 { "\uac00", "\uac00", "m", true }, 2781 2782 /* Need a NFDSlice to nfd the source to solve this issue 2783 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2784 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2785 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2786 2787 // Decomposed supplementary outside char classes 2788 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2789 // Composed supplementary outside char classes 2790 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2791 */ 2792 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2793 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2794 2795 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2796 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2797 }; 2798 2799 int failCount = 0; 2800 for (Object[] d : data) { 2801 String pn = (String)d[0]; 2802 String tt = (String)d[1]; 2803 boolean isFind = "f".equals(((String)d[2])); 2804 boolean expected = (boolean)d[3]; 2805 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2806 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2807 if (ret != expected) { 2808 failCount++; 2809 continue; 2810 } 2811 } 2812 report("Canonical Equivalence"); 2813 } 2814 2815 /** 2816 * A basic sanity test of Matcher.replaceAll(). 2817 */ 2818 private static void globalSubstitute() throws Exception { 2819 // Global substitution with a literal 2820 Pattern p = Pattern.compile("(ab)(c*)"); 2821 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2822 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2823 failCount++; 2824 2825 m.reset("zzzabccczzzabcczzzabccczzz"); 2826 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2827 failCount++; 2828 2829 // Global substitution with groups 2830 m.reset("zzzabccczzzabcczzzabccczzz"); 2831 String result = m.replaceAll("$1"); 2832 if (!result.equals("zzzabzzzabzzzabzzz")) 2833 failCount++; 2834 2835 // Supplementary character test 2836 // Global substitution with a literal 2837 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2838 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2839 if (!m.replaceAll(toSupplementaries("test")). 2840 equals(toSupplementaries("testzzztestzzztest"))) 2841 failCount++; 2842 2843 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2844 if (!m.replaceAll(toSupplementaries("test")). 2845 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2846 failCount++; 2847 2848 // Global substitution with groups 2849 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2850 result = m.replaceAll("$1"); 2851 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2852 failCount++; 2853 2854 report("Global Substitution"); 2855 } 2856 2857 /** 2858 * Tests the usage of Matcher.appendReplacement() with literal 2859 * and group substitutions. 2860 */ 2861 private static void stringbufferSubstitute() throws Exception { 2862 // SB substitution with literal 2863 String blah = "zzzblahzzz"; 2864 Pattern p = Pattern.compile("blah"); 2865 Matcher m = p.matcher(blah); 2866 StringBuffer result = new StringBuffer(); 2867 try { 2868 m.appendReplacement(result, "blech"); 2869 failCount++; 2870 } catch (IllegalStateException e) { 2871 } 2872 m.find(); 2873 m.appendReplacement(result, "blech"); 2874 if (!result.toString().equals("zzzblech")) 2875 failCount++; 2876 2877 m.appendTail(result); 2878 if (!result.toString().equals("zzzblechzzz")) 2879 failCount++; 2880 2881 // SB substitution with groups 2882 blah = "zzzabcdzzz"; 2883 p = Pattern.compile("(ab)(cd)*"); 2884 m = p.matcher(blah); 2885 result = new StringBuffer(); 2886 try { 2887 m.appendReplacement(result, "$1"); 2888 failCount++; 2889 } catch (IllegalStateException e) { 2890 } 2891 m.find(); 2892 m.appendReplacement(result, "$1"); 2893 if (!result.toString().equals("zzzab")) 2894 failCount++; 2895 2896 m.appendTail(result); 2897 if (!result.toString().equals("zzzabzzz")) 2898 failCount++; 2899 2900 // SB substitution with 3 groups 2901 blah = "zzzabcdcdefzzz"; 2902 p = Pattern.compile("(ab)(cd)*(ef)"); 2903 m = p.matcher(blah); 2904 result = new StringBuffer(); 2905 try { 2906 m.appendReplacement(result, "$1w$2w$3"); 2907 failCount++; 2908 } catch (IllegalStateException e) { 2909 } 2910 m.find(); 2911 m.appendReplacement(result, "$1w$2w$3"); 2912 if (!result.toString().equals("zzzabwcdwef")) 2913 failCount++; 2914 2915 m.appendTail(result); 2916 if (!result.toString().equals("zzzabwcdwefzzz")) 2917 failCount++; 2918 2919 // SB substitution with groups and three matches 2920 // skipping middle match 2921 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2922 p = Pattern.compile("(ab)(cd*)"); 2923 m = p.matcher(blah); 2924 result = new StringBuffer(); 2925 try { 2926 m.appendReplacement(result, "$1"); 2927 failCount++; 2928 } catch (IllegalStateException e) { 2929 } 2930 m.find(); 2931 m.appendReplacement(result, "$1"); 2932 if (!result.toString().equals("zzzab")) 2933 failCount++; 2934 2935 m.find(); 2936 m.find(); 2937 m.appendReplacement(result, "$2"); 2938 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2939 failCount++; 2940 2941 m.appendTail(result); 2942 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2943 failCount++; 2944 2945 // Check to make sure escaped $ is ignored 2946 blah = "zzzabcdcdefzzz"; 2947 p = Pattern.compile("(ab)(cd)*(ef)"); 2948 m = p.matcher(blah); 2949 result = new StringBuffer(); 2950 m.find(); 2951 m.appendReplacement(result, "$1w\\$2w$3"); 2952 if (!result.toString().equals("zzzabw$2wef")) 2953 failCount++; 2954 2955 m.appendTail(result); 2956 if (!result.toString().equals("zzzabw$2wefzzz")) 2957 failCount++; 2958 2959 // Check to make sure a reference to nonexistent group causes error 2960 blah = "zzzabcdcdefzzz"; 2961 p = Pattern.compile("(ab)(cd)*(ef)"); 2962 m = p.matcher(blah); 2963 result = new StringBuffer(); 2964 m.find(); 2965 try { 2966 m.appendReplacement(result, "$1w$5w$3"); 2967 failCount++; 2968 } catch (IndexOutOfBoundsException ioobe) { 2969 // Correct result 2970 } 2971 2972 // Check double digit group references 2973 blah = "zzz123456789101112zzz"; 2974 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2975 m = p.matcher(blah); 2976 result = new StringBuffer(); 2977 m.find(); 2978 m.appendReplacement(result, "$1w$11w$3"); 2979 if (!result.toString().equals("zzz1w11w3")) 2980 failCount++; 2981 2982 // Check to make sure it backs off $15 to $1 if only three groups 2983 blah = "zzzabcdcdefzzz"; 2984 p = Pattern.compile("(ab)(cd)*(ef)"); 2985 m = p.matcher(blah); 2986 result = new StringBuffer(); 2987 m.find(); 2988 m.appendReplacement(result, "$1w$15w$3"); 2989 if (!result.toString().equals("zzzabwab5wef")) 2990 failCount++; 2991 2992 2993 // Supplementary character test 2994 // SB substitution with literal 2995 blah = toSupplementaries("zzzblahzzz"); 2996 p = Pattern.compile(toSupplementaries("blah")); 2997 m = p.matcher(blah); 2998 result = new StringBuffer(); 2999 try { 3000 m.appendReplacement(result, toSupplementaries("blech")); 3001 failCount++; 3002 } catch (IllegalStateException e) { 3003 } 3004 m.find(); 3005 m.appendReplacement(result, toSupplementaries("blech")); 3006 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3007 failCount++; 3008 3009 m.appendTail(result); 3010 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3011 failCount++; 3012 3013 // SB substitution with groups 3014 blah = toSupplementaries("zzzabcdzzz"); 3015 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3016 m = p.matcher(blah); 3017 result = new StringBuffer(); 3018 try { 3019 m.appendReplacement(result, "$1"); 3020 failCount++; 3021 } catch (IllegalStateException e) { 3022 } 3023 m.find(); 3024 m.appendReplacement(result, "$1"); 3025 if (!result.toString().equals(toSupplementaries("zzzab"))) 3026 failCount++; 3027 3028 m.appendTail(result); 3029 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3030 failCount++; 3031 3032 // SB substitution with 3 groups 3033 blah = toSupplementaries("zzzabcdcdefzzz"); 3034 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3035 m = p.matcher(blah); 3036 result = new StringBuffer(); 3037 try { 3038 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3039 failCount++; 3040 } catch (IllegalStateException e) { 3041 } 3042 m.find(); 3043 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3044 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3045 failCount++; 3046 3047 m.appendTail(result); 3048 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3049 failCount++; 3050 3051 // SB substitution with groups and three matches 3052 // skipping middle match 3053 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3054 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3055 m = p.matcher(blah); 3056 result = new StringBuffer(); 3057 try { 3058 m.appendReplacement(result, "$1"); 3059 failCount++; 3060 } catch (IllegalStateException e) { 3061 } 3062 m.find(); 3063 m.appendReplacement(result, "$1"); 3064 if (!result.toString().equals(toSupplementaries("zzzab"))) 3065 failCount++; 3066 3067 m.find(); 3068 m.find(); 3069 m.appendReplacement(result, "$2"); 3070 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3071 failCount++; 3072 3073 m.appendTail(result); 3074 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3075 failCount++; 3076 3077 // Check to make sure escaped $ is ignored 3078 blah = toSupplementaries("zzzabcdcdefzzz"); 3079 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3080 m = p.matcher(blah); 3081 result = new StringBuffer(); 3082 m.find(); 3083 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3084 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3085 failCount++; 3086 3087 m.appendTail(result); 3088 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3089 failCount++; 3090 3091 // Check to make sure a reference to nonexistent group causes error 3092 blah = toSupplementaries("zzzabcdcdefzzz"); 3093 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3094 m = p.matcher(blah); 3095 result = new StringBuffer(); 3096 m.find(); 3097 try { 3098 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3099 failCount++; 3100 } catch (IndexOutOfBoundsException ioobe) { 3101 // Correct result 3102 } 3103 3104 // Check double digit group references 3105 blah = toSupplementaries("zzz123456789101112zzz"); 3106 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3107 m = p.matcher(blah); 3108 result = new StringBuffer(); 3109 m.find(); 3110 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3111 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3112 failCount++; 3113 3114 // Check to make sure it backs off $15 to $1 if only three groups 3115 blah = toSupplementaries("zzzabcdcdefzzz"); 3116 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3117 m = p.matcher(blah); 3118 result = new StringBuffer(); 3119 m.find(); 3120 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3121 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3122 failCount++; 3123 3124 // Check nothing has been appended into the output buffer if 3125 // the replacement string triggers IllegalArgumentException. 3126 p = Pattern.compile("(abc)"); 3127 m = p.matcher("abcd"); 3128 result = new StringBuffer(); 3129 m.find(); 3130 try { 3131 m.appendReplacement(result, ("xyz$g")); 3132 failCount++; 3133 } catch (IllegalArgumentException iae) { 3134 if (result.length() != 0) 3135 failCount++; 3136 } 3137 3138 report("SB Substitution"); 3139 } 3140 3141 /** 3142 * Tests the usage of Matcher.appendReplacement() with literal 3143 * and group substitutions. 3144 */ 3145 private static void stringbuilderSubstitute() throws Exception { 3146 // SB substitution with literal 3147 String blah = "zzzblahzzz"; 3148 Pattern p = Pattern.compile("blah"); 3149 Matcher m = p.matcher(blah); 3150 StringBuilder result = new StringBuilder(); 3151 try { 3152 m.appendReplacement(result, "blech"); 3153 failCount++; 3154 } catch (IllegalStateException e) { 3155 } 3156 m.find(); 3157 m.appendReplacement(result, "blech"); 3158 if (!result.toString().equals("zzzblech")) 3159 failCount++; 3160 3161 m.appendTail(result); 3162 if (!result.toString().equals("zzzblechzzz")) 3163 failCount++; 3164 3165 // SB substitution with groups 3166 blah = "zzzabcdzzz"; 3167 p = Pattern.compile("(ab)(cd)*"); 3168 m = p.matcher(blah); 3169 result = new StringBuilder(); 3170 try { 3171 m.appendReplacement(result, "$1"); 3172 failCount++; 3173 } catch (IllegalStateException e) { 3174 } 3175 m.find(); 3176 m.appendReplacement(result, "$1"); 3177 if (!result.toString().equals("zzzab")) 3178 failCount++; 3179 3180 m.appendTail(result); 3181 if (!result.toString().equals("zzzabzzz")) 3182 failCount++; 3183 3184 // SB substitution with 3 groups 3185 blah = "zzzabcdcdefzzz"; 3186 p = Pattern.compile("(ab)(cd)*(ef)"); 3187 m = p.matcher(blah); 3188 result = new StringBuilder(); 3189 try { 3190 m.appendReplacement(result, "$1w$2w$3"); 3191 failCount++; 3192 } catch (IllegalStateException e) { 3193 } 3194 m.find(); 3195 m.appendReplacement(result, "$1w$2w$3"); 3196 if (!result.toString().equals("zzzabwcdwef")) 3197 failCount++; 3198 3199 m.appendTail(result); 3200 if (!result.toString().equals("zzzabwcdwefzzz")) 3201 failCount++; 3202 3203 // SB substitution with groups and three matches 3204 // skipping middle match 3205 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3206 p = Pattern.compile("(ab)(cd*)"); 3207 m = p.matcher(blah); 3208 result = new StringBuilder(); 3209 try { 3210 m.appendReplacement(result, "$1"); 3211 failCount++; 3212 } catch (IllegalStateException e) { 3213 } 3214 m.find(); 3215 m.appendReplacement(result, "$1"); 3216 if (!result.toString().equals("zzzab")) 3217 failCount++; 3218 3219 m.find(); 3220 m.find(); 3221 m.appendReplacement(result, "$2"); 3222 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3223 failCount++; 3224 3225 m.appendTail(result); 3226 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3227 failCount++; 3228 3229 // Check to make sure escaped $ is ignored 3230 blah = "zzzabcdcdefzzz"; 3231 p = Pattern.compile("(ab)(cd)*(ef)"); 3232 m = p.matcher(blah); 3233 result = new StringBuilder(); 3234 m.find(); 3235 m.appendReplacement(result, "$1w\\$2w$3"); 3236 if (!result.toString().equals("zzzabw$2wef")) 3237 failCount++; 3238 3239 m.appendTail(result); 3240 if (!result.toString().equals("zzzabw$2wefzzz")) 3241 failCount++; 3242 3243 // Check to make sure a reference to nonexistent group causes error 3244 blah = "zzzabcdcdefzzz"; 3245 p = Pattern.compile("(ab)(cd)*(ef)"); 3246 m = p.matcher(blah); 3247 result = new StringBuilder(); 3248 m.find(); 3249 try { 3250 m.appendReplacement(result, "$1w$5w$3"); 3251 failCount++; 3252 } catch (IndexOutOfBoundsException ioobe) { 3253 // Correct result 3254 } 3255 3256 // Check double digit group references 3257 blah = "zzz123456789101112zzz"; 3258 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3259 m = p.matcher(blah); 3260 result = new StringBuilder(); 3261 m.find(); 3262 m.appendReplacement(result, "$1w$11w$3"); 3263 if (!result.toString().equals("zzz1w11w3")) 3264 failCount++; 3265 3266 // Check to make sure it backs off $15 to $1 if only three groups 3267 blah = "zzzabcdcdefzzz"; 3268 p = Pattern.compile("(ab)(cd)*(ef)"); 3269 m = p.matcher(blah); 3270 result = new StringBuilder(); 3271 m.find(); 3272 m.appendReplacement(result, "$1w$15w$3"); 3273 if (!result.toString().equals("zzzabwab5wef")) 3274 failCount++; 3275 3276 3277 // Supplementary character test 3278 // SB substitution with literal 3279 blah = toSupplementaries("zzzblahzzz"); 3280 p = Pattern.compile(toSupplementaries("blah")); 3281 m = p.matcher(blah); 3282 result = new StringBuilder(); 3283 try { 3284 m.appendReplacement(result, toSupplementaries("blech")); 3285 failCount++; 3286 } catch (IllegalStateException e) { 3287 } 3288 m.find(); 3289 m.appendReplacement(result, toSupplementaries("blech")); 3290 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3291 failCount++; 3292 m.appendTail(result); 3293 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3294 failCount++; 3295 3296 // SB substitution with groups 3297 blah = toSupplementaries("zzzabcdzzz"); 3298 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3299 m = p.matcher(blah); 3300 result = new StringBuilder(); 3301 try { 3302 m.appendReplacement(result, "$1"); 3303 failCount++; 3304 } catch (IllegalStateException e) { 3305 } 3306 m.find(); 3307 m.appendReplacement(result, "$1"); 3308 if (!result.toString().equals(toSupplementaries("zzzab"))) 3309 failCount++; 3310 3311 m.appendTail(result); 3312 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3313 failCount++; 3314 3315 // SB substitution with 3 groups 3316 blah = toSupplementaries("zzzabcdcdefzzz"); 3317 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3318 m = p.matcher(blah); 3319 result = new StringBuilder(); 3320 try { 3321 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3322 failCount++; 3323 } catch (IllegalStateException e) { 3324 } 3325 m.find(); 3326 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3327 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3328 failCount++; 3329 3330 m.appendTail(result); 3331 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3332 failCount++; 3333 3334 // SB substitution with groups and three matches 3335 // skipping middle match 3336 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3337 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3338 m = p.matcher(blah); 3339 result = new StringBuilder(); 3340 try { 3341 m.appendReplacement(result, "$1"); 3342 failCount++; 3343 } catch (IllegalStateException e) { 3344 } 3345 m.find(); 3346 m.appendReplacement(result, "$1"); 3347 if (!result.toString().equals(toSupplementaries("zzzab"))) 3348 failCount++; 3349 3350 m.find(); 3351 m.find(); 3352 m.appendReplacement(result, "$2"); 3353 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3354 failCount++; 3355 3356 m.appendTail(result); 3357 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3358 failCount++; 3359 3360 // Check to make sure escaped $ is ignored 3361 blah = toSupplementaries("zzzabcdcdefzzz"); 3362 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3363 m = p.matcher(blah); 3364 result = new StringBuilder(); 3365 m.find(); 3366 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3367 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3368 failCount++; 3369 3370 m.appendTail(result); 3371 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3372 failCount++; 3373 3374 // Check to make sure a reference to nonexistent group causes error 3375 blah = toSupplementaries("zzzabcdcdefzzz"); 3376 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3377 m = p.matcher(blah); 3378 result = new StringBuilder(); 3379 m.find(); 3380 try { 3381 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3382 failCount++; 3383 } catch (IndexOutOfBoundsException ioobe) { 3384 // Correct result 3385 } 3386 // Check double digit group references 3387 blah = toSupplementaries("zzz123456789101112zzz"); 3388 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3389 m = p.matcher(blah); 3390 result = new StringBuilder(); 3391 m.find(); 3392 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3393 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3394 failCount++; 3395 3396 // Check to make sure it backs off $15 to $1 if only three groups 3397 blah = toSupplementaries("zzzabcdcdefzzz"); 3398 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3399 m = p.matcher(blah); 3400 result = new StringBuilder(); 3401 m.find(); 3402 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3403 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3404 failCount++; 3405 // Check nothing has been appended into the output buffer if 3406 // the replacement string triggers IllegalArgumentException. 3407 p = Pattern.compile("(abc)"); 3408 m = p.matcher("abcd"); 3409 result = new StringBuilder(); 3410 m.find(); 3411 try { 3412 m.appendReplacement(result, ("xyz$g")); 3413 failCount++; 3414 } catch (IllegalArgumentException iae) { 3415 if (result.length() != 0) 3416 failCount++; 3417 } 3418 report("SB Substitution 2"); 3419 } 3420 3421 /* 3422 * 5 groups of characters are created to make a substitution string. 3423 * A base string will be created including random lead chars, the 3424 * substitution string, and random trailing chars. 3425 * A pattern containing the 5 groups is searched for and replaced with: 3426 * random group + random string + random group. 3427 * The results are checked for correctness. 3428 */ 3429 private static void substitutionBasher() { 3430 for (int runs = 0; runs<1000; runs++) { 3431 // Create a base string to work in 3432 int leadingChars = generator.nextInt(10); 3433 StringBuffer baseBuffer = new StringBuffer(100); 3434 String leadingString = getRandomAlphaString(leadingChars); 3435 baseBuffer.append(leadingString); 3436 3437 // Create 5 groups of random number of random chars 3438 // Create the string to substitute 3439 // Create the pattern string to search for 3440 StringBuffer bufferToSub = new StringBuffer(25); 3441 StringBuffer bufferToPat = new StringBuffer(50); 3442 String[] groups = new String[5]; 3443 for(int i=0; i<5; i++) { 3444 int aGroupSize = generator.nextInt(5)+1; 3445 groups[i] = getRandomAlphaString(aGroupSize); 3446 bufferToSub.append(groups[i]); 3447 bufferToPat.append('('); 3448 bufferToPat.append(groups[i]); 3449 bufferToPat.append(')'); 3450 } 3451 String stringToSub = bufferToSub.toString(); 3452 String pattern = bufferToPat.toString(); 3453 3454 // Place sub string into working string at random index 3455 baseBuffer.append(stringToSub); 3456 3457 // Append random chars to end 3458 int trailingChars = generator.nextInt(10); 3459 String trailingString = getRandomAlphaString(trailingChars); 3460 baseBuffer.append(trailingString); 3461 String baseString = baseBuffer.toString(); 3462 3463 // Create test pattern and matcher 3464 Pattern p = Pattern.compile(pattern); 3465 Matcher m = p.matcher(baseString); 3466 3467 // Reject candidate if pattern happens to start early 3468 m.find(); 3469 if (m.start() < leadingChars) 3470 continue; 3471 3472 // Reject candidate if more than one match 3473 if (m.find()) 3474 continue; 3475 3476 // Construct a replacement string with : 3477 // random group + random string + random group 3478 StringBuffer bufferToRep = new StringBuffer(); 3479 int groupIndex1 = generator.nextInt(5); 3480 bufferToRep.append("$" + (groupIndex1 + 1)); 3481 String randomMidString = getRandomAlphaString(5); 3482 bufferToRep.append(randomMidString); 3483 int groupIndex2 = generator.nextInt(5); 3484 bufferToRep.append("$" + (groupIndex2 + 1)); 3485 String replacement = bufferToRep.toString(); 3486 3487 // Do the replacement 3488 String result = m.replaceAll(replacement); 3489 3490 // Construct expected result 3491 StringBuffer bufferToRes = new StringBuffer(); 3492 bufferToRes.append(leadingString); 3493 bufferToRes.append(groups[groupIndex1]); 3494 bufferToRes.append(randomMidString); 3495 bufferToRes.append(groups[groupIndex2]); 3496 bufferToRes.append(trailingString); 3497 String expectedResult = bufferToRes.toString(); 3498 3499 // Check results 3500 if (!result.equals(expectedResult)) 3501 failCount++; 3502 } 3503 3504 report("Substitution Basher"); 3505 } 3506 3507 /* 3508 * 5 groups of characters are created to make a substitution string. 3509 * A base string will be created including random lead chars, the 3510 * substitution string, and random trailing chars. 3511 * A pattern containing the 5 groups is searched for and replaced with: 3512 * random group + random string + random group. 3513 * The results are checked for correctness. 3514 */ 3515 private static void substitutionBasher2() { 3516 for (int runs = 0; runs<1000; runs++) { 3517 // Create a base string to work in 3518 int leadingChars = generator.nextInt(10); 3519 StringBuilder baseBuffer = new StringBuilder(100); 3520 String leadingString = getRandomAlphaString(leadingChars); 3521 baseBuffer.append(leadingString); 3522 3523 // Create 5 groups of random number of random chars 3524 // Create the string to substitute 3525 // Create the pattern string to search for 3526 StringBuilder bufferToSub = new StringBuilder(25); 3527 StringBuilder bufferToPat = new StringBuilder(50); 3528 String[] groups = new String[5]; 3529 for(int i=0; i<5; i++) { 3530 int aGroupSize = generator.nextInt(5)+1; 3531 groups[i] = getRandomAlphaString(aGroupSize); 3532 bufferToSub.append(groups[i]); 3533 bufferToPat.append('('); 3534 bufferToPat.append(groups[i]); 3535 bufferToPat.append(')'); 3536 } 3537 String stringToSub = bufferToSub.toString(); 3538 String pattern = bufferToPat.toString(); 3539 3540 // Place sub string into working string at random index 3541 baseBuffer.append(stringToSub); 3542 3543 // Append random chars to end 3544 int trailingChars = generator.nextInt(10); 3545 String trailingString = getRandomAlphaString(trailingChars); 3546 baseBuffer.append(trailingString); 3547 String baseString = baseBuffer.toString(); 3548 3549 // Create test pattern and matcher 3550 Pattern p = Pattern.compile(pattern); 3551 Matcher m = p.matcher(baseString); 3552 3553 // Reject candidate if pattern happens to start early 3554 m.find(); 3555 if (m.start() < leadingChars) 3556 continue; 3557 3558 // Reject candidate if more than one match 3559 if (m.find()) 3560 continue; 3561 3562 // Construct a replacement string with : 3563 // random group + random string + random group 3564 StringBuilder bufferToRep = new StringBuilder(); 3565 int groupIndex1 = generator.nextInt(5); 3566 bufferToRep.append("$" + (groupIndex1 + 1)); 3567 String randomMidString = getRandomAlphaString(5); 3568 bufferToRep.append(randomMidString); 3569 int groupIndex2 = generator.nextInt(5); 3570 bufferToRep.append("$" + (groupIndex2 + 1)); 3571 String replacement = bufferToRep.toString(); 3572 3573 // Do the replacement 3574 String result = m.replaceAll(replacement); 3575 3576 // Construct expected result 3577 StringBuilder bufferToRes = new StringBuilder(); 3578 bufferToRes.append(leadingString); 3579 bufferToRes.append(groups[groupIndex1]); 3580 bufferToRes.append(randomMidString); 3581 bufferToRes.append(groups[groupIndex2]); 3582 bufferToRes.append(trailingString); 3583 String expectedResult = bufferToRes.toString(); 3584 3585 // Check results 3586 if (!result.equals(expectedResult)) { 3587 failCount++; 3588 } 3589 } 3590 3591 report("Substitution Basher 2"); 3592 } 3593 3594 /** 3595 * Checks the handling of some escape sequences that the Pattern 3596 * class should process instead of the java compiler. These are 3597 * not in the file because the escapes should be be processed 3598 * by the Pattern class when the regex is compiled. 3599 */ 3600 private static void escapes() throws Exception { 3601 Pattern p = Pattern.compile("\\043"); 3602 Matcher m = p.matcher("#"); 3603 if (!m.find()) 3604 failCount++; 3605 3606 p = Pattern.compile("\\x23"); 3607 m = p.matcher("#"); 3608 if (!m.find()) 3609 failCount++; 3610 3611 p = Pattern.compile("\\u0023"); 3612 m = p.matcher("#"); 3613 if (!m.find()) 3614 failCount++; 3615 3616 report("Escape sequences"); 3617 } 3618 3619 /** 3620 * Checks the handling of blank input situations. These 3621 * tests are incompatible with my test file format. 3622 */ 3623 private static void blankInput() throws Exception { 3624 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3625 Matcher m = p.matcher(""); 3626 if (m.find()) 3627 failCount++; 3628 3629 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3630 m = p.matcher(""); 3631 if (!m.find()) 3632 failCount++; 3633 3634 p = Pattern.compile("abc"); 3635 m = p.matcher(""); 3636 if (m.find()) 3637 failCount++; 3638 3639 p = Pattern.compile("a*"); 3640 m = p.matcher(""); 3641 if (!m.find()) 3642 failCount++; 3643 3644 report("Blank input"); 3645 } 3646 3647 /** 3648 * Tests the Boyer-Moore pattern matching of a character sequence 3649 * on randomly generated patterns. 3650 */ 3651 private static void bm() throws Exception { 3652 doBnM('a'); 3653 report("Boyer Moore (ASCII)"); 3654 3655 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3656 report("Boyer Moore (Supplementary)"); 3657 } 3658 3659 private static void doBnM(int baseCharacter) throws Exception { 3660 int achar=0; 3661 3662 for (int i=0; i<100; i++) { 3663 // Create a short pattern to search for 3664 int patternLength = generator.nextInt(7) + 4; 3665 StringBuffer patternBuffer = new StringBuffer(patternLength); 3666 String pattern; 3667 retry: for (;;) { 3668 for (int x=0; x<patternLength; x++) { 3669 int ch = baseCharacter + generator.nextInt(26); 3670 if (Character.isSupplementaryCodePoint(ch)) { 3671 patternBuffer.append(Character.toChars(ch)); 3672 } else { 3673 patternBuffer.append((char)ch); 3674 } 3675 } 3676 pattern = patternBuffer.toString(); 3677 3678 // Avoid patterns that start and end with the same substring 3679 // See JDK-6854417 3680 for (int x=1; x < pattern.length(); x++) { 3681 if (pattern.startsWith(pattern.substring(x))) 3682 continue retry; 3683 } 3684 break; 3685 } 3686 Pattern p = Pattern.compile(pattern); 3687 3688 // Create a buffer with random ASCII chars that does 3689 // not match the sample 3690 String toSearch = null; 3691 StringBuffer s = null; 3692 Matcher m = p.matcher(""); 3693 do { 3694 s = new StringBuffer(100); 3695 for (int x=0; x<100; x++) { 3696 int ch = baseCharacter + generator.nextInt(26); 3697 if (Character.isSupplementaryCodePoint(ch)) { 3698 s.append(Character.toChars(ch)); 3699 } else { 3700 s.append((char)ch); 3701 } 3702 } 3703 toSearch = s.toString(); 3704 m.reset(toSearch); 3705 } while (m.find()); 3706 3707 // Insert the pattern at a random spot 3708 int insertIndex = generator.nextInt(99); 3709 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3710 insertIndex++; 3711 s = s.insert(insertIndex, pattern); 3712 toSearch = s.toString(); 3713 3714 // Make sure that the pattern is found 3715 m.reset(toSearch); 3716 if (!m.find()) 3717 failCount++; 3718 3719 // Make sure that the match text is the pattern 3720 if (!m.group().equals(pattern)) 3721 failCount++; 3722 3723 // Make sure match occured at insertion point 3724 if (m.start() != insertIndex) 3725 failCount++; 3726 } 3727 } 3728 3729 /** 3730 * Tests the matching of slices on randomly generated patterns. 3731 * The Boyer-Moore optimization is not done on these patterns 3732 * because it uses unicode case folding. 3733 */ 3734 private static void slice() throws Exception { 3735 doSlice(Character.MAX_VALUE); 3736 report("Slice"); 3737 3738 doSlice(Character.MAX_CODE_POINT); 3739 report("Slice (Supplementary)"); 3740 } 3741 3742 private static void doSlice(int maxCharacter) throws Exception { 3743 Random generator = new Random(); 3744 int achar=0; 3745 3746 for (int i=0; i<100; i++) { 3747 // Create a short pattern to search for 3748 int patternLength = generator.nextInt(7) + 4; 3749 StringBuffer patternBuffer = new StringBuffer(patternLength); 3750 for (int x=0; x<patternLength; x++) { 3751 int randomChar = 0; 3752 while (!Character.isLetterOrDigit(randomChar)) 3753 randomChar = generator.nextInt(maxCharacter); 3754 if (Character.isSupplementaryCodePoint(randomChar)) { 3755 patternBuffer.append(Character.toChars(randomChar)); 3756 } else { 3757 patternBuffer.append((char) randomChar); 3758 } 3759 } 3760 String pattern = patternBuffer.toString(); 3761 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3762 3763 // Create a buffer with random chars that does not match the sample 3764 String toSearch = null; 3765 StringBuffer s = null; 3766 Matcher m = p.matcher(""); 3767 do { 3768 s = new StringBuffer(100); 3769 for (int x=0; x<100; x++) { 3770 int randomChar = 0; 3771 while (!Character.isLetterOrDigit(randomChar)) 3772 randomChar = generator.nextInt(maxCharacter); 3773 if (Character.isSupplementaryCodePoint(randomChar)) { 3774 s.append(Character.toChars(randomChar)); 3775 } else { 3776 s.append((char) randomChar); 3777 } 3778 } 3779 toSearch = s.toString(); 3780 m.reset(toSearch); 3781 } while (m.find()); 3782 3783 // Insert the pattern at a random spot 3784 int insertIndex = generator.nextInt(99); 3785 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3786 insertIndex++; 3787 s = s.insert(insertIndex, pattern); 3788 toSearch = s.toString(); 3789 3790 // Make sure that the pattern is found 3791 m.reset(toSearch); 3792 if (!m.find()) 3793 failCount++; 3794 3795 // Make sure that the match text is the pattern 3796 if (!m.group().equals(pattern)) 3797 failCount++; 3798 3799 // Make sure match occured at insertion point 3800 if (m.start() != insertIndex) 3801 failCount++; 3802 } 3803 } 3804 3805 private static void explainFailure(String pattern, String data, 3806 String expected, String actual) { 3807 System.err.println("----------------------------------------"); 3808 System.err.println("Pattern = "+pattern); 3809 System.err.println("Data = "+data); 3810 System.err.println("Expected = " + expected); 3811 System.err.println("Actual = " + actual); 3812 } 3813 3814 private static void explainFailure(String pattern, String data, 3815 Throwable t) { 3816 System.err.println("----------------------------------------"); 3817 System.err.println("Pattern = "+pattern); 3818 System.err.println("Data = "+data); 3819 t.printStackTrace(System.err); 3820 } 3821 3822 // Testing examples from a file 3823 3824 /** 3825 * Goes through the file "TestCases.txt" and creates many patterns 3826 * described in the file, matching the patterns against input lines in 3827 * the file, and comparing the results against the correct results 3828 * also found in the file. The file format is described in comments 3829 * at the head of the file. 3830 */ 3831 private static void processFile(String fileName) throws Exception { 3832 File testCases = new File(System.getProperty("test.src", "."), 3833 fileName); 3834 FileInputStream in = new FileInputStream(testCases); 3835 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3836 3837 // Process next test case. 3838 String aLine; 3839 while((aLine = r.readLine()) != null) { 3840 // Read a line for pattern 3841 String patternString = grabLine(r); 3842 Pattern p = null; 3843 try { 3844 p = compileTestPattern(patternString); 3845 } catch (PatternSyntaxException e) { 3846 String dataString = grabLine(r); 3847 String expectedResult = grabLine(r); 3848 if (expectedResult.startsWith("error")) 3849 continue; 3850 explainFailure(patternString, dataString, e); 3851 failCount++; 3852 continue; 3853 } 3854 3855 // Read a line for input string 3856 String dataString = grabLine(r); 3857 Matcher m = p.matcher(dataString); 3858 StringBuffer result = new StringBuffer(); 3859 3860 // Check for IllegalStateExceptions before a match 3861 failCount += preMatchInvariants(m); 3862 3863 boolean found = m.find(); 3864 3865 if (found) 3866 failCount += postTrueMatchInvariants(m); 3867 else 3868 failCount += postFalseMatchInvariants(m); 3869 3870 if (found) { 3871 result.append("true "); 3872 result.append(m.group(0) + " "); 3873 } else { 3874 result.append("false "); 3875 } 3876 3877 result.append(m.groupCount()); 3878 3879 if (found) { 3880 for (int i=1; i<m.groupCount()+1; i++) 3881 if (m.group(i) != null) 3882 result.append(" " +m.group(i)); 3883 } 3884 3885 // Read a line for the expected result 3886 String expectedResult = grabLine(r); 3887 3888 if (!result.toString().equals(expectedResult)) { 3889 explainFailure(patternString, dataString, expectedResult, result.toString()); 3890 failCount++; 3891 } 3892 } 3893 3894 report(fileName); 3895 } 3896 3897 private static int preMatchInvariants(Matcher m) { 3898 int failCount = 0; 3899 try { 3900 m.start(); 3901 failCount++; 3902 } catch (IllegalStateException ise) {} 3903 try { 3904 m.end(); 3905 failCount++; 3906 } catch (IllegalStateException ise) {} 3907 try { 3908 m.group(); 3909 failCount++; 3910 } catch (IllegalStateException ise) {} 3911 return failCount; 3912 } 3913 3914 private static int postFalseMatchInvariants(Matcher m) { 3915 int failCount = 0; 3916 try { 3917 m.group(); 3918 failCount++; 3919 } catch (IllegalStateException ise) {} 3920 try { 3921 m.start(); 3922 failCount++; 3923 } catch (IllegalStateException ise) {} 3924 try { 3925 m.end(); 3926 failCount++; 3927 } catch (IllegalStateException ise) {} 3928 return failCount; 3929 } 3930 3931 private static int postTrueMatchInvariants(Matcher m) { 3932 int failCount = 0; 3933 //assert(m.start() = m.start(0); 3934 if (m.start() != m.start(0)) 3935 failCount++; 3936 //assert(m.end() = m.end(0); 3937 if (m.start() != m.start(0)) 3938 failCount++; 3939 //assert(m.group() = m.group(0); 3940 if (!m.group().equals(m.group(0))) 3941 failCount++; 3942 try { 3943 m.group(50); 3944 failCount++; 3945 } catch (IndexOutOfBoundsException ise) {} 3946 3947 return failCount; 3948 } 3949 3950 private static Pattern compileTestPattern(String patternString) { 3951 if (!patternString.startsWith("'")) { 3952 return Pattern.compile(patternString); 3953 } 3954 int break1 = patternString.lastIndexOf("'"); 3955 String flagString = patternString.substring( 3956 break1+1, patternString.length()); 3957 patternString = patternString.substring(1, break1); 3958 3959 if (flagString.equals("i")) 3960 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3961 3962 if (flagString.equals("m")) 3963 return Pattern.compile(patternString, Pattern.MULTILINE); 3964 3965 return Pattern.compile(patternString); 3966 } 3967 3968 /** 3969 * Reads a line from the input file. Keeps reading lines until a non 3970 * empty non comment line is read. If the line contains a \n then 3971 * these two characters are replaced by a newline char. If a \\uxxxx 3972 * sequence is read then the sequence is replaced by the unicode char. 3973 */ 3974 private static String grabLine(BufferedReader r) throws Exception { 3975 int index = 0; 3976 String line = r.readLine(); 3977 while (line.startsWith("//") || line.length() < 1) 3978 line = r.readLine(); 3979 while ((index = line.indexOf("\\n")) != -1) { 3980 StringBuffer temp = new StringBuffer(line); 3981 temp.replace(index, index+2, "\n"); 3982 line = temp.toString(); 3983 } 3984 while ((index = line.indexOf("\\u")) != -1) { 3985 StringBuffer temp = new StringBuffer(line); 3986 String value = temp.substring(index+2, index+6); 3987 char aChar = (char)Integer.parseInt(value, 16); 3988 String unicodeChar = "" + aChar; 3989 temp.replace(index, index+6, unicodeChar); 3990 line = temp.toString(); 3991 } 3992 3993 return line; 3994 } 3995 3996 private static void check(Pattern p, String s, String g, String expected) { 3997 Matcher m = p.matcher(s); 3998 m.find(); 3999 if (!m.group(g).equals(expected) || 4000 s.charAt(m.start(g)) != expected.charAt(0) || 4001 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4002 failCount++; 4003 } 4004 4005 private static void checkReplaceFirst(String p, String s, String r, String expected) 4006 { 4007 if (!expected.equals(Pattern.compile(p) 4008 .matcher(s) 4009 .replaceFirst(r))) 4010 failCount++; 4011 } 4012 4013 private static void checkReplaceAll(String p, String s, String r, String expected) 4014 { 4015 if (!expected.equals(Pattern.compile(p) 4016 .matcher(s) 4017 .replaceAll(r))) 4018 failCount++; 4019 } 4020 4021 private static void checkExpectedFail(String p) { 4022 try { 4023 Pattern.compile(p); 4024 } catch (PatternSyntaxException pse) { 4025 //pse.printStackTrace(); 4026 return; 4027 } 4028 failCount++; 4029 } 4030 4031 private static void checkExpectedIAE(Matcher m, String g) { 4032 m.find(); 4033 try { 4034 m.group(g); 4035 } catch (IllegalArgumentException x) { 4036 //iae.printStackTrace(); 4037 try { 4038 m.start(g); 4039 } catch (IllegalArgumentException xx) { 4040 try { 4041 m.start(g); 4042 } catch (IllegalArgumentException xxx) { 4043 return; 4044 } 4045 } 4046 } 4047 failCount++; 4048 } 4049 4050 private static void checkExpectedNPE(Matcher m) { 4051 m.find(); 4052 try { 4053 m.group(null); 4054 } catch (NullPointerException x) { 4055 try { 4056 m.start(null); 4057 } catch (NullPointerException xx) { 4058 try { 4059 m.end(null); 4060 } catch (NullPointerException xxx) { 4061 return; 4062 } 4063 } 4064 } 4065 failCount++; 4066 } 4067 4068 private static void namedGroupCaptureTest() throws Exception { 4069 check(Pattern.compile("x+(?<gname>y+)z+"), 4070 "xxxyyyzzz", 4071 "gname", 4072 "yyy"); 4073 4074 check(Pattern.compile("x+(?<gname8>y+)z+"), 4075 "xxxyyyzzz", 4076 "gname8", 4077 "yyy"); 4078 4079 //backref 4080 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4081 check(pattern, "zzzaabcazzz", true); // found "abca" 4082 4083 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4084 "zzzaabcaazzz", true); 4085 4086 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4087 "abcdefabc", true); 4088 4089 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4090 "abcdefghijkk", true); 4091 4092 // Supplementary character tests 4093 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4094 toSupplementaries("zzzaabcazzz"), true); 4095 4096 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4097 toSupplementaries("zzzaabcaazzz"), true); 4098 4099 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4100 toSupplementaries("abcdefabc"), true); 4101 4102 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4103 "(?<gname>" + 4104 toSupplementaries("k)") + "\\k<gname>"), 4105 toSupplementaries("abcdefghijkk"), true); 4106 4107 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4108 "xxxyyyzzzyyy", 4109 "gname", 4110 "yyy"); 4111 4112 //replaceFirst/All 4113 checkReplaceFirst("(?<gn>ab)(c*)", 4114 "abccczzzabcczzzabccc", 4115 "${gn}", 4116 "abzzzabcczzzabccc"); 4117 4118 checkReplaceAll("(?<gn>ab)(c*)", 4119 "abccczzzabcczzzabccc", 4120 "${gn}", 4121 "abzzzabzzzab"); 4122 4123 4124 checkReplaceFirst("(?<gn>ab)(c*)", 4125 "zzzabccczzzabcczzzabccczzz", 4126 "${gn}", 4127 "zzzabzzzabcczzzabccczzz"); 4128 4129 checkReplaceAll("(?<gn>ab)(c*)", 4130 "zzzabccczzzabcczzzabccczzz", 4131 "${gn}", 4132 "zzzabzzzabzzzabzzz"); 4133 4134 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4135 "zzzabccczzzabcczzzabccczzz", 4136 "${gn2}", 4137 "zzzccczzzabcczzzabccczzz"); 4138 4139 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4140 "zzzabccczzzabcczzzabccczzz", 4141 "${gn2}", 4142 "zzzccczzzcczzzccczzz"); 4143 4144 //toSupplementaries("(ab)(c*)")); 4145 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4146 ")(?<gn2>" + toSupplementaries("c") + "*)", 4147 toSupplementaries("abccczzzabcczzzabccc"), 4148 "${gn1}", 4149 toSupplementaries("abzzzabcczzzabccc")); 4150 4151 4152 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4153 ")(?<gn2>" + toSupplementaries("c") + "*)", 4154 toSupplementaries("abccczzzabcczzzabccc"), 4155 "${gn1}", 4156 toSupplementaries("abzzzabzzzab")); 4157 4158 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4159 ")(?<gn2>" + toSupplementaries("c") + "*)", 4160 toSupplementaries("abccczzzabcczzzabccc"), 4161 "${gn2}", 4162 toSupplementaries("ccczzzabcczzzabccc")); 4163 4164 4165 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4166 ")(?<gn2>" + toSupplementaries("c") + "*)", 4167 toSupplementaries("abccczzzabcczzzabccc"), 4168 "${gn2}", 4169 toSupplementaries("ccczzzcczzzccc")); 4170 4171 checkReplaceFirst("(?<dog>Dog)AndCat", 4172 "zzzDogAndCatzzzDogAndCatzzz", 4173 "${dog}", 4174 "zzzDogzzzDogAndCatzzz"); 4175 4176 4177 checkReplaceAll("(?<dog>Dog)AndCat", 4178 "zzzDogAndCatzzzDogAndCatzzz", 4179 "${dog}", 4180 "zzzDogzzzDogzzz"); 4181 4182 // backref in Matcher & String 4183 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4184 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4185 failCount++; 4186 4187 // negative 4188 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4189 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4190 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4191 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4192 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4193 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4194 "gnameX"); 4195 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4196 report("NamedGroupCapture"); 4197 } 4198 4199 // This is for bug 6919132 4200 private static void nonBmpClassComplementTest() throws Exception { 4201 Pattern p = Pattern.compile("\\P{Lu}"); 4202 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4203 4204 if (m.find() && m.start() == 1) 4205 failCount++; 4206 4207 // from a unicode category 4208 p = Pattern.compile("\\P{Lu}"); 4209 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4210 if (m.find()) 4211 failCount++; 4212 if (!m.hitEnd()) 4213 failCount++; 4214 4215 // block 4216 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4217 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4218 if (m.find() && m.start() == 1) 4219 failCount++; 4220 4221 p = Pattern.compile("\\P{sc=GRANTHA}"); 4222 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4223 if (m.find() && m.start() == 1) 4224 failCount++; 4225 4226 report("NonBmpClassComplement"); 4227 } 4228 4229 private static void unicodePropertiesTest() throws Exception { 4230 // different forms 4231 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4232 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4233 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4234 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4235 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4236 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4237 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4238 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4239 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4240 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4241 failCount++; 4242 4243 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4244 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4245 Matcher lastSM = common; 4246 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4247 4248 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4249 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4250 Matcher lastBM = latin; 4251 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4252 4253 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4254 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4255 continue; // only pick couple code points, they are the same 4256 } 4257 4258 // Unicode Script 4259 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4260 Matcher m; 4261 String str = new String(Character.toChars(cp)); 4262 if (script == lastScript) { 4263 m = lastSM; 4264 m.reset(str); 4265 } else { 4266 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4267 } 4268 if (!m.matches()) { 4269 failCount++; 4270 } 4271 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4272 other.reset(str); 4273 if (other.matches()) { 4274 failCount++; 4275 } 4276 lastSM = m; 4277 lastScript = script; 4278 4279 // Unicode Block 4280 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4281 if (block == null) { 4282 //System.out.printf("Not a Block: cp=%x%n", cp); 4283 continue; 4284 } 4285 if (block == lastBlock) { 4286 m = lastBM; 4287 m.reset(str); 4288 } else { 4289 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4290 } 4291 if (!m.matches()) { 4292 failCount++; 4293 } 4294 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4295 other.reset(str); 4296 if (other.matches()) { 4297 failCount++; 4298 } 4299 lastBM = m; 4300 lastBlock = block; 4301 } 4302 report("unicodeProperties"); 4303 } 4304 4305 private static void unicodeHexNotationTest() throws Exception { 4306 4307 // negative 4308 checkExpectedFail("\\x{-23}"); 4309 checkExpectedFail("\\x{110000}"); 4310 checkExpectedFail("\\x{}"); 4311 checkExpectedFail("\\x{AB[ef]"); 4312 4313 // codepoint 4314 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4315 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4316 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4317 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4318 4319 // in class 4320 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4321 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4322 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4323 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4324 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4325 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4326 4327 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4328 String s = "A" + new String(Character.toChars(cp)) + "B"; 4329 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4330 : String.format("\\u%04x\\u%04x", 4331 (int) Character.toChars(cp)[0], 4332 (int) Character.toChars(cp)[1]); 4333 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4334 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4335 failCount++; 4336 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4337 failCount++; 4338 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4339 failCount++; 4340 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4341 failCount++; 4342 } 4343 report("unicodeHexNotation"); 4344 } 4345 4346 private static void unicodeClassesTest() throws Exception { 4347 4348 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4349 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4350 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4351 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4352 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4353 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4354 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4355 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4356 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4357 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4358 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4359 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4360 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4361 Matcher bound = Pattern.compile("\\b").matcher(""); 4362 Matcher word = Pattern.compile("\\w++").matcher(""); 4363 // UNICODE_CHARACTER_CLASS 4364 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4365 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4366 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4367 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4368 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4369 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4370 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4371 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4372 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4373 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4374 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4375 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4376 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4377 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4378 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4379 // embedded flag (?U) 4380 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4381 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4382 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4383 4384 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4385 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4386 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4387 // properties 4388 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4389 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4390 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4391 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4392 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4393 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4394 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4395 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4396 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4397 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4398 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4399 // javaMethod 4400 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4401 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4402 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4403 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4404 // GC/C 4405 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4406 4407 for (int cp = 1; cp < 0x30000; cp++) { 4408 String str = new String(Character.toChars(cp)); 4409 int type = Character.getType(cp); 4410 if (// lower 4411 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4412 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4413 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4414 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4415 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4416 // upper 4417 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4418 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4419 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4420 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4421 // alpha 4422 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4423 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4424 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4425 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4426 // digit 4427 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4428 Character.isDigit(cp) != digitU.reset(str).matches() || 4429 // alnum 4430 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4431 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4432 // punct 4433 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4434 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4435 // graph 4436 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4437 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4438 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4439 // blank 4440 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4441 != blank.reset(str).matches() || 4442 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4443 // print 4444 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4445 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4446 // cntrl 4447 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4448 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4449 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4450 // hexdigit 4451 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4452 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4453 // space 4454 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4455 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4456 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4457 // word 4458 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4459 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4460 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4461 // bwordb 4462 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4463 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4464 // properties 4465 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4466 Character.isLetter(cp) != letterP.reset(str).matches()|| 4467 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4468 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4469 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4470 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4471 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4472 // gc_C 4473 (Character.CONTROL == type || Character.FORMAT == type || 4474 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4475 Character.UNASSIGNED == type) 4476 != gcC.reset(str).matches()) { 4477 failCount++; 4478 } 4479 } 4480 4481 // bounds/word align 4482 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4483 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4484 failCount++; 4485 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4486 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4487 failCount++; 4488 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4489 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4490 failCount++; 4491 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4492 failCount++; 4493 report("unicodePredefinedClasses"); 4494 } 4495 4496 private static void unicodeCharacterNameTest() throws Exception { 4497 4498 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4499 if (!Character.isValidCodePoint(cp) || 4500 Character.getType(cp) == Character.UNASSIGNED) 4501 continue; 4502 String str = new String(Character.toChars(cp)); 4503 // single 4504 String p = "\\N{" + Character.getName(cp) + "}"; 4505 if (!Pattern.compile(p).matcher(str).matches()) { 4506 failCount++; 4507 } 4508 // class[c] 4509 p = "[\\N{" + Character.getName(cp) + "}]"; 4510 if (!Pattern.compile(p).matcher(str).matches()) { 4511 failCount++; 4512 } 4513 } 4514 4515 // range 4516 for (int i = 0; i < 10; i++) { 4517 int start = generator.nextInt(20); 4518 int end = start + generator.nextInt(200); 4519 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4520 String str; 4521 for (int cp = start; cp < end; cp++) { 4522 str = new String(Character.toChars(cp)); 4523 if (!Pattern.compile(p).matcher(str).matches()) { 4524 failCount++; 4525 } 4526 } 4527 str = new String(Character.toChars(end + 10)); 4528 if (Pattern.compile(p).matcher(str).matches()) { 4529 failCount++; 4530 } 4531 } 4532 4533 // slice 4534 for (int i = 0; i < 10; i++) { 4535 int n = generator.nextInt(256); 4536 int[] buf = new int[n]; 4537 StringBuffer sb = new StringBuffer(1024); 4538 for (int j = 0; j < n; j++) { 4539 int cp = generator.nextInt(1000); 4540 if (!Character.isValidCodePoint(cp) || 4541 Character.getType(cp) == Character.UNASSIGNED) 4542 cp = 0x4e00; // just use 4e00 4543 sb.append("\\N{" + Character.getName(cp) + "}"); 4544 buf[j] = cp; 4545 } 4546 String p = sb.toString(); 4547 String str = new String(buf, 0, buf.length); 4548 if (!Pattern.compile(p).matcher(str).matches()) { 4549 failCount++; 4550 } 4551 } 4552 report("unicodeCharacterName"); 4553 } 4554 4555 private static void horizontalAndVerticalWSTest() throws Exception { 4556 String hws = new String (new char[] { 4557 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4558 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4559 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4560 0x202f, 0x205f, 0x3000 }); 4561 String vws = new String (new char[] { 4562 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4563 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4564 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4565 failCount++; 4566 if (Pattern.compile("\\H").matcher(hws).find() || 4567 Pattern.compile("[\\H]").matcher(hws).find()) 4568 failCount++; 4569 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4570 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4571 failCount++; 4572 if (Pattern.compile("\\V").matcher(vws).find() || 4573 Pattern.compile("[\\V]").matcher(vws).find()) 4574 failCount++; 4575 String prefix = "abcd"; 4576 String suffix = "efgh"; 4577 String ng = "A"; 4578 for (int i = 0; i < hws.length(); i++) { 4579 String c = String.valueOf(hws.charAt(i)); 4580 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4581 if (!m.find() || !c.equals(m.group())) 4582 failCount++; 4583 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4584 if (!m.find() || !c.equals(m.group())) 4585 failCount++; 4586 4587 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4588 if (!m.find() || !ng.equals(m.group())) 4589 failCount++; 4590 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4591 if (!m.find() || !ng.equals(m.group())) 4592 failCount++; 4593 } 4594 for (int i = 0; i < vws.length(); i++) { 4595 String c = String.valueOf(vws.charAt(i)); 4596 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4597 if (!m.find() || !c.equals(m.group())) 4598 failCount++; 4599 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4600 if (!m.find() || !c.equals(m.group())) 4601 failCount++; 4602 4603 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4604 if (!m.find() || !ng.equals(m.group())) 4605 failCount++; 4606 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4607 if (!m.find() || !ng.equals(m.group())) 4608 failCount++; 4609 } 4610 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4611 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4612 failCount++; 4613 report("horizontalAndVerticalWSTest"); 4614 } 4615 4616 private static void linebreakTest() throws Exception { 4617 String linebreaks = new String (new char[] { 4618 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4619 String crnl = "\r\n"; 4620 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4621 Pattern.compile("\\R").matcher(crnl).matches() && 4622 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4623 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4624 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4625 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4626 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4627 failCount++; 4628 } 4629 report("linebreakTest"); 4630 } 4631 4632 // #7189363 4633 private static void branchTest() throws Exception { 4634 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4635 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4636 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4637 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4638 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4639 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4640 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4641 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4642 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4643 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4644 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4645 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4646 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4647 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4648 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4649 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4650 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4651 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4652 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4653 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4654 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4655 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4656 failCount++; 4657 report("branchTest"); 4658 } 4659 4660 // This test is for 8007395 4661 private static void groupCurlyNotFoundSuppTest() throws Exception { 4662 String input = "test this as \ud83d\ude0d"; 4663 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4664 "test(.)*(@[a-zA-Z.]+)", 4665 "test([^B])+(@[a-zA-Z.]+)", 4666 "test([^B])*(@[a-zA-Z.]+)", 4667 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4668 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4669 }) { 4670 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4671 .matcher(input); 4672 try { 4673 if (m.find()) { 4674 failCount++; 4675 } 4676 } catch (Exception x) { 4677 failCount++; 4678 } 4679 } 4680 report("GroupCurly NotFoundSupp"); 4681 } 4682 4683 // This test is for 8023647 4684 private static void groupCurlyBackoffTest() throws Exception { 4685 if (!"abc1c".matches("(\\w)+1\\1") || 4686 "abc11".matches("(\\w)+1\\1")) { 4687 failCount++; 4688 } 4689 report("GroupCurly backoff"); 4690 } 4691 4692 // This test is for 8012646 4693 private static void patternAsPredicate() throws Exception { 4694 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4695 4696 if (p.test("")) { 4697 failCount++; 4698 } 4699 if (!p.test("word")) { 4700 failCount++; 4701 } 4702 if (p.test("1234")) { 4703 failCount++; 4704 } 4705 if (!p.test("word1234")) { 4706 failCount++; 4707 } 4708 report("Pattern.asPredicate"); 4709 } 4710 4711 // This test is for 8184692 4712 private static void patternAsMatchPredicate() throws Exception { 4713 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4714 4715 if (p.test("")) { 4716 failCount++; 4717 } 4718 if (!p.test("word")) { 4719 failCount++; 4720 } 4721 if (p.test("1234word")) { 4722 failCount++; 4723 } 4724 if (p.test("1234")) { 4725 failCount++; 4726 } 4727 report("Pattern.asMatchPredicate"); 4728 } 4729 4730 4731 // This test is for 8035975 4732 private static void invalidFlags() throws Exception { 4733 for (int flag = 1; flag != 0; flag <<= 1) { 4734 switch (flag) { 4735 case Pattern.CASE_INSENSITIVE: 4736 case Pattern.MULTILINE: 4737 case Pattern.DOTALL: 4738 case Pattern.UNICODE_CASE: 4739 case Pattern.CANON_EQ: 4740 case Pattern.UNIX_LINES: 4741 case Pattern.LITERAL: 4742 case Pattern.UNICODE_CHARACTER_CLASS: 4743 case Pattern.COMMENTS: 4744 // valid flag, continue 4745 break; 4746 default: 4747 try { 4748 Pattern.compile(".", flag); 4749 failCount++; 4750 } catch (IllegalArgumentException expected) { 4751 } 4752 } 4753 } 4754 report("Invalid compile flags"); 4755 } 4756 4757 // This test is for 8158482 4758 private static void embeddedFlags() throws Exception { 4759 try { 4760 Pattern.compile("(?i).(?-i)."); 4761 Pattern.compile("(?m).(?-m)."); 4762 Pattern.compile("(?s).(?-s)."); 4763 Pattern.compile("(?d).(?-d)."); 4764 Pattern.compile("(?u).(?-u)."); 4765 Pattern.compile("(?c).(?-c)."); 4766 Pattern.compile("(?x).(?-x)."); 4767 Pattern.compile("(?U).(?-U)."); 4768 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4769 } catch (PatternSyntaxException x) { 4770 failCount++; 4771 } 4772 report("Embedded flags"); 4773 } 4774 4775 private static void grapheme() throws Exception { 4776 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4777 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4778 .forEach( ln -> { 4779 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4780 // System.out.println(str); 4781 String[] strs = ln.split("\u00f7|\u00d7"); 4782 StringBuilder src = new StringBuilder(); 4783 ArrayList<String> graphemes = new ArrayList<>(); 4784 StringBuilder buf = new StringBuilder(); 4785 int offBk = 0; 4786 for (String str : strs) { 4787 if (str.length() == 0) // first empty str 4788 continue; 4789 int cp = Integer.parseInt(str, 16); 4790 src.appendCodePoint(cp); 4791 buf.appendCodePoint(cp); 4792 offBk += (str.length() + 1); 4793 if (ln.charAt(offBk) == '\u00f7') { // DIV 4794 graphemes.add(buf.toString()); 4795 buf = new StringBuilder(); 4796 } 4797 } 4798 Pattern p = Pattern.compile("\\X"); 4799 Matcher m = p.matcher(src.toString()); 4800 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4801 for (String g : graphemes) { 4802 // System.out.printf(" grapheme:=[%s]%n", g); 4803 // (1) test \\X directly 4804 if (!m.find() || !m.group().equals(g)) { 4805 System.out.println("Failed \\X [" + ln + "] : " + g); 4806 failCount++; 4807 } 4808 // (2) test \\b{g} + \\X via Scanner 4809 boolean hasNext = s.hasNext(p); 4810 // if (!s.hasNext() || !s.next().equals(next)) { 4811 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4812 System.out.println("Failed b{g} [" + ln + "] : " + g); 4813 failCount++; 4814 } 4815 } 4816 }); 4817 // some sanity checks 4818 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4819 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4820 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4821 failCount++; 4822 // make sure "\b{n}" still works 4823 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4824 failCount++; 4825 report("Unicode extended grapheme cluster"); 4826 } 4827 4828 // hangup/timeout if go into exponential backtracking 4829 private static void expoBacktracking() throws Exception { 4830 4831 Object[][] patternMatchers = { 4832 // 6328855 4833 { "(.*\n*)*", 4834 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4835 false }, 4836 // 6192895 4837 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4838 "Hello World this is a test this is a test this is a test A", 4839 true }, 4840 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4841 "Hello World this is a test this is a test this is a test \u4e00 ", 4842 false }, 4843 { " *([a-z0-9]+ *)+", 4844 "hello world this is a test this is a test this is a test A", 4845 false }, 4846 // 4771934 [FIXED] #5013651? 4847 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4848 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4849 true }, 4850 // 4866249 [FIXED] 4851 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4852 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4853 true }, 4854 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4855 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4856 false }, 4857 // 6345469 4858 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4859 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4860 true }, // --> matched 4861 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4862 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4863 false }, 4864 // 5026912 4865 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4866 "156580451111112225588087755221111111566969655555555", 4867 false}, 4868 // 6988218 4869 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4870 "'%)) order by ANGEBOT.ID", 4871 false}, // find 4872 // 6693451 4873 { "^(\\s*foo\\s*)*$", 4874 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4875 true }, 4876 { "^(\\s*foo\\s*)*$", 4877 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4878 false 4879 }, 4880 // 7006761 4881 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4882 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4883 // 8140212 4884 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4885 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4886 false 4887 }, 4888 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4889 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4890 4891 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4892 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4893 4894 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4895 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4896 4897 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4898 4899 /* not fixed 4900 //8132141 ---> second level exponential backtracking 4901 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4902 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4903 */ 4904 }; 4905 4906 for (Object[] pm : patternMatchers) { 4907 String p = (String)pm[0]; 4908 String s = (String)pm[1]; 4909 boolean r = (Boolean)pm[2]; 4910 if (r != Pattern.compile(p).matcher(s).matches()) { 4911 failCount++; 4912 } 4913 } 4914 } 4915 4916 private static void invalidGroupName() { 4917 // Invalid start of a group name 4918 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4919 "\u0060", "\u007b", "\u0416")) { 4920 for (String pat : List.of("(?<" + groupName + ">)", 4921 "\\k<" + groupName + ">")) { 4922 try { 4923 Pattern.compile(pat); 4924 failCount++; 4925 } catch (PatternSyntaxException e) { 4926 if (!e.getMessage().startsWith( 4927 "capturing group name does not start with a" 4928 + " Latin letter")) { 4929 failCount++; 4930 } 4931 } 4932 } 4933 } 4934 // Invalid char in a group name 4935 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4936 "d\u0060", "e\u007b", "f\u0416")) { 4937 for (String pat : List.of("(?<" + groupName + ">)", 4938 "\\k<" + groupName + ">")) { 4939 try { 4940 Pattern.compile(pat); 4941 failCount++; 4942 } catch (PatternSyntaxException e) { 4943 if (!e.getMessage().startsWith( 4944 "named capturing group is missing trailing '>'")) { 4945 failCount++; 4946 } 4947 } 4948 } 4949 } 4950 report("Invalid capturing group names"); 4951 } 4952 4953 private static void illegalRepetitionRange() { 4954 // huge integers > (2^31 - 1) 4955 String n = BigInteger.valueOf(1L << 32) 4956 .toString(); 4957 String m = BigInteger.valueOf(1L << 31) 4958 .add(new BigInteger(80, generator)) 4959 .toString(); 4960 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4961 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4962 String pat = ".{" + rep + "}"; 4963 try { 4964 Pattern.compile(pat); 4965 failCount++; 4966 System.out.println("Expected to fail. Pattern: " + pat); 4967 } catch (PatternSyntaxException e) { 4968 if (!e.getMessage().startsWith("Illegal repetition")) { 4969 failCount++; 4970 System.out.println("Unexpected error message: " + e.getMessage()); 4971 } 4972 } catch (Throwable t) { 4973 failCount++; 4974 System.out.println("Unexpected exception: " + t); 4975 } 4976 } 4977 report("illegalRepetitionRange"); 4978 } 4979 4980 private static void surrogatePairWithCanonEq() { 4981 try { 4982 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 4983 } catch (Throwable t) { 4984 failCount++; 4985 System.out.println("Unexpected exception: " + t); 4986 } 4987 report("surrogatePairWithCanonEq"); 4988 } 4989 4990 private static void controlCharacters() { 4991 char[] contolCharsPairs = { '@', 0x00, 4992 'A', 0x01, 'B', 0x02, 'C', 0x03, 'D', 0x04, 'E', 0x05, 'F', 0x06, 4993 'G', 0x07, 'H', 0x08, 'I', 0x09, 'J', 0x0a, 'K', 0x0b, 'L', 0x0c, 4994 'M', 0x0d, 'N', 0x0e, 'O', 0x0f, 'P', 0x10, 'Q', 0x11, 'R', 0x12, 4995 'S', 0x13, 'T', 0x14, 'U', 0x15, 'V', 0x16, 'W', 0x17, 'X', 0x18, 4996 'Y', 0x19, 'Z', 0x1a, 4997 '[', 0x1b, '\\', 0x1c, ']', 0x1d, '^', 0x1e, '_', 0x1f, '?', 0x7f }; 4998 var contolChars = new HashMap<Character, Integer>(); 4999 for (int i = 0; i < contolCharsPairs.length; i += 2) 5000 contolChars.put(Character.valueOf(contolCharsPairs[i]), 5001 Integer.valueOf(contolCharsPairs[i + 1])); 5002 5003 for (char chP = 0; chP <= 0xff + 16; ++chP) { 5004 String pat = "\\c"; 5005 if (chP < 0xff) { 5006 // \cx with ASCII x 5007 pat = "\\c" + Character.toString(chP); 5008 } else if (chP == 0xff) { 5009 // incomplete \c at the end of pattern 5010 pat = "\\c"; 5011 } else if (chP <= 0xff + 8) { 5012 // \cx with a random non-ASCII char x 5013 int x = 0xff + generator.nextInt(0xff00 + 1); 5014 pat = "\\c" + Character.toString(x); 5015 } else { 5016 // \cx with a random non-ASCII codepoint x 5017 int x = 0xff + generator.nextInt(Character.MAX_CODE_POINT + 1 - 0xff); 5018 pat = "\\c" + Character.toString(x); 5019 } 5020 if (contolChars.containsKey(chP)) { 5021 try { 5022 Pattern p = Pattern.compile(pat); 5023 for (int chS = 0; chS < 0xff; ++chS) { 5024 Matcher m = p.matcher(Character.toString(chS)); 5025 if (m.matches() && contolChars.get(chP) != chS) { 5026 failCount++; 5027 System.out.println("Control character 0x" + Integer.toHexString(chS) + 5028 " unexpectedly matched pattern " + pat); 5029 } else if (!m.matches() && contolChars.get(chP) == chS) { 5030 failCount++; 5031 System.out.println("Control character 0x" + Integer.toHexString(chS) + 5032 " failed to match pattern " + pat); 5033 } 5034 if (m.matches() && Character.getType(chS) != Character.CONTROL) { 5035 failCount++; 5036 System.out.println("Non-control character 0x" + Integer.toHexString(chS) + 5037 " unexpectedly matched pattern " + pat); 5038 } 5039 } 5040 } catch (Throwable t) { 5041 failCount++; 5042 System.out.println("Failed to compile pattern " + pat + 5043 " due to exception: " + t); 5044 } 5045 } else { 5046 try { 5047 Pattern p = Pattern.compile(pat); 5048 failCount++; 5049 System.out.println("Expected to throw an exception when compiling " + pat); 5050 } catch (PatternSyntaxException expected) { 5051 } catch (Throwable t) { 5052 failCount++; 5053 System.out.println("Unexpected exception when compiling " + pat + 5054 " : " + t); 5055 } 5056 } 5057 } 5058 report("controlCharacters"); 5059 } 5060 }