1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812 39 * 8216332 8214245 40 * 41 * @library /test/lib 42 * @library /lib/testlibrary/java/lang 43 * @build jdk.test.lib.RandomFactory 44 * @run main RegExTest 45 * @key randomness 46 */ 47 48 import java.io.BufferedReader; 49 import java.io.ByteArrayInputStream; 50 import java.io.ByteArrayOutputStream; 51 import java.io.File; 52 import java.io.FileInputStream; 53 import java.io.InputStreamReader; 54 import java.io.ObjectInputStream; 55 import java.io.ObjectOutputStream; 56 import java.math.BigInteger; 57 import java.nio.CharBuffer; 58 import java.nio.file.Files; 59 import java.nio.file.Path; 60 import java.nio.file.Paths; 61 import java.util.ArrayList; 62 import java.util.Arrays; 63 import java.util.HashMap; 64 import java.util.List; 65 import java.util.Map; 66 import java.util.Random; 67 import java.util.Scanner; 68 import java.util.function.Function; 69 import java.util.function.Predicate; 70 import java.util.regex.Matcher; 71 import java.util.regex.MatchResult; 72 import java.util.regex.Pattern; 73 import java.util.regex.PatternSyntaxException; 74 import java.util.stream.Stream; 75 76 import jdk.test.lib.RandomFactory; 77 78 /** 79 * This is a test class created to check the operation of 80 * the Pattern and Matcher classes. 81 */ 82 public class RegExTest { 83 84 private static Random generator = RandomFactory.getRandom(); 85 private static boolean failure = false; 86 private static int failCount = 0; 87 private static String firstFailure = null; 88 89 /** 90 * Main to interpret arguments and run several tests. 91 * 92 */ 93 public static void main(String[] args) throws Exception { 94 // Most of the tests are in a file 95 processFile("TestCases.txt"); 96 //processFile("PerlCases.txt"); 97 processFile("BMPTestCases.txt"); 98 processFile("SupplementaryTestCases.txt"); 99 100 // These test many randomly generated char patterns 101 bm(); 102 slice(); 103 104 // These are hard to put into the file 105 escapes(); 106 blankInput(); 107 108 // Substitition tests on randomly generated sequences 109 globalSubstitute(); 110 stringbufferSubstitute(); 111 stringbuilderSubstitute(); 112 113 substitutionBasher(); 114 substitutionBasher2(); 115 116 // Canonical Equivalence 117 ceTest(); 118 119 // Anchors 120 anchorTest(); 121 122 // boolean match calls 123 matchesTest(); 124 lookingAtTest(); 125 126 // Pattern API 127 patternMatchesTest(); 128 129 // Misc 130 lookbehindTest(); 131 nullArgumentTest(); 132 backRefTest(); 133 groupCaptureTest(); 134 caretTest(); 135 charClassTest(); 136 emptyPatternTest(); 137 findIntTest(); 138 group0Test(); 139 longPatternTest(); 140 octalTest(); 141 ampersandTest(); 142 negationTest(); 143 splitTest(); 144 appendTest(); 145 caseFoldingTest(); 146 commentsTest(); 147 unixLinesTest(); 148 replaceFirstTest(); 149 gTest(); 150 zTest(); 151 serializeTest(); 152 reluctantRepetitionTest(); 153 multilineDollarTest(); 154 dollarAtEndTest(); 155 caretBetweenTerminatorsTest(); 156 // This RFE rejected in Tiger numOccurrencesTest(); 157 javaCharClassTest(); 158 nonCaptureRepetitionTest(); 159 notCapturedGroupCurlyMatchTest(); 160 escapedSegmentTest(); 161 literalPatternTest(); 162 literalReplacementTest(); 163 regionTest(); 164 toStringTest(); 165 negatedCharClassTest(); 166 findFromTest(); 167 boundsTest(); 168 unicodeWordBoundsTest(); 169 caretAtEndTest(); 170 wordSearchTest(); 171 hitEndTest(); 172 toMatchResultTest(); 173 toMatchResultTest2(); 174 surrogatesInClassTest(); 175 removeQEQuotingTest(); 176 namedGroupCaptureTest(); 177 nonBmpClassComplementTest(); 178 unicodePropertiesTest(); 179 unicodeHexNotationTest(); 180 unicodeClassesTest(); 181 unicodeCharacterNameTest(); 182 horizontalAndVerticalWSTest(); 183 linebreakTest(); 184 branchTest(); 185 groupCurlyNotFoundSuppTest(); 186 groupCurlyBackoffTest(); 187 patternAsPredicate(); 188 patternAsMatchPredicate(); 189 invalidFlags(); 190 embeddedFlags(); 191 grapheme(); 192 expoBacktracking(); 193 invalidGroupName(); 194 illegalRepetitionRange(); 195 surrogatePairWithCanonEq(); 196 lineBreakWithQuantifier(); 197 caseInsensitivePMatch(); 198 199 if (failure) { 200 throw new 201 RuntimeException("RegExTest failed, 1st failure: " + 202 firstFailure); 203 } else { 204 System.err.println("OKAY: All tests passed."); 205 } 206 } 207 208 // Utility functions 209 210 private static String getRandomAlphaString(int length) { 211 StringBuffer buf = new StringBuffer(length); 212 for (int i=0; i<length; i++) { 213 char randChar = (char)(97 + generator.nextInt(26)); 214 buf.append(randChar); 215 } 216 return buf.toString(); 217 } 218 219 private static void check(Matcher m, String expected) { 220 m.find(); 221 if (!m.group().equals(expected)) 222 failCount++; 223 } 224 225 private static void check(Matcher m, String result, boolean expected) { 226 m.find(); 227 if (m.group().equals(result) != expected) 228 failCount++; 229 } 230 231 private static void check(Pattern p, String s, boolean expected) { 232 if (p.matcher(s).find() != expected) 233 failCount++; 234 } 235 236 private static void check(String p, String s, boolean expected) { 237 Matcher matcher = Pattern.compile(p).matcher(s); 238 if (matcher.find() != expected) 239 failCount++; 240 } 241 242 private static void check(String p, char c, boolean expected) { 243 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 244 Pattern pattern = Pattern.compile(propertyPattern); 245 char[] ca = new char[1]; ca[0] = c; 246 Matcher matcher = pattern.matcher(new String(ca)); 247 if (!matcher.find()) 248 failCount++; 249 } 250 251 private static void check(String p, int codePoint, boolean expected) { 252 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 253 Pattern pattern = Pattern.compile(propertyPattern); 254 char[] ca = Character.toChars(codePoint); 255 Matcher matcher = pattern.matcher(new String(ca)); 256 if (!matcher.find()) 257 failCount++; 258 } 259 260 private static void check(String p, int flag, String input, String s, 261 boolean expected) 262 { 263 Pattern pattern = Pattern.compile(p, flag); 264 Matcher matcher = pattern.matcher(input); 265 if (expected) 266 check(matcher, s, expected); 267 else 268 check(pattern, input, false); 269 } 270 271 private static void report(String testName) { 272 int spacesToAdd = 30 - testName.length(); 273 StringBuffer paddedNameBuffer = new StringBuffer(testName); 274 for (int i=0; i<spacesToAdd; i++) 275 paddedNameBuffer.append(" "); 276 String paddedName = paddedNameBuffer.toString(); 277 System.err.println(paddedName + ": " + 278 (failCount==0 ? "Passed":"Failed("+failCount+")")); 279 if (failCount > 0) { 280 failure = true; 281 282 if (firstFailure == null) { 283 firstFailure = testName; 284 } 285 } 286 287 failCount = 0; 288 } 289 290 /** 291 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 292 * supplementary characters. This method does NOT fully take care 293 * of the regex syntax. 294 */ 295 private static String toSupplementaries(String s) { 296 int length = s.length(); 297 StringBuffer sb = new StringBuffer(length * 2); 298 299 for (int i = 0; i < length; ) { 300 char c = s.charAt(i++); 301 if (c == '\\') { 302 sb.append(c); 303 if (i < length) { 304 c = s.charAt(i++); 305 sb.append(c); 306 if (c == 'u') { 307 // assume no syntax error 308 sb.append(s.charAt(i++)); 309 sb.append(s.charAt(i++)); 310 sb.append(s.charAt(i++)); 311 sb.append(s.charAt(i++)); 312 } 313 } 314 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 315 sb.append('\ud800').append((char)('\udc00'+c)); 316 } else { 317 sb.append(c); 318 } 319 } 320 return sb.toString(); 321 } 322 323 // Regular expression tests 324 325 // This is for bug 6178785 326 // Test if an expected NPE gets thrown when passing in a null argument 327 private static boolean check(Runnable test) { 328 try { 329 test.run(); 330 failCount++; 331 return false; 332 } catch (NullPointerException npe) { 333 return true; 334 } 335 } 336 337 private static void nullArgumentTest() { 338 check(() -> Pattern.compile(null)); 339 check(() -> Pattern.matches(null, null)); 340 check(() -> Pattern.matches("xyz", null)); 341 check(() -> Pattern.quote(null)); 342 check(() -> Pattern.compile("xyz").split(null)); 343 check(() -> Pattern.compile("xyz").matcher(null)); 344 345 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 346 m.matches(); 347 check(() -> m.appendTail((StringBuffer) null)); 348 check(() -> m.appendTail((StringBuilder)null)); 349 check(() -> m.replaceAll((String) null)); 350 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 351 check(() -> m.replaceFirst((String)null)); 352 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 353 check(() -> m.appendReplacement((StringBuffer)null, null)); 354 check(() -> m.appendReplacement((StringBuilder)null, null)); 355 check(() -> m.reset(null)); 356 check(() -> Matcher.quoteReplacement(null)); 357 //check(() -> m.usePattern(null)); 358 359 report("Null Argument"); 360 } 361 362 // This is for bug6635133 363 // Test if surrogate pair in Unicode escapes can be handled correctly. 364 private static void surrogatesInClassTest() throws Exception { 365 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 366 Matcher matcher = pattern.matcher("\ud834\udd22"); 367 if (!matcher.find()) 368 failCount++; 369 370 report("Surrogate pair in Unicode escape"); 371 } 372 373 // This is for bug6990617 374 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 375 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 376 // char is an octal digit. 377 private static void removeQEQuotingTest() throws Exception { 378 Pattern pattern = 379 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 380 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 381 if (!matcher.find()) 382 failCount++; 383 384 report("Remove Q/E Quoting"); 385 } 386 387 // This is for bug 4988891 388 // Test toMatchResult to see that it is a copy of the Matcher 389 // that is not affected by subsequent operations on the original 390 private static void toMatchResultTest() throws Exception { 391 Pattern pattern = Pattern.compile("squid"); 392 Matcher matcher = pattern.matcher( 393 "agiantsquidofdestinyasmallsquidoffate"); 394 matcher.find(); 395 int matcherStart1 = matcher.start(); 396 MatchResult mr = matcher.toMatchResult(); 397 if (mr == matcher) 398 failCount++; 399 int resultStart1 = mr.start(); 400 if (matcherStart1 != resultStart1) 401 failCount++; 402 matcher.find(); 403 int matcherStart2 = matcher.start(); 404 int resultStart2 = mr.start(); 405 if (matcherStart2 == resultStart2) 406 failCount++; 407 if (resultStart1 != resultStart2) 408 failCount++; 409 MatchResult mr2 = matcher.toMatchResult(); 410 if (mr == mr2) 411 failCount++; 412 if (mr2.start() != matcherStart2) 413 failCount++; 414 report("toMatchResult is a copy"); 415 } 416 417 private static void checkExpectedISE(Runnable test) { 418 try { 419 test.run(); 420 failCount++; 421 } catch (IllegalStateException x) { 422 } catch (IndexOutOfBoundsException xx) { 423 failCount++; 424 } 425 } 426 427 private static void checkExpectedIOOE(Runnable test) { 428 try { 429 test.run(); 430 failCount++; 431 } catch (IndexOutOfBoundsException x) {} 432 } 433 434 // This is for bug 8074678 435 // Test the result of toMatchResult throws ISE if no match is availble 436 private static void toMatchResultTest2() throws Exception { 437 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 438 matcher.find(); 439 MatchResult mr = matcher.toMatchResult(); 440 441 checkExpectedISE(() -> mr.start()); 442 checkExpectedISE(() -> mr.start(2)); 443 checkExpectedISE(() -> mr.end()); 444 checkExpectedISE(() -> mr.end(2)); 445 checkExpectedISE(() -> mr.group()); 446 checkExpectedISE(() -> mr.group(2)); 447 448 matcher = Pattern.compile("(match)").matcher("there is a match"); 449 matcher.find(); 450 MatchResult mr2 = matcher.toMatchResult(); 451 checkExpectedIOOE(() -> mr2.start(2)); 452 checkExpectedIOOE(() -> mr2.end(2)); 453 checkExpectedIOOE(() -> mr2.group(2)); 454 455 report("toMatchResult2 appropriate exceptions"); 456 } 457 458 // This is for bug 5013885 459 // Must test a slice to see if it reports hitEnd correctly 460 private static void hitEndTest() throws Exception { 461 // Basic test of Slice node 462 Pattern p = Pattern.compile("^squidattack"); 463 Matcher m = p.matcher("squack"); 464 m.find(); 465 if (m.hitEnd()) 466 failCount++; 467 m.reset("squid"); 468 m.find(); 469 if (!m.hitEnd()) 470 failCount++; 471 472 // Test Slice, SliceA and SliceU nodes 473 for (int i=0; i<3; i++) { 474 int flags = 0; 475 if (i==1) flags = Pattern.CASE_INSENSITIVE; 476 if (i==2) flags = Pattern.UNICODE_CASE; 477 p = Pattern.compile("^abc", flags); 478 m = p.matcher("ad"); 479 m.find(); 480 if (m.hitEnd()) 481 failCount++; 482 m.reset("ab"); 483 m.find(); 484 if (!m.hitEnd()) 485 failCount++; 486 } 487 488 // Test Boyer-Moore node 489 p = Pattern.compile("catattack"); 490 m = p.matcher("attack"); 491 m.find(); 492 if (!m.hitEnd()) 493 failCount++; 494 495 p = Pattern.compile("catattack"); 496 m = p.matcher("attackattackattackcatatta"); 497 m.find(); 498 if (!m.hitEnd()) 499 failCount++; 500 501 // 8184706: Matching u+0d at EOL against \R should hit-end 502 p = Pattern.compile("...\\R"); 503 m = p.matcher("cat" + (char)0x0a); 504 m.find(); 505 if (m.hitEnd()) 506 failCount++; 507 508 m = p.matcher("cat" + (char)0x0d); 509 m.find(); 510 if (!m.hitEnd()) 511 failCount++; 512 513 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 514 m.find(); 515 if (m.hitEnd()) 516 failCount++; 517 518 report("hitEnd"); 519 } 520 521 // This is for bug 4997476 522 // It is weird code submitted by customer demonstrating a regression 523 private static void wordSearchTest() throws Exception { 524 String testString = new String("word1 word2 word3"); 525 Pattern p = Pattern.compile("\\b"); 526 Matcher m = p.matcher(testString); 527 int position = 0; 528 int start = 0; 529 while (m.find(position)) { 530 start = m.start(); 531 if (start == testString.length()) 532 break; 533 if (m.find(start+1)) { 534 position = m.start(); 535 } else { 536 position = testString.length(); 537 } 538 if (testString.substring(start, position).equals(" ")) 539 continue; 540 if (!testString.substring(start, position-1).startsWith("word")) 541 failCount++; 542 } 543 report("Customer word search"); 544 } 545 546 // This is for bug 4994840 547 private static void caretAtEndTest() throws Exception { 548 // Problem only occurs with multiline patterns 549 // containing a beginning-of-line caret "^" followed 550 // by an expression that also matches the empty string. 551 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 552 Matcher matcher = pattern.matcher("\r"); 553 matcher.find(); 554 matcher.find(); 555 report("Caret at end"); 556 } 557 558 // This test is for 4979006 559 // Check to see if word boundary construct properly handles unicode 560 // non spacing marks 561 private static void unicodeWordBoundsTest() throws Exception { 562 String spaces = " "; 563 String wordChar = "a"; 564 String nsm = "\u030a"; 565 566 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 567 568 Pattern pattern = Pattern.compile("\\b"); 569 Matcher matcher = pattern.matcher(""); 570 // S=other B=word character N=non spacing mark .=word boundary 571 // SS.BB.SS 572 String input = spaces + wordChar + wordChar + spaces; 573 twoFindIndexes(input, matcher, 2, 4); 574 // SS.BBN.SS 575 input = spaces + wordChar +wordChar + nsm + spaces; 576 twoFindIndexes(input, matcher, 2, 5); 577 // SS.BN.SS 578 input = spaces + wordChar + nsm + spaces; 579 twoFindIndexes(input, matcher, 2, 4); 580 // SS.BNN.SS 581 input = spaces + wordChar + nsm + nsm + spaces; 582 twoFindIndexes(input, matcher, 2, 5); 583 // SSN.BB.SS 584 input = spaces + nsm + wordChar + wordChar + spaces; 585 twoFindIndexes(input, matcher, 3, 5); 586 // SS.BNB.SS 587 input = spaces + wordChar + nsm + wordChar + spaces; 588 twoFindIndexes(input, matcher, 2, 5); 589 // SSNNSS 590 input = spaces + nsm + nsm + spaces; 591 matcher.reset(input); 592 if (matcher.find()) 593 failCount++; 594 // SSN.BBN.SS 595 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 596 twoFindIndexes(input, matcher, 3, 6); 597 598 report("Unicode word boundary"); 599 } 600 601 private static void twoFindIndexes(String input, Matcher matcher, int a, 602 int b) throws Exception 603 { 604 matcher.reset(input); 605 matcher.find(); 606 if (matcher.start() != a) 607 failCount++; 608 matcher.find(); 609 if (matcher.start() != b) 610 failCount++; 611 } 612 613 // This test is for 6284152 614 static void check(String regex, String input, String[] expected) { 615 List<String> result = new ArrayList<String>(); 616 Pattern p = Pattern.compile(regex); 617 Matcher m = p.matcher(input); 618 while (m.find()) { 619 result.add(m.group()); 620 } 621 if (!Arrays.asList(expected).equals(result)) 622 failCount++; 623 } 624 625 private static void lookbehindTest() throws Exception { 626 //Positive 627 check("(?<=%.{0,5})foo\\d", 628 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 629 new String[]{"foo1", "foo2", "foo3"}); 630 631 //boundary at end of the lookbehind sub-regex should work consistently 632 //with the boundary just after the lookbehind sub-regex 633 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 634 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 635 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 636 check("(?<!abc \\b)foo", "abc foo", new String[0]); 637 638 //Negative 639 check("(?<!%.{0,5})foo\\d", 640 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 641 new String[] {"foo4", "foo5"}); 642 643 //Positive greedy 644 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 645 646 //Positive reluctant 647 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 648 649 //supplementary 650 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 651 new String[] {"fo\ud800\udc00o"}); 652 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 653 new String[] {"fo\ud800\udc00o"}); 654 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 655 new String[] {"fo\ud800\udc00o"}); 656 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 657 new String[] {"fo\ud800\udc00o"}); 658 report("Lookbehind"); 659 } 660 661 // This test is for 4938995 662 // Check to see if weak region boundaries are transparent to 663 // lookahead and lookbehind constructs 664 private static void boundsTest() throws Exception { 665 String fullMessage = "catdogcat"; 666 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 667 Matcher matcher = pattern.matcher("catdogca"); 668 matcher.useTransparentBounds(true); 669 if (matcher.find()) 670 failCount++; 671 matcher.reset("atdogcat"); 672 if (matcher.find()) 673 failCount++; 674 matcher.reset(fullMessage); 675 if (!matcher.find()) 676 failCount++; 677 matcher.reset(fullMessage); 678 matcher.region(0,9); 679 if (!matcher.find()) 680 failCount++; 681 matcher.reset(fullMessage); 682 matcher.region(0,6); 683 if (!matcher.find()) 684 failCount++; 685 matcher.reset(fullMessage); 686 matcher.region(3,6); 687 if (!matcher.find()) 688 failCount++; 689 matcher.useTransparentBounds(false); 690 if (matcher.find()) 691 failCount++; 692 693 // Negative lookahead/lookbehind 694 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 695 matcher = pattern.matcher("dogcat"); 696 matcher.useTransparentBounds(true); 697 matcher.region(0,3); 698 if (matcher.find()) 699 failCount++; 700 matcher.reset("catdog"); 701 matcher.region(3,6); 702 if (matcher.find()) 703 failCount++; 704 matcher.useTransparentBounds(false); 705 matcher.reset("dogcat"); 706 matcher.region(0,3); 707 if (!matcher.find()) 708 failCount++; 709 matcher.reset("catdog"); 710 matcher.region(3,6); 711 if (!matcher.find()) 712 failCount++; 713 714 report("Region bounds transparency"); 715 } 716 717 // This test is for 4945394 718 private static void findFromTest() throws Exception { 719 String message = "This is 40 $0 message."; 720 Pattern pat = Pattern.compile("\\$0"); 721 Matcher match = pat.matcher(message); 722 if (!match.find()) 723 failCount++; 724 if (match.find()) 725 failCount++; 726 if (match.find()) 727 failCount++; 728 report("Check for alternating find"); 729 } 730 731 // This test is for 4872664 and 4892980 732 private static void negatedCharClassTest() throws Exception { 733 Pattern pattern = Pattern.compile("[^>]"); 734 Matcher matcher = pattern.matcher("\u203A"); 735 if (!matcher.matches()) 736 failCount++; 737 pattern = Pattern.compile("[^fr]"); 738 matcher = pattern.matcher("a"); 739 if (!matcher.find()) 740 failCount++; 741 matcher.reset("\u203A"); 742 if (!matcher.find()) 743 failCount++; 744 String s = "for"; 745 String result[] = s.split("[^fr]"); 746 if (!result[0].equals("f")) 747 failCount++; 748 if (!result[1].equals("r")) 749 failCount++; 750 s = "f\u203Ar"; 751 result = s.split("[^fr]"); 752 if (!result[0].equals("f")) 753 failCount++; 754 if (!result[1].equals("r")) 755 failCount++; 756 757 // Test adding to bits, subtracting a node, then adding to bits again 758 pattern = Pattern.compile("[^f\u203Ar]"); 759 matcher = pattern.matcher("a"); 760 if (!matcher.find()) 761 failCount++; 762 matcher.reset("f"); 763 if (matcher.find()) 764 failCount++; 765 matcher.reset("\u203A"); 766 if (matcher.find()) 767 failCount++; 768 matcher.reset("r"); 769 if (matcher.find()) 770 failCount++; 771 matcher.reset("\u203B"); 772 if (!matcher.find()) 773 failCount++; 774 775 // Test subtracting a node, adding to bits, subtracting again 776 pattern = Pattern.compile("[^\u203Ar\u203B]"); 777 matcher = pattern.matcher("a"); 778 if (!matcher.find()) 779 failCount++; 780 matcher.reset("\u203A"); 781 if (matcher.find()) 782 failCount++; 783 matcher.reset("r"); 784 if (matcher.find()) 785 failCount++; 786 matcher.reset("\u203B"); 787 if (matcher.find()) 788 failCount++; 789 matcher.reset("\u203C"); 790 if (!matcher.find()) 791 failCount++; 792 793 report("Negated Character Class"); 794 } 795 796 // This test is for 4628291 797 private static void toStringTest() throws Exception { 798 Pattern pattern = Pattern.compile("b+"); 799 if (pattern.toString() != "b+") 800 failCount++; 801 Matcher matcher = pattern.matcher("aaabbbccc"); 802 String matcherString = matcher.toString(); // unspecified 803 matcher.find(); 804 matcherString = matcher.toString(); // unspecified 805 matcher.region(0,3); 806 matcherString = matcher.toString(); // unspecified 807 matcher.reset(); 808 matcherString = matcher.toString(); // unspecified 809 report("toString"); 810 } 811 812 // This test is for 4808962 813 private static void literalPatternTest() throws Exception { 814 int flags = Pattern.LITERAL; 815 816 Pattern pattern = Pattern.compile("abc\\t$^", flags); 817 check(pattern, "abc\\t$^", true); 818 819 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 820 check(pattern, "abc\\t$^", true); 821 822 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 823 check(pattern, "\\Qa^$bcabc\\E", true); 824 check(pattern, "a^$bcabc", false); 825 826 pattern = Pattern.compile("\\\\Q\\\\E"); 827 check(pattern, "\\Q\\E", true); 828 829 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 830 check(pattern, "abcefg\\Q\\Ehij", true); 831 832 pattern = Pattern.compile("\\\\\\Q\\\\E"); 833 check(pattern, "\\\\\\\\", true); 834 835 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 836 check(pattern, "\\Qa^$bcabc\\E", true); 837 check(pattern, "a^$bcabc", false); 838 839 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 840 check(pattern, "\\Qabc\\Edef", true); 841 check(pattern, "abcdef", false); 842 843 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 844 check(pattern, "abc\\Edef", true); 845 check(pattern, "abcdef", false); 846 847 pattern = Pattern.compile(Pattern.quote("\\E")); 848 check(pattern, "\\E", true); 849 850 pattern = Pattern.compile("((((abc.+?:)", flags); 851 check(pattern, "((((abc.+?:)", true); 852 853 flags |= Pattern.MULTILINE; 854 855 pattern = Pattern.compile("^cat$", flags); 856 check(pattern, "abc^cat$def", true); 857 check(pattern, "cat", false); 858 859 flags |= Pattern.CASE_INSENSITIVE; 860 861 pattern = Pattern.compile("abcdef", flags); 862 check(pattern, "ABCDEF", true); 863 check(pattern, "AbCdEf", true); 864 865 flags |= Pattern.DOTALL; 866 867 pattern = Pattern.compile("a...b", flags); 868 check(pattern, "A...b", true); 869 check(pattern, "Axxxb", false); 870 871 flags |= Pattern.CANON_EQ; 872 873 Pattern p = Pattern.compile("testa\u030a", flags); 874 check(pattern, "testa\u030a", false); 875 check(pattern, "test\u00e5", false); 876 877 // Supplementary character test 878 flags = Pattern.LITERAL; 879 880 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 881 check(pattern, toSupplementaries("abc\\t$^"), true); 882 883 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 884 check(pattern, toSupplementaries("abc\\t$^"), true); 885 886 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 887 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 888 check(pattern, toSupplementaries("a^$bcabc"), false); 889 890 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 891 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 892 check(pattern, toSupplementaries("a^$bcabc"), false); 893 894 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 895 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 896 check(pattern, toSupplementaries("abcdef"), false); 897 898 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 899 check(pattern, toSupplementaries("abc\\Edef"), true); 900 check(pattern, toSupplementaries("abcdef"), false); 901 902 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 903 check(pattern, toSupplementaries("((((abc.+?:)"), true); 904 905 flags |= Pattern.MULTILINE; 906 907 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 908 check(pattern, toSupplementaries("abc^cat$def"), true); 909 check(pattern, toSupplementaries("cat"), false); 910 911 flags |= Pattern.DOTALL; 912 913 // note: this is case-sensitive. 914 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 915 check(pattern, toSupplementaries("a...b"), true); 916 check(pattern, toSupplementaries("axxxb"), false); 917 918 flags |= Pattern.CANON_EQ; 919 920 String t = toSupplementaries("test"); 921 p = Pattern.compile(t + "a\u030a", flags); 922 check(pattern, t + "a\u030a", false); 923 check(pattern, t + "\u00e5", false); 924 925 report("Literal pattern"); 926 } 927 928 // This test is for 4803179 929 // This test is also for 4808962, replacement parts 930 private static void literalReplacementTest() throws Exception { 931 int flags = Pattern.LITERAL; 932 933 Pattern pattern = Pattern.compile("abc", flags); 934 Matcher matcher = pattern.matcher("zzzabczzz"); 935 String replaceTest = "$0"; 936 String result = matcher.replaceAll(replaceTest); 937 if (!result.equals("zzzabczzz")) 938 failCount++; 939 940 matcher.reset(); 941 String literalReplacement = matcher.quoteReplacement(replaceTest); 942 result = matcher.replaceAll(literalReplacement); 943 if (!result.equals("zzz$0zzz")) 944 failCount++; 945 946 matcher.reset(); 947 replaceTest = "\\t$\\$"; 948 literalReplacement = matcher.quoteReplacement(replaceTest); 949 result = matcher.replaceAll(literalReplacement); 950 if (!result.equals("zzz\\t$\\$zzz")) 951 failCount++; 952 953 // Supplementary character test 954 pattern = Pattern.compile(toSupplementaries("abc"), flags); 955 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 956 replaceTest = "$0"; 957 result = matcher.replaceAll(replaceTest); 958 if (!result.equals(toSupplementaries("zzzabczzz"))) 959 failCount++; 960 961 matcher.reset(); 962 literalReplacement = matcher.quoteReplacement(replaceTest); 963 result = matcher.replaceAll(literalReplacement); 964 if (!result.equals(toSupplementaries("zzz$0zzz"))) 965 failCount++; 966 967 matcher.reset(); 968 replaceTest = "\\t$\\$"; 969 literalReplacement = matcher.quoteReplacement(replaceTest); 970 result = matcher.replaceAll(literalReplacement); 971 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 972 failCount++; 973 974 // IAE should be thrown if backslash or '$' is the last character 975 // in replacement string 976 try { 977 "\uac00".replaceAll("\uac00", "$"); 978 failCount++; 979 } catch (IllegalArgumentException iie) { 980 } catch (Exception e) { 981 failCount++; 982 } 983 try { 984 "\uac00".replaceAll("\uac00", "\\"); 985 failCount++; 986 } catch (IllegalArgumentException iie) { 987 } catch (Exception e) { 988 failCount++; 989 } 990 report("Literal replacement"); 991 } 992 993 // This test is for 4757029 994 private static void regionTest() throws Exception { 995 Pattern pattern = Pattern.compile("abc"); 996 Matcher matcher = pattern.matcher("abcdefabc"); 997 998 matcher.region(0,9); 999 if (!matcher.find()) 1000 failCount++; 1001 if (!matcher.find()) 1002 failCount++; 1003 matcher.region(0,3); 1004 if (!matcher.find()) 1005 failCount++; 1006 matcher.region(3,6); 1007 if (matcher.find()) 1008 failCount++; 1009 matcher.region(0,2); 1010 if (matcher.find()) 1011 failCount++; 1012 1013 expectRegionFail(matcher, 1, -1); 1014 expectRegionFail(matcher, -1, -1); 1015 expectRegionFail(matcher, -1, 1); 1016 expectRegionFail(matcher, 5, 3); 1017 expectRegionFail(matcher, 5, 12); 1018 expectRegionFail(matcher, 12, 12); 1019 1020 pattern = Pattern.compile("^abc$"); 1021 matcher = pattern.matcher("zzzabczzz"); 1022 matcher.region(0,9); 1023 if (matcher.find()) 1024 failCount++; 1025 matcher.region(3,6); 1026 if (!matcher.find()) 1027 failCount++; 1028 matcher.region(3,6); 1029 matcher.useAnchoringBounds(false); 1030 if (matcher.find()) 1031 failCount++; 1032 1033 // Supplementary character test 1034 pattern = Pattern.compile(toSupplementaries("abc")); 1035 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1036 matcher.region(0,9*2); 1037 if (!matcher.find()) 1038 failCount++; 1039 if (!matcher.find()) 1040 failCount++; 1041 matcher.region(0,3*2); 1042 if (!matcher.find()) 1043 failCount++; 1044 matcher.region(1,3*2); 1045 if (matcher.find()) 1046 failCount++; 1047 matcher.region(3*2,6*2); 1048 if (matcher.find()) 1049 failCount++; 1050 matcher.region(0,2*2); 1051 if (matcher.find()) 1052 failCount++; 1053 matcher.region(0,2*2+1); 1054 if (matcher.find()) 1055 failCount++; 1056 1057 expectRegionFail(matcher, 1*2, -1); 1058 expectRegionFail(matcher, -1, -1); 1059 expectRegionFail(matcher, -1, 1*2); 1060 expectRegionFail(matcher, 5*2, 3*2); 1061 expectRegionFail(matcher, 5*2, 12*2); 1062 expectRegionFail(matcher, 12*2, 12*2); 1063 1064 pattern = Pattern.compile(toSupplementaries("^abc$")); 1065 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1066 matcher.region(0,9*2); 1067 if (matcher.find()) 1068 failCount++; 1069 matcher.region(3*2,6*2); 1070 if (!matcher.find()) 1071 failCount++; 1072 matcher.region(3*2+1,6*2); 1073 if (matcher.find()) 1074 failCount++; 1075 matcher.region(3*2,6*2-1); 1076 if (matcher.find()) 1077 failCount++; 1078 matcher.region(3*2,6*2); 1079 matcher.useAnchoringBounds(false); 1080 if (matcher.find()) 1081 failCount++; 1082 1083 // JDK-8230829 1084 pattern = Pattern.compile("\\ud800\\udc61"); 1085 matcher = pattern.matcher("\ud800\udc61"); 1086 matcher.region(0, 1); 1087 if (matcher.find()) { 1088 failCount++; 1089 System.out.println("Matched a surrogate pair" + 1090 " that crosses border of region"); 1091 } 1092 if (!matcher.hitEnd()) { 1093 failCount++; 1094 System.out.println("Expected to hit the end when" + 1095 " matching a surrogate pair crossing region"); 1096 } 1097 1098 report("Regions"); 1099 } 1100 1101 private static void expectRegionFail(Matcher matcher, int index1, 1102 int index2) 1103 { 1104 try { 1105 matcher.region(index1, index2); 1106 failCount++; 1107 } catch (IndexOutOfBoundsException ioobe) { 1108 // Correct result 1109 } catch (IllegalStateException ise) { 1110 // Correct result 1111 } 1112 } 1113 1114 // This test is for 4803197 1115 private static void escapedSegmentTest() throws Exception { 1116 1117 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1118 check(pattern, "dir1\\dir2", true); 1119 1120 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1121 check(pattern, "dir1\\dir2\\", true); 1122 1123 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1124 check(pattern, "dir1\\dir2\\", true); 1125 1126 // Supplementary character test 1127 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1128 check(pattern, toSupplementaries("dir1\\dir2"), true); 1129 1130 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1131 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1132 1133 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1134 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1135 1136 report("Escaped segment"); 1137 } 1138 1139 // This test is for 4792284 1140 private static void nonCaptureRepetitionTest() throws Exception { 1141 String input = "abcdefgh;"; 1142 1143 String[] patterns = new String[] { 1144 "(?:\\w{4})+;", 1145 "(?:\\w{8})*;", 1146 "(?:\\w{2}){2,4};", 1147 "(?:\\w{4}){2,};", // only matches the 1148 ".*?(?:\\w{5})+;", // specified minimum 1149 ".*?(?:\\w{9})*;", // number of reps - OK 1150 "(?:\\w{4})+?;", // lazy repetition - OK 1151 "(?:\\w{4})++;", // possessive repetition - OK 1152 "(?:\\w{2,}?)+;", // non-deterministic - OK 1153 "(\\w{4})+;", // capturing group - OK 1154 }; 1155 1156 for (int i = 0; i < patterns.length; i++) { 1157 // Check find() 1158 check(patterns[i], 0, input, input, true); 1159 // Check matches() 1160 Pattern p = Pattern.compile(patterns[i]); 1161 Matcher m = p.matcher(input); 1162 1163 if (m.matches()) { 1164 if (!m.group(0).equals(input)) 1165 failCount++; 1166 } else { 1167 failCount++; 1168 } 1169 } 1170 1171 report("Non capturing repetition"); 1172 } 1173 1174 // This test is for 6358731 1175 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1176 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1177 Matcher matcher = pattern.matcher("abcd"); 1178 if (!matcher.matches() || 1179 matcher.group(1) != null || 1180 !matcher.group(2).equals("abcd")) { 1181 failCount++; 1182 } 1183 report("Not captured GroupCurly"); 1184 } 1185 1186 // This test is for 4706545 1187 private static void javaCharClassTest() throws Exception { 1188 for (int i=0; i<1000; i++) { 1189 char c = (char)generator.nextInt(); 1190 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1191 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1192 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1193 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1194 check("{javaDigit}", c, Character.isDigit(c)); 1195 check("{javaDefined}", c, Character.isDefined(c)); 1196 check("{javaLetter}", c, Character.isLetter(c)); 1197 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1198 check("{javaJavaIdentifierStart}", c, 1199 Character.isJavaIdentifierStart(c)); 1200 check("{javaJavaIdentifierPart}", c, 1201 Character.isJavaIdentifierPart(c)); 1202 check("{javaUnicodeIdentifierStart}", c, 1203 Character.isUnicodeIdentifierStart(c)); 1204 check("{javaUnicodeIdentifierPart}", c, 1205 Character.isUnicodeIdentifierPart(c)); 1206 check("{javaIdentifierIgnorable}", c, 1207 Character.isIdentifierIgnorable(c)); 1208 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1209 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1210 check("{javaISOControl}", c, Character.isISOControl(c)); 1211 check("{javaMirrored}", c, Character.isMirrored(c)); 1212 1213 } 1214 1215 // Supplementary character test 1216 for (int i=0; i<1000; i++) { 1217 int c = generator.nextInt(Character.MAX_CODE_POINT 1218 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1219 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1220 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1221 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1222 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1223 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1224 check("{javaDigit}", c, Character.isDigit(c)); 1225 check("{javaDefined}", c, Character.isDefined(c)); 1226 check("{javaLetter}", c, Character.isLetter(c)); 1227 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1228 check("{javaJavaIdentifierStart}", c, 1229 Character.isJavaIdentifierStart(c)); 1230 check("{javaJavaIdentifierPart}", c, 1231 Character.isJavaIdentifierPart(c)); 1232 check("{javaUnicodeIdentifierStart}", c, 1233 Character.isUnicodeIdentifierStart(c)); 1234 check("{javaUnicodeIdentifierPart}", c, 1235 Character.isUnicodeIdentifierPart(c)); 1236 check("{javaIdentifierIgnorable}", c, 1237 Character.isIdentifierIgnorable(c)); 1238 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1239 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1240 check("{javaISOControl}", c, Character.isISOControl(c)); 1241 check("{javaMirrored}", c, Character.isMirrored(c)); 1242 } 1243 1244 report("Java character classes"); 1245 } 1246 1247 // This test is for 4523620 1248 /* 1249 private static void numOccurrencesTest() throws Exception { 1250 Pattern pattern = Pattern.compile("aaa"); 1251 1252 if (pattern.numOccurrences("aaaaaa", false) != 2) 1253 failCount++; 1254 if (pattern.numOccurrences("aaaaaa", true) != 4) 1255 failCount++; 1256 1257 pattern = Pattern.compile("^"); 1258 if (pattern.numOccurrences("aaaaaa", false) != 1) 1259 failCount++; 1260 if (pattern.numOccurrences("aaaaaa", true) != 1) 1261 failCount++; 1262 1263 report("Number of Occurrences"); 1264 } 1265 */ 1266 1267 // This test is for 4776374 1268 private static void caretBetweenTerminatorsTest() throws Exception { 1269 int flags1 = Pattern.DOTALL; 1270 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1271 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1272 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1273 1274 check("^....", flags1, "test\ntest", "test", true); 1275 check(".....^", flags1, "test\ntest", "test", false); 1276 check(".....^", flags1, "test\n", "test", false); 1277 check("....^", flags1, "test\r\n", "test", false); 1278 1279 check("^....", flags2, "test\ntest", "test", true); 1280 check("....^", flags2, "test\ntest", "test", false); 1281 check(".....^", flags2, "test\n", "test", false); 1282 check("....^", flags2, "test\r\n", "test", false); 1283 1284 check("^....", flags3, "test\ntest", "test", true); 1285 check(".....^", flags3, "test\ntest", "test\n", true); 1286 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1287 check(".....^", flags3, "test\n", "test", false); 1288 check(".....^", flags3, "test\r\n", "test", false); 1289 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1290 1291 check("^....", flags4, "test\ntest", "test", true); 1292 check(".....^", flags3, "test\ntest", "test\n", true); 1293 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1294 check(".....^", flags4, "test\n", "test\n", false); 1295 check(".....^", flags4, "test\r\n", "test\r", false); 1296 1297 // Supplementary character test 1298 String t = toSupplementaries("test"); 1299 check("^....", flags1, t+"\n"+t, t, true); 1300 check(".....^", flags1, t+"\n"+t, t, false); 1301 check(".....^", flags1, t+"\n", t, false); 1302 check("....^", flags1, t+"\r\n", t, false); 1303 1304 check("^....", flags2, t+"\n"+t, t, true); 1305 check("....^", flags2, t+"\n"+t, t, false); 1306 check(".....^", flags2, t+"\n", t, false); 1307 check("....^", flags2, t+"\r\n", t, false); 1308 1309 check("^....", flags3, t+"\n"+t, t, true); 1310 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1311 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1312 check(".....^", flags3, t+"\n", t, false); 1313 check(".....^", flags3, t+"\r\n", t, false); 1314 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1315 1316 check("^....", flags4, t+"\n"+t, t, true); 1317 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1318 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1319 check(".....^", flags4, t+"\n", t+"\n", false); 1320 check(".....^", flags4, t+"\r\n", t+"\r", false); 1321 1322 report("Caret between terminators"); 1323 } 1324 1325 // This test is for 4727935 1326 private static void dollarAtEndTest() throws Exception { 1327 int flags1 = Pattern.DOTALL; 1328 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1329 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1330 1331 check("....$", flags1, "test\n", "test", true); 1332 check("....$", flags1, "test\r\n", "test", true); 1333 check(".....$", flags1, "test\n", "test\n", true); 1334 check(".....$", flags1, "test\u0085", "test\u0085", true); 1335 check("....$", flags1, "test\u0085", "test", true); 1336 1337 check("....$", flags2, "test\n", "test", true); 1338 check(".....$", flags2, "test\n", "test\n", true); 1339 check(".....$", flags2, "test\u0085", "test\u0085", true); 1340 check("....$", flags2, "test\u0085", "est\u0085", true); 1341 1342 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1343 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1344 check("....$blah", flags3, "test\nblah", "!!!!", false); 1345 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1346 1347 // Supplementary character test 1348 String t = toSupplementaries("test"); 1349 String b = toSupplementaries("blah"); 1350 check("....$", flags1, t+"\n", t, true); 1351 check("....$", flags1, t+"\r\n", t, true); 1352 check(".....$", flags1, t+"\n", t+"\n", true); 1353 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1354 check("....$", flags1, t+"\u0085", t, true); 1355 1356 check("....$", flags2, t+"\n", t, true); 1357 check(".....$", flags2, t+"\n", t+"\n", true); 1358 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1359 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1360 1361 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1362 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1363 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1364 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1365 1366 report("Dollar at End"); 1367 } 1368 1369 // This test is for 4711773 1370 private static void multilineDollarTest() throws Exception { 1371 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1372 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1373 matcher.find(); 1374 if (matcher.start(0) != 9) 1375 failCount++; 1376 matcher.find(); 1377 if (matcher.start(0) != 20) 1378 failCount++; 1379 1380 // Supplementary character test 1381 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1382 matcher.find(); 1383 if (matcher.start(0) != 9*2) 1384 failCount++; 1385 matcher.find(); 1386 if (matcher.start(0) != 20*2) 1387 failCount++; 1388 1389 report("Multiline Dollar"); 1390 } 1391 1392 private static void reluctantRepetitionTest() throws Exception { 1393 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1394 check(p, "1 word word word 2", true); 1395 check(p, "1 wor wo w 2", true); 1396 check(p, "1 word word 2", true); 1397 check(p, "1 word 2", true); 1398 check(p, "1 wo w w 2", true); 1399 check(p, "1 wo w 2", true); 1400 check(p, "1 wor w 2", true); 1401 1402 p = Pattern.compile("([a-z])+?c"); 1403 Matcher m = p.matcher("ababcdefdec"); 1404 check(m, "ababc"); 1405 1406 // Supplementary character test 1407 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1408 m = p.matcher(toSupplementaries("ababcdefdec")); 1409 check(m, toSupplementaries("ababc")); 1410 1411 report("Reluctant Repetition"); 1412 } 1413 1414 private static Pattern serializedPattern(Pattern p) throws Exception { 1415 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1416 ObjectOutputStream oos = new ObjectOutputStream(baos); 1417 oos.writeObject(p); 1418 oos.close(); 1419 try (ObjectInputStream ois = new ObjectInputStream( 1420 new ByteArrayInputStream(baos.toByteArray()))) { 1421 return (Pattern)ois.readObject(); 1422 } 1423 } 1424 1425 private static void serializeTest() throws Exception { 1426 String patternStr = "(b)"; 1427 String matchStr = "b"; 1428 Pattern pattern = Pattern.compile(patternStr); 1429 Pattern serializedPattern = serializedPattern(pattern); 1430 Matcher matcher = serializedPattern.matcher(matchStr); 1431 if (!matcher.matches()) 1432 failCount++; 1433 if (matcher.groupCount() != 1) 1434 failCount++; 1435 1436 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1437 serializedPattern = serializedPattern(pattern); 1438 if (!serializedPattern.matcher("Ab").matches()) 1439 failCount++; 1440 if (serializedPattern.matcher("AB").matches()) 1441 failCount++; 1442 1443 report("Serialization"); 1444 } 1445 1446 private static void gTest() { 1447 Pattern pattern = Pattern.compile("\\G\\w"); 1448 Matcher matcher = pattern.matcher("abc#x#x"); 1449 matcher.find(); 1450 matcher.find(); 1451 matcher.find(); 1452 if (matcher.find()) 1453 failCount++; 1454 1455 pattern = Pattern.compile("\\GA*"); 1456 matcher = pattern.matcher("1A2AA3"); 1457 matcher.find(); 1458 if (matcher.find()) 1459 failCount++; 1460 1461 pattern = Pattern.compile("\\GA*"); 1462 matcher = pattern.matcher("1A2AA3"); 1463 if (!matcher.find(1)) 1464 failCount++; 1465 matcher.find(); 1466 if (matcher.find()) 1467 failCount++; 1468 1469 report("\\G"); 1470 } 1471 1472 private static void zTest() { 1473 Pattern pattern = Pattern.compile("foo\\Z"); 1474 // Positives 1475 check(pattern, "foo\u0085", true); 1476 check(pattern, "foo\u2028", true); 1477 check(pattern, "foo\u2029", true); 1478 check(pattern, "foo\n", true); 1479 check(pattern, "foo\r", true); 1480 check(pattern, "foo\r\n", true); 1481 // Negatives 1482 check(pattern, "fooo", false); 1483 check(pattern, "foo\n\r", false); 1484 1485 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1486 // Positives 1487 check(pattern, "foo", true); 1488 check(pattern, "foo\n", true); 1489 // Negatives 1490 check(pattern, "foo\r", false); 1491 check(pattern, "foo\u0085", false); 1492 check(pattern, "foo\u2028", false); 1493 check(pattern, "foo\u2029", false); 1494 1495 report("\\Z"); 1496 } 1497 1498 private static void replaceFirstTest() { 1499 Pattern pattern = Pattern.compile("(ab)(c*)"); 1500 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1501 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1502 failCount++; 1503 1504 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1505 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1506 failCount++; 1507 1508 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1509 String result = matcher.replaceFirst("$1"); 1510 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1511 failCount++; 1512 1513 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1514 result = matcher.replaceFirst("$2"); 1515 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1516 failCount++; 1517 1518 pattern = Pattern.compile("a*"); 1519 matcher = pattern.matcher("aaaaaaaaaa"); 1520 if (!matcher.replaceFirst("test").equals("test")) 1521 failCount++; 1522 1523 pattern = Pattern.compile("a+"); 1524 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1525 if (!matcher.replaceFirst("test").equals("zzztest")) 1526 failCount++; 1527 1528 // Supplementary character test 1529 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1530 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1531 if (!matcher.replaceFirst(toSupplementaries("test")) 1532 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1533 failCount++; 1534 1535 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1536 if (!matcher.replaceFirst(toSupplementaries("test")). 1537 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1538 failCount++; 1539 1540 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1541 result = matcher.replaceFirst("$1"); 1542 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1543 failCount++; 1544 1545 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1546 result = matcher.replaceFirst("$2"); 1547 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1548 failCount++; 1549 1550 pattern = Pattern.compile(toSupplementaries("a*")); 1551 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1552 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1553 failCount++; 1554 1555 pattern = Pattern.compile(toSupplementaries("a+")); 1556 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1557 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1558 failCount++; 1559 1560 report("Replace First"); 1561 } 1562 1563 private static void unixLinesTest() { 1564 Pattern pattern = Pattern.compile(".*"); 1565 Matcher matcher = pattern.matcher("aa\u2028blah"); 1566 matcher.find(); 1567 if (!matcher.group(0).equals("aa")) 1568 failCount++; 1569 1570 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1571 matcher = pattern.matcher("aa\u2028blah"); 1572 matcher.find(); 1573 if (!matcher.group(0).equals("aa\u2028blah")) 1574 failCount++; 1575 1576 pattern = Pattern.compile("[az]$", 1577 Pattern.MULTILINE | Pattern.UNIX_LINES); 1578 matcher = pattern.matcher("aa\u2028zz"); 1579 check(matcher, "a\u2028", false); 1580 1581 // Supplementary character test 1582 pattern = Pattern.compile(".*"); 1583 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1584 matcher.find(); 1585 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1586 failCount++; 1587 1588 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1589 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1590 matcher.find(); 1591 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("[az]$"), 1595 Pattern.MULTILINE | Pattern.UNIX_LINES); 1596 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1597 check(matcher, toSupplementaries("a\u2028"), false); 1598 1599 report("Unix Lines"); 1600 } 1601 1602 private static void commentsTest() { 1603 int flags = Pattern.COMMENTS; 1604 1605 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1606 Matcher matcher = pattern.matcher("aa#aa"); 1607 if (!matcher.matches()) 1608 failCount++; 1609 1610 pattern = Pattern.compile("aa # blah", flags); 1611 matcher = pattern.matcher("aa"); 1612 if (!matcher.matches()) 1613 failCount++; 1614 1615 pattern = Pattern.compile("aa blah", flags); 1616 matcher = pattern.matcher("aablah"); 1617 if (!matcher.matches()) 1618 failCount++; 1619 1620 pattern = Pattern.compile("aa # blah blech ", flags); 1621 matcher = pattern.matcher("aa"); 1622 if (!matcher.matches()) 1623 failCount++; 1624 1625 pattern = Pattern.compile("aa # blah\n ", flags); 1626 matcher = pattern.matcher("aa"); 1627 if (!matcher.matches()) 1628 failCount++; 1629 1630 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1631 matcher = pattern.matcher("aabc"); 1632 if (!matcher.matches()) 1633 failCount++; 1634 1635 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1636 matcher = pattern.matcher("aabc"); 1637 if (!matcher.matches()) 1638 failCount++; 1639 1640 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1641 matcher = pattern.matcher("aabc#blech"); 1642 if (!matcher.matches()) 1643 failCount++; 1644 1645 // Supplementary character test 1646 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1647 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1648 if (!matcher.matches()) 1649 failCount++; 1650 1651 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1652 matcher = pattern.matcher(toSupplementaries("aa")); 1653 if (!matcher.matches()) 1654 failCount++; 1655 1656 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1657 matcher = pattern.matcher(toSupplementaries("aablah")); 1658 if (!matcher.matches()) 1659 failCount++; 1660 1661 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1662 matcher = pattern.matcher(toSupplementaries("aa")); 1663 if (!matcher.matches()) 1664 failCount++; 1665 1666 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1667 matcher = pattern.matcher(toSupplementaries("aa")); 1668 if (!matcher.matches()) 1669 failCount++; 1670 1671 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1672 matcher = pattern.matcher(toSupplementaries("aabc")); 1673 if (!matcher.matches()) 1674 failCount++; 1675 1676 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1677 matcher = pattern.matcher(toSupplementaries("aabc")); 1678 if (!matcher.matches()) 1679 failCount++; 1680 1681 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1682 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1683 if (!matcher.matches()) 1684 failCount++; 1685 1686 report("Comments"); 1687 } 1688 1689 private static void caseFoldingTest() { // bug 4504687 1690 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1691 Pattern pattern = Pattern.compile("aa", flags); 1692 Matcher matcher = pattern.matcher("ab"); 1693 if (matcher.matches()) 1694 failCount++; 1695 1696 pattern = Pattern.compile("aA", flags); 1697 matcher = pattern.matcher("ab"); 1698 if (matcher.matches()) 1699 failCount++; 1700 1701 pattern = Pattern.compile("aa", flags); 1702 matcher = pattern.matcher("aB"); 1703 if (matcher.matches()) 1704 failCount++; 1705 matcher = pattern.matcher("Ab"); 1706 if (matcher.matches()) 1707 failCount++; 1708 1709 // ASCII "a" 1710 // Latin-1 Supplement "a" + grave 1711 // Cyrillic "a" 1712 String[] patterns = new String[] { 1713 //single 1714 "a", "\u00e0", "\u0430", 1715 //slice 1716 "ab", "\u00e0\u00e1", "\u0430\u0431", 1717 //class single 1718 "[a]", "[\u00e0]", "[\u0430]", 1719 //class range 1720 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1721 //back reference 1722 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1723 }; 1724 1725 String[] texts = new String[] { 1726 "A", "\u00c0", "\u0410", 1727 "AB", "\u00c0\u00c1", "\u0410\u0411", 1728 "A", "\u00c0", "\u0410", 1729 "B", "\u00c2", "\u0411", 1730 "aA", "\u00e0\u00c0", "\u0430\u0410" 1731 }; 1732 1733 boolean[] expected = new boolean[] { 1734 true, false, false, 1735 true, false, false, 1736 true, false, false, 1737 true, false, false, 1738 true, false, false 1739 }; 1740 1741 flags = Pattern.CASE_INSENSITIVE; 1742 for (int i = 0; i < patterns.length; i++) { 1743 pattern = Pattern.compile(patterns[i], flags); 1744 matcher = pattern.matcher(texts[i]); 1745 if (matcher.matches() != expected[i]) { 1746 System.out.println("<1> Failed at " + i); 1747 failCount++; 1748 } 1749 } 1750 1751 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1752 for (int i = 0; i < patterns.length; i++) { 1753 pattern = Pattern.compile(patterns[i], flags); 1754 matcher = pattern.matcher(texts[i]); 1755 if (!matcher.matches()) { 1756 System.out.println("<2> Failed at " + i); 1757 failCount++; 1758 } 1759 } 1760 // flag unicode_case alone should do nothing 1761 flags = Pattern.UNICODE_CASE; 1762 for (int i = 0; i < patterns.length; i++) { 1763 pattern = Pattern.compile(patterns[i], flags); 1764 matcher = pattern.matcher(texts[i]); 1765 if (matcher.matches()) { 1766 System.out.println("<3> Failed at " + i); 1767 failCount++; 1768 } 1769 } 1770 1771 // Special cases: i, I, u+0131 and u+0130 1772 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1773 pattern = Pattern.compile("[h-j]+", flags); 1774 if (!pattern.matcher("\u0131\u0130").matches()) 1775 failCount++; 1776 report("Case Folding"); 1777 } 1778 1779 private static void appendTest() { 1780 Pattern pattern = Pattern.compile("(ab)(cd)"); 1781 Matcher matcher = pattern.matcher("abcd"); 1782 String result = matcher.replaceAll("$2$1"); 1783 if (!result.equals("cdab")) 1784 failCount++; 1785 1786 String s1 = "Swap all: first = 123, second = 456"; 1787 String s2 = "Swap one: first = 123, second = 456"; 1788 String r = "$3$2$1"; 1789 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1790 matcher = pattern.matcher(s1); 1791 1792 result = matcher.replaceAll(r); 1793 if (!result.equals("Swap all: 123 = first, 456 = second")) 1794 failCount++; 1795 1796 matcher = pattern.matcher(s2); 1797 1798 if (matcher.find()) { 1799 StringBuffer sb = new StringBuffer(); 1800 matcher.appendReplacement(sb, r); 1801 matcher.appendTail(sb); 1802 result = sb.toString(); 1803 if (!result.equals("Swap one: 123 = first, second = 456")) 1804 failCount++; 1805 } 1806 1807 // Supplementary character test 1808 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1809 matcher = pattern.matcher(toSupplementaries("abcd")); 1810 result = matcher.replaceAll("$2$1"); 1811 if (!result.equals(toSupplementaries("cdab"))) 1812 failCount++; 1813 1814 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1815 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1816 r = toSupplementaries("$3$2$1"); 1817 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1818 matcher = pattern.matcher(s1); 1819 1820 result = matcher.replaceAll(r); 1821 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1822 failCount++; 1823 1824 matcher = pattern.matcher(s2); 1825 1826 if (matcher.find()) { 1827 StringBuffer sb = new StringBuffer(); 1828 matcher.appendReplacement(sb, r); 1829 matcher.appendTail(sb); 1830 result = sb.toString(); 1831 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1832 failCount++; 1833 } 1834 report("Append"); 1835 } 1836 1837 private static void splitTest() { 1838 Pattern pattern = Pattern.compile(":"); 1839 String[] result = pattern.split("foo:and:boo", 2); 1840 if (!result[0].equals("foo")) 1841 failCount++; 1842 if (!result[1].equals("and:boo")) 1843 failCount++; 1844 // Supplementary character test 1845 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1846 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1847 if (!result[0].equals(toSupplementaries("foo"))) 1848 failCount++; 1849 if (!result[1].equals(toSupplementaries("andXboo"))) 1850 failCount++; 1851 1852 CharBuffer cb = CharBuffer.allocate(100); 1853 cb.put("foo:and:boo"); 1854 cb.flip(); 1855 result = pattern.split(cb); 1856 if (!result[0].equals("foo")) 1857 failCount++; 1858 if (!result[1].equals("and")) 1859 failCount++; 1860 if (!result[2].equals("boo")) 1861 failCount++; 1862 1863 // Supplementary character test 1864 CharBuffer cbs = CharBuffer.allocate(100); 1865 cbs.put(toSupplementaries("fooXandXboo")); 1866 cbs.flip(); 1867 result = patternX.split(cbs); 1868 if (!result[0].equals(toSupplementaries("foo"))) 1869 failCount++; 1870 if (!result[1].equals(toSupplementaries("and"))) 1871 failCount++; 1872 if (!result[2].equals(toSupplementaries("boo"))) 1873 failCount++; 1874 1875 String source = "0123456789"; 1876 for (int limit=-2; limit<3; limit++) { 1877 for (int x=0; x<10; x++) { 1878 result = source.split(Integer.toString(x), limit); 1879 int expectedLength = limit < 1 ? 2 : limit; 1880 1881 if ((limit == 0) && (x == 9)) { 1882 // expected dropping of "" 1883 if (result.length != 1) 1884 failCount++; 1885 if (!result[0].equals("012345678")) { 1886 failCount++; 1887 } 1888 } else { 1889 if (result.length != expectedLength) { 1890 failCount++; 1891 } 1892 if (!result[0].equals(source.substring(0,x))) { 1893 if (limit != 1) { 1894 failCount++; 1895 } else { 1896 if (!result[0].equals(source.substring(0,10))) { 1897 failCount++; 1898 } 1899 } 1900 } 1901 if (expectedLength > 1) { // Check segment 2 1902 if (!result[1].equals(source.substring(x+1,10))) 1903 failCount++; 1904 } 1905 } 1906 } 1907 } 1908 // Check the case for no match found 1909 for (int limit=-2; limit<3; limit++) { 1910 result = source.split("e", limit); 1911 if (result.length != 1) 1912 failCount++; 1913 if (!result[0].equals(source)) 1914 failCount++; 1915 } 1916 // Check the case for limit == 0, source = ""; 1917 // split() now returns 0-length for empty source "" see #6559590 1918 source = ""; 1919 result = source.split("e", 0); 1920 if (result.length != 1) 1921 failCount++; 1922 if (!result[0].equals(source)) 1923 failCount++; 1924 1925 // Check both split() and splitAsStraem(), especially for zero-lenth 1926 // input and zero-lenth match cases 1927 String[][] input = new String[][] { 1928 { " ", "Abc Efg Hij" }, // normal non-zero-match 1929 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1930 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1931 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1932 { "(?=\\p{Lu})", "AbcEfg" }, 1933 { "(?=\\p{Lu})", "Abc" }, 1934 { " ", "" }, // zero-length input 1935 { ".*", "" }, 1936 1937 // some tests from PatternStreamTest.java 1938 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1939 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1940 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1941 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1942 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1943 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1944 { "\u56da", "" }, 1945 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1946 { "o", "boo:and:foo" }, 1947 { "o", "booooo:and:fooooo" }, 1948 { "o", "fooooo:" }, 1949 }; 1950 1951 String[][] expected = new String[][] { 1952 { "Abc", "Efg", "Hij" }, 1953 { "", "Abc", "Efg", "Hij" }, 1954 { "Abc", "", "Efg", "Hij" }, 1955 { "Abc", "Efg", "Hij" }, 1956 { "Abc", "Efg" }, 1957 { "Abc" }, 1958 { "" }, 1959 { "" }, 1960 1961 { "awgqwefg1fefw", "vssv1vvv1" }, 1962 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1963 { "awgqwefg", "fefw4vssv", "vvv" }, 1964 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1965 { "1", "23", "456", "7890" }, 1966 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1967 { "" }, 1968 { "This", "is", "testing", "", "with", "different", "separators" }, 1969 { "b", "", ":and:f" }, 1970 { "b", "", "", "", "", ":and:f" }, 1971 { "f", "", "", "", "", ":" }, 1972 }; 1973 for (int i = 0; i < input.length; i++) { 1974 pattern = Pattern.compile(input[i][0]); 1975 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1976 failCount++; 1977 } 1978 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1979 // array for zero-length input for now 1980 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1981 expected[i])) { 1982 failCount++; 1983 } 1984 } 1985 report("Split"); 1986 } 1987 1988 private static void negationTest() { 1989 Pattern pattern = Pattern.compile("[\\[@^]+"); 1990 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1991 if (!matcher.find()) 1992 failCount++; 1993 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1994 failCount++; 1995 pattern = Pattern.compile("[@\\[^]+"); 1996 matcher = pattern.matcher("@@@@[[[[^^^^"); 1997 if (!matcher.find()) 1998 failCount++; 1999 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2000 failCount++; 2001 pattern = Pattern.compile("[@\\[^@]+"); 2002 matcher = pattern.matcher("@@@@[[[[^^^^"); 2003 if (!matcher.find()) 2004 failCount++; 2005 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 2006 failCount++; 2007 2008 pattern = Pattern.compile("\\)"); 2009 matcher = pattern.matcher("xxx)xxx"); 2010 if (!matcher.find()) 2011 failCount++; 2012 2013 report("Negation"); 2014 } 2015 2016 private static void ampersandTest() { 2017 Pattern pattern = Pattern.compile("[&@]+"); 2018 check(pattern, "@@@@&&&&", true); 2019 2020 pattern = Pattern.compile("[@&]+"); 2021 check(pattern, "@@@@&&&&", true); 2022 2023 pattern = Pattern.compile("[@\\&]+"); 2024 check(pattern, "@@@@&&&&", true); 2025 2026 report("Ampersand"); 2027 } 2028 2029 private static void octalTest() throws Exception { 2030 Pattern pattern = Pattern.compile("\\u0007"); 2031 Matcher matcher = pattern.matcher("\u0007"); 2032 if (!matcher.matches()) 2033 failCount++; 2034 pattern = Pattern.compile("\\07"); 2035 matcher = pattern.matcher("\u0007"); 2036 if (!matcher.matches()) 2037 failCount++; 2038 pattern = Pattern.compile("\\007"); 2039 matcher = pattern.matcher("\u0007"); 2040 if (!matcher.matches()) 2041 failCount++; 2042 pattern = Pattern.compile("\\0007"); 2043 matcher = pattern.matcher("\u0007"); 2044 if (!matcher.matches()) 2045 failCount++; 2046 pattern = Pattern.compile("\\040"); 2047 matcher = pattern.matcher("\u0020"); 2048 if (!matcher.matches()) 2049 failCount++; 2050 pattern = Pattern.compile("\\0403"); 2051 matcher = pattern.matcher("\u00203"); 2052 if (!matcher.matches()) 2053 failCount++; 2054 pattern = Pattern.compile("\\0103"); 2055 matcher = pattern.matcher("\u0043"); 2056 if (!matcher.matches()) 2057 failCount++; 2058 2059 report("Octal"); 2060 } 2061 2062 private static void longPatternTest() throws Exception { 2063 try { 2064 Pattern pattern = Pattern.compile( 2065 "a 32-character-long pattern xxxx"); 2066 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2067 pattern = Pattern.compile("a thirty four character long regex"); 2068 StringBuffer patternToBe = new StringBuffer(101); 2069 for (int i=0; i<100; i++) 2070 patternToBe.append((char)(97 + i%26)); 2071 pattern = Pattern.compile(patternToBe.toString()); 2072 } catch (PatternSyntaxException e) { 2073 failCount++; 2074 } 2075 2076 // Supplementary character test 2077 try { 2078 Pattern pattern = Pattern.compile( 2079 toSupplementaries("a 32-character-long pattern xxxx")); 2080 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2081 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2082 StringBuffer patternToBe = new StringBuffer(101*2); 2083 for (int i=0; i<100; i++) 2084 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2085 + 97 + i%26)); 2086 pattern = Pattern.compile(patternToBe.toString()); 2087 } catch (PatternSyntaxException e) { 2088 failCount++; 2089 } 2090 report("LongPattern"); 2091 } 2092 2093 private static void group0Test() throws Exception { 2094 Pattern pattern = Pattern.compile("(tes)ting"); 2095 Matcher matcher = pattern.matcher("testing"); 2096 check(matcher, "testing"); 2097 2098 matcher.reset("testing"); 2099 if (matcher.lookingAt()) { 2100 if (!matcher.group(0).equals("testing")) 2101 failCount++; 2102 } else { 2103 failCount++; 2104 } 2105 2106 matcher.reset("testing"); 2107 if (matcher.matches()) { 2108 if (!matcher.group(0).equals("testing")) 2109 failCount++; 2110 } else { 2111 failCount++; 2112 } 2113 2114 pattern = Pattern.compile("(tes)ting"); 2115 matcher = pattern.matcher("testing"); 2116 if (matcher.lookingAt()) { 2117 if (!matcher.group(0).equals("testing")) 2118 failCount++; 2119 } else { 2120 failCount++; 2121 } 2122 2123 pattern = Pattern.compile("^(tes)ting"); 2124 matcher = pattern.matcher("testing"); 2125 if (matcher.matches()) { 2126 if (!matcher.group(0).equals("testing")) 2127 failCount++; 2128 } else { 2129 failCount++; 2130 } 2131 2132 // Supplementary character test 2133 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2134 matcher = pattern.matcher(toSupplementaries("testing")); 2135 check(matcher, toSupplementaries("testing")); 2136 2137 matcher.reset(toSupplementaries("testing")); 2138 if (matcher.lookingAt()) { 2139 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2140 failCount++; 2141 } else { 2142 failCount++; 2143 } 2144 2145 matcher.reset(toSupplementaries("testing")); 2146 if (matcher.matches()) { 2147 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2148 failCount++; 2149 } else { 2150 failCount++; 2151 } 2152 2153 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2154 matcher = pattern.matcher(toSupplementaries("testing")); 2155 if (matcher.lookingAt()) { 2156 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2157 failCount++; 2158 } else { 2159 failCount++; 2160 } 2161 2162 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2163 matcher = pattern.matcher(toSupplementaries("testing")); 2164 if (matcher.matches()) { 2165 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2166 failCount++; 2167 } else { 2168 failCount++; 2169 } 2170 2171 report("Group0"); 2172 } 2173 2174 private static void findIntTest() throws Exception { 2175 Pattern p = Pattern.compile("blah"); 2176 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2177 boolean result = m.find(2); 2178 if (!result) 2179 failCount++; 2180 2181 p = Pattern.compile("$"); 2182 m = p.matcher("1234567890"); 2183 result = m.find(10); 2184 if (!result) 2185 failCount++; 2186 try { 2187 result = m.find(11); 2188 failCount++; 2189 } catch (IndexOutOfBoundsException e) { 2190 // correct result 2191 } 2192 2193 // Supplementary character test 2194 p = Pattern.compile(toSupplementaries("blah")); 2195 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2196 result = m.find(2); 2197 if (!result) 2198 failCount++; 2199 2200 report("FindInt"); 2201 } 2202 2203 private static void emptyPatternTest() throws Exception { 2204 Pattern p = Pattern.compile(""); 2205 Matcher m = p.matcher("foo"); 2206 2207 // Should find empty pattern at beginning of input 2208 boolean result = m.find(); 2209 if (result != true) 2210 failCount++; 2211 if (m.start() != 0) 2212 failCount++; 2213 2214 // Should not match entire input if input is not empty 2215 m.reset(); 2216 result = m.matches(); 2217 if (result == true) 2218 failCount++; 2219 2220 try { 2221 m.start(0); 2222 failCount++; 2223 } catch (IllegalStateException e) { 2224 // Correct result 2225 } 2226 2227 // Should match entire input if input is empty 2228 m.reset(""); 2229 result = m.matches(); 2230 if (result != true) 2231 failCount++; 2232 2233 result = Pattern.matches("", ""); 2234 if (result != true) 2235 failCount++; 2236 2237 result = Pattern.matches("", "foo"); 2238 if (result == true) 2239 failCount++; 2240 report("EmptyPattern"); 2241 } 2242 2243 private static void charClassTest() throws Exception { 2244 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2245 check(pattern, "blahb]blech", true); 2246 2247 pattern = Pattern.compile("[abc[def]]"); 2248 check(pattern, "b", true); 2249 2250 // Supplementary character tests 2251 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2252 check(pattern, toSupplementaries("blahb]blech"), true); 2253 2254 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2255 check(pattern, toSupplementaries("b"), true); 2256 2257 try { 2258 // u00ff when UNICODE_CASE 2259 pattern = Pattern.compile("[ab\u00ffcd]", 2260 Pattern.CASE_INSENSITIVE| 2261 Pattern.UNICODE_CASE); 2262 check(pattern, "ab\u00ffcd", true); 2263 check(pattern, "Ab\u0178Cd", true); 2264 2265 // u00b5 when UNICODE_CASE 2266 pattern = Pattern.compile("[ab\u00b5cd]", 2267 Pattern.CASE_INSENSITIVE| 2268 Pattern.UNICODE_CASE); 2269 check(pattern, "ab\u00b5cd", true); 2270 check(pattern, "Ab\u039cCd", true); 2271 } catch (Exception e) { failCount++; } 2272 2273 /* Special cases 2274 (1)LatinSmallLetterLongS u+017f 2275 (2)LatinSmallLetterDotlessI u+0131 2276 (3)LatineCapitalLetterIWithDotAbove u+0130 2277 (4)KelvinSign u+212a 2278 (5)AngstromSign u+212b 2279 */ 2280 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2281 pattern = Pattern.compile("[sik\u00c5]+", flags); 2282 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2283 failCount++; 2284 2285 report("CharClass"); 2286 } 2287 2288 private static void caretTest() throws Exception { 2289 Pattern pattern = Pattern.compile("\\w*"); 2290 Matcher matcher = pattern.matcher("a#bc#def##g"); 2291 check(matcher, "a"); 2292 check(matcher, ""); 2293 check(matcher, "bc"); 2294 check(matcher, ""); 2295 check(matcher, "def"); 2296 check(matcher, ""); 2297 check(matcher, ""); 2298 check(matcher, "g"); 2299 check(matcher, ""); 2300 if (matcher.find()) 2301 failCount++; 2302 2303 pattern = Pattern.compile("^\\w*"); 2304 matcher = pattern.matcher("a#bc#def##g"); 2305 check(matcher, "a"); 2306 if (matcher.find()) 2307 failCount++; 2308 2309 pattern = Pattern.compile("\\w"); 2310 matcher = pattern.matcher("abc##x"); 2311 check(matcher, "a"); 2312 check(matcher, "b"); 2313 check(matcher, "c"); 2314 check(matcher, "x"); 2315 if (matcher.find()) 2316 failCount++; 2317 2318 pattern = Pattern.compile("^\\w"); 2319 matcher = pattern.matcher("abc##x"); 2320 check(matcher, "a"); 2321 if (matcher.find()) 2322 failCount++; 2323 2324 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2325 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2326 check(matcher, "abc"); 2327 if (matcher.find()) 2328 failCount++; 2329 2330 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2331 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2332 check(matcher, "abc"); 2333 check(matcher, "jkl"); 2334 if (matcher.find()) 2335 failCount++; 2336 2337 pattern = Pattern.compile("^", Pattern.MULTILINE); 2338 matcher = pattern.matcher("this is some text"); 2339 String result = matcher.replaceAll("X"); 2340 if (!result.equals("Xthis is some text")) 2341 failCount++; 2342 2343 pattern = Pattern.compile("^"); 2344 matcher = pattern.matcher("this is some text"); 2345 result = matcher.replaceAll("X"); 2346 if (!result.equals("Xthis is some text")) 2347 failCount++; 2348 2349 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2350 matcher = pattern.matcher("this is some text\n"); 2351 result = matcher.replaceAll("X"); 2352 if (!result.equals("Xthis is some text\n")) 2353 failCount++; 2354 2355 report("Caret"); 2356 } 2357 2358 private static void groupCaptureTest() throws Exception { 2359 // Independent group 2360 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2361 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2362 matcher.find(); 2363 try { 2364 String blah = matcher.group(1); 2365 failCount++; 2366 } catch (IndexOutOfBoundsException ioobe) { 2367 // Good result 2368 } 2369 // Pure group 2370 pattern = Pattern.compile("x+(?:y+)z+"); 2371 matcher = pattern.matcher("xxxyyyzzz"); 2372 matcher.find(); 2373 try { 2374 String blah = matcher.group(1); 2375 failCount++; 2376 } catch (IndexOutOfBoundsException ioobe) { 2377 // Good result 2378 } 2379 2380 // Supplementary character tests 2381 // Independent group 2382 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2383 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2384 matcher.find(); 2385 try { 2386 String blah = matcher.group(1); 2387 failCount++; 2388 } catch (IndexOutOfBoundsException ioobe) { 2389 // Good result 2390 } 2391 // Pure group 2392 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2393 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2394 matcher.find(); 2395 try { 2396 String blah = matcher.group(1); 2397 failCount++; 2398 } catch (IndexOutOfBoundsException ioobe) { 2399 // Good result 2400 } 2401 2402 report("GroupCapture"); 2403 } 2404 2405 private static void backRefTest() throws Exception { 2406 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2407 check(pattern, "zzzaabcazzz", true); 2408 2409 pattern = Pattern.compile("(a*)bc\\1"); 2410 check(pattern, "zzzaabcaazzz", true); 2411 2412 pattern = Pattern.compile("(abc)(def)\\1"); 2413 check(pattern, "abcdefabc", true); 2414 2415 pattern = Pattern.compile("(abc)(def)\\3"); 2416 check(pattern, "abcdefabc", false); 2417 2418 try { 2419 for (int i = 1; i < 10; i++) { 2420 // Make sure backref 1-9 are always accepted 2421 pattern = Pattern.compile("abcdef\\" + i); 2422 // and fail to match if the target group does not exit 2423 check(pattern, "abcdef", false); 2424 } 2425 } catch(PatternSyntaxException e) { 2426 failCount++; 2427 } 2428 2429 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2430 check(pattern, "abcdefghija", false); 2431 check(pattern, "abcdefghija1", true); 2432 2433 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2434 check(pattern, "abcdefghijkk", true); 2435 2436 pattern = Pattern.compile("(a)bcdefghij\\11"); 2437 check(pattern, "abcdefghija1", true); 2438 2439 // Supplementary character tests 2440 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2441 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2442 2443 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2444 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2445 2446 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2447 check(pattern, toSupplementaries("abcdefabc"), true); 2448 2449 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2450 check(pattern, toSupplementaries("abcdefabc"), false); 2451 2452 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2453 check(pattern, toSupplementaries("abcdefghija"), false); 2454 check(pattern, toSupplementaries("abcdefghija1"), true); 2455 2456 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2457 check(pattern, toSupplementaries("abcdefghijkk"), true); 2458 2459 report("BackRef"); 2460 } 2461 2462 /** 2463 * Unicode Technical Report #18, section 2.6 End of Line 2464 * There is no empty line to be matched in the sequence \u000D\u000A 2465 * but there is an empty line in the sequence \u000A\u000D. 2466 */ 2467 private static void anchorTest() throws Exception { 2468 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2469 Matcher m = p.matcher("blah1\r\nblah2"); 2470 m.find(); 2471 m.find(); 2472 if (!m.group().equals("blah2")) 2473 failCount++; 2474 2475 m.reset("blah1\n\rblah2"); 2476 m.find(); 2477 m.find(); 2478 m.find(); 2479 if (!m.group().equals("blah2")) 2480 failCount++; 2481 2482 // Test behavior of $ with \r\n at end of input 2483 p = Pattern.compile(".+$"); 2484 m = p.matcher("blah1\r\n"); 2485 if (!m.find()) 2486 failCount++; 2487 if (!m.group().equals("blah1")) 2488 failCount++; 2489 if (m.find()) 2490 failCount++; 2491 2492 // Test behavior of $ with \r\n at end of input in multiline 2493 p = Pattern.compile(".+$", Pattern.MULTILINE); 2494 m = p.matcher("blah1\r\n"); 2495 if (!m.find()) 2496 failCount++; 2497 if (m.find()) 2498 failCount++; 2499 2500 // Test for $ recognition of \u0085 for bug 4527731 2501 p = Pattern.compile(".+$", Pattern.MULTILINE); 2502 m = p.matcher("blah1\u0085"); 2503 if (!m.find()) 2504 failCount++; 2505 2506 // Supplementary character test 2507 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2508 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2509 m.find(); 2510 m.find(); 2511 if (!m.group().equals(toSupplementaries("blah2"))) 2512 failCount++; 2513 2514 m.reset(toSupplementaries("blah1\n\rblah2")); 2515 m.find(); 2516 m.find(); 2517 m.find(); 2518 if (!m.group().equals(toSupplementaries("blah2"))) 2519 failCount++; 2520 2521 // Test behavior of $ with \r\n at end of input 2522 p = Pattern.compile(".+$"); 2523 m = p.matcher(toSupplementaries("blah1\r\n")); 2524 if (!m.find()) 2525 failCount++; 2526 if (!m.group().equals(toSupplementaries("blah1"))) 2527 failCount++; 2528 if (m.find()) 2529 failCount++; 2530 2531 // Test behavior of $ with \r\n at end of input in multiline 2532 p = Pattern.compile(".+$", Pattern.MULTILINE); 2533 m = p.matcher(toSupplementaries("blah1\r\n")); 2534 if (!m.find()) 2535 failCount++; 2536 if (m.find()) 2537 failCount++; 2538 2539 // Test for $ recognition of \u0085 for bug 4527731 2540 p = Pattern.compile(".+$", Pattern.MULTILINE); 2541 m = p.matcher(toSupplementaries("blah1\u0085")); 2542 if (!m.find()) 2543 failCount++; 2544 2545 report("Anchors"); 2546 } 2547 2548 /** 2549 * A basic sanity test of Matcher.lookingAt(). 2550 */ 2551 private static void lookingAtTest() throws Exception { 2552 Pattern p = Pattern.compile("(ab)(c*)"); 2553 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2554 2555 if (!m.lookingAt()) 2556 failCount++; 2557 2558 if (!m.group().equals(m.group(0))) 2559 failCount++; 2560 2561 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2562 if (m.lookingAt()) 2563 failCount++; 2564 2565 // Supplementary character test 2566 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2567 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2568 2569 if (!m.lookingAt()) 2570 failCount++; 2571 2572 if (!m.group().equals(m.group(0))) 2573 failCount++; 2574 2575 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2576 if (m.lookingAt()) 2577 failCount++; 2578 2579 report("Looking At"); 2580 } 2581 2582 /** 2583 * A basic sanity test of Matcher.matches(). 2584 */ 2585 private static void matchesTest() throws Exception { 2586 // matches() 2587 Pattern p = Pattern.compile("ulb(c*)"); 2588 Matcher m = p.matcher("ulbcccccc"); 2589 if (!m.matches()) 2590 failCount++; 2591 2592 // find() but not matches() 2593 m.reset("zzzulbcccccc"); 2594 if (m.matches()) 2595 failCount++; 2596 2597 // lookingAt() but not matches() 2598 m.reset("ulbccccccdef"); 2599 if (m.matches()) 2600 failCount++; 2601 2602 // matches() 2603 p = Pattern.compile("a|ad"); 2604 m = p.matcher("ad"); 2605 if (!m.matches()) 2606 failCount++; 2607 2608 // Supplementary character test 2609 // matches() 2610 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2611 m = p.matcher(toSupplementaries("ulbcccccc")); 2612 if (!m.matches()) 2613 failCount++; 2614 2615 // find() but not matches() 2616 m.reset(toSupplementaries("zzzulbcccccc")); 2617 if (m.matches()) 2618 failCount++; 2619 2620 // lookingAt() but not matches() 2621 m.reset(toSupplementaries("ulbccccccdef")); 2622 if (m.matches()) 2623 failCount++; 2624 2625 // matches() 2626 p = Pattern.compile(toSupplementaries("a|ad")); 2627 m = p.matcher(toSupplementaries("ad")); 2628 if (!m.matches()) 2629 failCount++; 2630 2631 report("Matches"); 2632 } 2633 2634 /** 2635 * A basic sanity test of Pattern.matches(). 2636 */ 2637 private static void patternMatchesTest() throws Exception { 2638 // matches() 2639 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2640 toSupplementaries("ulbcccccc"))) 2641 failCount++; 2642 2643 // find() but not matches() 2644 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2645 toSupplementaries("zzzulbcccccc"))) 2646 failCount++; 2647 2648 // lookingAt() but not matches() 2649 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2650 toSupplementaries("ulbccccccdef"))) 2651 failCount++; 2652 2653 // Supplementary character test 2654 // matches() 2655 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2656 toSupplementaries("ulbcccccc"))) 2657 failCount++; 2658 2659 // find() but not matches() 2660 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2661 toSupplementaries("zzzulbcccccc"))) 2662 failCount++; 2663 2664 // lookingAt() but not matches() 2665 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2666 toSupplementaries("ulbccccccdef"))) 2667 failCount++; 2668 2669 report("Pattern Matches"); 2670 } 2671 2672 /** 2673 * Canonical equivalence testing. Tests the ability of the engine 2674 * to match sequences that are not explicitly specified in the 2675 * pattern when they are considered equivalent by the Unicode Standard. 2676 */ 2677 private static void ceTest() throws Exception { 2678 // Decomposed char outside char classes 2679 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2680 Matcher m = p.matcher("test\u00e5"); 2681 if (!m.matches()) 2682 failCount++; 2683 2684 m.reset("testa\u030a"); 2685 if (!m.matches()) 2686 failCount++; 2687 2688 // Composed char outside char classes 2689 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2690 m = p.matcher("test\u00e5"); 2691 if (!m.matches()) 2692 failCount++; 2693 2694 m.reset("testa\u030a"); 2695 if (!m.find()) 2696 failCount++; 2697 2698 // Decomposed char inside a char class 2699 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2700 m = p.matcher("test\u00e5"); 2701 if (!m.find()) 2702 failCount++; 2703 2704 m.reset("testa\u030a"); 2705 if (!m.find()) 2706 failCount++; 2707 2708 // Composed char inside a char class 2709 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2710 m = p.matcher("test\u00e5"); 2711 if (!m.find()) 2712 failCount++; 2713 2714 m.reset("testa\u0300"); 2715 if (!m.find()) 2716 failCount++; 2717 2718 m.reset("testa\u030a"); 2719 if (!m.find()) 2720 failCount++; 2721 2722 // Marks that cannot legally change order and be equivalent 2723 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2724 check(p, "testa\u0308\u0300", true); 2725 check(p, "testa\u0300\u0308", false); 2726 2727 // Marks that can legally change order and be equivalent 2728 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2729 check(p, "testa\u0308\u0323", true); 2730 check(p, "testa\u0323\u0308", true); 2731 2732 // Test all equivalences of the sequence a\u0308\u0323\u0300 2733 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2734 check(p, "testa\u0308\u0323\u0300", true); 2735 check(p, "testa\u0323\u0308\u0300", true); 2736 check(p, "testa\u0308\u0300\u0323", true); 2737 check(p, "test\u00e4\u0323\u0300", true); 2738 check(p, "test\u00e4\u0300\u0323", true); 2739 2740 Object[][] data = new Object[][] { 2741 2742 // JDK-4867170 2743 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2744 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2745 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2746 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2747 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2748 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2749 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2750 2751 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2752 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2753 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2754 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2755 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2756 2757 // backtracking, force to match "\u1f80", instead of \u1f82" 2758 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2759 2760 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2761 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2762 2763 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2764 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2765 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2766 2767 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2768 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2769 { "\u1f80", "ab\u1f80cd", "f", true }, 2770 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2771 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2772 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2773 { "\u1f82", "\u1f80\u0300", "m", true }, 2774 2775 // JDK-7080302 # compile failed 2776 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2777 2778 // JDK-6728861, same cause as above one 2779 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2780 2781 // JDK-6995635 2782 { "(\u00e9)", "e\u0301", "m", true }, 2783 2784 // JDK-6736245 2785 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2786 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2787 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2788 2789 // 4916384. 2790 // Decomposed hangul (jamos) works inside clazz 2791 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2792 { "[\u1100\u1161]", "\uac00", "m", true}, 2793 2794 { "[\uac00]", "\u1100\u1161", "m", true}, 2795 { "[\uac00]", "\uac00", "m", true}, 2796 2797 // Decomposed hangul (jamos) 2798 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2799 { "\u1100\u1161", "\uac00", "m", true}, 2800 2801 // Composed hangul 2802 { "\uac00", "\u1100\u1161", "m", true }, 2803 { "\uac00", "\uac00", "m", true }, 2804 2805 /* Need a NFDSlice to nfd the source to solve this issue 2806 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2807 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2808 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2809 2810 // Decomposed supplementary outside char classes 2811 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2812 // Composed supplementary outside char classes 2813 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2814 */ 2815 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2816 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2817 2818 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2819 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2820 }; 2821 2822 int failCount = 0; 2823 for (Object[] d : data) { 2824 String pn = (String)d[0]; 2825 String tt = (String)d[1]; 2826 boolean isFind = "f".equals(((String)d[2])); 2827 boolean expected = (boolean)d[3]; 2828 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2829 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2830 if (ret != expected) { 2831 failCount++; 2832 continue; 2833 } 2834 } 2835 report("Canonical Equivalence"); 2836 } 2837 2838 /** 2839 * A basic sanity test of Matcher.replaceAll(). 2840 */ 2841 private static void globalSubstitute() throws Exception { 2842 // Global substitution with a literal 2843 Pattern p = Pattern.compile("(ab)(c*)"); 2844 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2845 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2846 failCount++; 2847 2848 m.reset("zzzabccczzzabcczzzabccczzz"); 2849 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2850 failCount++; 2851 2852 // Global substitution with groups 2853 m.reset("zzzabccczzzabcczzzabccczzz"); 2854 String result = m.replaceAll("$1"); 2855 if (!result.equals("zzzabzzzabzzzabzzz")) 2856 failCount++; 2857 2858 // Supplementary character test 2859 // Global substitution with a literal 2860 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2861 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2862 if (!m.replaceAll(toSupplementaries("test")). 2863 equals(toSupplementaries("testzzztestzzztest"))) 2864 failCount++; 2865 2866 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2867 if (!m.replaceAll(toSupplementaries("test")). 2868 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2869 failCount++; 2870 2871 // Global substitution with groups 2872 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2873 result = m.replaceAll("$1"); 2874 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2875 failCount++; 2876 2877 report("Global Substitution"); 2878 } 2879 2880 /** 2881 * Tests the usage of Matcher.appendReplacement() with literal 2882 * and group substitutions. 2883 */ 2884 private static void stringbufferSubstitute() throws Exception { 2885 // SB substitution with literal 2886 String blah = "zzzblahzzz"; 2887 Pattern p = Pattern.compile("blah"); 2888 Matcher m = p.matcher(blah); 2889 StringBuffer result = new StringBuffer(); 2890 try { 2891 m.appendReplacement(result, "blech"); 2892 failCount++; 2893 } catch (IllegalStateException e) { 2894 } 2895 m.find(); 2896 m.appendReplacement(result, "blech"); 2897 if (!result.toString().equals("zzzblech")) 2898 failCount++; 2899 2900 m.appendTail(result); 2901 if (!result.toString().equals("zzzblechzzz")) 2902 failCount++; 2903 2904 // SB substitution with groups 2905 blah = "zzzabcdzzz"; 2906 p = Pattern.compile("(ab)(cd)*"); 2907 m = p.matcher(blah); 2908 result = new StringBuffer(); 2909 try { 2910 m.appendReplacement(result, "$1"); 2911 failCount++; 2912 } catch (IllegalStateException e) { 2913 } 2914 m.find(); 2915 m.appendReplacement(result, "$1"); 2916 if (!result.toString().equals("zzzab")) 2917 failCount++; 2918 2919 m.appendTail(result); 2920 if (!result.toString().equals("zzzabzzz")) 2921 failCount++; 2922 2923 // SB substitution with 3 groups 2924 blah = "zzzabcdcdefzzz"; 2925 p = Pattern.compile("(ab)(cd)*(ef)"); 2926 m = p.matcher(blah); 2927 result = new StringBuffer(); 2928 try { 2929 m.appendReplacement(result, "$1w$2w$3"); 2930 failCount++; 2931 } catch (IllegalStateException e) { 2932 } 2933 m.find(); 2934 m.appendReplacement(result, "$1w$2w$3"); 2935 if (!result.toString().equals("zzzabwcdwef")) 2936 failCount++; 2937 2938 m.appendTail(result); 2939 if (!result.toString().equals("zzzabwcdwefzzz")) 2940 failCount++; 2941 2942 // SB substitution with groups and three matches 2943 // skipping middle match 2944 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2945 p = Pattern.compile("(ab)(cd*)"); 2946 m = p.matcher(blah); 2947 result = new StringBuffer(); 2948 try { 2949 m.appendReplacement(result, "$1"); 2950 failCount++; 2951 } catch (IllegalStateException e) { 2952 } 2953 m.find(); 2954 m.appendReplacement(result, "$1"); 2955 if (!result.toString().equals("zzzab")) 2956 failCount++; 2957 2958 m.find(); 2959 m.find(); 2960 m.appendReplacement(result, "$2"); 2961 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2962 failCount++; 2963 2964 m.appendTail(result); 2965 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2966 failCount++; 2967 2968 // Check to make sure escaped $ is ignored 2969 blah = "zzzabcdcdefzzz"; 2970 p = Pattern.compile("(ab)(cd)*(ef)"); 2971 m = p.matcher(blah); 2972 result = new StringBuffer(); 2973 m.find(); 2974 m.appendReplacement(result, "$1w\\$2w$3"); 2975 if (!result.toString().equals("zzzabw$2wef")) 2976 failCount++; 2977 2978 m.appendTail(result); 2979 if (!result.toString().equals("zzzabw$2wefzzz")) 2980 failCount++; 2981 2982 // Check to make sure a reference to nonexistent group causes error 2983 blah = "zzzabcdcdefzzz"; 2984 p = Pattern.compile("(ab)(cd)*(ef)"); 2985 m = p.matcher(blah); 2986 result = new StringBuffer(); 2987 m.find(); 2988 try { 2989 m.appendReplacement(result, "$1w$5w$3"); 2990 failCount++; 2991 } catch (IndexOutOfBoundsException ioobe) { 2992 // Correct result 2993 } 2994 2995 // Check double digit group references 2996 blah = "zzz123456789101112zzz"; 2997 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2998 m = p.matcher(blah); 2999 result = new StringBuffer(); 3000 m.find(); 3001 m.appendReplacement(result, "$1w$11w$3"); 3002 if (!result.toString().equals("zzz1w11w3")) 3003 failCount++; 3004 3005 // Check to make sure it backs off $15 to $1 if only three groups 3006 blah = "zzzabcdcdefzzz"; 3007 p = Pattern.compile("(ab)(cd)*(ef)"); 3008 m = p.matcher(blah); 3009 result = new StringBuffer(); 3010 m.find(); 3011 m.appendReplacement(result, "$1w$15w$3"); 3012 if (!result.toString().equals("zzzabwab5wef")) 3013 failCount++; 3014 3015 3016 // Supplementary character test 3017 // SB substitution with literal 3018 blah = toSupplementaries("zzzblahzzz"); 3019 p = Pattern.compile(toSupplementaries("blah")); 3020 m = p.matcher(blah); 3021 result = new StringBuffer(); 3022 try { 3023 m.appendReplacement(result, toSupplementaries("blech")); 3024 failCount++; 3025 } catch (IllegalStateException e) { 3026 } 3027 m.find(); 3028 m.appendReplacement(result, toSupplementaries("blech")); 3029 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3030 failCount++; 3031 3032 m.appendTail(result); 3033 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3034 failCount++; 3035 3036 // SB substitution with groups 3037 blah = toSupplementaries("zzzabcdzzz"); 3038 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3039 m = p.matcher(blah); 3040 result = new StringBuffer(); 3041 try { 3042 m.appendReplacement(result, "$1"); 3043 failCount++; 3044 } catch (IllegalStateException e) { 3045 } 3046 m.find(); 3047 m.appendReplacement(result, "$1"); 3048 if (!result.toString().equals(toSupplementaries("zzzab"))) 3049 failCount++; 3050 3051 m.appendTail(result); 3052 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3053 failCount++; 3054 3055 // SB substitution with 3 groups 3056 blah = toSupplementaries("zzzabcdcdefzzz"); 3057 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3058 m = p.matcher(blah); 3059 result = new StringBuffer(); 3060 try { 3061 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3062 failCount++; 3063 } catch (IllegalStateException e) { 3064 } 3065 m.find(); 3066 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3067 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3068 failCount++; 3069 3070 m.appendTail(result); 3071 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3072 failCount++; 3073 3074 // SB substitution with groups and three matches 3075 // skipping middle match 3076 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3077 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3078 m = p.matcher(blah); 3079 result = new StringBuffer(); 3080 try { 3081 m.appendReplacement(result, "$1"); 3082 failCount++; 3083 } catch (IllegalStateException e) { 3084 } 3085 m.find(); 3086 m.appendReplacement(result, "$1"); 3087 if (!result.toString().equals(toSupplementaries("zzzab"))) 3088 failCount++; 3089 3090 m.find(); 3091 m.find(); 3092 m.appendReplacement(result, "$2"); 3093 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3094 failCount++; 3095 3096 m.appendTail(result); 3097 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3098 failCount++; 3099 3100 // Check to make sure escaped $ is ignored 3101 blah = toSupplementaries("zzzabcdcdefzzz"); 3102 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3103 m = p.matcher(blah); 3104 result = new StringBuffer(); 3105 m.find(); 3106 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3107 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3108 failCount++; 3109 3110 m.appendTail(result); 3111 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3112 failCount++; 3113 3114 // Check to make sure a reference to nonexistent group causes error 3115 blah = toSupplementaries("zzzabcdcdefzzz"); 3116 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3117 m = p.matcher(blah); 3118 result = new StringBuffer(); 3119 m.find(); 3120 try { 3121 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3122 failCount++; 3123 } catch (IndexOutOfBoundsException ioobe) { 3124 // Correct result 3125 } 3126 3127 // Check double digit group references 3128 blah = toSupplementaries("zzz123456789101112zzz"); 3129 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3130 m = p.matcher(blah); 3131 result = new StringBuffer(); 3132 m.find(); 3133 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3134 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3135 failCount++; 3136 3137 // Check to make sure it backs off $15 to $1 if only three groups 3138 blah = toSupplementaries("zzzabcdcdefzzz"); 3139 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3140 m = p.matcher(blah); 3141 result = new StringBuffer(); 3142 m.find(); 3143 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3144 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3145 failCount++; 3146 3147 // Check nothing has been appended into the output buffer if 3148 // the replacement string triggers IllegalArgumentException. 3149 p = Pattern.compile("(abc)"); 3150 m = p.matcher("abcd"); 3151 result = new StringBuffer(); 3152 m.find(); 3153 try { 3154 m.appendReplacement(result, ("xyz$g")); 3155 failCount++; 3156 } catch (IllegalArgumentException iae) { 3157 if (result.length() != 0) 3158 failCount++; 3159 } 3160 3161 report("SB Substitution"); 3162 } 3163 3164 /** 3165 * Tests the usage of Matcher.appendReplacement() with literal 3166 * and group substitutions. 3167 */ 3168 private static void stringbuilderSubstitute() throws Exception { 3169 // SB substitution with literal 3170 String blah = "zzzblahzzz"; 3171 Pattern p = Pattern.compile("blah"); 3172 Matcher m = p.matcher(blah); 3173 StringBuilder result = new StringBuilder(); 3174 try { 3175 m.appendReplacement(result, "blech"); 3176 failCount++; 3177 } catch (IllegalStateException e) { 3178 } 3179 m.find(); 3180 m.appendReplacement(result, "blech"); 3181 if (!result.toString().equals("zzzblech")) 3182 failCount++; 3183 3184 m.appendTail(result); 3185 if (!result.toString().equals("zzzblechzzz")) 3186 failCount++; 3187 3188 // SB substitution with groups 3189 blah = "zzzabcdzzz"; 3190 p = Pattern.compile("(ab)(cd)*"); 3191 m = p.matcher(blah); 3192 result = new StringBuilder(); 3193 try { 3194 m.appendReplacement(result, "$1"); 3195 failCount++; 3196 } catch (IllegalStateException e) { 3197 } 3198 m.find(); 3199 m.appendReplacement(result, "$1"); 3200 if (!result.toString().equals("zzzab")) 3201 failCount++; 3202 3203 m.appendTail(result); 3204 if (!result.toString().equals("zzzabzzz")) 3205 failCount++; 3206 3207 // SB substitution with 3 groups 3208 blah = "zzzabcdcdefzzz"; 3209 p = Pattern.compile("(ab)(cd)*(ef)"); 3210 m = p.matcher(blah); 3211 result = new StringBuilder(); 3212 try { 3213 m.appendReplacement(result, "$1w$2w$3"); 3214 failCount++; 3215 } catch (IllegalStateException e) { 3216 } 3217 m.find(); 3218 m.appendReplacement(result, "$1w$2w$3"); 3219 if (!result.toString().equals("zzzabwcdwef")) 3220 failCount++; 3221 3222 m.appendTail(result); 3223 if (!result.toString().equals("zzzabwcdwefzzz")) 3224 failCount++; 3225 3226 // SB substitution with groups and three matches 3227 // skipping middle match 3228 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3229 p = Pattern.compile("(ab)(cd*)"); 3230 m = p.matcher(blah); 3231 result = new StringBuilder(); 3232 try { 3233 m.appendReplacement(result, "$1"); 3234 failCount++; 3235 } catch (IllegalStateException e) { 3236 } 3237 m.find(); 3238 m.appendReplacement(result, "$1"); 3239 if (!result.toString().equals("zzzab")) 3240 failCount++; 3241 3242 m.find(); 3243 m.find(); 3244 m.appendReplacement(result, "$2"); 3245 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3246 failCount++; 3247 3248 m.appendTail(result); 3249 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3250 failCount++; 3251 3252 // Check to make sure escaped $ is ignored 3253 blah = "zzzabcdcdefzzz"; 3254 p = Pattern.compile("(ab)(cd)*(ef)"); 3255 m = p.matcher(blah); 3256 result = new StringBuilder(); 3257 m.find(); 3258 m.appendReplacement(result, "$1w\\$2w$3"); 3259 if (!result.toString().equals("zzzabw$2wef")) 3260 failCount++; 3261 3262 m.appendTail(result); 3263 if (!result.toString().equals("zzzabw$2wefzzz")) 3264 failCount++; 3265 3266 // Check to make sure a reference to nonexistent group causes error 3267 blah = "zzzabcdcdefzzz"; 3268 p = Pattern.compile("(ab)(cd)*(ef)"); 3269 m = p.matcher(blah); 3270 result = new StringBuilder(); 3271 m.find(); 3272 try { 3273 m.appendReplacement(result, "$1w$5w$3"); 3274 failCount++; 3275 } catch (IndexOutOfBoundsException ioobe) { 3276 // Correct result 3277 } 3278 3279 // Check double digit group references 3280 blah = "zzz123456789101112zzz"; 3281 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3282 m = p.matcher(blah); 3283 result = new StringBuilder(); 3284 m.find(); 3285 m.appendReplacement(result, "$1w$11w$3"); 3286 if (!result.toString().equals("zzz1w11w3")) 3287 failCount++; 3288 3289 // Check to make sure it backs off $15 to $1 if only three groups 3290 blah = "zzzabcdcdefzzz"; 3291 p = Pattern.compile("(ab)(cd)*(ef)"); 3292 m = p.matcher(blah); 3293 result = new StringBuilder(); 3294 m.find(); 3295 m.appendReplacement(result, "$1w$15w$3"); 3296 if (!result.toString().equals("zzzabwab5wef")) 3297 failCount++; 3298 3299 3300 // Supplementary character test 3301 // SB substitution with literal 3302 blah = toSupplementaries("zzzblahzzz"); 3303 p = Pattern.compile(toSupplementaries("blah")); 3304 m = p.matcher(blah); 3305 result = new StringBuilder(); 3306 try { 3307 m.appendReplacement(result, toSupplementaries("blech")); 3308 failCount++; 3309 } catch (IllegalStateException e) { 3310 } 3311 m.find(); 3312 m.appendReplacement(result, toSupplementaries("blech")); 3313 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3314 failCount++; 3315 m.appendTail(result); 3316 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3317 failCount++; 3318 3319 // SB substitution with groups 3320 blah = toSupplementaries("zzzabcdzzz"); 3321 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3322 m = p.matcher(blah); 3323 result = new StringBuilder(); 3324 try { 3325 m.appendReplacement(result, "$1"); 3326 failCount++; 3327 } catch (IllegalStateException e) { 3328 } 3329 m.find(); 3330 m.appendReplacement(result, "$1"); 3331 if (!result.toString().equals(toSupplementaries("zzzab"))) 3332 failCount++; 3333 3334 m.appendTail(result); 3335 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3336 failCount++; 3337 3338 // SB substitution with 3 groups 3339 blah = toSupplementaries("zzzabcdcdefzzz"); 3340 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3341 m = p.matcher(blah); 3342 result = new StringBuilder(); 3343 try { 3344 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3345 failCount++; 3346 } catch (IllegalStateException e) { 3347 } 3348 m.find(); 3349 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3350 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3351 failCount++; 3352 3353 m.appendTail(result); 3354 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3355 failCount++; 3356 3357 // SB substitution with groups and three matches 3358 // skipping middle match 3359 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3360 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3361 m = p.matcher(blah); 3362 result = new StringBuilder(); 3363 try { 3364 m.appendReplacement(result, "$1"); 3365 failCount++; 3366 } catch (IllegalStateException e) { 3367 } 3368 m.find(); 3369 m.appendReplacement(result, "$1"); 3370 if (!result.toString().equals(toSupplementaries("zzzab"))) 3371 failCount++; 3372 3373 m.find(); 3374 m.find(); 3375 m.appendReplacement(result, "$2"); 3376 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3377 failCount++; 3378 3379 m.appendTail(result); 3380 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3381 failCount++; 3382 3383 // Check to make sure escaped $ is ignored 3384 blah = toSupplementaries("zzzabcdcdefzzz"); 3385 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3386 m = p.matcher(blah); 3387 result = new StringBuilder(); 3388 m.find(); 3389 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3390 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3391 failCount++; 3392 3393 m.appendTail(result); 3394 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3395 failCount++; 3396 3397 // Check to make sure a reference to nonexistent group causes error 3398 blah = toSupplementaries("zzzabcdcdefzzz"); 3399 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3400 m = p.matcher(blah); 3401 result = new StringBuilder(); 3402 m.find(); 3403 try { 3404 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3405 failCount++; 3406 } catch (IndexOutOfBoundsException ioobe) { 3407 // Correct result 3408 } 3409 // Check double digit group references 3410 blah = toSupplementaries("zzz123456789101112zzz"); 3411 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3412 m = p.matcher(blah); 3413 result = new StringBuilder(); 3414 m.find(); 3415 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3416 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3417 failCount++; 3418 3419 // Check to make sure it backs off $15 to $1 if only three groups 3420 blah = toSupplementaries("zzzabcdcdefzzz"); 3421 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3422 m = p.matcher(blah); 3423 result = new StringBuilder(); 3424 m.find(); 3425 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3426 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3427 failCount++; 3428 // Check nothing has been appended into the output buffer if 3429 // the replacement string triggers IllegalArgumentException. 3430 p = Pattern.compile("(abc)"); 3431 m = p.matcher("abcd"); 3432 result = new StringBuilder(); 3433 m.find(); 3434 try { 3435 m.appendReplacement(result, ("xyz$g")); 3436 failCount++; 3437 } catch (IllegalArgumentException iae) { 3438 if (result.length() != 0) 3439 failCount++; 3440 } 3441 report("SB Substitution 2"); 3442 } 3443 3444 /* 3445 * 5 groups of characters are created to make a substitution string. 3446 * A base string will be created including random lead chars, the 3447 * substitution string, and random trailing chars. 3448 * A pattern containing the 5 groups is searched for and replaced with: 3449 * random group + random string + random group. 3450 * The results are checked for correctness. 3451 */ 3452 private static void substitutionBasher() { 3453 for (int runs = 0; runs<1000; runs++) { 3454 // Create a base string to work in 3455 int leadingChars = generator.nextInt(10); 3456 StringBuffer baseBuffer = new StringBuffer(100); 3457 String leadingString = getRandomAlphaString(leadingChars); 3458 baseBuffer.append(leadingString); 3459 3460 // Create 5 groups of random number of random chars 3461 // Create the string to substitute 3462 // Create the pattern string to search for 3463 StringBuffer bufferToSub = new StringBuffer(25); 3464 StringBuffer bufferToPat = new StringBuffer(50); 3465 String[] groups = new String[5]; 3466 for(int i=0; i<5; i++) { 3467 int aGroupSize = generator.nextInt(5)+1; 3468 groups[i] = getRandomAlphaString(aGroupSize); 3469 bufferToSub.append(groups[i]); 3470 bufferToPat.append('('); 3471 bufferToPat.append(groups[i]); 3472 bufferToPat.append(')'); 3473 } 3474 String stringToSub = bufferToSub.toString(); 3475 String pattern = bufferToPat.toString(); 3476 3477 // Place sub string into working string at random index 3478 baseBuffer.append(stringToSub); 3479 3480 // Append random chars to end 3481 int trailingChars = generator.nextInt(10); 3482 String trailingString = getRandomAlphaString(trailingChars); 3483 baseBuffer.append(trailingString); 3484 String baseString = baseBuffer.toString(); 3485 3486 // Create test pattern and matcher 3487 Pattern p = Pattern.compile(pattern); 3488 Matcher m = p.matcher(baseString); 3489 3490 // Reject candidate if pattern happens to start early 3491 m.find(); 3492 if (m.start() < leadingChars) 3493 continue; 3494 3495 // Reject candidate if more than one match 3496 if (m.find()) 3497 continue; 3498 3499 // Construct a replacement string with : 3500 // random group + random string + random group 3501 StringBuffer bufferToRep = new StringBuffer(); 3502 int groupIndex1 = generator.nextInt(5); 3503 bufferToRep.append("$" + (groupIndex1 + 1)); 3504 String randomMidString = getRandomAlphaString(5); 3505 bufferToRep.append(randomMidString); 3506 int groupIndex2 = generator.nextInt(5); 3507 bufferToRep.append("$" + (groupIndex2 + 1)); 3508 String replacement = bufferToRep.toString(); 3509 3510 // Do the replacement 3511 String result = m.replaceAll(replacement); 3512 3513 // Construct expected result 3514 StringBuffer bufferToRes = new StringBuffer(); 3515 bufferToRes.append(leadingString); 3516 bufferToRes.append(groups[groupIndex1]); 3517 bufferToRes.append(randomMidString); 3518 bufferToRes.append(groups[groupIndex2]); 3519 bufferToRes.append(trailingString); 3520 String expectedResult = bufferToRes.toString(); 3521 3522 // Check results 3523 if (!result.equals(expectedResult)) 3524 failCount++; 3525 } 3526 3527 report("Substitution Basher"); 3528 } 3529 3530 /* 3531 * 5 groups of characters are created to make a substitution string. 3532 * A base string will be created including random lead chars, the 3533 * substitution string, and random trailing chars. 3534 * A pattern containing the 5 groups is searched for and replaced with: 3535 * random group + random string + random group. 3536 * The results are checked for correctness. 3537 */ 3538 private static void substitutionBasher2() { 3539 for (int runs = 0; runs<1000; runs++) { 3540 // Create a base string to work in 3541 int leadingChars = generator.nextInt(10); 3542 StringBuilder baseBuffer = new StringBuilder(100); 3543 String leadingString = getRandomAlphaString(leadingChars); 3544 baseBuffer.append(leadingString); 3545 3546 // Create 5 groups of random number of random chars 3547 // Create the string to substitute 3548 // Create the pattern string to search for 3549 StringBuilder bufferToSub = new StringBuilder(25); 3550 StringBuilder bufferToPat = new StringBuilder(50); 3551 String[] groups = new String[5]; 3552 for(int i=0; i<5; i++) { 3553 int aGroupSize = generator.nextInt(5)+1; 3554 groups[i] = getRandomAlphaString(aGroupSize); 3555 bufferToSub.append(groups[i]); 3556 bufferToPat.append('('); 3557 bufferToPat.append(groups[i]); 3558 bufferToPat.append(')'); 3559 } 3560 String stringToSub = bufferToSub.toString(); 3561 String pattern = bufferToPat.toString(); 3562 3563 // Place sub string into working string at random index 3564 baseBuffer.append(stringToSub); 3565 3566 // Append random chars to end 3567 int trailingChars = generator.nextInt(10); 3568 String trailingString = getRandomAlphaString(trailingChars); 3569 baseBuffer.append(trailingString); 3570 String baseString = baseBuffer.toString(); 3571 3572 // Create test pattern and matcher 3573 Pattern p = Pattern.compile(pattern); 3574 Matcher m = p.matcher(baseString); 3575 3576 // Reject candidate if pattern happens to start early 3577 m.find(); 3578 if (m.start() < leadingChars) 3579 continue; 3580 3581 // Reject candidate if more than one match 3582 if (m.find()) 3583 continue; 3584 3585 // Construct a replacement string with : 3586 // random group + random string + random group 3587 StringBuilder bufferToRep = new StringBuilder(); 3588 int groupIndex1 = generator.nextInt(5); 3589 bufferToRep.append("$" + (groupIndex1 + 1)); 3590 String randomMidString = getRandomAlphaString(5); 3591 bufferToRep.append(randomMidString); 3592 int groupIndex2 = generator.nextInt(5); 3593 bufferToRep.append("$" + (groupIndex2 + 1)); 3594 String replacement = bufferToRep.toString(); 3595 3596 // Do the replacement 3597 String result = m.replaceAll(replacement); 3598 3599 // Construct expected result 3600 StringBuilder bufferToRes = new StringBuilder(); 3601 bufferToRes.append(leadingString); 3602 bufferToRes.append(groups[groupIndex1]); 3603 bufferToRes.append(randomMidString); 3604 bufferToRes.append(groups[groupIndex2]); 3605 bufferToRes.append(trailingString); 3606 String expectedResult = bufferToRes.toString(); 3607 3608 // Check results 3609 if (!result.equals(expectedResult)) { 3610 failCount++; 3611 } 3612 } 3613 3614 report("Substitution Basher 2"); 3615 } 3616 3617 /** 3618 * Checks the handling of some escape sequences that the Pattern 3619 * class should process instead of the java compiler. These are 3620 * not in the file because the escapes should be be processed 3621 * by the Pattern class when the regex is compiled. 3622 */ 3623 private static void escapes() throws Exception { 3624 Pattern p = Pattern.compile("\\043"); 3625 Matcher m = p.matcher("#"); 3626 if (!m.find()) 3627 failCount++; 3628 3629 p = Pattern.compile("\\x23"); 3630 m = p.matcher("#"); 3631 if (!m.find()) 3632 failCount++; 3633 3634 p = Pattern.compile("\\u0023"); 3635 m = p.matcher("#"); 3636 if (!m.find()) 3637 failCount++; 3638 3639 report("Escape sequences"); 3640 } 3641 3642 /** 3643 * Checks the handling of blank input situations. These 3644 * tests are incompatible with my test file format. 3645 */ 3646 private static void blankInput() throws Exception { 3647 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3648 Matcher m = p.matcher(""); 3649 if (m.find()) 3650 failCount++; 3651 3652 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3653 m = p.matcher(""); 3654 if (!m.find()) 3655 failCount++; 3656 3657 p = Pattern.compile("abc"); 3658 m = p.matcher(""); 3659 if (m.find()) 3660 failCount++; 3661 3662 p = Pattern.compile("a*"); 3663 m = p.matcher(""); 3664 if (!m.find()) 3665 failCount++; 3666 3667 report("Blank input"); 3668 } 3669 3670 /** 3671 * Tests the Boyer-Moore pattern matching of a character sequence 3672 * on randomly generated patterns. 3673 */ 3674 private static void bm() throws Exception { 3675 doBnM('a'); 3676 report("Boyer Moore (ASCII)"); 3677 3678 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3679 report("Boyer Moore (Supplementary)"); 3680 } 3681 3682 private static void doBnM(int baseCharacter) throws Exception { 3683 int achar=0; 3684 3685 for (int i=0; i<100; i++) { 3686 // Create a short pattern to search for 3687 int patternLength = generator.nextInt(7) + 4; 3688 StringBuffer patternBuffer = new StringBuffer(patternLength); 3689 String pattern; 3690 retry: for (;;) { 3691 for (int x=0; x<patternLength; x++) { 3692 int ch = baseCharacter + generator.nextInt(26); 3693 if (Character.isSupplementaryCodePoint(ch)) { 3694 patternBuffer.append(Character.toChars(ch)); 3695 } else { 3696 patternBuffer.append((char)ch); 3697 } 3698 } 3699 pattern = patternBuffer.toString(); 3700 3701 // Avoid patterns that start and end with the same substring 3702 // See JDK-6854417 3703 for (int x=1; x < pattern.length(); x++) { 3704 if (pattern.startsWith(pattern.substring(x))) 3705 continue retry; 3706 } 3707 break; 3708 } 3709 Pattern p = Pattern.compile(pattern); 3710 3711 // Create a buffer with random ASCII chars that does 3712 // not match the sample 3713 String toSearch = null; 3714 StringBuffer s = null; 3715 Matcher m = p.matcher(""); 3716 do { 3717 s = new StringBuffer(100); 3718 for (int x=0; x<100; x++) { 3719 int ch = baseCharacter + generator.nextInt(26); 3720 if (Character.isSupplementaryCodePoint(ch)) { 3721 s.append(Character.toChars(ch)); 3722 } else { 3723 s.append((char)ch); 3724 } 3725 } 3726 toSearch = s.toString(); 3727 m.reset(toSearch); 3728 } while (m.find()); 3729 3730 // Insert the pattern at a random spot 3731 int insertIndex = generator.nextInt(99); 3732 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3733 insertIndex++; 3734 s = s.insert(insertIndex, pattern); 3735 toSearch = s.toString(); 3736 3737 // Make sure that the pattern is found 3738 m.reset(toSearch); 3739 if (!m.find()) 3740 failCount++; 3741 3742 // Make sure that the match text is the pattern 3743 if (!m.group().equals(pattern)) 3744 failCount++; 3745 3746 // Make sure match occured at insertion point 3747 if (m.start() != insertIndex) 3748 failCount++; 3749 } 3750 } 3751 3752 /** 3753 * Tests the matching of slices on randomly generated patterns. 3754 * The Boyer-Moore optimization is not done on these patterns 3755 * because it uses unicode case folding. 3756 */ 3757 private static void slice() throws Exception { 3758 doSlice(Character.MAX_VALUE); 3759 report("Slice"); 3760 3761 doSlice(Character.MAX_CODE_POINT); 3762 report("Slice (Supplementary)"); 3763 } 3764 3765 private static void doSlice(int maxCharacter) throws Exception { 3766 Random generator = new Random(); 3767 int achar=0; 3768 3769 for (int i=0; i<100; i++) { 3770 // Create a short pattern to search for 3771 int patternLength = generator.nextInt(7) + 4; 3772 StringBuffer patternBuffer = new StringBuffer(patternLength); 3773 for (int x=0; x<patternLength; x++) { 3774 int randomChar = 0; 3775 while (!Character.isLetterOrDigit(randomChar)) 3776 randomChar = generator.nextInt(maxCharacter); 3777 if (Character.isSupplementaryCodePoint(randomChar)) { 3778 patternBuffer.append(Character.toChars(randomChar)); 3779 } else { 3780 patternBuffer.append((char) randomChar); 3781 } 3782 } 3783 String pattern = patternBuffer.toString(); 3784 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3785 3786 // Create a buffer with random chars that does not match the sample 3787 String toSearch = null; 3788 StringBuffer s = null; 3789 Matcher m = p.matcher(""); 3790 do { 3791 s = new StringBuffer(100); 3792 for (int x=0; x<100; x++) { 3793 int randomChar = 0; 3794 while (!Character.isLetterOrDigit(randomChar)) 3795 randomChar = generator.nextInt(maxCharacter); 3796 if (Character.isSupplementaryCodePoint(randomChar)) { 3797 s.append(Character.toChars(randomChar)); 3798 } else { 3799 s.append((char) randomChar); 3800 } 3801 } 3802 toSearch = s.toString(); 3803 m.reset(toSearch); 3804 } while (m.find()); 3805 3806 // Insert the pattern at a random spot 3807 int insertIndex = generator.nextInt(99); 3808 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3809 insertIndex++; 3810 s = s.insert(insertIndex, pattern); 3811 toSearch = s.toString(); 3812 3813 // Make sure that the pattern is found 3814 m.reset(toSearch); 3815 if (!m.find()) 3816 failCount++; 3817 3818 // Make sure that the match text is the pattern 3819 if (!m.group().equals(pattern)) 3820 failCount++; 3821 3822 // Make sure match occured at insertion point 3823 if (m.start() != insertIndex) 3824 failCount++; 3825 } 3826 } 3827 3828 private static void explainFailure(String pattern, String data, 3829 String expected, String actual) { 3830 System.err.println("----------------------------------------"); 3831 System.err.println("Pattern = "+pattern); 3832 System.err.println("Data = "+data); 3833 System.err.println("Expected = " + expected); 3834 System.err.println("Actual = " + actual); 3835 } 3836 3837 private static void explainFailure(String pattern, String data, 3838 Throwable t) { 3839 System.err.println("----------------------------------------"); 3840 System.err.println("Pattern = "+pattern); 3841 System.err.println("Data = "+data); 3842 t.printStackTrace(System.err); 3843 } 3844 3845 // Testing examples from a file 3846 3847 /** 3848 * Goes through the file "TestCases.txt" and creates many patterns 3849 * described in the file, matching the patterns against input lines in 3850 * the file, and comparing the results against the correct results 3851 * also found in the file. The file format is described in comments 3852 * at the head of the file. 3853 */ 3854 private static void processFile(String fileName) throws Exception { 3855 File testCases = new File(System.getProperty("test.src", "."), 3856 fileName); 3857 FileInputStream in = new FileInputStream(testCases); 3858 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3859 3860 // Process next test case. 3861 String aLine; 3862 while((aLine = r.readLine()) != null) { 3863 // Read a line for pattern 3864 String patternString = grabLine(r); 3865 Pattern p = null; 3866 try { 3867 p = compileTestPattern(patternString); 3868 } catch (PatternSyntaxException e) { 3869 String dataString = grabLine(r); 3870 String expectedResult = grabLine(r); 3871 if (expectedResult.startsWith("error")) 3872 continue; 3873 explainFailure(patternString, dataString, e); 3874 failCount++; 3875 continue; 3876 } 3877 3878 // Read a line for input string 3879 String dataString = grabLine(r); 3880 Matcher m = p.matcher(dataString); 3881 StringBuffer result = new StringBuffer(); 3882 3883 // Check for IllegalStateExceptions before a match 3884 failCount += preMatchInvariants(m); 3885 3886 boolean found = m.find(); 3887 3888 if (found) 3889 failCount += postTrueMatchInvariants(m); 3890 else 3891 failCount += postFalseMatchInvariants(m); 3892 3893 if (found) { 3894 result.append("true "); 3895 result.append(m.group(0) + " "); 3896 } else { 3897 result.append("false "); 3898 } 3899 3900 result.append(m.groupCount()); 3901 3902 if (found) { 3903 for (int i=1; i<m.groupCount()+1; i++) 3904 if (m.group(i) != null) 3905 result.append(" " +m.group(i)); 3906 } 3907 3908 // Read a line for the expected result 3909 String expectedResult = grabLine(r); 3910 3911 if (!result.toString().equals(expectedResult)) { 3912 explainFailure(patternString, dataString, expectedResult, result.toString()); 3913 failCount++; 3914 } 3915 } 3916 3917 report(fileName); 3918 } 3919 3920 private static int preMatchInvariants(Matcher m) { 3921 int failCount = 0; 3922 try { 3923 m.start(); 3924 failCount++; 3925 } catch (IllegalStateException ise) {} 3926 try { 3927 m.end(); 3928 failCount++; 3929 } catch (IllegalStateException ise) {} 3930 try { 3931 m.group(); 3932 failCount++; 3933 } catch (IllegalStateException ise) {} 3934 return failCount; 3935 } 3936 3937 private static int postFalseMatchInvariants(Matcher m) { 3938 int failCount = 0; 3939 try { 3940 m.group(); 3941 failCount++; 3942 } catch (IllegalStateException ise) {} 3943 try { 3944 m.start(); 3945 failCount++; 3946 } catch (IllegalStateException ise) {} 3947 try { 3948 m.end(); 3949 failCount++; 3950 } catch (IllegalStateException ise) {} 3951 return failCount; 3952 } 3953 3954 private static int postTrueMatchInvariants(Matcher m) { 3955 int failCount = 0; 3956 //assert(m.start() = m.start(0); 3957 if (m.start() != m.start(0)) 3958 failCount++; 3959 //assert(m.end() = m.end(0); 3960 if (m.start() != m.start(0)) 3961 failCount++; 3962 //assert(m.group() = m.group(0); 3963 if (!m.group().equals(m.group(0))) 3964 failCount++; 3965 try { 3966 m.group(50); 3967 failCount++; 3968 } catch (IndexOutOfBoundsException ise) {} 3969 3970 return failCount; 3971 } 3972 3973 private static Pattern compileTestPattern(String patternString) { 3974 if (!patternString.startsWith("'")) { 3975 return Pattern.compile(patternString); 3976 } 3977 int break1 = patternString.lastIndexOf("'"); 3978 String flagString = patternString.substring( 3979 break1+1, patternString.length()); 3980 patternString = patternString.substring(1, break1); 3981 3982 if (flagString.equals("i")) 3983 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3984 3985 if (flagString.equals("m")) 3986 return Pattern.compile(patternString, Pattern.MULTILINE); 3987 3988 return Pattern.compile(patternString); 3989 } 3990 3991 /** 3992 * Reads a line from the input file. Keeps reading lines until a non 3993 * empty non comment line is read. If the line contains a \n then 3994 * these two characters are replaced by a newline char. If a \\uxxxx 3995 * sequence is read then the sequence is replaced by the unicode char. 3996 */ 3997 private static String grabLine(BufferedReader r) throws Exception { 3998 int index = 0; 3999 String line = r.readLine(); 4000 while (line.startsWith("//") || line.length() < 1) 4001 line = r.readLine(); 4002 while ((index = line.indexOf("\\n")) != -1) { 4003 StringBuffer temp = new StringBuffer(line); 4004 temp.replace(index, index+2, "\n"); 4005 line = temp.toString(); 4006 } 4007 while ((index = line.indexOf("\\u")) != -1) { 4008 StringBuffer temp = new StringBuffer(line); 4009 String value = temp.substring(index+2, index+6); 4010 char aChar = (char)Integer.parseInt(value, 16); 4011 String unicodeChar = "" + aChar; 4012 temp.replace(index, index+6, unicodeChar); 4013 line = temp.toString(); 4014 } 4015 4016 return line; 4017 } 4018 4019 private static void check(Pattern p, String s, String g, String expected) { 4020 Matcher m = p.matcher(s); 4021 m.find(); 4022 if (!m.group(g).equals(expected) || 4023 s.charAt(m.start(g)) != expected.charAt(0) || 4024 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 4025 failCount++; 4026 } 4027 4028 private static void checkReplaceFirst(String p, String s, String r, String expected) 4029 { 4030 if (!expected.equals(Pattern.compile(p) 4031 .matcher(s) 4032 .replaceFirst(r))) 4033 failCount++; 4034 } 4035 4036 private static void checkReplaceAll(String p, String s, String r, String expected) 4037 { 4038 if (!expected.equals(Pattern.compile(p) 4039 .matcher(s) 4040 .replaceAll(r))) 4041 failCount++; 4042 } 4043 4044 private static void checkExpectedFail(String p) { 4045 try { 4046 Pattern.compile(p); 4047 } catch (PatternSyntaxException pse) { 4048 //pse.printStackTrace(); 4049 return; 4050 } 4051 failCount++; 4052 } 4053 4054 private static void checkExpectedIAE(Matcher m, String g) { 4055 m.find(); 4056 try { 4057 m.group(g); 4058 } catch (IllegalArgumentException x) { 4059 //iae.printStackTrace(); 4060 try { 4061 m.start(g); 4062 } catch (IllegalArgumentException xx) { 4063 try { 4064 m.start(g); 4065 } catch (IllegalArgumentException xxx) { 4066 return; 4067 } 4068 } 4069 } 4070 failCount++; 4071 } 4072 4073 private static void checkExpectedNPE(Matcher m) { 4074 m.find(); 4075 try { 4076 m.group(null); 4077 } catch (NullPointerException x) { 4078 try { 4079 m.start(null); 4080 } catch (NullPointerException xx) { 4081 try { 4082 m.end(null); 4083 } catch (NullPointerException xxx) { 4084 return; 4085 } 4086 } 4087 } 4088 failCount++; 4089 } 4090 4091 private static void namedGroupCaptureTest() throws Exception { 4092 check(Pattern.compile("x+(?<gname>y+)z+"), 4093 "xxxyyyzzz", 4094 "gname", 4095 "yyy"); 4096 4097 check(Pattern.compile("x+(?<gname8>y+)z+"), 4098 "xxxyyyzzz", 4099 "gname8", 4100 "yyy"); 4101 4102 //backref 4103 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4104 check(pattern, "zzzaabcazzz", true); // found "abca" 4105 4106 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4107 "zzzaabcaazzz", true); 4108 4109 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4110 "abcdefabc", true); 4111 4112 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4113 "abcdefghijkk", true); 4114 4115 // Supplementary character tests 4116 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4117 toSupplementaries("zzzaabcazzz"), true); 4118 4119 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4120 toSupplementaries("zzzaabcaazzz"), true); 4121 4122 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4123 toSupplementaries("abcdefabc"), true); 4124 4125 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4126 "(?<gname>" + 4127 toSupplementaries("k)") + "\\k<gname>"), 4128 toSupplementaries("abcdefghijkk"), true); 4129 4130 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4131 "xxxyyyzzzyyy", 4132 "gname", 4133 "yyy"); 4134 4135 //replaceFirst/All 4136 checkReplaceFirst("(?<gn>ab)(c*)", 4137 "abccczzzabcczzzabccc", 4138 "${gn}", 4139 "abzzzabcczzzabccc"); 4140 4141 checkReplaceAll("(?<gn>ab)(c*)", 4142 "abccczzzabcczzzabccc", 4143 "${gn}", 4144 "abzzzabzzzab"); 4145 4146 4147 checkReplaceFirst("(?<gn>ab)(c*)", 4148 "zzzabccczzzabcczzzabccczzz", 4149 "${gn}", 4150 "zzzabzzzabcczzzabccczzz"); 4151 4152 checkReplaceAll("(?<gn>ab)(c*)", 4153 "zzzabccczzzabcczzzabccczzz", 4154 "${gn}", 4155 "zzzabzzzabzzzabzzz"); 4156 4157 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4158 "zzzabccczzzabcczzzabccczzz", 4159 "${gn2}", 4160 "zzzccczzzabcczzzabccczzz"); 4161 4162 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4163 "zzzabccczzzabcczzzabccczzz", 4164 "${gn2}", 4165 "zzzccczzzcczzzccczzz"); 4166 4167 //toSupplementaries("(ab)(c*)")); 4168 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4169 ")(?<gn2>" + toSupplementaries("c") + "*)", 4170 toSupplementaries("abccczzzabcczzzabccc"), 4171 "${gn1}", 4172 toSupplementaries("abzzzabcczzzabccc")); 4173 4174 4175 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4176 ")(?<gn2>" + toSupplementaries("c") + "*)", 4177 toSupplementaries("abccczzzabcczzzabccc"), 4178 "${gn1}", 4179 toSupplementaries("abzzzabzzzab")); 4180 4181 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4182 ")(?<gn2>" + toSupplementaries("c") + "*)", 4183 toSupplementaries("abccczzzabcczzzabccc"), 4184 "${gn2}", 4185 toSupplementaries("ccczzzabcczzzabccc")); 4186 4187 4188 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4189 ")(?<gn2>" + toSupplementaries("c") + "*)", 4190 toSupplementaries("abccczzzabcczzzabccc"), 4191 "${gn2}", 4192 toSupplementaries("ccczzzcczzzccc")); 4193 4194 checkReplaceFirst("(?<dog>Dog)AndCat", 4195 "zzzDogAndCatzzzDogAndCatzzz", 4196 "${dog}", 4197 "zzzDogzzzDogAndCatzzz"); 4198 4199 4200 checkReplaceAll("(?<dog>Dog)AndCat", 4201 "zzzDogAndCatzzzDogAndCatzzz", 4202 "${dog}", 4203 "zzzDogzzzDogzzz"); 4204 4205 // backref in Matcher & String 4206 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4207 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4208 failCount++; 4209 4210 // negative 4211 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4212 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4213 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4214 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4215 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4216 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4217 "gnameX"); 4218 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4219 report("NamedGroupCapture"); 4220 } 4221 4222 // This is for bug 6919132 4223 private static void nonBmpClassComplementTest() throws Exception { 4224 Pattern p = Pattern.compile("\\P{Lu}"); 4225 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4226 4227 if (m.find() && m.start() == 1) 4228 failCount++; 4229 4230 // from a unicode category 4231 p = Pattern.compile("\\P{Lu}"); 4232 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4233 if (m.find()) 4234 failCount++; 4235 if (!m.hitEnd()) 4236 failCount++; 4237 4238 // block 4239 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4240 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4241 if (m.find() && m.start() == 1) 4242 failCount++; 4243 4244 p = Pattern.compile("\\P{sc=GRANTHA}"); 4245 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4246 if (m.find() && m.start() == 1) 4247 failCount++; 4248 4249 report("NonBmpClassComplement"); 4250 } 4251 4252 private static void unicodePropertiesTest() throws Exception { 4253 // different forms 4254 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4255 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4256 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4257 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4258 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4259 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4260 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4261 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4262 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4263 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4264 failCount++; 4265 4266 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4267 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4268 Matcher lastSM = common; 4269 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4270 4271 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4272 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4273 Matcher lastBM = latin; 4274 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4275 4276 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4277 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4278 continue; // only pick couple code points, they are the same 4279 } 4280 4281 // Unicode Script 4282 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4283 Matcher m; 4284 String str = new String(Character.toChars(cp)); 4285 if (script == lastScript) { 4286 m = lastSM; 4287 m.reset(str); 4288 } else { 4289 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4290 } 4291 if (!m.matches()) { 4292 failCount++; 4293 } 4294 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4295 other.reset(str); 4296 if (other.matches()) { 4297 failCount++; 4298 } 4299 lastSM = m; 4300 lastScript = script; 4301 4302 // Unicode Block 4303 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4304 if (block == null) { 4305 //System.out.printf("Not a Block: cp=%x%n", cp); 4306 continue; 4307 } 4308 if (block == lastBlock) { 4309 m = lastBM; 4310 m.reset(str); 4311 } else { 4312 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4313 } 4314 if (!m.matches()) { 4315 failCount++; 4316 } 4317 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4318 other.reset(str); 4319 if (other.matches()) { 4320 failCount++; 4321 } 4322 lastBM = m; 4323 lastBlock = block; 4324 } 4325 report("unicodeProperties"); 4326 } 4327 4328 private static void unicodeHexNotationTest() throws Exception { 4329 4330 // negative 4331 checkExpectedFail("\\x{-23}"); 4332 checkExpectedFail("\\x{110000}"); 4333 checkExpectedFail("\\x{}"); 4334 checkExpectedFail("\\x{AB[ef]"); 4335 4336 // codepoint 4337 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4338 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4339 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4340 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4341 4342 // in class 4343 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4344 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4345 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4346 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4347 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4348 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4349 4350 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4351 String s = "A" + new String(Character.toChars(cp)) + "B"; 4352 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4353 : String.format("\\u%04x\\u%04x", 4354 (int) Character.toChars(cp)[0], 4355 (int) Character.toChars(cp)[1]); 4356 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4357 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4358 failCount++; 4359 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4360 failCount++; 4361 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4362 failCount++; 4363 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4364 failCount++; 4365 } 4366 report("unicodeHexNotation"); 4367 } 4368 4369 private static void unicodeClassesTest() throws Exception { 4370 4371 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4372 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4373 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4374 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4375 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4376 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4377 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4378 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4379 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4380 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4381 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4382 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4383 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4384 Matcher bound = Pattern.compile("\\b").matcher(""); 4385 Matcher word = Pattern.compile("\\w++").matcher(""); 4386 // UNICODE_CHARACTER_CLASS 4387 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4388 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4389 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4390 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4391 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4392 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4393 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4394 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4395 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4396 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4397 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4398 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4399 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4400 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4401 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4402 // embedded flag (?U) 4403 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4404 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4405 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4406 4407 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4408 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4409 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4410 // properties 4411 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4412 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4413 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4414 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4415 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4416 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4417 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4418 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4419 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4420 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4421 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4422 // javaMethod 4423 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4424 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4425 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4426 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4427 // GC/C 4428 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4429 4430 for (int cp = 1; cp < 0x30000; cp++) { 4431 String str = new String(Character.toChars(cp)); 4432 int type = Character.getType(cp); 4433 if (// lower 4434 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4435 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4436 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4437 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4438 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4439 // upper 4440 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4441 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4442 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4443 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4444 // alpha 4445 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4446 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4447 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4448 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4449 // digit 4450 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4451 Character.isDigit(cp) != digitU.reset(str).matches() || 4452 // alnum 4453 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4454 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4455 // punct 4456 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4457 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4458 // graph 4459 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4460 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4461 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4462 // blank 4463 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4464 != blank.reset(str).matches() || 4465 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4466 // print 4467 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4468 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4469 // cntrl 4470 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4471 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4472 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4473 // hexdigit 4474 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4475 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4476 // space 4477 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4478 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4479 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4480 // word 4481 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4482 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4483 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4484 // bwordb 4485 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4486 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4487 // properties 4488 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4489 Character.isLetter(cp) != letterP.reset(str).matches()|| 4490 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4491 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4492 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4493 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4494 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4495 // gc_C 4496 (Character.CONTROL == type || Character.FORMAT == type || 4497 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4498 Character.UNASSIGNED == type) 4499 != gcC.reset(str).matches()) { 4500 failCount++; 4501 } 4502 } 4503 4504 // bounds/word align 4505 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4506 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4507 failCount++; 4508 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4509 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4510 failCount++; 4511 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4512 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4513 failCount++; 4514 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4515 failCount++; 4516 report("unicodePredefinedClasses"); 4517 } 4518 4519 private static void unicodeCharacterNameTest() throws Exception { 4520 4521 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4522 if (!Character.isValidCodePoint(cp) || 4523 Character.getType(cp) == Character.UNASSIGNED) 4524 continue; 4525 String str = new String(Character.toChars(cp)); 4526 // single 4527 String p = "\\N{" + Character.getName(cp) + "}"; 4528 if (!Pattern.compile(p).matcher(str).matches()) { 4529 failCount++; 4530 } 4531 // class[c] 4532 p = "[\\N{" + Character.getName(cp) + "}]"; 4533 if (!Pattern.compile(p).matcher(str).matches()) { 4534 failCount++; 4535 } 4536 } 4537 4538 // range 4539 for (int i = 0; i < 10; i++) { 4540 int start = generator.nextInt(20); 4541 int end = start + generator.nextInt(200); 4542 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4543 String str; 4544 for (int cp = start; cp < end; cp++) { 4545 str = new String(Character.toChars(cp)); 4546 if (!Pattern.compile(p).matcher(str).matches()) { 4547 failCount++; 4548 } 4549 } 4550 str = new String(Character.toChars(end + 10)); 4551 if (Pattern.compile(p).matcher(str).matches()) { 4552 failCount++; 4553 } 4554 } 4555 4556 // slice 4557 for (int i = 0; i < 10; i++) { 4558 int n = generator.nextInt(256); 4559 int[] buf = new int[n]; 4560 StringBuffer sb = new StringBuffer(1024); 4561 for (int j = 0; j < n; j++) { 4562 int cp = generator.nextInt(1000); 4563 if (!Character.isValidCodePoint(cp) || 4564 Character.getType(cp) == Character.UNASSIGNED) 4565 cp = 0x4e00; // just use 4e00 4566 sb.append("\\N{" + Character.getName(cp) + "}"); 4567 buf[j] = cp; 4568 } 4569 String p = sb.toString(); 4570 String str = new String(buf, 0, buf.length); 4571 if (!Pattern.compile(p).matcher(str).matches()) { 4572 failCount++; 4573 } 4574 } 4575 report("unicodeCharacterName"); 4576 } 4577 4578 private static void horizontalAndVerticalWSTest() throws Exception { 4579 String hws = new String (new char[] { 4580 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4581 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4582 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4583 0x202f, 0x205f, 0x3000 }); 4584 String vws = new String (new char[] { 4585 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4586 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4587 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4588 failCount++; 4589 if (Pattern.compile("\\H").matcher(hws).find() || 4590 Pattern.compile("[\\H]").matcher(hws).find()) 4591 failCount++; 4592 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4593 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4594 failCount++; 4595 if (Pattern.compile("\\V").matcher(vws).find() || 4596 Pattern.compile("[\\V]").matcher(vws).find()) 4597 failCount++; 4598 String prefix = "abcd"; 4599 String suffix = "efgh"; 4600 String ng = "A"; 4601 for (int i = 0; i < hws.length(); i++) { 4602 String c = String.valueOf(hws.charAt(i)); 4603 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4604 if (!m.find() || !c.equals(m.group())) 4605 failCount++; 4606 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4607 if (!m.find() || !c.equals(m.group())) 4608 failCount++; 4609 4610 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4611 if (!m.find() || !ng.equals(m.group())) 4612 failCount++; 4613 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4614 if (!m.find() || !ng.equals(m.group())) 4615 failCount++; 4616 } 4617 for (int i = 0; i < vws.length(); i++) { 4618 String c = String.valueOf(vws.charAt(i)); 4619 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4620 if (!m.find() || !c.equals(m.group())) 4621 failCount++; 4622 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4623 if (!m.find() || !c.equals(m.group())) 4624 failCount++; 4625 4626 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4627 if (!m.find() || !ng.equals(m.group())) 4628 failCount++; 4629 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4630 if (!m.find() || !ng.equals(m.group())) 4631 failCount++; 4632 } 4633 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4634 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4635 failCount++; 4636 report("horizontalAndVerticalWSTest"); 4637 } 4638 4639 private static void linebreakTest() throws Exception { 4640 String linebreaks = new String (new char[] { 4641 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4642 String crnl = "\r\n"; 4643 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4644 Pattern.compile("\\R").matcher(crnl).matches() && 4645 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4646 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4647 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4648 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4649 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4650 failCount++; 4651 } 4652 report("linebreakTest"); 4653 } 4654 4655 // #7189363 4656 private static void branchTest() throws Exception { 4657 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4658 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4659 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4660 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4661 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4662 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4663 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4664 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4665 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4666 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4667 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4668 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4669 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4670 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4671 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4672 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4673 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4674 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4675 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4676 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4677 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4678 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4679 failCount++; 4680 report("branchTest"); 4681 } 4682 4683 // This test is for 8007395 4684 private static void groupCurlyNotFoundSuppTest() throws Exception { 4685 String input = "test this as \ud83d\ude0d"; 4686 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4687 "test(.)*(@[a-zA-Z.]+)", 4688 "test([^B])+(@[a-zA-Z.]+)", 4689 "test([^B])*(@[a-zA-Z.]+)", 4690 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4691 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4692 }) { 4693 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4694 .matcher(input); 4695 try { 4696 if (m.find()) { 4697 failCount++; 4698 } 4699 } catch (Exception x) { 4700 failCount++; 4701 } 4702 } 4703 report("GroupCurly NotFoundSupp"); 4704 } 4705 4706 // This test is for 8023647 4707 private static void groupCurlyBackoffTest() throws Exception { 4708 if (!"abc1c".matches("(\\w)+1\\1") || 4709 "abc11".matches("(\\w)+1\\1")) { 4710 failCount++; 4711 } 4712 report("GroupCurly backoff"); 4713 } 4714 4715 // This test is for 8012646 4716 private static void patternAsPredicate() throws Exception { 4717 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4718 4719 if (p.test("")) { 4720 failCount++; 4721 } 4722 if (!p.test("word")) { 4723 failCount++; 4724 } 4725 if (p.test("1234")) { 4726 failCount++; 4727 } 4728 if (!p.test("word1234")) { 4729 failCount++; 4730 } 4731 report("Pattern.asPredicate"); 4732 } 4733 4734 // This test is for 8184692 4735 private static void patternAsMatchPredicate() throws Exception { 4736 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4737 4738 if (p.test("")) { 4739 failCount++; 4740 } 4741 if (!p.test("word")) { 4742 failCount++; 4743 } 4744 if (p.test("1234word")) { 4745 failCount++; 4746 } 4747 if (p.test("1234")) { 4748 failCount++; 4749 } 4750 report("Pattern.asMatchPredicate"); 4751 } 4752 4753 4754 // This test is for 8035975 4755 private static void invalidFlags() throws Exception { 4756 for (int flag = 1; flag != 0; flag <<= 1) { 4757 switch (flag) { 4758 case Pattern.CASE_INSENSITIVE: 4759 case Pattern.MULTILINE: 4760 case Pattern.DOTALL: 4761 case Pattern.UNICODE_CASE: 4762 case Pattern.CANON_EQ: 4763 case Pattern.UNIX_LINES: 4764 case Pattern.LITERAL: 4765 case Pattern.UNICODE_CHARACTER_CLASS: 4766 case Pattern.COMMENTS: 4767 // valid flag, continue 4768 break; 4769 default: 4770 try { 4771 Pattern.compile(".", flag); 4772 failCount++; 4773 } catch (IllegalArgumentException expected) { 4774 } 4775 } 4776 } 4777 report("Invalid compile flags"); 4778 } 4779 4780 // This test is for 8158482 4781 private static void embeddedFlags() throws Exception { 4782 try { 4783 Pattern.compile("(?i).(?-i)."); 4784 Pattern.compile("(?m).(?-m)."); 4785 Pattern.compile("(?s).(?-s)."); 4786 Pattern.compile("(?d).(?-d)."); 4787 Pattern.compile("(?u).(?-u)."); 4788 Pattern.compile("(?c).(?-c)."); 4789 Pattern.compile("(?x).(?-x)."); 4790 Pattern.compile("(?U).(?-U)."); 4791 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4792 } catch (PatternSyntaxException x) { 4793 failCount++; 4794 } 4795 report("Embedded flags"); 4796 } 4797 4798 private static void grapheme() throws Exception { 4799 Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST), 4800 Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt"))) 4801 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4802 .forEach( ln -> { 4803 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4804 // System.out.println(str); 4805 String[] strs = ln.split("\u00f7|\u00d7"); 4806 StringBuilder src = new StringBuilder(); 4807 ArrayList<String> graphemes = new ArrayList<>(); 4808 StringBuilder buf = new StringBuilder(); 4809 int offBk = 0; 4810 for (String str : strs) { 4811 if (str.length() == 0) // first empty str 4812 continue; 4813 int cp = Integer.parseInt(str, 16); 4814 src.appendCodePoint(cp); 4815 buf.appendCodePoint(cp); 4816 offBk += (str.length() + 1); 4817 if (ln.charAt(offBk) == '\u00f7') { // DIV 4818 graphemes.add(buf.toString()); 4819 buf = new StringBuilder(); 4820 } 4821 } 4822 Pattern p = Pattern.compile("\\X"); 4823 Matcher m = p.matcher(src.toString()); 4824 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4825 for (String g : graphemes) { 4826 // System.out.printf(" grapheme:=[%s]%n", g); 4827 // (1) test \\X directly 4828 if (!m.find() || !m.group().equals(g)) { 4829 System.out.println("Failed \\X [" + ln + "] : " + g); 4830 failCount++; 4831 } 4832 // (2) test \\b{g} + \\X via Scanner 4833 boolean hasNext = s.hasNext(p); 4834 // if (!s.hasNext() || !s.next().equals(next)) { 4835 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4836 System.out.println("Failed b{g} [" + ln + "] : " + g); 4837 failCount++; 4838 } 4839 } 4840 }); 4841 // some sanity checks 4842 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4843 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4844 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4845 failCount++; 4846 // make sure "\b{n}" still works 4847 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4848 failCount++; 4849 report("Unicode extended grapheme cluster"); 4850 } 4851 4852 // hangup/timeout if go into exponential backtracking 4853 private static void expoBacktracking() throws Exception { 4854 4855 Object[][] patternMatchers = { 4856 // 6328855 4857 { "(.*\n*)*", 4858 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4859 false }, 4860 // 6192895 4861 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4862 "Hello World this is a test this is a test this is a test A", 4863 true }, 4864 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4865 "Hello World this is a test this is a test this is a test \u4e00 ", 4866 false }, 4867 { " *([a-z0-9]+ *)+", 4868 "hello world this is a test this is a test this is a test A", 4869 false }, 4870 // 4771934 [FIXED] #5013651? 4871 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4872 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4873 true }, 4874 // 4866249 [FIXED] 4875 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4876 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4877 true }, 4878 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4879 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4880 false }, 4881 // 6345469 4882 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4883 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4884 true }, // --> matched 4885 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4886 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4887 false }, 4888 // 5026912 4889 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4890 "156580451111112225588087755221111111566969655555555", 4891 false}, 4892 // 6988218 4893 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4894 "'%)) order by ANGEBOT.ID", 4895 false}, // find 4896 // 6693451 4897 { "^(\\s*foo\\s*)*$", 4898 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4899 true }, 4900 { "^(\\s*foo\\s*)*$", 4901 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4902 false 4903 }, 4904 // 7006761 4905 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4906 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4907 // 8140212 4908 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4909 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4910 false 4911 }, 4912 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4913 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4914 4915 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4916 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4917 4918 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4919 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4920 4921 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4922 4923 /* not fixed 4924 //8132141 ---> second level exponential backtracking 4925 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4926 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4927 */ 4928 }; 4929 4930 for (Object[] pm : patternMatchers) { 4931 String p = (String)pm[0]; 4932 String s = (String)pm[1]; 4933 boolean r = (Boolean)pm[2]; 4934 if (r != Pattern.compile(p).matcher(s).matches()) { 4935 failCount++; 4936 } 4937 } 4938 } 4939 4940 private static void invalidGroupName() { 4941 // Invalid start of a group name 4942 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4943 "\u0060", "\u007b", "\u0416")) { 4944 for (String pat : List.of("(?<" + groupName + ">)", 4945 "\\k<" + groupName + ">")) { 4946 try { 4947 Pattern.compile(pat); 4948 failCount++; 4949 } catch (PatternSyntaxException e) { 4950 if (!e.getMessage().startsWith( 4951 "capturing group name does not start with a" 4952 + " Latin letter")) { 4953 failCount++; 4954 } 4955 } 4956 } 4957 } 4958 // Invalid char in a group name 4959 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4960 "d\u0060", "e\u007b", "f\u0416")) { 4961 for (String pat : List.of("(?<" + groupName + ">)", 4962 "\\k<" + groupName + ">")) { 4963 try { 4964 Pattern.compile(pat); 4965 failCount++; 4966 } catch (PatternSyntaxException e) { 4967 if (!e.getMessage().startsWith( 4968 "named capturing group is missing trailing '>'")) { 4969 failCount++; 4970 } 4971 } 4972 } 4973 } 4974 report("Invalid capturing group names"); 4975 } 4976 4977 private static void illegalRepetitionRange() { 4978 // huge integers > (2^31 - 1) 4979 String n = BigInteger.valueOf(1L << 32) 4980 .toString(); 4981 String m = BigInteger.valueOf(1L << 31) 4982 .add(new BigInteger(80, generator)) 4983 .toString(); 4984 for (String rep : List.of("", "x", ".", ",", "-1", "2,1", 4985 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) { 4986 String pat = ".{" + rep + "}"; 4987 try { 4988 Pattern.compile(pat); 4989 failCount++; 4990 System.out.println("Expected to fail. Pattern: " + pat); 4991 } catch (PatternSyntaxException e) { 4992 if (!e.getMessage().startsWith("Illegal repetition")) { 4993 failCount++; 4994 System.out.println("Unexpected error message: " + e.getMessage()); 4995 } 4996 } catch (Throwable t) { 4997 failCount++; 4998 System.out.println("Unexpected exception: " + t); 4999 } 5000 } 5001 report("illegalRepetitionRange"); 5002 } 5003 5004 private static void surrogatePairWithCanonEq() { 5005 try { 5006 Pattern.compile("\ud834\udd21", Pattern.CANON_EQ); 5007 } catch (Throwable t) { 5008 failCount++; 5009 System.out.println("Unexpected exception: " + t); 5010 } 5011 report("surrogatePairWithCanonEq"); 5012 } 5013 5014 // This test is for 8235812 5015 private static void lineBreakWithQuantifier() { 5016 // key: pattern 5017 // value: lengths of input that must match the pattern 5018 Map<String, List<Integer>> cases = Map.ofEntries( 5019 Map.entry("\\R?", List.of(0, 1)), 5020 Map.entry("\\R*", List.of(0, 1, 2, 3)), 5021 Map.entry("\\R+", List.of(1, 2, 3)), 5022 Map.entry("\\R{0}", List.of(0)), 5023 Map.entry("\\R{1}", List.of(1)), 5024 Map.entry("\\R{2}", List.of(2)), 5025 Map.entry("\\R{3}", List.of(3)), 5026 Map.entry("\\R{0,}", List.of(0, 1, 2, 3)), 5027 Map.entry("\\R{1,}", List.of(1, 2, 3)), 5028 Map.entry("\\R{2,}", List.of(2, 3)), 5029 Map.entry("\\R{3,}", List.of(3)), 5030 Map.entry("\\R{0,0}", List.of(0)), 5031 Map.entry("\\R{0,1}", List.of(0, 1)), 5032 Map.entry("\\R{0,2}", List.of(0, 1, 2)), 5033 Map.entry("\\R{0,3}", List.of(0, 1, 2, 3)), 5034 Map.entry("\\R{1,1}", List.of(1)), 5035 Map.entry("\\R{1,2}", List.of(1, 2)), 5036 Map.entry("\\R{1,3}", List.of(1, 2, 3)), 5037 Map.entry("\\R{2,2}", List.of(2)), 5038 Map.entry("\\R{2,3}", List.of(2, 3)), 5039 Map.entry("\\R{3,3}", List.of(3)), 5040 Map.entry("\\R", List.of(1)), 5041 Map.entry("\\R\\R", List.of(2)), 5042 Map.entry("\\R\\R\\R", List.of(3)) 5043 ); 5044 5045 // key: length of input 5046 // value: all possible inputs of given length 5047 Map<Integer, List<String>> inputs = new HashMap<>(); 5048 String[] Rs = { "\r\n", "\r", "\n", 5049 "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" }; 5050 StringBuilder sb = new StringBuilder(); 5051 for (int len = 0; len <= 3; ++len) { 5052 int[] idx = new int[len + 1]; 5053 do { 5054 sb.setLength(0); 5055 for (int j = 0; j < len; ++j) 5056 sb.append(Rs[idx[j]]); 5057 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString()); 5058 idx[0]++; 5059 for (int j = 0; j < len; ++j) { 5060 if (idx[j] < Rs.length) 5061 break; 5062 idx[j] = 0; 5063 idx[j+1]++; 5064 } 5065 } while (idx[len] == 0); 5066 } 5067 5068 // exhaustive testing 5069 for (String patStr : cases.keySet()) { 5070 Pattern[] pats = patStr.endsWith("R") 5071 ? new Pattern[] { Pattern.compile(patStr) } // no quantifiers 5072 : new Pattern[] { Pattern.compile(patStr), // greedy 5073 Pattern.compile(patStr + "?") }; // reluctant 5074 Matcher m = pats[0].matcher(""); 5075 for (Pattern p : pats) { 5076 m.usePattern(p); 5077 for (int len : cases.get(patStr)) { 5078 for (String in : inputs.get(len)) { 5079 if (!m.reset(in).matches()) { 5080 failCount++; 5081 System.err.println("Expected to match '" + 5082 in + "' =~ /" + p + "/"); 5083 } 5084 } 5085 } 5086 } 5087 } 5088 report("lineBreakWithQuantifier"); 5089 } 5090 5091 // This test is for 8214245 5092 private static void caseInsensitivePMatch() { 5093 for (String input : List.of("abcd", "AbCd", "ABCD")) { 5094 for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}", 5095 "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}", 5096 "\\p{IsLl}{4}", "\\p{gc=Ll}{4}", 5097 "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}", 5098 "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}", 5099 "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}", 5100 "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}", 5101 "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}", 5102 "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}", 5103 "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}", 5104 "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}", 5105 "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}", 5106 "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}", 5107 "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}", 5108 "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}", 5109 "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}", 5110 "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}", 5111 "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}")) 5112 { 5113 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE) 5114 .matcher(input) 5115 .matches()) 5116 { 5117 failCount++; 5118 System.err.println("Expected to match: " + 5119 "'" + input + "' =~ /" + pattern + "/"); 5120 } 5121 } 5122 } 5123 5124 for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) { 5125 for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9", 5126 "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]", 5127 "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]", 5128 "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}", 5129 "\\p{general_category=Ll}", "\\p{IsLowercase}", 5130 "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}", 5131 "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}", 5132 "\\p{IsUppercase}", "\\p{javaUpperCase}", 5133 "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}", 5134 "\\p{general_category=Lt}", "\\p{IsTitlecase}", 5135 "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]", 5136 "[\\p{IsLl}]", "[\\p{gc=Ll}]", 5137 "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]", 5138 "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]", 5139 "[\\p{IsLu}]", "[\\p{gc=Lu}]", 5140 "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]", 5141 "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]", 5142 "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]", 5143 "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]")) 5144 { 5145 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE 5146 | Pattern.UNICODE_CHARACTER_CLASS) 5147 .matcher(input) 5148 .matches()) 5149 { 5150 failCount++; 5151 System.err.println("Expected to match: " + 5152 "'" + input + "' =~ /" + pattern + "/"); 5153 } 5154 } 5155 } 5156 report("caseInsensitivePMatch"); 5157 } 5158 }