1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 39 * 40 * @library /test/lib 41 * @build jdk.test.lib.RandomFactory 42 * @run main RegExTest 43 * @key randomness 44 */ 45 46 import java.util.function.Function; 47 import java.util.regex.*; 48 import java.util.Random; 49 import java.util.Scanner; 50 import java.io.*; 51 import java.nio.file.*; 52 import java.util.*; 53 import java.nio.CharBuffer; 54 import java.util.function.Predicate; 55 import jdk.test.lib.RandomFactory; 56 57 /** 58 * This is a test class created to check the operation of 59 * the Pattern and Matcher classes. 60 */ 61 public class RegExTest { 62 63 private static Random generator = RandomFactory.getRandom(); 64 private static boolean failure = false; 65 private static int failCount = 0; 66 private static String firstFailure = null; 67 68 /** 69 * Main to interpret arguments and run several tests. 70 * 71 */ 72 public static void main(String[] args) throws Exception { 73 // Most of the tests are in a file 74 processFile("TestCases.txt"); 75 //processFile("PerlCases.txt"); 76 processFile("BMPTestCases.txt"); 77 processFile("SupplementaryTestCases.txt"); 78 79 // These test many randomly generated char patterns 80 bm(); 81 slice(); 82 83 // These are hard to put into the file 84 escapes(); 85 blankInput(); 86 87 // Substitition tests on randomly generated sequences 88 globalSubstitute(); 89 stringbufferSubstitute(); 90 stringbuilderSubstitute(); 91 92 substitutionBasher(); 93 substitutionBasher2(); 94 95 // Canonical Equivalence 96 ceTest(); 97 98 // Anchors 99 anchorTest(); 100 101 // boolean match calls 102 matchesTest(); 103 lookingAtTest(); 104 105 // Pattern API 106 patternMatchesTest(); 107 108 // Misc 109 lookbehindTest(); 110 nullArgumentTest(); 111 backRefTest(); 112 groupCaptureTest(); 113 caretTest(); 114 charClassTest(); 115 emptyPatternTest(); 116 findIntTest(); 117 group0Test(); 118 longPatternTest(); 119 octalTest(); 120 ampersandTest(); 121 negationTest(); 122 splitTest(); 123 appendTest(); 124 caseFoldingTest(); 125 commentsTest(); 126 unixLinesTest(); 127 replaceFirstTest(); 128 gTest(); 129 zTest(); 130 serializeTest(); 131 reluctantRepetitionTest(); 132 multilineDollarTest(); 133 dollarAtEndTest(); 134 caretBetweenTerminatorsTest(); 135 // This RFE rejected in Tiger numOccurrencesTest(); 136 javaCharClassTest(); 137 nonCaptureRepetitionTest(); 138 notCapturedGroupCurlyMatchTest(); 139 escapedSegmentTest(); 140 literalPatternTest(); 141 literalReplacementTest(); 142 regionTest(); 143 toStringTest(); 144 negatedCharClassTest(); 145 findFromTest(); 146 boundsTest(); 147 unicodeWordBoundsTest(); 148 caretAtEndTest(); 149 wordSearchTest(); 150 hitEndTest(); 151 toMatchResultTest(); 152 toMatchResultTest2(); 153 surrogatesInClassTest(); 154 removeQEQuotingTest(); 155 namedGroupCaptureTest(); 156 nonBmpClassComplementTest(); 157 unicodePropertiesTest(); 158 unicodeHexNotationTest(); 159 unicodeClassesTest(); 160 unicodeCharacterNameTest(); 161 horizontalAndVerticalWSTest(); 162 linebreakTest(); 163 branchTest(); 164 groupCurlyNotFoundSuppTest(); 165 groupCurlyBackoffTest(); 166 patternAsPredicate(); 167 invalidFlags(); 168 embeddedFlags(); 169 grapheme(); 170 expoBacktracking(); 171 invalidGroupName(); 172 173 if (failure) { 174 throw new 175 RuntimeException("RegExTest failed, 1st failure: " + 176 firstFailure); 177 } else { 178 System.err.println("OKAY: All tests passed."); 179 } 180 } 181 182 // Utility functions 183 184 private static String getRandomAlphaString(int length) { 185 StringBuffer buf = new StringBuffer(length); 186 for (int i=0; i<length; i++) { 187 char randChar = (char)(97 + generator.nextInt(26)); 188 buf.append(randChar); 189 } 190 return buf.toString(); 191 } 192 193 private static void check(Matcher m, String expected) { 194 m.find(); 195 if (!m.group().equals(expected)) 196 failCount++; 197 } 198 199 private static void check(Matcher m, String result, boolean expected) { 200 m.find(); 201 if (m.group().equals(result) != expected) 202 failCount++; 203 } 204 205 private static void check(Pattern p, String s, boolean expected) { 206 if (p.matcher(s).find() != expected) 207 failCount++; 208 } 209 210 private static void check(String p, String s, boolean expected) { 211 Matcher matcher = Pattern.compile(p).matcher(s); 212 if (matcher.find() != expected) 213 failCount++; 214 } 215 216 private static void check(String p, char c, boolean expected) { 217 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 218 Pattern pattern = Pattern.compile(propertyPattern); 219 char[] ca = new char[1]; ca[0] = c; 220 Matcher matcher = pattern.matcher(new String(ca)); 221 if (!matcher.find()) 222 failCount++; 223 } 224 225 private static void check(String p, int codePoint, boolean expected) { 226 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 227 Pattern pattern = Pattern.compile(propertyPattern); 228 char[] ca = Character.toChars(codePoint); 229 Matcher matcher = pattern.matcher(new String(ca)); 230 if (!matcher.find()) 231 failCount++; 232 } 233 234 private static void check(String p, int flag, String input, String s, 235 boolean expected) 236 { 237 Pattern pattern = Pattern.compile(p, flag); 238 Matcher matcher = pattern.matcher(input); 239 if (expected) 240 check(matcher, s, expected); 241 else 242 check(pattern, input, false); 243 } 244 245 private static void report(String testName) { 246 int spacesToAdd = 30 - testName.length(); 247 StringBuffer paddedNameBuffer = new StringBuffer(testName); 248 for (int i=0; i<spacesToAdd; i++) 249 paddedNameBuffer.append(" "); 250 String paddedName = paddedNameBuffer.toString(); 251 System.err.println(paddedName + ": " + 252 (failCount==0 ? "Passed":"Failed("+failCount+")")); 253 if (failCount > 0) { 254 failure = true; 255 256 if (firstFailure == null) { 257 firstFailure = testName; 258 } 259 } 260 261 failCount = 0; 262 } 263 264 /** 265 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 266 * supplementary characters. This method does NOT fully take care 267 * of the regex syntax. 268 */ 269 private static String toSupplementaries(String s) { 270 int length = s.length(); 271 StringBuffer sb = new StringBuffer(length * 2); 272 273 for (int i = 0; i < length; ) { 274 char c = s.charAt(i++); 275 if (c == '\\') { 276 sb.append(c); 277 if (i < length) { 278 c = s.charAt(i++); 279 sb.append(c); 280 if (c == 'u') { 281 // assume no syntax error 282 sb.append(s.charAt(i++)); 283 sb.append(s.charAt(i++)); 284 sb.append(s.charAt(i++)); 285 sb.append(s.charAt(i++)); 286 } 287 } 288 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 289 sb.append('\ud800').append((char)('\udc00'+c)); 290 } else { 291 sb.append(c); 292 } 293 } 294 return sb.toString(); 295 } 296 297 // Regular expression tests 298 299 // This is for bug 6178785 300 // Test if an expected NPE gets thrown when passing in a null argument 301 private static boolean check(Runnable test) { 302 try { 303 test.run(); 304 failCount++; 305 return false; 306 } catch (NullPointerException npe) { 307 return true; 308 } 309 } 310 311 private static void nullArgumentTest() { 312 check(() -> Pattern.compile(null)); 313 check(() -> Pattern.matches(null, null)); 314 check(() -> Pattern.matches("xyz", null)); 315 check(() -> Pattern.quote(null)); 316 check(() -> Pattern.compile("xyz").split(null)); 317 check(() -> Pattern.compile("xyz").matcher(null)); 318 319 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 320 m.matches(); 321 check(() -> m.appendTail((StringBuffer) null)); 322 check(() -> m.appendTail((StringBuilder)null)); 323 check(() -> m.replaceAll((String) null)); 324 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 325 check(() -> m.replaceFirst((String)null)); 326 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 327 check(() -> m.appendReplacement((StringBuffer)null, null)); 328 check(() -> m.appendReplacement((StringBuilder)null, null)); 329 check(() -> m.reset(null)); 330 check(() -> Matcher.quoteReplacement(null)); 331 //check(() -> m.usePattern(null)); 332 333 report("Null Argument"); 334 } 335 336 // This is for bug6635133 337 // Test if surrogate pair in Unicode escapes can be handled correctly. 338 private static void surrogatesInClassTest() throws Exception { 339 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 340 Matcher matcher = pattern.matcher("\ud834\udd22"); 341 if (!matcher.find()) 342 failCount++; 343 344 report("Surrogate pair in Unicode escape"); 345 } 346 347 // This is for bug6990617 348 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 349 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 350 // char is an octal digit. 351 private static void removeQEQuotingTest() throws Exception { 352 Pattern pattern = 353 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 354 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 355 if (!matcher.find()) 356 failCount++; 357 358 report("Remove Q/E Quoting"); 359 } 360 361 // This is for bug 4988891 362 // Test toMatchResult to see that it is a copy of the Matcher 363 // that is not affected by subsequent operations on the original 364 private static void toMatchResultTest() throws Exception { 365 Pattern pattern = Pattern.compile("squid"); 366 Matcher matcher = pattern.matcher( 367 "agiantsquidofdestinyasmallsquidoffate"); 368 matcher.find(); 369 int matcherStart1 = matcher.start(); 370 MatchResult mr = matcher.toMatchResult(); 371 if (mr == matcher) 372 failCount++; 373 int resultStart1 = mr.start(); 374 if (matcherStart1 != resultStart1) 375 failCount++; 376 matcher.find(); 377 int matcherStart2 = matcher.start(); 378 int resultStart2 = mr.start(); 379 if (matcherStart2 == resultStart2) 380 failCount++; 381 if (resultStart1 != resultStart2) 382 failCount++; 383 MatchResult mr2 = matcher.toMatchResult(); 384 if (mr == mr2) 385 failCount++; 386 if (mr2.start() != matcherStart2) 387 failCount++; 388 report("toMatchResult is a copy"); 389 } 390 391 private static void checkExpectedISE(Runnable test) { 392 try { 393 test.run(); 394 failCount++; 395 } catch (IllegalStateException x) { 396 } catch (IndexOutOfBoundsException xx) { 397 failCount++; 398 } 399 } 400 401 private static void checkExpectedIOOE(Runnable test) { 402 try { 403 test.run(); 404 failCount++; 405 } catch (IndexOutOfBoundsException x) {} 406 } 407 408 // This is for bug 8074678 409 // Test the result of toMatchResult throws ISE if no match is availble 410 private static void toMatchResultTest2() throws Exception { 411 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 412 matcher.find(); 413 MatchResult mr = matcher.toMatchResult(); 414 415 checkExpectedISE(() -> mr.start()); 416 checkExpectedISE(() -> mr.start(2)); 417 checkExpectedISE(() -> mr.end()); 418 checkExpectedISE(() -> mr.end(2)); 419 checkExpectedISE(() -> mr.group()); 420 checkExpectedISE(() -> mr.group(2)); 421 422 matcher = Pattern.compile("(match)").matcher("there is a match"); 423 matcher.find(); 424 MatchResult mr2 = matcher.toMatchResult(); 425 checkExpectedIOOE(() -> mr2.start(2)); 426 checkExpectedIOOE(() -> mr2.end(2)); 427 checkExpectedIOOE(() -> mr2.group(2)); 428 429 report("toMatchResult2 appropriate exceptions"); 430 } 431 432 // This is for bug 5013885 433 // Must test a slice to see if it reports hitEnd correctly 434 private static void hitEndTest() throws Exception { 435 // Basic test of Slice node 436 Pattern p = Pattern.compile("^squidattack"); 437 Matcher m = p.matcher("squack"); 438 m.find(); 439 if (m.hitEnd()) 440 failCount++; 441 m.reset("squid"); 442 m.find(); 443 if (!m.hitEnd()) 444 failCount++; 445 446 // Test Slice, SliceA and SliceU nodes 447 for (int i=0; i<3; i++) { 448 int flags = 0; 449 if (i==1) flags = Pattern.CASE_INSENSITIVE; 450 if (i==2) flags = Pattern.UNICODE_CASE; 451 p = Pattern.compile("^abc", flags); 452 m = p.matcher("ad"); 453 m.find(); 454 if (m.hitEnd()) 455 failCount++; 456 m.reset("ab"); 457 m.find(); 458 if (!m.hitEnd()) 459 failCount++; 460 } 461 462 // Test Boyer-Moore node 463 p = Pattern.compile("catattack"); 464 m = p.matcher("attack"); 465 m.find(); 466 if (!m.hitEnd()) 467 failCount++; 468 469 p = Pattern.compile("catattack"); 470 m = p.matcher("attackattackattackcatatta"); 471 m.find(); 472 if (!m.hitEnd()) 473 failCount++; 474 475 // 8184706: Matching u+0d at EOL against \R should hit-end 476 p = Pattern.compile("...\\R"); 477 m = p.matcher("cat" + (char)0x0a); 478 m.find(); 479 if (m.hitEnd()) 480 failCount++; 481 482 m = p.matcher("cat" + (char)0x0d); 483 m.find(); 484 if (!m.hitEnd()) 485 failCount++; 486 487 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 488 m.find(); 489 if (m.hitEnd()) 490 failCount++; 491 492 report("hitEnd"); 493 } 494 495 // This is for bug 4997476 496 // It is weird code submitted by customer demonstrating a regression 497 private static void wordSearchTest() throws Exception { 498 String testString = new String("word1 word2 word3"); 499 Pattern p = Pattern.compile("\\b"); 500 Matcher m = p.matcher(testString); 501 int position = 0; 502 int start = 0; 503 while (m.find(position)) { 504 start = m.start(); 505 if (start == testString.length()) 506 break; 507 if (m.find(start+1)) { 508 position = m.start(); 509 } else { 510 position = testString.length(); 511 } 512 if (testString.substring(start, position).equals(" ")) 513 continue; 514 if (!testString.substring(start, position-1).startsWith("word")) 515 failCount++; 516 } 517 report("Customer word search"); 518 } 519 520 // This is for bug 4994840 521 private static void caretAtEndTest() throws Exception { 522 // Problem only occurs with multiline patterns 523 // containing a beginning-of-line caret "^" followed 524 // by an expression that also matches the empty string. 525 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 526 Matcher matcher = pattern.matcher("\r"); 527 matcher.find(); 528 matcher.find(); 529 report("Caret at end"); 530 } 531 532 // This test is for 4979006 533 // Check to see if word boundary construct properly handles unicode 534 // non spacing marks 535 private static void unicodeWordBoundsTest() throws Exception { 536 String spaces = " "; 537 String wordChar = "a"; 538 String nsm = "\u030a"; 539 540 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 541 542 Pattern pattern = Pattern.compile("\\b"); 543 Matcher matcher = pattern.matcher(""); 544 // S=other B=word character N=non spacing mark .=word boundary 545 // SS.BB.SS 546 String input = spaces + wordChar + wordChar + spaces; 547 twoFindIndexes(input, matcher, 2, 4); 548 // SS.BBN.SS 549 input = spaces + wordChar +wordChar + nsm + spaces; 550 twoFindIndexes(input, matcher, 2, 5); 551 // SS.BN.SS 552 input = spaces + wordChar + nsm + spaces; 553 twoFindIndexes(input, matcher, 2, 4); 554 // SS.BNN.SS 555 input = spaces + wordChar + nsm + nsm + spaces; 556 twoFindIndexes(input, matcher, 2, 5); 557 // SSN.BB.SS 558 input = spaces + nsm + wordChar + wordChar + spaces; 559 twoFindIndexes(input, matcher, 3, 5); 560 // SS.BNB.SS 561 input = spaces + wordChar + nsm + wordChar + spaces; 562 twoFindIndexes(input, matcher, 2, 5); 563 // SSNNSS 564 input = spaces + nsm + nsm + spaces; 565 matcher.reset(input); 566 if (matcher.find()) 567 failCount++; 568 // SSN.BBN.SS 569 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 570 twoFindIndexes(input, matcher, 3, 6); 571 572 report("Unicode word boundary"); 573 } 574 575 private static void twoFindIndexes(String input, Matcher matcher, int a, 576 int b) throws Exception 577 { 578 matcher.reset(input); 579 matcher.find(); 580 if (matcher.start() != a) 581 failCount++; 582 matcher.find(); 583 if (matcher.start() != b) 584 failCount++; 585 } 586 587 // This test is for 6284152 588 static void check(String regex, String input, String[] expected) { 589 List<String> result = new ArrayList<String>(); 590 Pattern p = Pattern.compile(regex); 591 Matcher m = p.matcher(input); 592 while (m.find()) { 593 result.add(m.group()); 594 } 595 if (!Arrays.asList(expected).equals(result)) 596 failCount++; 597 } 598 599 private static void lookbehindTest() throws Exception { 600 //Positive 601 check("(?<=%.{0,5})foo\\d", 602 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 603 new String[]{"foo1", "foo2", "foo3"}); 604 605 //boundary at end of the lookbehind sub-regex should work consistently 606 //with the boundary just after the lookbehind sub-regex 607 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 608 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 609 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 610 check("(?<!abc \\b)foo", "abc foo", new String[0]); 611 612 //Negative 613 check("(?<!%.{0,5})foo\\d", 614 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 615 new String[] {"foo4", "foo5"}); 616 617 //Positive greedy 618 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 619 620 //Positive reluctant 621 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 622 623 //supplementary 624 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 625 new String[] {"fo\ud800\udc00o"}); 626 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 627 new String[] {"fo\ud800\udc00o"}); 628 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 629 new String[] {"fo\ud800\udc00o"}); 630 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 631 new String[] {"fo\ud800\udc00o"}); 632 report("Lookbehind"); 633 } 634 635 // This test is for 4938995 636 // Check to see if weak region boundaries are transparent to 637 // lookahead and lookbehind constructs 638 private static void boundsTest() throws Exception { 639 String fullMessage = "catdogcat"; 640 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 641 Matcher matcher = pattern.matcher("catdogca"); 642 matcher.useTransparentBounds(true); 643 if (matcher.find()) 644 failCount++; 645 matcher.reset("atdogcat"); 646 if (matcher.find()) 647 failCount++; 648 matcher.reset(fullMessage); 649 if (!matcher.find()) 650 failCount++; 651 matcher.reset(fullMessage); 652 matcher.region(0,9); 653 if (!matcher.find()) 654 failCount++; 655 matcher.reset(fullMessage); 656 matcher.region(0,6); 657 if (!matcher.find()) 658 failCount++; 659 matcher.reset(fullMessage); 660 matcher.region(3,6); 661 if (!matcher.find()) 662 failCount++; 663 matcher.useTransparentBounds(false); 664 if (matcher.find()) 665 failCount++; 666 667 // Negative lookahead/lookbehind 668 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 669 matcher = pattern.matcher("dogcat"); 670 matcher.useTransparentBounds(true); 671 matcher.region(0,3); 672 if (matcher.find()) 673 failCount++; 674 matcher.reset("catdog"); 675 matcher.region(3,6); 676 if (matcher.find()) 677 failCount++; 678 matcher.useTransparentBounds(false); 679 matcher.reset("dogcat"); 680 matcher.region(0,3); 681 if (!matcher.find()) 682 failCount++; 683 matcher.reset("catdog"); 684 matcher.region(3,6); 685 if (!matcher.find()) 686 failCount++; 687 688 report("Region bounds transparency"); 689 } 690 691 // This test is for 4945394 692 private static void findFromTest() throws Exception { 693 String message = "This is 40 $0 message."; 694 Pattern pat = Pattern.compile("\\$0"); 695 Matcher match = pat.matcher(message); 696 if (!match.find()) 697 failCount++; 698 if (match.find()) 699 failCount++; 700 if (match.find()) 701 failCount++; 702 report("Check for alternating find"); 703 } 704 705 // This test is for 4872664 and 4892980 706 private static void negatedCharClassTest() throws Exception { 707 Pattern pattern = Pattern.compile("[^>]"); 708 Matcher matcher = pattern.matcher("\u203A"); 709 if (!matcher.matches()) 710 failCount++; 711 pattern = Pattern.compile("[^fr]"); 712 matcher = pattern.matcher("a"); 713 if (!matcher.find()) 714 failCount++; 715 matcher.reset("\u203A"); 716 if (!matcher.find()) 717 failCount++; 718 String s = "for"; 719 String result[] = s.split("[^fr]"); 720 if (!result[0].equals("f")) 721 failCount++; 722 if (!result[1].equals("r")) 723 failCount++; 724 s = "f\u203Ar"; 725 result = s.split("[^fr]"); 726 if (!result[0].equals("f")) 727 failCount++; 728 if (!result[1].equals("r")) 729 failCount++; 730 731 // Test adding to bits, subtracting a node, then adding to bits again 732 pattern = Pattern.compile("[^f\u203Ar]"); 733 matcher = pattern.matcher("a"); 734 if (!matcher.find()) 735 failCount++; 736 matcher.reset("f"); 737 if (matcher.find()) 738 failCount++; 739 matcher.reset("\u203A"); 740 if (matcher.find()) 741 failCount++; 742 matcher.reset("r"); 743 if (matcher.find()) 744 failCount++; 745 matcher.reset("\u203B"); 746 if (!matcher.find()) 747 failCount++; 748 749 // Test subtracting a node, adding to bits, subtracting again 750 pattern = Pattern.compile("[^\u203Ar\u203B]"); 751 matcher = pattern.matcher("a"); 752 if (!matcher.find()) 753 failCount++; 754 matcher.reset("\u203A"); 755 if (matcher.find()) 756 failCount++; 757 matcher.reset("r"); 758 if (matcher.find()) 759 failCount++; 760 matcher.reset("\u203B"); 761 if (matcher.find()) 762 failCount++; 763 matcher.reset("\u203C"); 764 if (!matcher.find()) 765 failCount++; 766 767 report("Negated Character Class"); 768 } 769 770 // This test is for 4628291 771 private static void toStringTest() throws Exception { 772 Pattern pattern = Pattern.compile("b+"); 773 if (pattern.toString() != "b+") 774 failCount++; 775 Matcher matcher = pattern.matcher("aaabbbccc"); 776 String matcherString = matcher.toString(); // unspecified 777 matcher.find(); 778 matcherString = matcher.toString(); // unspecified 779 matcher.region(0,3); 780 matcherString = matcher.toString(); // unspecified 781 matcher.reset(); 782 matcherString = matcher.toString(); // unspecified 783 report("toString"); 784 } 785 786 // This test is for 4808962 787 private static void literalPatternTest() throws Exception { 788 int flags = Pattern.LITERAL; 789 790 Pattern pattern = Pattern.compile("abc\\t$^", flags); 791 check(pattern, "abc\\t$^", true); 792 793 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 794 check(pattern, "abc\\t$^", true); 795 796 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 797 check(pattern, "\\Qa^$bcabc\\E", true); 798 check(pattern, "a^$bcabc", false); 799 800 pattern = Pattern.compile("\\\\Q\\\\E"); 801 check(pattern, "\\Q\\E", true); 802 803 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 804 check(pattern, "abcefg\\Q\\Ehij", true); 805 806 pattern = Pattern.compile("\\\\\\Q\\\\E"); 807 check(pattern, "\\\\\\\\", true); 808 809 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 810 check(pattern, "\\Qa^$bcabc\\E", true); 811 check(pattern, "a^$bcabc", false); 812 813 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 814 check(pattern, "\\Qabc\\Edef", true); 815 check(pattern, "abcdef", false); 816 817 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 818 check(pattern, "abc\\Edef", true); 819 check(pattern, "abcdef", false); 820 821 pattern = Pattern.compile(Pattern.quote("\\E")); 822 check(pattern, "\\E", true); 823 824 pattern = Pattern.compile("((((abc.+?:)", flags); 825 check(pattern, "((((abc.+?:)", true); 826 827 flags |= Pattern.MULTILINE; 828 829 pattern = Pattern.compile("^cat$", flags); 830 check(pattern, "abc^cat$def", true); 831 check(pattern, "cat", false); 832 833 flags |= Pattern.CASE_INSENSITIVE; 834 835 pattern = Pattern.compile("abcdef", flags); 836 check(pattern, "ABCDEF", true); 837 check(pattern, "AbCdEf", true); 838 839 flags |= Pattern.DOTALL; 840 841 pattern = Pattern.compile("a...b", flags); 842 check(pattern, "A...b", true); 843 check(pattern, "Axxxb", false); 844 845 flags |= Pattern.CANON_EQ; 846 847 Pattern p = Pattern.compile("testa\u030a", flags); 848 check(pattern, "testa\u030a", false); 849 check(pattern, "test\u00e5", false); 850 851 // Supplementary character test 852 flags = Pattern.LITERAL; 853 854 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 855 check(pattern, toSupplementaries("abc\\t$^"), true); 856 857 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 858 check(pattern, toSupplementaries("abc\\t$^"), true); 859 860 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 861 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 862 check(pattern, toSupplementaries("a^$bcabc"), false); 863 864 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 865 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 866 check(pattern, toSupplementaries("a^$bcabc"), false); 867 868 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 869 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 870 check(pattern, toSupplementaries("abcdef"), false); 871 872 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 873 check(pattern, toSupplementaries("abc\\Edef"), true); 874 check(pattern, toSupplementaries("abcdef"), false); 875 876 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 877 check(pattern, toSupplementaries("((((abc.+?:)"), true); 878 879 flags |= Pattern.MULTILINE; 880 881 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 882 check(pattern, toSupplementaries("abc^cat$def"), true); 883 check(pattern, toSupplementaries("cat"), false); 884 885 flags |= Pattern.DOTALL; 886 887 // note: this is case-sensitive. 888 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 889 check(pattern, toSupplementaries("a...b"), true); 890 check(pattern, toSupplementaries("axxxb"), false); 891 892 flags |= Pattern.CANON_EQ; 893 894 String t = toSupplementaries("test"); 895 p = Pattern.compile(t + "a\u030a", flags); 896 check(pattern, t + "a\u030a", false); 897 check(pattern, t + "\u00e5", false); 898 899 report("Literal pattern"); 900 } 901 902 // This test is for 4803179 903 // This test is also for 4808962, replacement parts 904 private static void literalReplacementTest() throws Exception { 905 int flags = Pattern.LITERAL; 906 907 Pattern pattern = Pattern.compile("abc", flags); 908 Matcher matcher = pattern.matcher("zzzabczzz"); 909 String replaceTest = "$0"; 910 String result = matcher.replaceAll(replaceTest); 911 if (!result.equals("zzzabczzz")) 912 failCount++; 913 914 matcher.reset(); 915 String literalReplacement = matcher.quoteReplacement(replaceTest); 916 result = matcher.replaceAll(literalReplacement); 917 if (!result.equals("zzz$0zzz")) 918 failCount++; 919 920 matcher.reset(); 921 replaceTest = "\\t$\\$"; 922 literalReplacement = matcher.quoteReplacement(replaceTest); 923 result = matcher.replaceAll(literalReplacement); 924 if (!result.equals("zzz\\t$\\$zzz")) 925 failCount++; 926 927 // Supplementary character test 928 pattern = Pattern.compile(toSupplementaries("abc"), flags); 929 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 930 replaceTest = "$0"; 931 result = matcher.replaceAll(replaceTest); 932 if (!result.equals(toSupplementaries("zzzabczzz"))) 933 failCount++; 934 935 matcher.reset(); 936 literalReplacement = matcher.quoteReplacement(replaceTest); 937 result = matcher.replaceAll(literalReplacement); 938 if (!result.equals(toSupplementaries("zzz$0zzz"))) 939 failCount++; 940 941 matcher.reset(); 942 replaceTest = "\\t$\\$"; 943 literalReplacement = matcher.quoteReplacement(replaceTest); 944 result = matcher.replaceAll(literalReplacement); 945 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 946 failCount++; 947 948 // IAE should be thrown if backslash or '$' is the last character 949 // in replacement string 950 try { 951 "\uac00".replaceAll("\uac00", "$"); 952 failCount++; 953 } catch (IllegalArgumentException iie) { 954 } catch (Exception e) { 955 failCount++; 956 } 957 try { 958 "\uac00".replaceAll("\uac00", "\\"); 959 failCount++; 960 } catch (IllegalArgumentException iie) { 961 } catch (Exception e) { 962 failCount++; 963 } 964 report("Literal replacement"); 965 } 966 967 // This test is for 4757029 968 private static void regionTest() throws Exception { 969 Pattern pattern = Pattern.compile("abc"); 970 Matcher matcher = pattern.matcher("abcdefabc"); 971 972 matcher.region(0,9); 973 if (!matcher.find()) 974 failCount++; 975 if (!matcher.find()) 976 failCount++; 977 matcher.region(0,3); 978 if (!matcher.find()) 979 failCount++; 980 matcher.region(3,6); 981 if (matcher.find()) 982 failCount++; 983 matcher.region(0,2); 984 if (matcher.find()) 985 failCount++; 986 987 expectRegionFail(matcher, 1, -1); 988 expectRegionFail(matcher, -1, -1); 989 expectRegionFail(matcher, -1, 1); 990 expectRegionFail(matcher, 5, 3); 991 expectRegionFail(matcher, 5, 12); 992 expectRegionFail(matcher, 12, 12); 993 994 pattern = Pattern.compile("^abc$"); 995 matcher = pattern.matcher("zzzabczzz"); 996 matcher.region(0,9); 997 if (matcher.find()) 998 failCount++; 999 matcher.region(3,6); 1000 if (!matcher.find()) 1001 failCount++; 1002 matcher.region(3,6); 1003 matcher.useAnchoringBounds(false); 1004 if (matcher.find()) 1005 failCount++; 1006 1007 // Supplementary character test 1008 pattern = Pattern.compile(toSupplementaries("abc")); 1009 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1010 matcher.region(0,9*2); 1011 if (!matcher.find()) 1012 failCount++; 1013 if (!matcher.find()) 1014 failCount++; 1015 matcher.region(0,3*2); 1016 if (!matcher.find()) 1017 failCount++; 1018 matcher.region(1,3*2); 1019 if (matcher.find()) 1020 failCount++; 1021 matcher.region(3*2,6*2); 1022 if (matcher.find()) 1023 failCount++; 1024 matcher.region(0,2*2); 1025 if (matcher.find()) 1026 failCount++; 1027 matcher.region(0,2*2+1); 1028 if (matcher.find()) 1029 failCount++; 1030 1031 expectRegionFail(matcher, 1*2, -1); 1032 expectRegionFail(matcher, -1, -1); 1033 expectRegionFail(matcher, -1, 1*2); 1034 expectRegionFail(matcher, 5*2, 3*2); 1035 expectRegionFail(matcher, 5*2, 12*2); 1036 expectRegionFail(matcher, 12*2, 12*2); 1037 1038 pattern = Pattern.compile(toSupplementaries("^abc$")); 1039 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1040 matcher.region(0,9*2); 1041 if (matcher.find()) 1042 failCount++; 1043 matcher.region(3*2,6*2); 1044 if (!matcher.find()) 1045 failCount++; 1046 matcher.region(3*2+1,6*2); 1047 if (matcher.find()) 1048 failCount++; 1049 matcher.region(3*2,6*2-1); 1050 if (matcher.find()) 1051 failCount++; 1052 matcher.region(3*2,6*2); 1053 matcher.useAnchoringBounds(false); 1054 if (matcher.find()) 1055 failCount++; 1056 report("Regions"); 1057 } 1058 1059 private static void expectRegionFail(Matcher matcher, int index1, 1060 int index2) 1061 { 1062 try { 1063 matcher.region(index1, index2); 1064 failCount++; 1065 } catch (IndexOutOfBoundsException ioobe) { 1066 // Correct result 1067 } catch (IllegalStateException ise) { 1068 // Correct result 1069 } 1070 } 1071 1072 // This test is for 4803197 1073 private static void escapedSegmentTest() throws Exception { 1074 1075 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1076 check(pattern, "dir1\\dir2", true); 1077 1078 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1079 check(pattern, "dir1\\dir2\\", true); 1080 1081 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1082 check(pattern, "dir1\\dir2\\", true); 1083 1084 // Supplementary character test 1085 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1086 check(pattern, toSupplementaries("dir1\\dir2"), true); 1087 1088 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1089 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1090 1091 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1092 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1093 1094 report("Escaped segment"); 1095 } 1096 1097 // This test is for 4792284 1098 private static void nonCaptureRepetitionTest() throws Exception { 1099 String input = "abcdefgh;"; 1100 1101 String[] patterns = new String[] { 1102 "(?:\\w{4})+;", 1103 "(?:\\w{8})*;", 1104 "(?:\\w{2}){2,4};", 1105 "(?:\\w{4}){2,};", // only matches the 1106 ".*?(?:\\w{5})+;", // specified minimum 1107 ".*?(?:\\w{9})*;", // number of reps - OK 1108 "(?:\\w{4})+?;", // lazy repetition - OK 1109 "(?:\\w{4})++;", // possessive repetition - OK 1110 "(?:\\w{2,}?)+;", // non-deterministic - OK 1111 "(\\w{4})+;", // capturing group - OK 1112 }; 1113 1114 for (int i = 0; i < patterns.length; i++) { 1115 // Check find() 1116 check(patterns[i], 0, input, input, true); 1117 // Check matches() 1118 Pattern p = Pattern.compile(patterns[i]); 1119 Matcher m = p.matcher(input); 1120 1121 if (m.matches()) { 1122 if (!m.group(0).equals(input)) 1123 failCount++; 1124 } else { 1125 failCount++; 1126 } 1127 } 1128 1129 report("Non capturing repetition"); 1130 } 1131 1132 // This test is for 6358731 1133 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1134 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1135 Matcher matcher = pattern.matcher("abcd"); 1136 if (!matcher.matches() || 1137 matcher.group(1) != null || 1138 !matcher.group(2).equals("abcd")) { 1139 failCount++; 1140 } 1141 report("Not captured GroupCurly"); 1142 } 1143 1144 // This test is for 4706545 1145 private static void javaCharClassTest() throws Exception { 1146 for (int i=0; i<1000; i++) { 1147 char c = (char)generator.nextInt(); 1148 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1149 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1150 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1151 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1152 check("{javaDigit}", c, Character.isDigit(c)); 1153 check("{javaDefined}", c, Character.isDefined(c)); 1154 check("{javaLetter}", c, Character.isLetter(c)); 1155 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1156 check("{javaJavaIdentifierStart}", c, 1157 Character.isJavaIdentifierStart(c)); 1158 check("{javaJavaIdentifierPart}", c, 1159 Character.isJavaIdentifierPart(c)); 1160 check("{javaUnicodeIdentifierStart}", c, 1161 Character.isUnicodeIdentifierStart(c)); 1162 check("{javaUnicodeIdentifierPart}", c, 1163 Character.isUnicodeIdentifierPart(c)); 1164 check("{javaIdentifierIgnorable}", c, 1165 Character.isIdentifierIgnorable(c)); 1166 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1167 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1168 check("{javaISOControl}", c, Character.isISOControl(c)); 1169 check("{javaMirrored}", c, Character.isMirrored(c)); 1170 1171 } 1172 1173 // Supplementary character test 1174 for (int i=0; i<1000; i++) { 1175 int c = generator.nextInt(Character.MAX_CODE_POINT 1176 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1177 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1178 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1179 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1180 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1181 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1182 check("{javaDigit}", c, Character.isDigit(c)); 1183 check("{javaDefined}", c, Character.isDefined(c)); 1184 check("{javaLetter}", c, Character.isLetter(c)); 1185 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1186 check("{javaJavaIdentifierStart}", c, 1187 Character.isJavaIdentifierStart(c)); 1188 check("{javaJavaIdentifierPart}", c, 1189 Character.isJavaIdentifierPart(c)); 1190 check("{javaUnicodeIdentifierStart}", c, 1191 Character.isUnicodeIdentifierStart(c)); 1192 check("{javaUnicodeIdentifierPart}", c, 1193 Character.isUnicodeIdentifierPart(c)); 1194 check("{javaIdentifierIgnorable}", c, 1195 Character.isIdentifierIgnorable(c)); 1196 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1197 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1198 check("{javaISOControl}", c, Character.isISOControl(c)); 1199 check("{javaMirrored}", c, Character.isMirrored(c)); 1200 } 1201 1202 report("Java character classes"); 1203 } 1204 1205 // This test is for 4523620 1206 /* 1207 private static void numOccurrencesTest() throws Exception { 1208 Pattern pattern = Pattern.compile("aaa"); 1209 1210 if (pattern.numOccurrences("aaaaaa", false) != 2) 1211 failCount++; 1212 if (pattern.numOccurrences("aaaaaa", true) != 4) 1213 failCount++; 1214 1215 pattern = Pattern.compile("^"); 1216 if (pattern.numOccurrences("aaaaaa", false) != 1) 1217 failCount++; 1218 if (pattern.numOccurrences("aaaaaa", true) != 1) 1219 failCount++; 1220 1221 report("Number of Occurrences"); 1222 } 1223 */ 1224 1225 // This test is for 4776374 1226 private static void caretBetweenTerminatorsTest() throws Exception { 1227 int flags1 = Pattern.DOTALL; 1228 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1229 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1230 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1231 1232 check("^....", flags1, "test\ntest", "test", true); 1233 check(".....^", flags1, "test\ntest", "test", false); 1234 check(".....^", flags1, "test\n", "test", false); 1235 check("....^", flags1, "test\r\n", "test", false); 1236 1237 check("^....", flags2, "test\ntest", "test", true); 1238 check("....^", flags2, "test\ntest", "test", false); 1239 check(".....^", flags2, "test\n", "test", false); 1240 check("....^", flags2, "test\r\n", "test", false); 1241 1242 check("^....", flags3, "test\ntest", "test", true); 1243 check(".....^", flags3, "test\ntest", "test\n", true); 1244 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1245 check(".....^", flags3, "test\n", "test", false); 1246 check(".....^", flags3, "test\r\n", "test", false); 1247 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1248 1249 check("^....", flags4, "test\ntest", "test", true); 1250 check(".....^", flags3, "test\ntest", "test\n", true); 1251 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1252 check(".....^", flags4, "test\n", "test\n", false); 1253 check(".....^", flags4, "test\r\n", "test\r", false); 1254 1255 // Supplementary character test 1256 String t = toSupplementaries("test"); 1257 check("^....", flags1, t+"\n"+t, t, true); 1258 check(".....^", flags1, t+"\n"+t, t, false); 1259 check(".....^", flags1, t+"\n", t, false); 1260 check("....^", flags1, t+"\r\n", t, false); 1261 1262 check("^....", flags2, t+"\n"+t, t, true); 1263 check("....^", flags2, t+"\n"+t, t, false); 1264 check(".....^", flags2, t+"\n", t, false); 1265 check("....^", flags2, t+"\r\n", t, false); 1266 1267 check("^....", flags3, t+"\n"+t, t, true); 1268 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1269 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1270 check(".....^", flags3, t+"\n", t, false); 1271 check(".....^", flags3, t+"\r\n", t, false); 1272 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1273 1274 check("^....", flags4, t+"\n"+t, t, true); 1275 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1276 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1277 check(".....^", flags4, t+"\n", t+"\n", false); 1278 check(".....^", flags4, t+"\r\n", t+"\r", false); 1279 1280 report("Caret between terminators"); 1281 } 1282 1283 // This test is for 4727935 1284 private static void dollarAtEndTest() throws Exception { 1285 int flags1 = Pattern.DOTALL; 1286 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1287 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1288 1289 check("....$", flags1, "test\n", "test", true); 1290 check("....$", flags1, "test\r\n", "test", true); 1291 check(".....$", flags1, "test\n", "test\n", true); 1292 check(".....$", flags1, "test\u0085", "test\u0085", true); 1293 check("....$", flags1, "test\u0085", "test", true); 1294 1295 check("....$", flags2, "test\n", "test", true); 1296 check(".....$", flags2, "test\n", "test\n", true); 1297 check(".....$", flags2, "test\u0085", "test\u0085", true); 1298 check("....$", flags2, "test\u0085", "est\u0085", true); 1299 1300 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1301 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1302 check("....$blah", flags3, "test\nblah", "!!!!", false); 1303 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1304 1305 // Supplementary character test 1306 String t = toSupplementaries("test"); 1307 String b = toSupplementaries("blah"); 1308 check("....$", flags1, t+"\n", t, true); 1309 check("....$", flags1, t+"\r\n", t, true); 1310 check(".....$", flags1, t+"\n", t+"\n", true); 1311 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1312 check("....$", flags1, t+"\u0085", t, true); 1313 1314 check("....$", flags2, t+"\n", t, true); 1315 check(".....$", flags2, t+"\n", t+"\n", true); 1316 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1317 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1318 1319 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1320 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1321 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1322 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1323 1324 report("Dollar at End"); 1325 } 1326 1327 // This test is for 4711773 1328 private static void multilineDollarTest() throws Exception { 1329 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1330 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1331 matcher.find(); 1332 if (matcher.start(0) != 9) 1333 failCount++; 1334 matcher.find(); 1335 if (matcher.start(0) != 20) 1336 failCount++; 1337 1338 // Supplementary character test 1339 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1340 matcher.find(); 1341 if (matcher.start(0) != 9*2) 1342 failCount++; 1343 matcher.find(); 1344 if (matcher.start(0) != 20*2) 1345 failCount++; 1346 1347 report("Multiline Dollar"); 1348 } 1349 1350 private static void reluctantRepetitionTest() throws Exception { 1351 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1352 check(p, "1 word word word 2", true); 1353 check(p, "1 wor wo w 2", true); 1354 check(p, "1 word word 2", true); 1355 check(p, "1 word 2", true); 1356 check(p, "1 wo w w 2", true); 1357 check(p, "1 wo w 2", true); 1358 check(p, "1 wor w 2", true); 1359 1360 p = Pattern.compile("([a-z])+?c"); 1361 Matcher m = p.matcher("ababcdefdec"); 1362 check(m, "ababc"); 1363 1364 // Supplementary character test 1365 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1366 m = p.matcher(toSupplementaries("ababcdefdec")); 1367 check(m, toSupplementaries("ababc")); 1368 1369 report("Reluctant Repetition"); 1370 } 1371 1372 private static Pattern serializedPattern(Pattern p) throws Exception { 1373 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1374 ObjectOutputStream oos = new ObjectOutputStream(baos); 1375 oos.writeObject(p); 1376 oos.close(); 1377 try (ObjectInputStream ois = new ObjectInputStream( 1378 new ByteArrayInputStream(baos.toByteArray()))) { 1379 return (Pattern)ois.readObject(); 1380 } 1381 } 1382 1383 private static void serializeTest() throws Exception { 1384 String patternStr = "(b)"; 1385 String matchStr = "b"; 1386 Pattern pattern = Pattern.compile(patternStr); 1387 Pattern serializedPattern = serializedPattern(pattern); 1388 Matcher matcher = serializedPattern.matcher(matchStr); 1389 if (!matcher.matches()) 1390 failCount++; 1391 if (matcher.groupCount() != 1) 1392 failCount++; 1393 1394 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1395 serializedPattern = serializedPattern(pattern); 1396 if (!serializedPattern.matcher("Ab").matches()) 1397 failCount++; 1398 if (serializedPattern.matcher("AB").matches()) 1399 failCount++; 1400 1401 report("Serialization"); 1402 } 1403 1404 private static void gTest() { 1405 Pattern pattern = Pattern.compile("\\G\\w"); 1406 Matcher matcher = pattern.matcher("abc#x#x"); 1407 matcher.find(); 1408 matcher.find(); 1409 matcher.find(); 1410 if (matcher.find()) 1411 failCount++; 1412 1413 pattern = Pattern.compile("\\GA*"); 1414 matcher = pattern.matcher("1A2AA3"); 1415 matcher.find(); 1416 if (matcher.find()) 1417 failCount++; 1418 1419 pattern = Pattern.compile("\\GA*"); 1420 matcher = pattern.matcher("1A2AA3"); 1421 if (!matcher.find(1)) 1422 failCount++; 1423 matcher.find(); 1424 if (matcher.find()) 1425 failCount++; 1426 1427 report("\\G"); 1428 } 1429 1430 private static void zTest() { 1431 Pattern pattern = Pattern.compile("foo\\Z"); 1432 // Positives 1433 check(pattern, "foo\u0085", true); 1434 check(pattern, "foo\u2028", true); 1435 check(pattern, "foo\u2029", true); 1436 check(pattern, "foo\n", true); 1437 check(pattern, "foo\r", true); 1438 check(pattern, "foo\r\n", true); 1439 // Negatives 1440 check(pattern, "fooo", false); 1441 check(pattern, "foo\n\r", false); 1442 1443 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1444 // Positives 1445 check(pattern, "foo", true); 1446 check(pattern, "foo\n", true); 1447 // Negatives 1448 check(pattern, "foo\r", false); 1449 check(pattern, "foo\u0085", false); 1450 check(pattern, "foo\u2028", false); 1451 check(pattern, "foo\u2029", false); 1452 1453 report("\\Z"); 1454 } 1455 1456 private static void replaceFirstTest() { 1457 Pattern pattern = Pattern.compile("(ab)(c*)"); 1458 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1459 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1460 failCount++; 1461 1462 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1463 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1464 failCount++; 1465 1466 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1467 String result = matcher.replaceFirst("$1"); 1468 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1469 failCount++; 1470 1471 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1472 result = matcher.replaceFirst("$2"); 1473 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1474 failCount++; 1475 1476 pattern = Pattern.compile("a*"); 1477 matcher = pattern.matcher("aaaaaaaaaa"); 1478 if (!matcher.replaceFirst("test").equals("test")) 1479 failCount++; 1480 1481 pattern = Pattern.compile("a+"); 1482 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1483 if (!matcher.replaceFirst("test").equals("zzztest")) 1484 failCount++; 1485 1486 // Supplementary character test 1487 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1488 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1489 if (!matcher.replaceFirst(toSupplementaries("test")) 1490 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1491 failCount++; 1492 1493 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1494 if (!matcher.replaceFirst(toSupplementaries("test")). 1495 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1496 failCount++; 1497 1498 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1499 result = matcher.replaceFirst("$1"); 1500 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1501 failCount++; 1502 1503 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1504 result = matcher.replaceFirst("$2"); 1505 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1506 failCount++; 1507 1508 pattern = Pattern.compile(toSupplementaries("a*")); 1509 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1510 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1511 failCount++; 1512 1513 pattern = Pattern.compile(toSupplementaries("a+")); 1514 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1515 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1516 failCount++; 1517 1518 report("Replace First"); 1519 } 1520 1521 private static void unixLinesTest() { 1522 Pattern pattern = Pattern.compile(".*"); 1523 Matcher matcher = pattern.matcher("aa\u2028blah"); 1524 matcher.find(); 1525 if (!matcher.group(0).equals("aa")) 1526 failCount++; 1527 1528 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1529 matcher = pattern.matcher("aa\u2028blah"); 1530 matcher.find(); 1531 if (!matcher.group(0).equals("aa\u2028blah")) 1532 failCount++; 1533 1534 pattern = Pattern.compile("[az]$", 1535 Pattern.MULTILINE | Pattern.UNIX_LINES); 1536 matcher = pattern.matcher("aa\u2028zz"); 1537 check(matcher, "a\u2028", false); 1538 1539 // Supplementary character test 1540 pattern = Pattern.compile(".*"); 1541 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1542 matcher.find(); 1543 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1544 failCount++; 1545 1546 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1547 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1548 matcher.find(); 1549 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1550 failCount++; 1551 1552 pattern = Pattern.compile(toSupplementaries("[az]$"), 1553 Pattern.MULTILINE | Pattern.UNIX_LINES); 1554 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1555 check(matcher, toSupplementaries("a\u2028"), false); 1556 1557 report("Unix Lines"); 1558 } 1559 1560 private static void commentsTest() { 1561 int flags = Pattern.COMMENTS; 1562 1563 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1564 Matcher matcher = pattern.matcher("aa#aa"); 1565 if (!matcher.matches()) 1566 failCount++; 1567 1568 pattern = Pattern.compile("aa # blah", flags); 1569 matcher = pattern.matcher("aa"); 1570 if (!matcher.matches()) 1571 failCount++; 1572 1573 pattern = Pattern.compile("aa blah", flags); 1574 matcher = pattern.matcher("aablah"); 1575 if (!matcher.matches()) 1576 failCount++; 1577 1578 pattern = Pattern.compile("aa # blah blech ", flags); 1579 matcher = pattern.matcher("aa"); 1580 if (!matcher.matches()) 1581 failCount++; 1582 1583 pattern = Pattern.compile("aa # blah\n ", flags); 1584 matcher = pattern.matcher("aa"); 1585 if (!matcher.matches()) 1586 failCount++; 1587 1588 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1589 matcher = pattern.matcher("aabc"); 1590 if (!matcher.matches()) 1591 failCount++; 1592 1593 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1594 matcher = pattern.matcher("aabc"); 1595 if (!matcher.matches()) 1596 failCount++; 1597 1598 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1599 matcher = pattern.matcher("aabc#blech"); 1600 if (!matcher.matches()) 1601 failCount++; 1602 1603 // Supplementary character test 1604 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1605 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1606 if (!matcher.matches()) 1607 failCount++; 1608 1609 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1610 matcher = pattern.matcher(toSupplementaries("aa")); 1611 if (!matcher.matches()) 1612 failCount++; 1613 1614 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1615 matcher = pattern.matcher(toSupplementaries("aablah")); 1616 if (!matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1620 matcher = pattern.matcher(toSupplementaries("aa")); 1621 if (!matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1625 matcher = pattern.matcher(toSupplementaries("aa")); 1626 if (!matcher.matches()) 1627 failCount++; 1628 1629 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1630 matcher = pattern.matcher(toSupplementaries("aabc")); 1631 if (!matcher.matches()) 1632 failCount++; 1633 1634 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1635 matcher = pattern.matcher(toSupplementaries("aabc")); 1636 if (!matcher.matches()) 1637 failCount++; 1638 1639 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1640 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1641 if (!matcher.matches()) 1642 failCount++; 1643 1644 report("Comments"); 1645 } 1646 1647 private static void caseFoldingTest() { // bug 4504687 1648 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1649 Pattern pattern = Pattern.compile("aa", flags); 1650 Matcher matcher = pattern.matcher("ab"); 1651 if (matcher.matches()) 1652 failCount++; 1653 1654 pattern = Pattern.compile("aA", flags); 1655 matcher = pattern.matcher("ab"); 1656 if (matcher.matches()) 1657 failCount++; 1658 1659 pattern = Pattern.compile("aa", flags); 1660 matcher = pattern.matcher("aB"); 1661 if (matcher.matches()) 1662 failCount++; 1663 matcher = pattern.matcher("Ab"); 1664 if (matcher.matches()) 1665 failCount++; 1666 1667 // ASCII "a" 1668 // Latin-1 Supplement "a" + grave 1669 // Cyrillic "a" 1670 String[] patterns = new String[] { 1671 //single 1672 "a", "\u00e0", "\u0430", 1673 //slice 1674 "ab", "\u00e0\u00e1", "\u0430\u0431", 1675 //class single 1676 "[a]", "[\u00e0]", "[\u0430]", 1677 //class range 1678 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1679 //back reference 1680 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1681 }; 1682 1683 String[] texts = new String[] { 1684 "A", "\u00c0", "\u0410", 1685 "AB", "\u00c0\u00c1", "\u0410\u0411", 1686 "A", "\u00c0", "\u0410", 1687 "B", "\u00c2", "\u0411", 1688 "aA", "\u00e0\u00c0", "\u0430\u0410" 1689 }; 1690 1691 boolean[] expected = new boolean[] { 1692 true, false, false, 1693 true, false, false, 1694 true, false, false, 1695 true, false, false, 1696 true, false, false 1697 }; 1698 1699 flags = Pattern.CASE_INSENSITIVE; 1700 for (int i = 0; i < patterns.length; i++) { 1701 pattern = Pattern.compile(patterns[i], flags); 1702 matcher = pattern.matcher(texts[i]); 1703 if (matcher.matches() != expected[i]) { 1704 System.out.println("<1> Failed at " + i); 1705 failCount++; 1706 } 1707 } 1708 1709 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1710 for (int i = 0; i < patterns.length; i++) { 1711 pattern = Pattern.compile(patterns[i], flags); 1712 matcher = pattern.matcher(texts[i]); 1713 if (!matcher.matches()) { 1714 System.out.println("<2> Failed at " + i); 1715 failCount++; 1716 } 1717 } 1718 // flag unicode_case alone should do nothing 1719 flags = Pattern.UNICODE_CASE; 1720 for (int i = 0; i < patterns.length; i++) { 1721 pattern = Pattern.compile(patterns[i], flags); 1722 matcher = pattern.matcher(texts[i]); 1723 if (matcher.matches()) { 1724 System.out.println("<3> Failed at " + i); 1725 failCount++; 1726 } 1727 } 1728 1729 // Special cases: i, I, u+0131 and u+0130 1730 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1731 pattern = Pattern.compile("[h-j]+", flags); 1732 if (!pattern.matcher("\u0131\u0130").matches()) 1733 failCount++; 1734 report("Case Folding"); 1735 } 1736 1737 private static void appendTest() { 1738 Pattern pattern = Pattern.compile("(ab)(cd)"); 1739 Matcher matcher = pattern.matcher("abcd"); 1740 String result = matcher.replaceAll("$2$1"); 1741 if (!result.equals("cdab")) 1742 failCount++; 1743 1744 String s1 = "Swap all: first = 123, second = 456"; 1745 String s2 = "Swap one: first = 123, second = 456"; 1746 String r = "$3$2$1"; 1747 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1748 matcher = pattern.matcher(s1); 1749 1750 result = matcher.replaceAll(r); 1751 if (!result.equals("Swap all: 123 = first, 456 = second")) 1752 failCount++; 1753 1754 matcher = pattern.matcher(s2); 1755 1756 if (matcher.find()) { 1757 StringBuffer sb = new StringBuffer(); 1758 matcher.appendReplacement(sb, r); 1759 matcher.appendTail(sb); 1760 result = sb.toString(); 1761 if (!result.equals("Swap one: 123 = first, second = 456")) 1762 failCount++; 1763 } 1764 1765 // Supplementary character test 1766 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1767 matcher = pattern.matcher(toSupplementaries("abcd")); 1768 result = matcher.replaceAll("$2$1"); 1769 if (!result.equals(toSupplementaries("cdab"))) 1770 failCount++; 1771 1772 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1773 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1774 r = toSupplementaries("$3$2$1"); 1775 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1776 matcher = pattern.matcher(s1); 1777 1778 result = matcher.replaceAll(r); 1779 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1780 failCount++; 1781 1782 matcher = pattern.matcher(s2); 1783 1784 if (matcher.find()) { 1785 StringBuffer sb = new StringBuffer(); 1786 matcher.appendReplacement(sb, r); 1787 matcher.appendTail(sb); 1788 result = sb.toString(); 1789 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1790 failCount++; 1791 } 1792 report("Append"); 1793 } 1794 1795 private static void splitTest() { 1796 Pattern pattern = Pattern.compile(":"); 1797 String[] result = pattern.split("foo:and:boo", 2); 1798 if (!result[0].equals("foo")) 1799 failCount++; 1800 if (!result[1].equals("and:boo")) 1801 failCount++; 1802 // Supplementary character test 1803 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1804 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1805 if (!result[0].equals(toSupplementaries("foo"))) 1806 failCount++; 1807 if (!result[1].equals(toSupplementaries("andXboo"))) 1808 failCount++; 1809 1810 CharBuffer cb = CharBuffer.allocate(100); 1811 cb.put("foo:and:boo"); 1812 cb.flip(); 1813 result = pattern.split(cb); 1814 if (!result[0].equals("foo")) 1815 failCount++; 1816 if (!result[1].equals("and")) 1817 failCount++; 1818 if (!result[2].equals("boo")) 1819 failCount++; 1820 1821 // Supplementary character test 1822 CharBuffer cbs = CharBuffer.allocate(100); 1823 cbs.put(toSupplementaries("fooXandXboo")); 1824 cbs.flip(); 1825 result = patternX.split(cbs); 1826 if (!result[0].equals(toSupplementaries("foo"))) 1827 failCount++; 1828 if (!result[1].equals(toSupplementaries("and"))) 1829 failCount++; 1830 if (!result[2].equals(toSupplementaries("boo"))) 1831 failCount++; 1832 1833 String source = "0123456789"; 1834 for (int limit=-2; limit<3; limit++) { 1835 for (int x=0; x<10; x++) { 1836 result = source.split(Integer.toString(x), limit); 1837 int expectedLength = limit < 1 ? 2 : limit; 1838 1839 if ((limit == 0) && (x == 9)) { 1840 // expected dropping of "" 1841 if (result.length != 1) 1842 failCount++; 1843 if (!result[0].equals("012345678")) { 1844 failCount++; 1845 } 1846 } else { 1847 if (result.length != expectedLength) { 1848 failCount++; 1849 } 1850 if (!result[0].equals(source.substring(0,x))) { 1851 if (limit != 1) { 1852 failCount++; 1853 } else { 1854 if (!result[0].equals(source.substring(0,10))) { 1855 failCount++; 1856 } 1857 } 1858 } 1859 if (expectedLength > 1) { // Check segment 2 1860 if (!result[1].equals(source.substring(x+1,10))) 1861 failCount++; 1862 } 1863 } 1864 } 1865 } 1866 // Check the case for no match found 1867 for (int limit=-2; limit<3; limit++) { 1868 result = source.split("e", limit); 1869 if (result.length != 1) 1870 failCount++; 1871 if (!result[0].equals(source)) 1872 failCount++; 1873 } 1874 // Check the case for limit == 0, source = ""; 1875 // split() now returns 0-length for empty source "" see #6559590 1876 source = ""; 1877 result = source.split("e", 0); 1878 if (result.length != 1) 1879 failCount++; 1880 if (!result[0].equals(source)) 1881 failCount++; 1882 1883 // Check both split() and splitAsStraem(), especially for zero-lenth 1884 // input and zero-lenth match cases 1885 String[][] input = new String[][] { 1886 { " ", "Abc Efg Hij" }, // normal non-zero-match 1887 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1888 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1889 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1890 { "(?=\\p{Lu})", "AbcEfg" }, 1891 { "(?=\\p{Lu})", "Abc" }, 1892 { " ", "" }, // zero-length input 1893 { ".*", "" }, 1894 1895 // some tests from PatternStreamTest.java 1896 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1897 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1898 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1899 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1900 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1901 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1902 { "\u56da", "" }, 1903 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1904 { "o", "boo:and:foo" }, 1905 { "o", "booooo:and:fooooo" }, 1906 { "o", "fooooo:" }, 1907 }; 1908 1909 String[][] expected = new String[][] { 1910 { "Abc", "Efg", "Hij" }, 1911 { "", "Abc", "Efg", "Hij" }, 1912 { "Abc", "", "Efg", "Hij" }, 1913 { "Abc", "Efg", "Hij" }, 1914 { "Abc", "Efg" }, 1915 { "Abc" }, 1916 { "" }, 1917 { "" }, 1918 1919 { "awgqwefg1fefw", "vssv1vvv1" }, 1920 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1921 { "awgqwefg", "fefw4vssv", "vvv" }, 1922 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1923 { "1", "23", "456", "7890" }, 1924 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1925 { "" }, 1926 { "This", "is", "testing", "", "with", "different", "separators" }, 1927 { "b", "", ":and:f" }, 1928 { "b", "", "", "", "", ":and:f" }, 1929 { "f", "", "", "", "", ":" }, 1930 }; 1931 for (int i = 0; i < input.length; i++) { 1932 pattern = Pattern.compile(input[i][0]); 1933 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1934 failCount++; 1935 } 1936 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1937 // array for zero-length input for now 1938 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1939 expected[i])) { 1940 failCount++; 1941 } 1942 } 1943 report("Split"); 1944 } 1945 1946 private static void negationTest() { 1947 Pattern pattern = Pattern.compile("[\\[@^]+"); 1948 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1949 if (!matcher.find()) 1950 failCount++; 1951 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1952 failCount++; 1953 pattern = Pattern.compile("[@\\[^]+"); 1954 matcher = pattern.matcher("@@@@[[[[^^^^"); 1955 if (!matcher.find()) 1956 failCount++; 1957 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1958 failCount++; 1959 pattern = Pattern.compile("[@\\[^@]+"); 1960 matcher = pattern.matcher("@@@@[[[[^^^^"); 1961 if (!matcher.find()) 1962 failCount++; 1963 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1964 failCount++; 1965 1966 pattern = Pattern.compile("\\)"); 1967 matcher = pattern.matcher("xxx)xxx"); 1968 if (!matcher.find()) 1969 failCount++; 1970 1971 report("Negation"); 1972 } 1973 1974 private static void ampersandTest() { 1975 Pattern pattern = Pattern.compile("[&@]+"); 1976 check(pattern, "@@@@&&&&", true); 1977 1978 pattern = Pattern.compile("[@&]+"); 1979 check(pattern, "@@@@&&&&", true); 1980 1981 pattern = Pattern.compile("[@\\&]+"); 1982 check(pattern, "@@@@&&&&", true); 1983 1984 report("Ampersand"); 1985 } 1986 1987 private static void octalTest() throws Exception { 1988 Pattern pattern = Pattern.compile("\\u0007"); 1989 Matcher matcher = pattern.matcher("\u0007"); 1990 if (!matcher.matches()) 1991 failCount++; 1992 pattern = Pattern.compile("\\07"); 1993 matcher = pattern.matcher("\u0007"); 1994 if (!matcher.matches()) 1995 failCount++; 1996 pattern = Pattern.compile("\\007"); 1997 matcher = pattern.matcher("\u0007"); 1998 if (!matcher.matches()) 1999 failCount++; 2000 pattern = Pattern.compile("\\0007"); 2001 matcher = pattern.matcher("\u0007"); 2002 if (!matcher.matches()) 2003 failCount++; 2004 pattern = Pattern.compile("\\040"); 2005 matcher = pattern.matcher("\u0020"); 2006 if (!matcher.matches()) 2007 failCount++; 2008 pattern = Pattern.compile("\\0403"); 2009 matcher = pattern.matcher("\u00203"); 2010 if (!matcher.matches()) 2011 failCount++; 2012 pattern = Pattern.compile("\\0103"); 2013 matcher = pattern.matcher("\u0043"); 2014 if (!matcher.matches()) 2015 failCount++; 2016 2017 report("Octal"); 2018 } 2019 2020 private static void longPatternTest() throws Exception { 2021 try { 2022 Pattern pattern = Pattern.compile( 2023 "a 32-character-long pattern xxxx"); 2024 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2025 pattern = Pattern.compile("a thirty four character long regex"); 2026 StringBuffer patternToBe = new StringBuffer(101); 2027 for (int i=0; i<100; i++) 2028 patternToBe.append((char)(97 + i%26)); 2029 pattern = Pattern.compile(patternToBe.toString()); 2030 } catch (PatternSyntaxException e) { 2031 failCount++; 2032 } 2033 2034 // Supplementary character test 2035 try { 2036 Pattern pattern = Pattern.compile( 2037 toSupplementaries("a 32-character-long pattern xxxx")); 2038 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2039 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2040 StringBuffer patternToBe = new StringBuffer(101*2); 2041 for (int i=0; i<100; i++) 2042 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2043 + 97 + i%26)); 2044 pattern = Pattern.compile(patternToBe.toString()); 2045 } catch (PatternSyntaxException e) { 2046 failCount++; 2047 } 2048 report("LongPattern"); 2049 } 2050 2051 private static void group0Test() throws Exception { 2052 Pattern pattern = Pattern.compile("(tes)ting"); 2053 Matcher matcher = pattern.matcher("testing"); 2054 check(matcher, "testing"); 2055 2056 matcher.reset("testing"); 2057 if (matcher.lookingAt()) { 2058 if (!matcher.group(0).equals("testing")) 2059 failCount++; 2060 } else { 2061 failCount++; 2062 } 2063 2064 matcher.reset("testing"); 2065 if (matcher.matches()) { 2066 if (!matcher.group(0).equals("testing")) 2067 failCount++; 2068 } else { 2069 failCount++; 2070 } 2071 2072 pattern = Pattern.compile("(tes)ting"); 2073 matcher = pattern.matcher("testing"); 2074 if (matcher.lookingAt()) { 2075 if (!matcher.group(0).equals("testing")) 2076 failCount++; 2077 } else { 2078 failCount++; 2079 } 2080 2081 pattern = Pattern.compile("^(tes)ting"); 2082 matcher = pattern.matcher("testing"); 2083 if (matcher.matches()) { 2084 if (!matcher.group(0).equals("testing")) 2085 failCount++; 2086 } else { 2087 failCount++; 2088 } 2089 2090 // Supplementary character test 2091 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2092 matcher = pattern.matcher(toSupplementaries("testing")); 2093 check(matcher, toSupplementaries("testing")); 2094 2095 matcher.reset(toSupplementaries("testing")); 2096 if (matcher.lookingAt()) { 2097 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2098 failCount++; 2099 } else { 2100 failCount++; 2101 } 2102 2103 matcher.reset(toSupplementaries("testing")); 2104 if (matcher.matches()) { 2105 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2106 failCount++; 2107 } else { 2108 failCount++; 2109 } 2110 2111 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2112 matcher = pattern.matcher(toSupplementaries("testing")); 2113 if (matcher.lookingAt()) { 2114 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2115 failCount++; 2116 } else { 2117 failCount++; 2118 } 2119 2120 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2121 matcher = pattern.matcher(toSupplementaries("testing")); 2122 if (matcher.matches()) { 2123 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2124 failCount++; 2125 } else { 2126 failCount++; 2127 } 2128 2129 report("Group0"); 2130 } 2131 2132 private static void findIntTest() throws Exception { 2133 Pattern p = Pattern.compile("blah"); 2134 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2135 boolean result = m.find(2); 2136 if (!result) 2137 failCount++; 2138 2139 p = Pattern.compile("$"); 2140 m = p.matcher("1234567890"); 2141 result = m.find(10); 2142 if (!result) 2143 failCount++; 2144 try { 2145 result = m.find(11); 2146 failCount++; 2147 } catch (IndexOutOfBoundsException e) { 2148 // correct result 2149 } 2150 2151 // Supplementary character test 2152 p = Pattern.compile(toSupplementaries("blah")); 2153 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2154 result = m.find(2); 2155 if (!result) 2156 failCount++; 2157 2158 report("FindInt"); 2159 } 2160 2161 private static void emptyPatternTest() throws Exception { 2162 Pattern p = Pattern.compile(""); 2163 Matcher m = p.matcher("foo"); 2164 2165 // Should find empty pattern at beginning of input 2166 boolean result = m.find(); 2167 if (result != true) 2168 failCount++; 2169 if (m.start() != 0) 2170 failCount++; 2171 2172 // Should not match entire input if input is not empty 2173 m.reset(); 2174 result = m.matches(); 2175 if (result == true) 2176 failCount++; 2177 2178 try { 2179 m.start(0); 2180 failCount++; 2181 } catch (IllegalStateException e) { 2182 // Correct result 2183 } 2184 2185 // Should match entire input if input is empty 2186 m.reset(""); 2187 result = m.matches(); 2188 if (result != true) 2189 failCount++; 2190 2191 result = Pattern.matches("", ""); 2192 if (result != true) 2193 failCount++; 2194 2195 result = Pattern.matches("", "foo"); 2196 if (result == true) 2197 failCount++; 2198 report("EmptyPattern"); 2199 } 2200 2201 private static void charClassTest() throws Exception { 2202 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2203 check(pattern, "blahb]blech", true); 2204 2205 pattern = Pattern.compile("[abc[def]]"); 2206 check(pattern, "b", true); 2207 2208 // Supplementary character tests 2209 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2210 check(pattern, toSupplementaries("blahb]blech"), true); 2211 2212 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2213 check(pattern, toSupplementaries("b"), true); 2214 2215 try { 2216 // u00ff when UNICODE_CASE 2217 pattern = Pattern.compile("[ab\u00ffcd]", 2218 Pattern.CASE_INSENSITIVE| 2219 Pattern.UNICODE_CASE); 2220 check(pattern, "ab\u00ffcd", true); 2221 check(pattern, "Ab\u0178Cd", true); 2222 2223 // u00b5 when UNICODE_CASE 2224 pattern = Pattern.compile("[ab\u00b5cd]", 2225 Pattern.CASE_INSENSITIVE| 2226 Pattern.UNICODE_CASE); 2227 check(pattern, "ab\u00b5cd", true); 2228 check(pattern, "Ab\u039cCd", true); 2229 } catch (Exception e) { failCount++; } 2230 2231 /* Special cases 2232 (1)LatinSmallLetterLongS u+017f 2233 (2)LatinSmallLetterDotlessI u+0131 2234 (3)LatineCapitalLetterIWithDotAbove u+0130 2235 (4)KelvinSign u+212a 2236 (5)AngstromSign u+212b 2237 */ 2238 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2239 pattern = Pattern.compile("[sik\u00c5]+", flags); 2240 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2241 failCount++; 2242 2243 report("CharClass"); 2244 } 2245 2246 private static void caretTest() throws Exception { 2247 Pattern pattern = Pattern.compile("\\w*"); 2248 Matcher matcher = pattern.matcher("a#bc#def##g"); 2249 check(matcher, "a"); 2250 check(matcher, ""); 2251 check(matcher, "bc"); 2252 check(matcher, ""); 2253 check(matcher, "def"); 2254 check(matcher, ""); 2255 check(matcher, ""); 2256 check(matcher, "g"); 2257 check(matcher, ""); 2258 if (matcher.find()) 2259 failCount++; 2260 2261 pattern = Pattern.compile("^\\w*"); 2262 matcher = pattern.matcher("a#bc#def##g"); 2263 check(matcher, "a"); 2264 if (matcher.find()) 2265 failCount++; 2266 2267 pattern = Pattern.compile("\\w"); 2268 matcher = pattern.matcher("abc##x"); 2269 check(matcher, "a"); 2270 check(matcher, "b"); 2271 check(matcher, "c"); 2272 check(matcher, "x"); 2273 if (matcher.find()) 2274 failCount++; 2275 2276 pattern = Pattern.compile("^\\w"); 2277 matcher = pattern.matcher("abc##x"); 2278 check(matcher, "a"); 2279 if (matcher.find()) 2280 failCount++; 2281 2282 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2283 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2284 check(matcher, "abc"); 2285 if (matcher.find()) 2286 failCount++; 2287 2288 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2289 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2290 check(matcher, "abc"); 2291 check(matcher, "jkl"); 2292 if (matcher.find()) 2293 failCount++; 2294 2295 pattern = Pattern.compile("^", Pattern.MULTILINE); 2296 matcher = pattern.matcher("this is some text"); 2297 String result = matcher.replaceAll("X"); 2298 if (!result.equals("Xthis is some text")) 2299 failCount++; 2300 2301 pattern = Pattern.compile("^"); 2302 matcher = pattern.matcher("this is some text"); 2303 result = matcher.replaceAll("X"); 2304 if (!result.equals("Xthis is some text")) 2305 failCount++; 2306 2307 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2308 matcher = pattern.matcher("this is some text\n"); 2309 result = matcher.replaceAll("X"); 2310 if (!result.equals("Xthis is some text\n")) 2311 failCount++; 2312 2313 report("Caret"); 2314 } 2315 2316 private static void groupCaptureTest() throws Exception { 2317 // Independent group 2318 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2319 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2320 matcher.find(); 2321 try { 2322 String blah = matcher.group(1); 2323 failCount++; 2324 } catch (IndexOutOfBoundsException ioobe) { 2325 // Good result 2326 } 2327 // Pure group 2328 pattern = Pattern.compile("x+(?:y+)z+"); 2329 matcher = pattern.matcher("xxxyyyzzz"); 2330 matcher.find(); 2331 try { 2332 String blah = matcher.group(1); 2333 failCount++; 2334 } catch (IndexOutOfBoundsException ioobe) { 2335 // Good result 2336 } 2337 2338 // Supplementary character tests 2339 // Independent group 2340 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2341 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2342 matcher.find(); 2343 try { 2344 String blah = matcher.group(1); 2345 failCount++; 2346 } catch (IndexOutOfBoundsException ioobe) { 2347 // Good result 2348 } 2349 // Pure group 2350 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2351 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2352 matcher.find(); 2353 try { 2354 String blah = matcher.group(1); 2355 failCount++; 2356 } catch (IndexOutOfBoundsException ioobe) { 2357 // Good result 2358 } 2359 2360 report("GroupCapture"); 2361 } 2362 2363 private static void backRefTest() throws Exception { 2364 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2365 check(pattern, "zzzaabcazzz", true); 2366 2367 pattern = Pattern.compile("(a*)bc\\1"); 2368 check(pattern, "zzzaabcaazzz", true); 2369 2370 pattern = Pattern.compile("(abc)(def)\\1"); 2371 check(pattern, "abcdefabc", true); 2372 2373 pattern = Pattern.compile("(abc)(def)\\3"); 2374 check(pattern, "abcdefabc", false); 2375 2376 try { 2377 for (int i = 1; i < 10; i++) { 2378 // Make sure backref 1-9 are always accepted 2379 pattern = Pattern.compile("abcdef\\" + i); 2380 // and fail to match if the target group does not exit 2381 check(pattern, "abcdef", false); 2382 } 2383 } catch(PatternSyntaxException e) { 2384 failCount++; 2385 } 2386 2387 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2388 check(pattern, "abcdefghija", false); 2389 check(pattern, "abcdefghija1", true); 2390 2391 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2392 check(pattern, "abcdefghijkk", true); 2393 2394 pattern = Pattern.compile("(a)bcdefghij\\11"); 2395 check(pattern, "abcdefghija1", true); 2396 2397 // Supplementary character tests 2398 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2399 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2400 2401 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2402 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2403 2404 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2405 check(pattern, toSupplementaries("abcdefabc"), true); 2406 2407 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2408 check(pattern, toSupplementaries("abcdefabc"), false); 2409 2410 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2411 check(pattern, toSupplementaries("abcdefghija"), false); 2412 check(pattern, toSupplementaries("abcdefghija1"), true); 2413 2414 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2415 check(pattern, toSupplementaries("abcdefghijkk"), true); 2416 2417 report("BackRef"); 2418 } 2419 2420 /** 2421 * Unicode Technical Report #18, section 2.6 End of Line 2422 * There is no empty line to be matched in the sequence \u000D\u000A 2423 * but there is an empty line in the sequence \u000A\u000D. 2424 */ 2425 private static void anchorTest() throws Exception { 2426 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2427 Matcher m = p.matcher("blah1\r\nblah2"); 2428 m.find(); 2429 m.find(); 2430 if (!m.group().equals("blah2")) 2431 failCount++; 2432 2433 m.reset("blah1\n\rblah2"); 2434 m.find(); 2435 m.find(); 2436 m.find(); 2437 if (!m.group().equals("blah2")) 2438 failCount++; 2439 2440 // Test behavior of $ with \r\n at end of input 2441 p = Pattern.compile(".+$"); 2442 m = p.matcher("blah1\r\n"); 2443 if (!m.find()) 2444 failCount++; 2445 if (!m.group().equals("blah1")) 2446 failCount++; 2447 if (m.find()) 2448 failCount++; 2449 2450 // Test behavior of $ with \r\n at end of input in multiline 2451 p = Pattern.compile(".+$", Pattern.MULTILINE); 2452 m = p.matcher("blah1\r\n"); 2453 if (!m.find()) 2454 failCount++; 2455 if (m.find()) 2456 failCount++; 2457 2458 // Test for $ recognition of \u0085 for bug 4527731 2459 p = Pattern.compile(".+$", Pattern.MULTILINE); 2460 m = p.matcher("blah1\u0085"); 2461 if (!m.find()) 2462 failCount++; 2463 2464 // Supplementary character test 2465 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2466 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2467 m.find(); 2468 m.find(); 2469 if (!m.group().equals(toSupplementaries("blah2"))) 2470 failCount++; 2471 2472 m.reset(toSupplementaries("blah1\n\rblah2")); 2473 m.find(); 2474 m.find(); 2475 m.find(); 2476 if (!m.group().equals(toSupplementaries("blah2"))) 2477 failCount++; 2478 2479 // Test behavior of $ with \r\n at end of input 2480 p = Pattern.compile(".+$"); 2481 m = p.matcher(toSupplementaries("blah1\r\n")); 2482 if (!m.find()) 2483 failCount++; 2484 if (!m.group().equals(toSupplementaries("blah1"))) 2485 failCount++; 2486 if (m.find()) 2487 failCount++; 2488 2489 // Test behavior of $ with \r\n at end of input in multiline 2490 p = Pattern.compile(".+$", Pattern.MULTILINE); 2491 m = p.matcher(toSupplementaries("blah1\r\n")); 2492 if (!m.find()) 2493 failCount++; 2494 if (m.find()) 2495 failCount++; 2496 2497 // Test for $ recognition of \u0085 for bug 4527731 2498 p = Pattern.compile(".+$", Pattern.MULTILINE); 2499 m = p.matcher(toSupplementaries("blah1\u0085")); 2500 if (!m.find()) 2501 failCount++; 2502 2503 report("Anchors"); 2504 } 2505 2506 /** 2507 * A basic sanity test of Matcher.lookingAt(). 2508 */ 2509 private static void lookingAtTest() throws Exception { 2510 Pattern p = Pattern.compile("(ab)(c*)"); 2511 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2512 2513 if (!m.lookingAt()) 2514 failCount++; 2515 2516 if (!m.group().equals(m.group(0))) 2517 failCount++; 2518 2519 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2520 if (m.lookingAt()) 2521 failCount++; 2522 2523 // Supplementary character test 2524 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2525 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2526 2527 if (!m.lookingAt()) 2528 failCount++; 2529 2530 if (!m.group().equals(m.group(0))) 2531 failCount++; 2532 2533 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2534 if (m.lookingAt()) 2535 failCount++; 2536 2537 report("Looking At"); 2538 } 2539 2540 /** 2541 * A basic sanity test of Matcher.matches(). 2542 */ 2543 private static void matchesTest() throws Exception { 2544 // matches() 2545 Pattern p = Pattern.compile("ulb(c*)"); 2546 Matcher m = p.matcher("ulbcccccc"); 2547 if (!m.matches()) 2548 failCount++; 2549 2550 // find() but not matches() 2551 m.reset("zzzulbcccccc"); 2552 if (m.matches()) 2553 failCount++; 2554 2555 // lookingAt() but not matches() 2556 m.reset("ulbccccccdef"); 2557 if (m.matches()) 2558 failCount++; 2559 2560 // matches() 2561 p = Pattern.compile("a|ad"); 2562 m = p.matcher("ad"); 2563 if (!m.matches()) 2564 failCount++; 2565 2566 // Supplementary character test 2567 // matches() 2568 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2569 m = p.matcher(toSupplementaries("ulbcccccc")); 2570 if (!m.matches()) 2571 failCount++; 2572 2573 // find() but not matches() 2574 m.reset(toSupplementaries("zzzulbcccccc")); 2575 if (m.matches()) 2576 failCount++; 2577 2578 // lookingAt() but not matches() 2579 m.reset(toSupplementaries("ulbccccccdef")); 2580 if (m.matches()) 2581 failCount++; 2582 2583 // matches() 2584 p = Pattern.compile(toSupplementaries("a|ad")); 2585 m = p.matcher(toSupplementaries("ad")); 2586 if (!m.matches()) 2587 failCount++; 2588 2589 report("Matches"); 2590 } 2591 2592 /** 2593 * A basic sanity test of Pattern.matches(). 2594 */ 2595 private static void patternMatchesTest() throws Exception { 2596 // matches() 2597 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2598 toSupplementaries("ulbcccccc"))) 2599 failCount++; 2600 2601 // find() but not matches() 2602 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2603 toSupplementaries("zzzulbcccccc"))) 2604 failCount++; 2605 2606 // lookingAt() but not matches() 2607 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2608 toSupplementaries("ulbccccccdef"))) 2609 failCount++; 2610 2611 // Supplementary character test 2612 // matches() 2613 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2614 toSupplementaries("ulbcccccc"))) 2615 failCount++; 2616 2617 // find() but not matches() 2618 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2619 toSupplementaries("zzzulbcccccc"))) 2620 failCount++; 2621 2622 // lookingAt() but not matches() 2623 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2624 toSupplementaries("ulbccccccdef"))) 2625 failCount++; 2626 2627 report("Pattern Matches"); 2628 } 2629 2630 /** 2631 * Canonical equivalence testing. Tests the ability of the engine 2632 * to match sequences that are not explicitly specified in the 2633 * pattern when they are considered equivalent by the Unicode Standard. 2634 */ 2635 private static void ceTest() throws Exception { 2636 // Decomposed char outside char classes 2637 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2638 Matcher m = p.matcher("test\u00e5"); 2639 if (!m.matches()) 2640 failCount++; 2641 2642 m.reset("testa\u030a"); 2643 if (!m.matches()) 2644 failCount++; 2645 2646 // Composed char outside char classes 2647 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2648 m = p.matcher("test\u00e5"); 2649 if (!m.matches()) 2650 failCount++; 2651 2652 m.reset("testa\u030a"); 2653 if (!m.find()) 2654 failCount++; 2655 2656 // Decomposed char inside a char class 2657 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2658 m = p.matcher("test\u00e5"); 2659 if (!m.find()) 2660 failCount++; 2661 2662 m.reset("testa\u030a"); 2663 if (!m.find()) 2664 failCount++; 2665 2666 // Composed char inside a char class 2667 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2668 m = p.matcher("test\u00e5"); 2669 if (!m.find()) 2670 failCount++; 2671 2672 m.reset("testa\u0300"); 2673 if (!m.find()) 2674 failCount++; 2675 2676 m.reset("testa\u030a"); 2677 if (!m.find()) 2678 failCount++; 2679 2680 // Marks that cannot legally change order and be equivalent 2681 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2682 check(p, "testa\u0308\u0300", true); 2683 check(p, "testa\u0300\u0308", false); 2684 2685 // Marks that can legally change order and be equivalent 2686 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2687 check(p, "testa\u0308\u0323", true); 2688 check(p, "testa\u0323\u0308", true); 2689 2690 // Test all equivalences of the sequence a\u0308\u0323\u0300 2691 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2692 check(p, "testa\u0308\u0323\u0300", true); 2693 check(p, "testa\u0323\u0308\u0300", true); 2694 check(p, "testa\u0308\u0300\u0323", true); 2695 check(p, "test\u00e4\u0323\u0300", true); 2696 check(p, "test\u00e4\u0300\u0323", true); 2697 2698 Object[][] data = new Object[][] { 2699 2700 // JDK-4867170 2701 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2702 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2703 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2704 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2705 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2706 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2707 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2708 2709 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2710 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2711 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2712 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2713 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2714 2715 // backtracking, force to match "\u1f80", instead of \u1f82" 2716 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2717 2718 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2719 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2720 2721 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2722 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2723 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2724 2725 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2726 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2727 { "\u1f80", "ab\u1f80cd", "f", true }, 2728 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2729 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2730 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2731 { "\u1f82", "\u1f80\u0300", "m", true }, 2732 2733 // JDK-7080302 # compile failed 2734 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2735 2736 // JDK-6728861, same cause as above one 2737 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2738 2739 // JDK-6995635 2740 { "(\u00e9)", "e\u0301", "m", true }, 2741 2742 // JDK-6736245 2743 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2744 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2745 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2746 2747 // 4916384. 2748 // Decomposed hangul (jamos) works inside clazz 2749 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2750 { "[\u1100\u1161]", "\uac00", "m", true}, 2751 2752 { "[\uac00]", "\u1100\u1161", "m", true}, 2753 { "[\uac00]", "\uac00", "m", true}, 2754 2755 // Decomposed hangul (jamos) 2756 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2757 { "\u1100\u1161", "\uac00", "m", true}, 2758 2759 // Composed hangul 2760 { "\uac00", "\u1100\u1161", "m", true }, 2761 { "\uac00", "\uac00", "m", true }, 2762 2763 /* Need a NFDSlice to nfd the source to solve this issue 2764 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2765 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2766 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2767 2768 // Decomposed supplementary outside char classes 2769 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2770 // Composed supplementary outside char classes 2771 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2772 */ 2773 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2774 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2775 2776 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2777 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2778 }; 2779 2780 int failCount = 0; 2781 for (Object[] d : data) { 2782 String pn = (String)d[0]; 2783 String tt = (String)d[1]; 2784 boolean isFind = "f".equals(((String)d[2])); 2785 boolean expected = (boolean)d[3]; 2786 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2787 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2788 if (ret != expected) { 2789 failCount++; 2790 continue; 2791 } 2792 } 2793 report("Canonical Equivalence"); 2794 } 2795 2796 /** 2797 * A basic sanity test of Matcher.replaceAll(). 2798 */ 2799 private static void globalSubstitute() throws Exception { 2800 // Global substitution with a literal 2801 Pattern p = Pattern.compile("(ab)(c*)"); 2802 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2803 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2804 failCount++; 2805 2806 m.reset("zzzabccczzzabcczzzabccczzz"); 2807 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2808 failCount++; 2809 2810 // Global substitution with groups 2811 m.reset("zzzabccczzzabcczzzabccczzz"); 2812 String result = m.replaceAll("$1"); 2813 if (!result.equals("zzzabzzzabzzzabzzz")) 2814 failCount++; 2815 2816 // Supplementary character test 2817 // Global substitution with a literal 2818 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2819 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2820 if (!m.replaceAll(toSupplementaries("test")). 2821 equals(toSupplementaries("testzzztestzzztest"))) 2822 failCount++; 2823 2824 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2825 if (!m.replaceAll(toSupplementaries("test")). 2826 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2827 failCount++; 2828 2829 // Global substitution with groups 2830 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2831 result = m.replaceAll("$1"); 2832 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2833 failCount++; 2834 2835 report("Global Substitution"); 2836 } 2837 2838 /** 2839 * Tests the usage of Matcher.appendReplacement() with literal 2840 * and group substitutions. 2841 */ 2842 private static void stringbufferSubstitute() throws Exception { 2843 // SB substitution with literal 2844 String blah = "zzzblahzzz"; 2845 Pattern p = Pattern.compile("blah"); 2846 Matcher m = p.matcher(blah); 2847 StringBuffer result = new StringBuffer(); 2848 try { 2849 m.appendReplacement(result, "blech"); 2850 failCount++; 2851 } catch (IllegalStateException e) { 2852 } 2853 m.find(); 2854 m.appendReplacement(result, "blech"); 2855 if (!result.toString().equals("zzzblech")) 2856 failCount++; 2857 2858 m.appendTail(result); 2859 if (!result.toString().equals("zzzblechzzz")) 2860 failCount++; 2861 2862 // SB substitution with groups 2863 blah = "zzzabcdzzz"; 2864 p = Pattern.compile("(ab)(cd)*"); 2865 m = p.matcher(blah); 2866 result = new StringBuffer(); 2867 try { 2868 m.appendReplacement(result, "$1"); 2869 failCount++; 2870 } catch (IllegalStateException e) { 2871 } 2872 m.find(); 2873 m.appendReplacement(result, "$1"); 2874 if (!result.toString().equals("zzzab")) 2875 failCount++; 2876 2877 m.appendTail(result); 2878 if (!result.toString().equals("zzzabzzz")) 2879 failCount++; 2880 2881 // SB substitution with 3 groups 2882 blah = "zzzabcdcdefzzz"; 2883 p = Pattern.compile("(ab)(cd)*(ef)"); 2884 m = p.matcher(blah); 2885 result = new StringBuffer(); 2886 try { 2887 m.appendReplacement(result, "$1w$2w$3"); 2888 failCount++; 2889 } catch (IllegalStateException e) { 2890 } 2891 m.find(); 2892 m.appendReplacement(result, "$1w$2w$3"); 2893 if (!result.toString().equals("zzzabwcdwef")) 2894 failCount++; 2895 2896 m.appendTail(result); 2897 if (!result.toString().equals("zzzabwcdwefzzz")) 2898 failCount++; 2899 2900 // SB substitution with groups and three matches 2901 // skipping middle match 2902 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2903 p = Pattern.compile("(ab)(cd*)"); 2904 m = p.matcher(blah); 2905 result = new StringBuffer(); 2906 try { 2907 m.appendReplacement(result, "$1"); 2908 failCount++; 2909 } catch (IllegalStateException e) { 2910 } 2911 m.find(); 2912 m.appendReplacement(result, "$1"); 2913 if (!result.toString().equals("zzzab")) 2914 failCount++; 2915 2916 m.find(); 2917 m.find(); 2918 m.appendReplacement(result, "$2"); 2919 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2920 failCount++; 2921 2922 m.appendTail(result); 2923 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2924 failCount++; 2925 2926 // Check to make sure escaped $ is ignored 2927 blah = "zzzabcdcdefzzz"; 2928 p = Pattern.compile("(ab)(cd)*(ef)"); 2929 m = p.matcher(blah); 2930 result = new StringBuffer(); 2931 m.find(); 2932 m.appendReplacement(result, "$1w\\$2w$3"); 2933 if (!result.toString().equals("zzzabw$2wef")) 2934 failCount++; 2935 2936 m.appendTail(result); 2937 if (!result.toString().equals("zzzabw$2wefzzz")) 2938 failCount++; 2939 2940 // Check to make sure a reference to nonexistent group causes error 2941 blah = "zzzabcdcdefzzz"; 2942 p = Pattern.compile("(ab)(cd)*(ef)"); 2943 m = p.matcher(blah); 2944 result = new StringBuffer(); 2945 m.find(); 2946 try { 2947 m.appendReplacement(result, "$1w$5w$3"); 2948 failCount++; 2949 } catch (IndexOutOfBoundsException ioobe) { 2950 // Correct result 2951 } 2952 2953 // Check double digit group references 2954 blah = "zzz123456789101112zzz"; 2955 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2956 m = p.matcher(blah); 2957 result = new StringBuffer(); 2958 m.find(); 2959 m.appendReplacement(result, "$1w$11w$3"); 2960 if (!result.toString().equals("zzz1w11w3")) 2961 failCount++; 2962 2963 // Check to make sure it backs off $15 to $1 if only three groups 2964 blah = "zzzabcdcdefzzz"; 2965 p = Pattern.compile("(ab)(cd)*(ef)"); 2966 m = p.matcher(blah); 2967 result = new StringBuffer(); 2968 m.find(); 2969 m.appendReplacement(result, "$1w$15w$3"); 2970 if (!result.toString().equals("zzzabwab5wef")) 2971 failCount++; 2972 2973 2974 // Supplementary character test 2975 // SB substitution with literal 2976 blah = toSupplementaries("zzzblahzzz"); 2977 p = Pattern.compile(toSupplementaries("blah")); 2978 m = p.matcher(blah); 2979 result = new StringBuffer(); 2980 try { 2981 m.appendReplacement(result, toSupplementaries("blech")); 2982 failCount++; 2983 } catch (IllegalStateException e) { 2984 } 2985 m.find(); 2986 m.appendReplacement(result, toSupplementaries("blech")); 2987 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2988 failCount++; 2989 2990 m.appendTail(result); 2991 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2992 failCount++; 2993 2994 // SB substitution with groups 2995 blah = toSupplementaries("zzzabcdzzz"); 2996 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2997 m = p.matcher(blah); 2998 result = new StringBuffer(); 2999 try { 3000 m.appendReplacement(result, "$1"); 3001 failCount++; 3002 } catch (IllegalStateException e) { 3003 } 3004 m.find(); 3005 m.appendReplacement(result, "$1"); 3006 if (!result.toString().equals(toSupplementaries("zzzab"))) 3007 failCount++; 3008 3009 m.appendTail(result); 3010 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3011 failCount++; 3012 3013 // SB substitution with 3 groups 3014 blah = toSupplementaries("zzzabcdcdefzzz"); 3015 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3016 m = p.matcher(blah); 3017 result = new StringBuffer(); 3018 try { 3019 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3020 failCount++; 3021 } catch (IllegalStateException e) { 3022 } 3023 m.find(); 3024 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3025 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3026 failCount++; 3027 3028 m.appendTail(result); 3029 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3030 failCount++; 3031 3032 // SB substitution with groups and three matches 3033 // skipping middle match 3034 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3035 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3036 m = p.matcher(blah); 3037 result = new StringBuffer(); 3038 try { 3039 m.appendReplacement(result, "$1"); 3040 failCount++; 3041 } catch (IllegalStateException e) { 3042 } 3043 m.find(); 3044 m.appendReplacement(result, "$1"); 3045 if (!result.toString().equals(toSupplementaries("zzzab"))) 3046 failCount++; 3047 3048 m.find(); 3049 m.find(); 3050 m.appendReplacement(result, "$2"); 3051 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3052 failCount++; 3053 3054 m.appendTail(result); 3055 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3056 failCount++; 3057 3058 // Check to make sure escaped $ is ignored 3059 blah = toSupplementaries("zzzabcdcdefzzz"); 3060 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3061 m = p.matcher(blah); 3062 result = new StringBuffer(); 3063 m.find(); 3064 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3065 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3066 failCount++; 3067 3068 m.appendTail(result); 3069 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3070 failCount++; 3071 3072 // Check to make sure a reference to nonexistent group causes error 3073 blah = toSupplementaries("zzzabcdcdefzzz"); 3074 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3075 m = p.matcher(blah); 3076 result = new StringBuffer(); 3077 m.find(); 3078 try { 3079 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3080 failCount++; 3081 } catch (IndexOutOfBoundsException ioobe) { 3082 // Correct result 3083 } 3084 3085 // Check double digit group references 3086 blah = toSupplementaries("zzz123456789101112zzz"); 3087 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3088 m = p.matcher(blah); 3089 result = new StringBuffer(); 3090 m.find(); 3091 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3092 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3093 failCount++; 3094 3095 // Check to make sure it backs off $15 to $1 if only three groups 3096 blah = toSupplementaries("zzzabcdcdefzzz"); 3097 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3098 m = p.matcher(blah); 3099 result = new StringBuffer(); 3100 m.find(); 3101 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3102 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3103 failCount++; 3104 3105 // Check nothing has been appended into the output buffer if 3106 // the replacement string triggers IllegalArgumentException. 3107 p = Pattern.compile("(abc)"); 3108 m = p.matcher("abcd"); 3109 result = new StringBuffer(); 3110 m.find(); 3111 try { 3112 m.appendReplacement(result, ("xyz$g")); 3113 failCount++; 3114 } catch (IllegalArgumentException iae) { 3115 if (result.length() != 0) 3116 failCount++; 3117 } 3118 3119 report("SB Substitution"); 3120 } 3121 3122 /** 3123 * Tests the usage of Matcher.appendReplacement() with literal 3124 * and group substitutions. 3125 */ 3126 private static void stringbuilderSubstitute() throws Exception { 3127 // SB substitution with literal 3128 String blah = "zzzblahzzz"; 3129 Pattern p = Pattern.compile("blah"); 3130 Matcher m = p.matcher(blah); 3131 StringBuilder result = new StringBuilder(); 3132 try { 3133 m.appendReplacement(result, "blech"); 3134 failCount++; 3135 } catch (IllegalStateException e) { 3136 } 3137 m.find(); 3138 m.appendReplacement(result, "blech"); 3139 if (!result.toString().equals("zzzblech")) 3140 failCount++; 3141 3142 m.appendTail(result); 3143 if (!result.toString().equals("zzzblechzzz")) 3144 failCount++; 3145 3146 // SB substitution with groups 3147 blah = "zzzabcdzzz"; 3148 p = Pattern.compile("(ab)(cd)*"); 3149 m = p.matcher(blah); 3150 result = new StringBuilder(); 3151 try { 3152 m.appendReplacement(result, "$1"); 3153 failCount++; 3154 } catch (IllegalStateException e) { 3155 } 3156 m.find(); 3157 m.appendReplacement(result, "$1"); 3158 if (!result.toString().equals("zzzab")) 3159 failCount++; 3160 3161 m.appendTail(result); 3162 if (!result.toString().equals("zzzabzzz")) 3163 failCount++; 3164 3165 // SB substitution with 3 groups 3166 blah = "zzzabcdcdefzzz"; 3167 p = Pattern.compile("(ab)(cd)*(ef)"); 3168 m = p.matcher(blah); 3169 result = new StringBuilder(); 3170 try { 3171 m.appendReplacement(result, "$1w$2w$3"); 3172 failCount++; 3173 } catch (IllegalStateException e) { 3174 } 3175 m.find(); 3176 m.appendReplacement(result, "$1w$2w$3"); 3177 if (!result.toString().equals("zzzabwcdwef")) 3178 failCount++; 3179 3180 m.appendTail(result); 3181 if (!result.toString().equals("zzzabwcdwefzzz")) 3182 failCount++; 3183 3184 // SB substitution with groups and three matches 3185 // skipping middle match 3186 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3187 p = Pattern.compile("(ab)(cd*)"); 3188 m = p.matcher(blah); 3189 result = new StringBuilder(); 3190 try { 3191 m.appendReplacement(result, "$1"); 3192 failCount++; 3193 } catch (IllegalStateException e) { 3194 } 3195 m.find(); 3196 m.appendReplacement(result, "$1"); 3197 if (!result.toString().equals("zzzab")) 3198 failCount++; 3199 3200 m.find(); 3201 m.find(); 3202 m.appendReplacement(result, "$2"); 3203 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3204 failCount++; 3205 3206 m.appendTail(result); 3207 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3208 failCount++; 3209 3210 // Check to make sure escaped $ is ignored 3211 blah = "zzzabcdcdefzzz"; 3212 p = Pattern.compile("(ab)(cd)*(ef)"); 3213 m = p.matcher(blah); 3214 result = new StringBuilder(); 3215 m.find(); 3216 m.appendReplacement(result, "$1w\\$2w$3"); 3217 if (!result.toString().equals("zzzabw$2wef")) 3218 failCount++; 3219 3220 m.appendTail(result); 3221 if (!result.toString().equals("zzzabw$2wefzzz")) 3222 failCount++; 3223 3224 // Check to make sure a reference to nonexistent group causes error 3225 blah = "zzzabcdcdefzzz"; 3226 p = Pattern.compile("(ab)(cd)*(ef)"); 3227 m = p.matcher(blah); 3228 result = new StringBuilder(); 3229 m.find(); 3230 try { 3231 m.appendReplacement(result, "$1w$5w$3"); 3232 failCount++; 3233 } catch (IndexOutOfBoundsException ioobe) { 3234 // Correct result 3235 } 3236 3237 // Check double digit group references 3238 blah = "zzz123456789101112zzz"; 3239 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3240 m = p.matcher(blah); 3241 result = new StringBuilder(); 3242 m.find(); 3243 m.appendReplacement(result, "$1w$11w$3"); 3244 if (!result.toString().equals("zzz1w11w3")) 3245 failCount++; 3246 3247 // Check to make sure it backs off $15 to $1 if only three groups 3248 blah = "zzzabcdcdefzzz"; 3249 p = Pattern.compile("(ab)(cd)*(ef)"); 3250 m = p.matcher(blah); 3251 result = new StringBuilder(); 3252 m.find(); 3253 m.appendReplacement(result, "$1w$15w$3"); 3254 if (!result.toString().equals("zzzabwab5wef")) 3255 failCount++; 3256 3257 3258 // Supplementary character test 3259 // SB substitution with literal 3260 blah = toSupplementaries("zzzblahzzz"); 3261 p = Pattern.compile(toSupplementaries("blah")); 3262 m = p.matcher(blah); 3263 result = new StringBuilder(); 3264 try { 3265 m.appendReplacement(result, toSupplementaries("blech")); 3266 failCount++; 3267 } catch (IllegalStateException e) { 3268 } 3269 m.find(); 3270 m.appendReplacement(result, toSupplementaries("blech")); 3271 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3272 failCount++; 3273 m.appendTail(result); 3274 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3275 failCount++; 3276 3277 // SB substitution with groups 3278 blah = toSupplementaries("zzzabcdzzz"); 3279 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3280 m = p.matcher(blah); 3281 result = new StringBuilder(); 3282 try { 3283 m.appendReplacement(result, "$1"); 3284 failCount++; 3285 } catch (IllegalStateException e) { 3286 } 3287 m.find(); 3288 m.appendReplacement(result, "$1"); 3289 if (!result.toString().equals(toSupplementaries("zzzab"))) 3290 failCount++; 3291 3292 m.appendTail(result); 3293 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3294 failCount++; 3295 3296 // SB substitution with 3 groups 3297 blah = toSupplementaries("zzzabcdcdefzzz"); 3298 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3299 m = p.matcher(blah); 3300 result = new StringBuilder(); 3301 try { 3302 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3303 failCount++; 3304 } catch (IllegalStateException e) { 3305 } 3306 m.find(); 3307 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3308 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3309 failCount++; 3310 3311 m.appendTail(result); 3312 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3313 failCount++; 3314 3315 // SB substitution with groups and three matches 3316 // skipping middle match 3317 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3318 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3319 m = p.matcher(blah); 3320 result = new StringBuilder(); 3321 try { 3322 m.appendReplacement(result, "$1"); 3323 failCount++; 3324 } catch (IllegalStateException e) { 3325 } 3326 m.find(); 3327 m.appendReplacement(result, "$1"); 3328 if (!result.toString().equals(toSupplementaries("zzzab"))) 3329 failCount++; 3330 3331 m.find(); 3332 m.find(); 3333 m.appendReplacement(result, "$2"); 3334 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3335 failCount++; 3336 3337 m.appendTail(result); 3338 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3339 failCount++; 3340 3341 // Check to make sure escaped $ is ignored 3342 blah = toSupplementaries("zzzabcdcdefzzz"); 3343 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3344 m = p.matcher(blah); 3345 result = new StringBuilder(); 3346 m.find(); 3347 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3348 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3349 failCount++; 3350 3351 m.appendTail(result); 3352 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3353 failCount++; 3354 3355 // Check to make sure a reference to nonexistent group causes error 3356 blah = toSupplementaries("zzzabcdcdefzzz"); 3357 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3358 m = p.matcher(blah); 3359 result = new StringBuilder(); 3360 m.find(); 3361 try { 3362 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3363 failCount++; 3364 } catch (IndexOutOfBoundsException ioobe) { 3365 // Correct result 3366 } 3367 // Check double digit group references 3368 blah = toSupplementaries("zzz123456789101112zzz"); 3369 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3370 m = p.matcher(blah); 3371 result = new StringBuilder(); 3372 m.find(); 3373 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3374 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3375 failCount++; 3376 3377 // Check to make sure it backs off $15 to $1 if only three groups 3378 blah = toSupplementaries("zzzabcdcdefzzz"); 3379 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3380 m = p.matcher(blah); 3381 result = new StringBuilder(); 3382 m.find(); 3383 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3384 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3385 failCount++; 3386 // Check nothing has been appended into the output buffer if 3387 // the replacement string triggers IllegalArgumentException. 3388 p = Pattern.compile("(abc)"); 3389 m = p.matcher("abcd"); 3390 result = new StringBuilder(); 3391 m.find(); 3392 try { 3393 m.appendReplacement(result, ("xyz$g")); 3394 failCount++; 3395 } catch (IllegalArgumentException iae) { 3396 if (result.length() != 0) 3397 failCount++; 3398 } 3399 report("SB Substitution 2"); 3400 } 3401 3402 /* 3403 * 5 groups of characters are created to make a substitution string. 3404 * A base string will be created including random lead chars, the 3405 * substitution string, and random trailing chars. 3406 * A pattern containing the 5 groups is searched for and replaced with: 3407 * random group + random string + random group. 3408 * The results are checked for correctness. 3409 */ 3410 private static void substitutionBasher() { 3411 for (int runs = 0; runs<1000; runs++) { 3412 // Create a base string to work in 3413 int leadingChars = generator.nextInt(10); 3414 StringBuffer baseBuffer = new StringBuffer(100); 3415 String leadingString = getRandomAlphaString(leadingChars); 3416 baseBuffer.append(leadingString); 3417 3418 // Create 5 groups of random number of random chars 3419 // Create the string to substitute 3420 // Create the pattern string to search for 3421 StringBuffer bufferToSub = new StringBuffer(25); 3422 StringBuffer bufferToPat = new StringBuffer(50); 3423 String[] groups = new String[5]; 3424 for(int i=0; i<5; i++) { 3425 int aGroupSize = generator.nextInt(5)+1; 3426 groups[i] = getRandomAlphaString(aGroupSize); 3427 bufferToSub.append(groups[i]); 3428 bufferToPat.append('('); 3429 bufferToPat.append(groups[i]); 3430 bufferToPat.append(')'); 3431 } 3432 String stringToSub = bufferToSub.toString(); 3433 String pattern = bufferToPat.toString(); 3434 3435 // Place sub string into working string at random index 3436 baseBuffer.append(stringToSub); 3437 3438 // Append random chars to end 3439 int trailingChars = generator.nextInt(10); 3440 String trailingString = getRandomAlphaString(trailingChars); 3441 baseBuffer.append(trailingString); 3442 String baseString = baseBuffer.toString(); 3443 3444 // Create test pattern and matcher 3445 Pattern p = Pattern.compile(pattern); 3446 Matcher m = p.matcher(baseString); 3447 3448 // Reject candidate if pattern happens to start early 3449 m.find(); 3450 if (m.start() < leadingChars) 3451 continue; 3452 3453 // Reject candidate if more than one match 3454 if (m.find()) 3455 continue; 3456 3457 // Construct a replacement string with : 3458 // random group + random string + random group 3459 StringBuffer bufferToRep = new StringBuffer(); 3460 int groupIndex1 = generator.nextInt(5); 3461 bufferToRep.append("$" + (groupIndex1 + 1)); 3462 String randomMidString = getRandomAlphaString(5); 3463 bufferToRep.append(randomMidString); 3464 int groupIndex2 = generator.nextInt(5); 3465 bufferToRep.append("$" + (groupIndex2 + 1)); 3466 String replacement = bufferToRep.toString(); 3467 3468 // Do the replacement 3469 String result = m.replaceAll(replacement); 3470 3471 // Construct expected result 3472 StringBuffer bufferToRes = new StringBuffer(); 3473 bufferToRes.append(leadingString); 3474 bufferToRes.append(groups[groupIndex1]); 3475 bufferToRes.append(randomMidString); 3476 bufferToRes.append(groups[groupIndex2]); 3477 bufferToRes.append(trailingString); 3478 String expectedResult = bufferToRes.toString(); 3479 3480 // Check results 3481 if (!result.equals(expectedResult)) 3482 failCount++; 3483 } 3484 3485 report("Substitution Basher"); 3486 } 3487 3488 /* 3489 * 5 groups of characters are created to make a substitution string. 3490 * A base string will be created including random lead chars, the 3491 * substitution string, and random trailing chars. 3492 * A pattern containing the 5 groups is searched for and replaced with: 3493 * random group + random string + random group. 3494 * The results are checked for correctness. 3495 */ 3496 private static void substitutionBasher2() { 3497 for (int runs = 0; runs<1000; runs++) { 3498 // Create a base string to work in 3499 int leadingChars = generator.nextInt(10); 3500 StringBuilder baseBuffer = new StringBuilder(100); 3501 String leadingString = getRandomAlphaString(leadingChars); 3502 baseBuffer.append(leadingString); 3503 3504 // Create 5 groups of random number of random chars 3505 // Create the string to substitute 3506 // Create the pattern string to search for 3507 StringBuilder bufferToSub = new StringBuilder(25); 3508 StringBuilder bufferToPat = new StringBuilder(50); 3509 String[] groups = new String[5]; 3510 for(int i=0; i<5; i++) { 3511 int aGroupSize = generator.nextInt(5)+1; 3512 groups[i] = getRandomAlphaString(aGroupSize); 3513 bufferToSub.append(groups[i]); 3514 bufferToPat.append('('); 3515 bufferToPat.append(groups[i]); 3516 bufferToPat.append(')'); 3517 } 3518 String stringToSub = bufferToSub.toString(); 3519 String pattern = bufferToPat.toString(); 3520 3521 // Place sub string into working string at random index 3522 baseBuffer.append(stringToSub); 3523 3524 // Append random chars to end 3525 int trailingChars = generator.nextInt(10); 3526 String trailingString = getRandomAlphaString(trailingChars); 3527 baseBuffer.append(trailingString); 3528 String baseString = baseBuffer.toString(); 3529 3530 // Create test pattern and matcher 3531 Pattern p = Pattern.compile(pattern); 3532 Matcher m = p.matcher(baseString); 3533 3534 // Reject candidate if pattern happens to start early 3535 m.find(); 3536 if (m.start() < leadingChars) 3537 continue; 3538 3539 // Reject candidate if more than one match 3540 if (m.find()) 3541 continue; 3542 3543 // Construct a replacement string with : 3544 // random group + random string + random group 3545 StringBuilder bufferToRep = new StringBuilder(); 3546 int groupIndex1 = generator.nextInt(5); 3547 bufferToRep.append("$" + (groupIndex1 + 1)); 3548 String randomMidString = getRandomAlphaString(5); 3549 bufferToRep.append(randomMidString); 3550 int groupIndex2 = generator.nextInt(5); 3551 bufferToRep.append("$" + (groupIndex2 + 1)); 3552 String replacement = bufferToRep.toString(); 3553 3554 // Do the replacement 3555 String result = m.replaceAll(replacement); 3556 3557 // Construct expected result 3558 StringBuilder bufferToRes = new StringBuilder(); 3559 bufferToRes.append(leadingString); 3560 bufferToRes.append(groups[groupIndex1]); 3561 bufferToRes.append(randomMidString); 3562 bufferToRes.append(groups[groupIndex2]); 3563 bufferToRes.append(trailingString); 3564 String expectedResult = bufferToRes.toString(); 3565 3566 // Check results 3567 if (!result.equals(expectedResult)) { 3568 failCount++; 3569 } 3570 } 3571 3572 report("Substitution Basher 2"); 3573 } 3574 3575 /** 3576 * Checks the handling of some escape sequences that the Pattern 3577 * class should process instead of the java compiler. These are 3578 * not in the file because the escapes should be be processed 3579 * by the Pattern class when the regex is compiled. 3580 */ 3581 private static void escapes() throws Exception { 3582 Pattern p = Pattern.compile("\\043"); 3583 Matcher m = p.matcher("#"); 3584 if (!m.find()) 3585 failCount++; 3586 3587 p = Pattern.compile("\\x23"); 3588 m = p.matcher("#"); 3589 if (!m.find()) 3590 failCount++; 3591 3592 p = Pattern.compile("\\u0023"); 3593 m = p.matcher("#"); 3594 if (!m.find()) 3595 failCount++; 3596 3597 report("Escape sequences"); 3598 } 3599 3600 /** 3601 * Checks the handling of blank input situations. These 3602 * tests are incompatible with my test file format. 3603 */ 3604 private static void blankInput() throws Exception { 3605 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3606 Matcher m = p.matcher(""); 3607 if (m.find()) 3608 failCount++; 3609 3610 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3611 m = p.matcher(""); 3612 if (!m.find()) 3613 failCount++; 3614 3615 p = Pattern.compile("abc"); 3616 m = p.matcher(""); 3617 if (m.find()) 3618 failCount++; 3619 3620 p = Pattern.compile("a*"); 3621 m = p.matcher(""); 3622 if (!m.find()) 3623 failCount++; 3624 3625 report("Blank input"); 3626 } 3627 3628 /** 3629 * Tests the Boyer-Moore pattern matching of a character sequence 3630 * on randomly generated patterns. 3631 */ 3632 private static void bm() throws Exception { 3633 doBnM('a'); 3634 report("Boyer Moore (ASCII)"); 3635 3636 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3637 report("Boyer Moore (Supplementary)"); 3638 } 3639 3640 private static void doBnM(int baseCharacter) throws Exception { 3641 int achar=0; 3642 3643 for (int i=0; i<100; i++) { 3644 // Create a short pattern to search for 3645 int patternLength = generator.nextInt(7) + 4; 3646 StringBuffer patternBuffer = new StringBuffer(patternLength); 3647 String pattern; 3648 retry: for (;;) { 3649 for (int x=0; x<patternLength; x++) { 3650 int ch = baseCharacter + generator.nextInt(26); 3651 if (Character.isSupplementaryCodePoint(ch)) { 3652 patternBuffer.append(Character.toChars(ch)); 3653 } else { 3654 patternBuffer.append((char)ch); 3655 } 3656 } 3657 pattern = patternBuffer.toString(); 3658 3659 // Avoid patterns that start and end with the same substring 3660 // See JDK-6854417 3661 for (int x=1; x < pattern.length(); x++) { 3662 if (pattern.startsWith(pattern.substring(x))) 3663 continue retry; 3664 } 3665 break; 3666 } 3667 Pattern p = Pattern.compile(pattern); 3668 3669 // Create a buffer with random ASCII chars that does 3670 // not match the sample 3671 String toSearch = null; 3672 StringBuffer s = null; 3673 Matcher m = p.matcher(""); 3674 do { 3675 s = new StringBuffer(100); 3676 for (int x=0; x<100; x++) { 3677 int ch = baseCharacter + generator.nextInt(26); 3678 if (Character.isSupplementaryCodePoint(ch)) { 3679 s.append(Character.toChars(ch)); 3680 } else { 3681 s.append((char)ch); 3682 } 3683 } 3684 toSearch = s.toString(); 3685 m.reset(toSearch); 3686 } while (m.find()); 3687 3688 // Insert the pattern at a random spot 3689 int insertIndex = generator.nextInt(99); 3690 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3691 insertIndex++; 3692 s = s.insert(insertIndex, pattern); 3693 toSearch = s.toString(); 3694 3695 // Make sure that the pattern is found 3696 m.reset(toSearch); 3697 if (!m.find()) 3698 failCount++; 3699 3700 // Make sure that the match text is the pattern 3701 if (!m.group().equals(pattern)) 3702 failCount++; 3703 3704 // Make sure match occured at insertion point 3705 if (m.start() != insertIndex) 3706 failCount++; 3707 } 3708 } 3709 3710 /** 3711 * Tests the matching of slices on randomly generated patterns. 3712 * The Boyer-Moore optimization is not done on these patterns 3713 * because it uses unicode case folding. 3714 */ 3715 private static void slice() throws Exception { 3716 doSlice(Character.MAX_VALUE); 3717 report("Slice"); 3718 3719 doSlice(Character.MAX_CODE_POINT); 3720 report("Slice (Supplementary)"); 3721 } 3722 3723 private static void doSlice(int maxCharacter) throws Exception { 3724 Random generator = new Random(); 3725 int achar=0; 3726 3727 for (int i=0; i<100; i++) { 3728 // Create a short pattern to search for 3729 int patternLength = generator.nextInt(7) + 4; 3730 StringBuffer patternBuffer = new StringBuffer(patternLength); 3731 for (int x=0; x<patternLength; x++) { 3732 int randomChar = 0; 3733 while (!Character.isLetterOrDigit(randomChar)) 3734 randomChar = generator.nextInt(maxCharacter); 3735 if (Character.isSupplementaryCodePoint(randomChar)) { 3736 patternBuffer.append(Character.toChars(randomChar)); 3737 } else { 3738 patternBuffer.append((char) randomChar); 3739 } 3740 } 3741 String pattern = patternBuffer.toString(); 3742 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3743 3744 // Create a buffer with random chars that does not match the sample 3745 String toSearch = null; 3746 StringBuffer s = null; 3747 Matcher m = p.matcher(""); 3748 do { 3749 s = new StringBuffer(100); 3750 for (int x=0; x<100; x++) { 3751 int randomChar = 0; 3752 while (!Character.isLetterOrDigit(randomChar)) 3753 randomChar = generator.nextInt(maxCharacter); 3754 if (Character.isSupplementaryCodePoint(randomChar)) { 3755 s.append(Character.toChars(randomChar)); 3756 } else { 3757 s.append((char) randomChar); 3758 } 3759 } 3760 toSearch = s.toString(); 3761 m.reset(toSearch); 3762 } while (m.find()); 3763 3764 // Insert the pattern at a random spot 3765 int insertIndex = generator.nextInt(99); 3766 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3767 insertIndex++; 3768 s = s.insert(insertIndex, pattern); 3769 toSearch = s.toString(); 3770 3771 // Make sure that the pattern is found 3772 m.reset(toSearch); 3773 if (!m.find()) 3774 failCount++; 3775 3776 // Make sure that the match text is the pattern 3777 if (!m.group().equals(pattern)) 3778 failCount++; 3779 3780 // Make sure match occured at insertion point 3781 if (m.start() != insertIndex) 3782 failCount++; 3783 } 3784 } 3785 3786 private static void explainFailure(String pattern, String data, 3787 String expected, String actual) { 3788 System.err.println("----------------------------------------"); 3789 System.err.println("Pattern = "+pattern); 3790 System.err.println("Data = "+data); 3791 System.err.println("Expected = " + expected); 3792 System.err.println("Actual = " + actual); 3793 } 3794 3795 private static void explainFailure(String pattern, String data, 3796 Throwable t) { 3797 System.err.println("----------------------------------------"); 3798 System.err.println("Pattern = "+pattern); 3799 System.err.println("Data = "+data); 3800 t.printStackTrace(System.err); 3801 } 3802 3803 // Testing examples from a file 3804 3805 /** 3806 * Goes through the file "TestCases.txt" and creates many patterns 3807 * described in the file, matching the patterns against input lines in 3808 * the file, and comparing the results against the correct results 3809 * also found in the file. The file format is described in comments 3810 * at the head of the file. 3811 */ 3812 private static void processFile(String fileName) throws Exception { 3813 File testCases = new File(System.getProperty("test.src", "."), 3814 fileName); 3815 FileInputStream in = new FileInputStream(testCases); 3816 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3817 3818 // Process next test case. 3819 String aLine; 3820 while((aLine = r.readLine()) != null) { 3821 // Read a line for pattern 3822 String patternString = grabLine(r); 3823 Pattern p = null; 3824 try { 3825 p = compileTestPattern(patternString); 3826 } catch (PatternSyntaxException e) { 3827 String dataString = grabLine(r); 3828 String expectedResult = grabLine(r); 3829 if (expectedResult.startsWith("error")) 3830 continue; 3831 explainFailure(patternString, dataString, e); 3832 failCount++; 3833 continue; 3834 } 3835 3836 // Read a line for input string 3837 String dataString = grabLine(r); 3838 Matcher m = p.matcher(dataString); 3839 StringBuffer result = new StringBuffer(); 3840 3841 // Check for IllegalStateExceptions before a match 3842 failCount += preMatchInvariants(m); 3843 3844 boolean found = m.find(); 3845 3846 if (found) 3847 failCount += postTrueMatchInvariants(m); 3848 else 3849 failCount += postFalseMatchInvariants(m); 3850 3851 if (found) { 3852 result.append("true "); 3853 result.append(m.group(0) + " "); 3854 } else { 3855 result.append("false "); 3856 } 3857 3858 result.append(m.groupCount()); 3859 3860 if (found) { 3861 for (int i=1; i<m.groupCount()+1; i++) 3862 if (m.group(i) != null) 3863 result.append(" " +m.group(i)); 3864 } 3865 3866 // Read a line for the expected result 3867 String expectedResult = grabLine(r); 3868 3869 if (!result.toString().equals(expectedResult)) { 3870 explainFailure(patternString, dataString, expectedResult, result.toString()); 3871 failCount++; 3872 } 3873 } 3874 3875 report(fileName); 3876 } 3877 3878 private static int preMatchInvariants(Matcher m) { 3879 int failCount = 0; 3880 try { 3881 m.start(); 3882 failCount++; 3883 } catch (IllegalStateException ise) {} 3884 try { 3885 m.end(); 3886 failCount++; 3887 } catch (IllegalStateException ise) {} 3888 try { 3889 m.group(); 3890 failCount++; 3891 } catch (IllegalStateException ise) {} 3892 return failCount; 3893 } 3894 3895 private static int postFalseMatchInvariants(Matcher m) { 3896 int failCount = 0; 3897 try { 3898 m.group(); 3899 failCount++; 3900 } catch (IllegalStateException ise) {} 3901 try { 3902 m.start(); 3903 failCount++; 3904 } catch (IllegalStateException ise) {} 3905 try { 3906 m.end(); 3907 failCount++; 3908 } catch (IllegalStateException ise) {} 3909 return failCount; 3910 } 3911 3912 private static int postTrueMatchInvariants(Matcher m) { 3913 int failCount = 0; 3914 //assert(m.start() = m.start(0); 3915 if (m.start() != m.start(0)) 3916 failCount++; 3917 //assert(m.end() = m.end(0); 3918 if (m.start() != m.start(0)) 3919 failCount++; 3920 //assert(m.group() = m.group(0); 3921 if (!m.group().equals(m.group(0))) 3922 failCount++; 3923 try { 3924 m.group(50); 3925 failCount++; 3926 } catch (IndexOutOfBoundsException ise) {} 3927 3928 return failCount; 3929 } 3930 3931 private static Pattern compileTestPattern(String patternString) { 3932 if (!patternString.startsWith("'")) { 3933 return Pattern.compile(patternString); 3934 } 3935 int break1 = patternString.lastIndexOf("'"); 3936 String flagString = patternString.substring( 3937 break1+1, patternString.length()); 3938 patternString = patternString.substring(1, break1); 3939 3940 if (flagString.equals("i")) 3941 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3942 3943 if (flagString.equals("m")) 3944 return Pattern.compile(patternString, Pattern.MULTILINE); 3945 3946 return Pattern.compile(patternString); 3947 } 3948 3949 /** 3950 * Reads a line from the input file. Keeps reading lines until a non 3951 * empty non comment line is read. If the line contains a \n then 3952 * these two characters are replaced by a newline char. If a \\uxxxx 3953 * sequence is read then the sequence is replaced by the unicode char. 3954 */ 3955 private static String grabLine(BufferedReader r) throws Exception { 3956 int index = 0; 3957 String line = r.readLine(); 3958 while (line.startsWith("//") || line.length() < 1) 3959 line = r.readLine(); 3960 while ((index = line.indexOf("\\n")) != -1) { 3961 StringBuffer temp = new StringBuffer(line); 3962 temp.replace(index, index+2, "\n"); 3963 line = temp.toString(); 3964 } 3965 while ((index = line.indexOf("\\u")) != -1) { 3966 StringBuffer temp = new StringBuffer(line); 3967 String value = temp.substring(index+2, index+6); 3968 char aChar = (char)Integer.parseInt(value, 16); 3969 String unicodeChar = "" + aChar; 3970 temp.replace(index, index+6, unicodeChar); 3971 line = temp.toString(); 3972 } 3973 3974 return line; 3975 } 3976 3977 private static void check(Pattern p, String s, String g, String expected) { 3978 Matcher m = p.matcher(s); 3979 m.find(); 3980 if (!m.group(g).equals(expected) || 3981 s.charAt(m.start(g)) != expected.charAt(0) || 3982 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3983 failCount++; 3984 } 3985 3986 private static void checkReplaceFirst(String p, String s, String r, String expected) 3987 { 3988 if (!expected.equals(Pattern.compile(p) 3989 .matcher(s) 3990 .replaceFirst(r))) 3991 failCount++; 3992 } 3993 3994 private static void checkReplaceAll(String p, String s, String r, String expected) 3995 { 3996 if (!expected.equals(Pattern.compile(p) 3997 .matcher(s) 3998 .replaceAll(r))) 3999 failCount++; 4000 } 4001 4002 private static void checkExpectedFail(String p) { 4003 try { 4004 Pattern.compile(p); 4005 } catch (PatternSyntaxException pse) { 4006 //pse.printStackTrace(); 4007 return; 4008 } 4009 failCount++; 4010 } 4011 4012 private static void checkExpectedIAE(Matcher m, String g) { 4013 m.find(); 4014 try { 4015 m.group(g); 4016 } catch (IllegalArgumentException x) { 4017 //iae.printStackTrace(); 4018 try { 4019 m.start(g); 4020 } catch (IllegalArgumentException xx) { 4021 try { 4022 m.start(g); 4023 } catch (IllegalArgumentException xxx) { 4024 return; 4025 } 4026 } 4027 } 4028 failCount++; 4029 } 4030 4031 private static void checkExpectedNPE(Matcher m) { 4032 m.find(); 4033 try { 4034 m.group(null); 4035 } catch (NullPointerException x) { 4036 try { 4037 m.start(null); 4038 } catch (NullPointerException xx) { 4039 try { 4040 m.end(null); 4041 } catch (NullPointerException xxx) { 4042 return; 4043 } 4044 } 4045 } 4046 failCount++; 4047 } 4048 4049 private static void namedGroupCaptureTest() throws Exception { 4050 check(Pattern.compile("x+(?<gname>y+)z+"), 4051 "xxxyyyzzz", 4052 "gname", 4053 "yyy"); 4054 4055 check(Pattern.compile("x+(?<gname8>y+)z+"), 4056 "xxxyyyzzz", 4057 "gname8", 4058 "yyy"); 4059 4060 //backref 4061 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4062 check(pattern, "zzzaabcazzz", true); // found "abca" 4063 4064 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4065 "zzzaabcaazzz", true); 4066 4067 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4068 "abcdefabc", true); 4069 4070 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4071 "abcdefghijkk", true); 4072 4073 // Supplementary character tests 4074 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4075 toSupplementaries("zzzaabcazzz"), true); 4076 4077 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4078 toSupplementaries("zzzaabcaazzz"), true); 4079 4080 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4081 toSupplementaries("abcdefabc"), true); 4082 4083 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4084 "(?<gname>" + 4085 toSupplementaries("k)") + "\\k<gname>"), 4086 toSupplementaries("abcdefghijkk"), true); 4087 4088 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4089 "xxxyyyzzzyyy", 4090 "gname", 4091 "yyy"); 4092 4093 //replaceFirst/All 4094 checkReplaceFirst("(?<gn>ab)(c*)", 4095 "abccczzzabcczzzabccc", 4096 "${gn}", 4097 "abzzzabcczzzabccc"); 4098 4099 checkReplaceAll("(?<gn>ab)(c*)", 4100 "abccczzzabcczzzabccc", 4101 "${gn}", 4102 "abzzzabzzzab"); 4103 4104 4105 checkReplaceFirst("(?<gn>ab)(c*)", 4106 "zzzabccczzzabcczzzabccczzz", 4107 "${gn}", 4108 "zzzabzzzabcczzzabccczzz"); 4109 4110 checkReplaceAll("(?<gn>ab)(c*)", 4111 "zzzabccczzzabcczzzabccczzz", 4112 "${gn}", 4113 "zzzabzzzabzzzabzzz"); 4114 4115 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4116 "zzzabccczzzabcczzzabccczzz", 4117 "${gn2}", 4118 "zzzccczzzabcczzzabccczzz"); 4119 4120 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4121 "zzzabccczzzabcczzzabccczzz", 4122 "${gn2}", 4123 "zzzccczzzcczzzccczzz"); 4124 4125 //toSupplementaries("(ab)(c*)")); 4126 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4127 ")(?<gn2>" + toSupplementaries("c") + "*)", 4128 toSupplementaries("abccczzzabcczzzabccc"), 4129 "${gn1}", 4130 toSupplementaries("abzzzabcczzzabccc")); 4131 4132 4133 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4134 ")(?<gn2>" + toSupplementaries("c") + "*)", 4135 toSupplementaries("abccczzzabcczzzabccc"), 4136 "${gn1}", 4137 toSupplementaries("abzzzabzzzab")); 4138 4139 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4140 ")(?<gn2>" + toSupplementaries("c") + "*)", 4141 toSupplementaries("abccczzzabcczzzabccc"), 4142 "${gn2}", 4143 toSupplementaries("ccczzzabcczzzabccc")); 4144 4145 4146 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4147 ")(?<gn2>" + toSupplementaries("c") + "*)", 4148 toSupplementaries("abccczzzabcczzzabccc"), 4149 "${gn2}", 4150 toSupplementaries("ccczzzcczzzccc")); 4151 4152 checkReplaceFirst("(?<dog>Dog)AndCat", 4153 "zzzDogAndCatzzzDogAndCatzzz", 4154 "${dog}", 4155 "zzzDogzzzDogAndCatzzz"); 4156 4157 4158 checkReplaceAll("(?<dog>Dog)AndCat", 4159 "zzzDogAndCatzzzDogAndCatzzz", 4160 "${dog}", 4161 "zzzDogzzzDogzzz"); 4162 4163 // backref in Matcher & String 4164 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4165 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4166 failCount++; 4167 4168 // negative 4169 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4170 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4171 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4172 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4173 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4174 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4175 "gnameX"); 4176 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4177 report("NamedGroupCapture"); 4178 } 4179 4180 // This is for bug 6919132 4181 private static void nonBmpClassComplementTest() throws Exception { 4182 Pattern p = Pattern.compile("\\P{Lu}"); 4183 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4184 4185 if (m.find() && m.start() == 1) 4186 failCount++; 4187 4188 // from a unicode category 4189 p = Pattern.compile("\\P{Lu}"); 4190 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4191 if (m.find()) 4192 failCount++; 4193 if (!m.hitEnd()) 4194 failCount++; 4195 4196 // block 4197 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4198 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4199 if (m.find() && m.start() == 1) 4200 failCount++; 4201 4202 p = Pattern.compile("\\P{sc=GRANTHA}"); 4203 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4204 if (m.find() && m.start() == 1) 4205 failCount++; 4206 4207 report("NonBmpClassComplement"); 4208 } 4209 4210 private static void unicodePropertiesTest() throws Exception { 4211 // different forms 4212 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4213 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4214 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4215 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4216 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4217 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4218 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4219 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4220 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4221 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4222 failCount++; 4223 4224 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4225 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4226 Matcher lastSM = common; 4227 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4228 4229 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4230 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4231 Matcher lastBM = latin; 4232 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4233 4234 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4235 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4236 continue; // only pick couple code points, they are the same 4237 } 4238 4239 // Unicode Script 4240 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4241 Matcher m; 4242 String str = new String(Character.toChars(cp)); 4243 if (script == lastScript) { 4244 m = lastSM; 4245 m.reset(str); 4246 } else { 4247 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4248 } 4249 if (!m.matches()) { 4250 failCount++; 4251 } 4252 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4253 other.reset(str); 4254 if (other.matches()) { 4255 failCount++; 4256 } 4257 lastSM = m; 4258 lastScript = script; 4259 4260 // Unicode Block 4261 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4262 if (block == null) { 4263 //System.out.printf("Not a Block: cp=%x%n", cp); 4264 continue; 4265 } 4266 if (block == lastBlock) { 4267 m = lastBM; 4268 m.reset(str); 4269 } else { 4270 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4271 } 4272 if (!m.matches()) { 4273 failCount++; 4274 } 4275 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4276 other.reset(str); 4277 if (other.matches()) { 4278 failCount++; 4279 } 4280 lastBM = m; 4281 lastBlock = block; 4282 } 4283 report("unicodeProperties"); 4284 } 4285 4286 private static void unicodeHexNotationTest() throws Exception { 4287 4288 // negative 4289 checkExpectedFail("\\x{-23}"); 4290 checkExpectedFail("\\x{110000}"); 4291 checkExpectedFail("\\x{}"); 4292 checkExpectedFail("\\x{AB[ef]"); 4293 4294 // codepoint 4295 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4296 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4297 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4298 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4299 4300 // in class 4301 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4302 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4303 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4304 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4305 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4306 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4307 4308 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4309 String s = "A" + new String(Character.toChars(cp)) + "B"; 4310 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4311 : String.format("\\u%04x\\u%04x", 4312 (int) Character.toChars(cp)[0], 4313 (int) Character.toChars(cp)[1]); 4314 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4315 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4316 failCount++; 4317 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4318 failCount++; 4319 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4320 failCount++; 4321 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4322 failCount++; 4323 } 4324 report("unicodeHexNotation"); 4325 } 4326 4327 private static void unicodeClassesTest() throws Exception { 4328 4329 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4330 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4331 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4332 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4333 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4334 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4335 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4336 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4337 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4338 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4339 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4340 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4341 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4342 Matcher bound = Pattern.compile("\\b").matcher(""); 4343 Matcher word = Pattern.compile("\\w++").matcher(""); 4344 // UNICODE_CHARACTER_CLASS 4345 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4346 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4347 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4348 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4349 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4350 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4351 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4352 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4353 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4354 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4355 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4356 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4357 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4358 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4359 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4360 // embedded flag (?U) 4361 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4362 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4363 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4364 4365 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4366 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4367 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4368 // properties 4369 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4370 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4371 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4372 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4373 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4374 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4375 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4376 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4377 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4378 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4379 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4380 // javaMethod 4381 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4382 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4383 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4384 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4385 // GC/C 4386 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4387 4388 for (int cp = 1; cp < 0x30000; cp++) { 4389 String str = new String(Character.toChars(cp)); 4390 int type = Character.getType(cp); 4391 if (// lower 4392 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4393 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4394 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4395 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4396 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4397 // upper 4398 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4399 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4400 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4401 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4402 // alpha 4403 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4404 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4405 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4406 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4407 // digit 4408 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4409 Character.isDigit(cp) != digitU.reset(str).matches() || 4410 // alnum 4411 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4412 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4413 // punct 4414 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4415 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4416 // graph 4417 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4418 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4419 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4420 // blank 4421 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4422 != blank.reset(str).matches() || 4423 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4424 // print 4425 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4426 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4427 // cntrl 4428 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4429 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4430 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4431 // hexdigit 4432 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4433 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4434 // space 4435 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4436 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4437 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4438 // word 4439 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4440 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4441 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4442 // bwordb 4443 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4444 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4445 // properties 4446 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4447 Character.isLetter(cp) != letterP.reset(str).matches()|| 4448 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4449 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4450 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4451 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4452 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4453 // gc_C 4454 (Character.CONTROL == type || Character.FORMAT == type || 4455 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4456 Character.UNASSIGNED == type) 4457 != gcC.reset(str).matches()) { 4458 failCount++; 4459 } 4460 } 4461 4462 // bounds/word align 4463 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4464 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4465 failCount++; 4466 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4467 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4468 failCount++; 4469 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4470 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4471 failCount++; 4472 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4473 failCount++; 4474 report("unicodePredefinedClasses"); 4475 } 4476 4477 private static void unicodeCharacterNameTest() throws Exception { 4478 4479 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4480 if (!Character.isValidCodePoint(cp) || 4481 Character.getType(cp) == Character.UNASSIGNED) 4482 continue; 4483 String str = new String(Character.toChars(cp)); 4484 // single 4485 String p = "\\N{" + Character.getName(cp) + "}"; 4486 if (!Pattern.compile(p).matcher(str).matches()) { 4487 failCount++; 4488 } 4489 // class[c] 4490 p = "[\\N{" + Character.getName(cp) + "}]"; 4491 if (!Pattern.compile(p).matcher(str).matches()) { 4492 failCount++; 4493 } 4494 } 4495 4496 // range 4497 for (int i = 0; i < 10; i++) { 4498 int start = generator.nextInt(20); 4499 int end = start + generator.nextInt(200); 4500 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4501 String str; 4502 for (int cp = start; cp < end; cp++) { 4503 str = new String(Character.toChars(cp)); 4504 if (!Pattern.compile(p).matcher(str).matches()) { 4505 failCount++; 4506 } 4507 } 4508 str = new String(Character.toChars(end + 10)); 4509 if (Pattern.compile(p).matcher(str).matches()) { 4510 failCount++; 4511 } 4512 } 4513 4514 // slice 4515 for (int i = 0; i < 10; i++) { 4516 int n = generator.nextInt(256); 4517 int[] buf = new int[n]; 4518 StringBuffer sb = new StringBuffer(1024); 4519 for (int j = 0; j < n; j++) { 4520 int cp = generator.nextInt(1000); 4521 if (!Character.isValidCodePoint(cp) || 4522 Character.getType(cp) == Character.UNASSIGNED) 4523 cp = 0x4e00; // just use 4e00 4524 sb.append("\\N{" + Character.getName(cp) + "}"); 4525 buf[j] = cp; 4526 } 4527 String p = sb.toString(); 4528 String str = new String(buf, 0, buf.length); 4529 if (!Pattern.compile(p).matcher(str).matches()) { 4530 failCount++; 4531 } 4532 } 4533 report("unicodeCharacterName"); 4534 } 4535 4536 private static void horizontalAndVerticalWSTest() throws Exception { 4537 String hws = new String (new char[] { 4538 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4539 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4540 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4541 0x202f, 0x205f, 0x3000 }); 4542 String vws = new String (new char[] { 4543 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4544 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4545 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4546 failCount++; 4547 if (Pattern.compile("\\H").matcher(hws).find() || 4548 Pattern.compile("[\\H]").matcher(hws).find()) 4549 failCount++; 4550 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4551 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4552 failCount++; 4553 if (Pattern.compile("\\V").matcher(vws).find() || 4554 Pattern.compile("[\\V]").matcher(vws).find()) 4555 failCount++; 4556 String prefix = "abcd"; 4557 String suffix = "efgh"; 4558 String ng = "A"; 4559 for (int i = 0; i < hws.length(); i++) { 4560 String c = String.valueOf(hws.charAt(i)); 4561 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4562 if (!m.find() || !c.equals(m.group())) 4563 failCount++; 4564 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4565 if (!m.find() || !c.equals(m.group())) 4566 failCount++; 4567 4568 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4569 if (!m.find() || !ng.equals(m.group())) 4570 failCount++; 4571 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4572 if (!m.find() || !ng.equals(m.group())) 4573 failCount++; 4574 } 4575 for (int i = 0; i < vws.length(); i++) { 4576 String c = String.valueOf(vws.charAt(i)); 4577 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4578 if (!m.find() || !c.equals(m.group())) 4579 failCount++; 4580 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4581 if (!m.find() || !c.equals(m.group())) 4582 failCount++; 4583 4584 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4585 if (!m.find() || !ng.equals(m.group())) 4586 failCount++; 4587 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4588 if (!m.find() || !ng.equals(m.group())) 4589 failCount++; 4590 } 4591 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4592 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4593 failCount++; 4594 report("horizontalAndVerticalWSTest"); 4595 } 4596 4597 private static void linebreakTest() throws Exception { 4598 String linebreaks = new String (new char[] { 4599 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4600 String crnl = "\r\n"; 4601 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4602 Pattern.compile("\\R").matcher(crnl).matches() && 4603 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4604 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4605 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4606 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4607 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4608 failCount++; 4609 } 4610 report("linebreakTest"); 4611 } 4612 4613 // #7189363 4614 private static void branchTest() throws Exception { 4615 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4616 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4617 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4618 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4619 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4620 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4621 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4622 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4623 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4624 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4625 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4626 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4627 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4628 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4629 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4630 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4631 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4632 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4633 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4634 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4635 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4636 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4637 failCount++; 4638 report("branchTest"); 4639 } 4640 4641 // This test is for 8007395 4642 private static void groupCurlyNotFoundSuppTest() throws Exception { 4643 String input = "test this as \ud83d\ude0d"; 4644 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4645 "test(.)*(@[a-zA-Z.]+)", 4646 "test([^B])+(@[a-zA-Z.]+)", 4647 "test([^B])*(@[a-zA-Z.]+)", 4648 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4649 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4650 }) { 4651 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4652 .matcher(input); 4653 try { 4654 if (m.find()) { 4655 failCount++; 4656 } 4657 } catch (Exception x) { 4658 failCount++; 4659 } 4660 } 4661 report("GroupCurly NotFoundSupp"); 4662 } 4663 4664 // This test is for 8023647 4665 private static void groupCurlyBackoffTest() throws Exception { 4666 if (!"abc1c".matches("(\\w)+1\\1") || 4667 "abc11".matches("(\\w)+1\\1")) { 4668 failCount++; 4669 } 4670 report("GroupCurly backoff"); 4671 } 4672 4673 // This test is for 8012646 4674 private static void patternAsPredicate() throws Exception { 4675 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4676 4677 if (p.test("")) { 4678 failCount++; 4679 } 4680 if (!p.test("word")) { 4681 failCount++; 4682 } 4683 if (p.test("1234")) { 4684 failCount++; 4685 } 4686 report("Pattern.asPredicate"); 4687 } 4688 4689 // This test is for 8035975 4690 private static void invalidFlags() throws Exception { 4691 for (int flag = 1; flag != 0; flag <<= 1) { 4692 switch (flag) { 4693 case Pattern.CASE_INSENSITIVE: 4694 case Pattern.MULTILINE: 4695 case Pattern.DOTALL: 4696 case Pattern.UNICODE_CASE: 4697 case Pattern.CANON_EQ: 4698 case Pattern.UNIX_LINES: 4699 case Pattern.LITERAL: 4700 case Pattern.UNICODE_CHARACTER_CLASS: 4701 case Pattern.COMMENTS: 4702 // valid flag, continue 4703 break; 4704 default: 4705 try { 4706 Pattern.compile(".", flag); 4707 failCount++; 4708 } catch (IllegalArgumentException expected) { 4709 } 4710 } 4711 } 4712 report("Invalid compile flags"); 4713 } 4714 4715 // This test is for 8158482 4716 private static void embeddedFlags() throws Exception { 4717 try { 4718 Pattern.compile("(?i).(?-i)."); 4719 Pattern.compile("(?m).(?-m)."); 4720 Pattern.compile("(?s).(?-s)."); 4721 Pattern.compile("(?d).(?-d)."); 4722 Pattern.compile("(?u).(?-u)."); 4723 Pattern.compile("(?c).(?-c)."); 4724 Pattern.compile("(?x).(?-x)."); 4725 Pattern.compile("(?U).(?-U)."); 4726 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4727 } catch (PatternSyntaxException x) { 4728 failCount++; 4729 } 4730 report("Embedded flags"); 4731 } 4732 4733 private static void grapheme() throws Exception { 4734 Files.lines(Paths.get(System.getProperty("test.src", "."), 4735 "GraphemeBreakTest.txt")) 4736 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4737 .forEach( ln -> { 4738 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4739 // System.out.println(str); 4740 String[] strs = ln.split("\u00f7|\u00d7"); 4741 StringBuilder src = new StringBuilder(); 4742 ArrayList<String> graphemes = new ArrayList<>(); 4743 StringBuilder buf = new StringBuilder(); 4744 int offBk = 0; 4745 for (String str : strs) { 4746 if (str.length() == 0) // first empty str 4747 continue; 4748 int cp = Integer.parseInt(str, 16); 4749 src.appendCodePoint(cp); 4750 buf.appendCodePoint(cp); 4751 offBk += (str.length() + 1); 4752 if (ln.charAt(offBk) == '\u00f7') { // DIV 4753 graphemes.add(buf.toString()); 4754 buf = new StringBuilder(); 4755 } 4756 } 4757 Pattern p = Pattern.compile("\\X"); 4758 Matcher m = p.matcher(src.toString()); 4759 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4760 for (String g : graphemes) { 4761 // System.out.printf(" grapheme:=[%s]%n", g); 4762 // (1) test \\X directly 4763 if (!m.find() || !m.group().equals(g)) { 4764 System.out.println("Failed \\X [" + ln + "] : " + g); 4765 failCount++; 4766 } 4767 // (2) test \\b{g} + \\X via Scanner 4768 boolean hasNext = s.hasNext(p); 4769 // if (!s.hasNext() || !s.next().equals(next)) { 4770 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4771 System.out.println("Failed b{g} [" + ln + "] : " + g); 4772 failCount++; 4773 } 4774 } 4775 }); 4776 // some sanity checks 4777 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4778 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4779 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4780 failCount++; 4781 // make sure "\b{n}" still works 4782 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4783 failCount++; 4784 report("Unicode extended grapheme cluster"); 4785 } 4786 4787 // hangup/timeout if go into exponential backtracking 4788 private static void expoBacktracking() throws Exception { 4789 4790 Object[][] patternMatchers = { 4791 // 6328855 4792 { "(.*\n*)*", 4793 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4794 false }, 4795 // 6192895 4796 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4797 "Hello World this is a test this is a test this is a test A", 4798 true }, 4799 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4800 "Hello World this is a test this is a test this is a test \u4e00 ", 4801 false }, 4802 { " *([a-z0-9]+ *)+", 4803 "hello world this is a test this is a test this is a test A", 4804 false }, 4805 // 4771934 [FIXED] #5013651? 4806 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4807 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4808 true }, 4809 // 4866249 [FIXED] 4810 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4811 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4812 true }, 4813 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4814 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4815 false }, 4816 // 6345469 4817 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4818 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4819 true }, // --> matched 4820 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4821 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4822 false }, 4823 // 5026912 4824 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4825 "156580451111112225588087755221111111566969655555555", 4826 false}, 4827 // 6988218 4828 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4829 "'%)) order by ANGEBOT.ID", 4830 false}, // find 4831 // 6693451 4832 { "^(\\s*foo\\s*)*$", 4833 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4834 true }, 4835 { "^(\\s*foo\\s*)*$", 4836 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4837 false 4838 }, 4839 // 7006761 4840 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4841 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4842 // 8140212 4843 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4844 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4845 false 4846 }, 4847 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4848 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4849 4850 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4851 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4852 4853 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4854 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4855 4856 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4857 4858 /* not fixed 4859 //8132141 ---> second level exponential backtracking 4860 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4861 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4862 */ 4863 }; 4864 4865 for (Object[] pm : patternMatchers) { 4866 String p = (String)pm[0]; 4867 String s = (String)pm[1]; 4868 boolean r = (Boolean)pm[2]; 4869 if (r != Pattern.compile(p).matcher(s).matches()) { 4870 failCount++; 4871 } 4872 } 4873 } 4874 4875 private static void invalidGroupName() { 4876 // Invalid start of a group name 4877 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4878 "\u0060", "\u007b", "\u0416")) { 4879 for (String pat : List.of("(?<" + groupName + ">)", 4880 "\\k<" + groupName + ">")) { 4881 try { 4882 Pattern.compile(pat); 4883 failCount++; 4884 } catch (PatternSyntaxException e) { 4885 if (!e.getMessage().startsWith( 4886 "capturing group name does not start with a" 4887 + " Latin letter")) { 4888 failCount++; 4889 } 4890 } 4891 } 4892 } 4893 // Invalid char in a group name 4894 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4895 "d\u0060", "e\u007b", "f\u0416")) { 4896 for (String pat : List.of("(?<" + groupName + ">)", 4897 "\\k<" + groupName + ">")) { 4898 try { 4899 Pattern.compile(pat); 4900 failCount++; 4901 } catch (PatternSyntaxException e) { 4902 if (!e.getMessage().startsWith( 4903 "named capturing group is missing trailing '>'")) { 4904 failCount++; 4905 } 4906 } 4907 } 4908 } 4909 report("Invalid capturing group names"); 4910 } 4911 }