1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 8221431 39 * 40 * @library /test/lib 41 * @library /lib/testlibrary/java/lang 42 * @build jdk.test.lib.RandomFactory 43 * @run main RegExTest 44 * @key randomness 45 */ 46 47 import java.util.function.Function; 48 import java.util.regex.*; 49 import java.util.Random; 50 import java.util.Scanner; 51 import java.io.*; 52 import java.nio.file.*; 53 import java.util.*; 54 import java.nio.CharBuffer; 55 import java.util.function.Predicate; 56 import jdk.test.lib.RandomFactory; 57 58 /** 59 * This is a test class created to check the operation of 60 * the Pattern and Matcher classes. 61 */ 62 public class RegExTest { 63 64 private static Random generator = RandomFactory.getRandom(); 65 private static boolean failure = false; 66 private static int failCount = 0; 67 private static String firstFailure = null; 68 69 /** 70 * Main to interpret arguments and run several tests. 71 * 72 */ 73 public static void main(String[] args) throws Exception { 74 // Most of the tests are in a file 75 processFile("TestCases.txt"); 76 //processFile("PerlCases.txt"); 77 processFile("BMPTestCases.txt"); 78 processFile("SupplementaryTestCases.txt"); 79 80 // These test many randomly generated char patterns 81 bm(); 82 slice(); 83 84 // These are hard to put into the file 85 escapes(); 86 blankInput(); 87 88 // Substitition tests on randomly generated sequences 89 globalSubstitute(); 90 stringbufferSubstitute(); 91 stringbuilderSubstitute(); 92 93 substitutionBasher(); 94 substitutionBasher2(); 95 96 // Canonical Equivalence 97 ceTest(); 98 99 // Anchors 100 anchorTest(); 101 102 // boolean match calls 103 matchesTest(); 104 lookingAtTest(); 105 106 // Pattern API 107 patternMatchesTest(); 108 109 // Misc 110 lookbehindTest(); 111 nullArgumentTest(); 112 backRefTest(); 113 groupCaptureTest(); 114 caretTest(); 115 charClassTest(); 116 emptyPatternTest(); 117 findIntTest(); 118 group0Test(); 119 longPatternTest(); 120 octalTest(); 121 ampersandTest(); 122 negationTest(); 123 splitTest(); 124 appendTest(); 125 caseFoldingTest(); 126 commentsTest(); 127 unixLinesTest(); 128 replaceFirstTest(); 129 gTest(); 130 zTest(); 131 serializeTest(); 132 reluctantRepetitionTest(); 133 multilineDollarTest(); 134 dollarAtEndTest(); 135 caretBetweenTerminatorsTest(); 136 // This RFE rejected in Tiger numOccurrencesTest(); 137 javaCharClassTest(); 138 nonCaptureRepetitionTest(); 139 notCapturedGroupCurlyMatchTest(); 140 escapedSegmentTest(); 141 literalPatternTest(); 142 literalReplacementTest(); 143 regionTest(); 144 toStringTest(); 145 negatedCharClassTest(); 146 findFromTest(); 147 boundsTest(); 148 unicodeWordBoundsTest(); 149 caretAtEndTest(); 150 wordSearchTest(); 151 hitEndTest(); 152 toMatchResultTest(); 153 toMatchResultTest2(); 154 surrogatesInClassTest(); 155 removeQEQuotingTest(); 156 namedGroupCaptureTest(); 157 nonBmpClassComplementTest(); 158 unicodePropertiesTest(); 159 unicodeHexNotationTest(); 160 unicodeClassesTest(); 161 unicodeCharacterNameTest(); 162 horizontalAndVerticalWSTest(); 163 linebreakTest(); 164 branchTest(); 165 groupCurlyNotFoundSuppTest(); 166 groupCurlyBackoffTest(); 167 patternAsPredicate(); 168 patternAsMatchPredicate(); 169 invalidFlags(); 170 embeddedFlags(); 171 grapheme(); 172 expoBacktracking(); 173 invalidGroupName(); 174 175 if (failure) { 176 throw new 177 RuntimeException("RegExTest failed, 1st failure: " + 178 firstFailure); 179 } else { 180 System.err.println("OKAY: All tests passed."); 181 } 182 } 183 184 // Utility functions 185 186 private static String getRandomAlphaString(int length) { 187 StringBuffer buf = new StringBuffer(length); 188 for (int i=0; i<length; i++) { 189 char randChar = (char)(97 + generator.nextInt(26)); 190 buf.append(randChar); 191 } 192 return buf.toString(); 193 } 194 195 private static void check(Matcher m, String expected) { 196 m.find(); 197 if (!m.group().equals(expected)) 198 failCount++; 199 } 200 201 private static void check(Matcher m, String result, boolean expected) { 202 m.find(); 203 if (m.group().equals(result) != expected) 204 failCount++; 205 } 206 207 private static void check(Pattern p, String s, boolean expected) { 208 if (p.matcher(s).find() != expected) 209 failCount++; 210 } 211 212 private static void check(String p, String s, boolean expected) { 213 Matcher matcher = Pattern.compile(p).matcher(s); 214 if (matcher.find() != expected) 215 failCount++; 216 } 217 218 private static void check(String p, char c, boolean expected) { 219 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 220 Pattern pattern = Pattern.compile(propertyPattern); 221 char[] ca = new char[1]; ca[0] = c; 222 Matcher matcher = pattern.matcher(new String(ca)); 223 if (!matcher.find()) 224 failCount++; 225 } 226 227 private static void check(String p, int codePoint, boolean expected) { 228 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 229 Pattern pattern = Pattern.compile(propertyPattern); 230 char[] ca = Character.toChars(codePoint); 231 Matcher matcher = pattern.matcher(new String(ca)); 232 if (!matcher.find()) 233 failCount++; 234 } 235 236 private static void check(String p, int flag, String input, String s, 237 boolean expected) 238 { 239 Pattern pattern = Pattern.compile(p, flag); 240 Matcher matcher = pattern.matcher(input); 241 if (expected) 242 check(matcher, s, expected); 243 else 244 check(pattern, input, false); 245 } 246 247 private static void report(String testName) { 248 int spacesToAdd = 30 - testName.length(); 249 StringBuffer paddedNameBuffer = new StringBuffer(testName); 250 for (int i=0; i<spacesToAdd; i++) 251 paddedNameBuffer.append(" "); 252 String paddedName = paddedNameBuffer.toString(); 253 System.err.println(paddedName + ": " + 254 (failCount==0 ? "Passed":"Failed("+failCount+")")); 255 if (failCount > 0) { 256 failure = true; 257 258 if (firstFailure == null) { 259 firstFailure = testName; 260 } 261 } 262 263 failCount = 0; 264 } 265 266 /** 267 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 268 * supplementary characters. This method does NOT fully take care 269 * of the regex syntax. 270 */ 271 private static String toSupplementaries(String s) { 272 int length = s.length(); 273 StringBuffer sb = new StringBuffer(length * 2); 274 275 for (int i = 0; i < length; ) { 276 char c = s.charAt(i++); 277 if (c == '\\') { 278 sb.append(c); 279 if (i < length) { 280 c = s.charAt(i++); 281 sb.append(c); 282 if (c == 'u') { 283 // assume no syntax error 284 sb.append(s.charAt(i++)); 285 sb.append(s.charAt(i++)); 286 sb.append(s.charAt(i++)); 287 sb.append(s.charAt(i++)); 288 } 289 } 290 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 291 sb.append('\ud800').append((char)('\udc00'+c)); 292 } else { 293 sb.append(c); 294 } 295 } 296 return sb.toString(); 297 } 298 299 // Regular expression tests 300 301 // This is for bug 6178785 302 // Test if an expected NPE gets thrown when passing in a null argument 303 private static boolean check(Runnable test) { 304 try { 305 test.run(); 306 failCount++; 307 return false; 308 } catch (NullPointerException npe) { 309 return true; 310 } 311 } 312 313 private static void nullArgumentTest() { 314 check(() -> Pattern.compile(null)); 315 check(() -> Pattern.matches(null, null)); 316 check(() -> Pattern.matches("xyz", null)); 317 check(() -> Pattern.quote(null)); 318 check(() -> Pattern.compile("xyz").split(null)); 319 check(() -> Pattern.compile("xyz").matcher(null)); 320 321 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 322 m.matches(); 323 check(() -> m.appendTail((StringBuffer) null)); 324 check(() -> m.appendTail((StringBuilder)null)); 325 check(() -> m.replaceAll((String) null)); 326 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 327 check(() -> m.replaceFirst((String)null)); 328 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 329 check(() -> m.appendReplacement((StringBuffer)null, null)); 330 check(() -> m.appendReplacement((StringBuilder)null, null)); 331 check(() -> m.reset(null)); 332 check(() -> Matcher.quoteReplacement(null)); 333 //check(() -> m.usePattern(null)); 334 335 report("Null Argument"); 336 } 337 338 // This is for bug6635133 339 // Test if surrogate pair in Unicode escapes can be handled correctly. 340 private static void surrogatesInClassTest() throws Exception { 341 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 342 Matcher matcher = pattern.matcher("\ud834\udd22"); 343 if (!matcher.find()) 344 failCount++; 345 346 report("Surrogate pair in Unicode escape"); 347 } 348 349 // This is for bug6990617 350 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 351 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 352 // char is an octal digit. 353 private static void removeQEQuotingTest() throws Exception { 354 Pattern pattern = 355 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 356 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 357 if (!matcher.find()) 358 failCount++; 359 360 report("Remove Q/E Quoting"); 361 } 362 363 // This is for bug 4988891 364 // Test toMatchResult to see that it is a copy of the Matcher 365 // that is not affected by subsequent operations on the original 366 private static void toMatchResultTest() throws Exception { 367 Pattern pattern = Pattern.compile("squid"); 368 Matcher matcher = pattern.matcher( 369 "agiantsquidofdestinyasmallsquidoffate"); 370 matcher.find(); 371 int matcherStart1 = matcher.start(); 372 MatchResult mr = matcher.toMatchResult(); 373 if (mr == matcher) 374 failCount++; 375 int resultStart1 = mr.start(); 376 if (matcherStart1 != resultStart1) 377 failCount++; 378 matcher.find(); 379 int matcherStart2 = matcher.start(); 380 int resultStart2 = mr.start(); 381 if (matcherStart2 == resultStart2) 382 failCount++; 383 if (resultStart1 != resultStart2) 384 failCount++; 385 MatchResult mr2 = matcher.toMatchResult(); 386 if (mr == mr2) 387 failCount++; 388 if (mr2.start() != matcherStart2) 389 failCount++; 390 report("toMatchResult is a copy"); 391 } 392 393 private static void checkExpectedISE(Runnable test) { 394 try { 395 test.run(); 396 failCount++; 397 } catch (IllegalStateException x) { 398 } catch (IndexOutOfBoundsException xx) { 399 failCount++; 400 } 401 } 402 403 private static void checkExpectedIOOE(Runnable test) { 404 try { 405 test.run(); 406 failCount++; 407 } catch (IndexOutOfBoundsException x) {} 408 } 409 410 // This is for bug 8074678 411 // Test the result of toMatchResult throws ISE if no match is availble 412 private static void toMatchResultTest2() throws Exception { 413 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 414 matcher.find(); 415 MatchResult mr = matcher.toMatchResult(); 416 417 checkExpectedISE(() -> mr.start()); 418 checkExpectedISE(() -> mr.start(2)); 419 checkExpectedISE(() -> mr.end()); 420 checkExpectedISE(() -> mr.end(2)); 421 checkExpectedISE(() -> mr.group()); 422 checkExpectedISE(() -> mr.group(2)); 423 424 matcher = Pattern.compile("(match)").matcher("there is a match"); 425 matcher.find(); 426 MatchResult mr2 = matcher.toMatchResult(); 427 checkExpectedIOOE(() -> mr2.start(2)); 428 checkExpectedIOOE(() -> mr2.end(2)); 429 checkExpectedIOOE(() -> mr2.group(2)); 430 431 report("toMatchResult2 appropriate exceptions"); 432 } 433 434 // This is for bug 5013885 435 // Must test a slice to see if it reports hitEnd correctly 436 private static void hitEndTest() throws Exception { 437 // Basic test of Slice node 438 Pattern p = Pattern.compile("^squidattack"); 439 Matcher m = p.matcher("squack"); 440 m.find(); 441 if (m.hitEnd()) 442 failCount++; 443 m.reset("squid"); 444 m.find(); 445 if (!m.hitEnd()) 446 failCount++; 447 448 // Test Slice, SliceA and SliceU nodes 449 for (int i=0; i<3; i++) { 450 int flags = 0; 451 if (i==1) flags = Pattern.CASE_INSENSITIVE; 452 if (i==2) flags = Pattern.UNICODE_CASE; 453 p = Pattern.compile("^abc", flags); 454 m = p.matcher("ad"); 455 m.find(); 456 if (m.hitEnd()) 457 failCount++; 458 m.reset("ab"); 459 m.find(); 460 if (!m.hitEnd()) 461 failCount++; 462 } 463 464 // Test Boyer-Moore node 465 p = Pattern.compile("catattack"); 466 m = p.matcher("attack"); 467 m.find(); 468 if (!m.hitEnd()) 469 failCount++; 470 471 p = Pattern.compile("catattack"); 472 m = p.matcher("attackattackattackcatatta"); 473 m.find(); 474 if (!m.hitEnd()) 475 failCount++; 476 477 // 8184706: Matching u+0d at EOL against \R should hit-end 478 p = Pattern.compile("...\\R"); 479 m = p.matcher("cat" + (char)0x0a); 480 m.find(); 481 if (m.hitEnd()) 482 failCount++; 483 484 m = p.matcher("cat" + (char)0x0d); 485 m.find(); 486 if (!m.hitEnd()) 487 failCount++; 488 489 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 490 m.find(); 491 if (m.hitEnd()) 492 failCount++; 493 494 report("hitEnd"); 495 } 496 497 // This is for bug 4997476 498 // It is weird code submitted by customer demonstrating a regression 499 private static void wordSearchTest() throws Exception { 500 String testString = new String("word1 word2 word3"); 501 Pattern p = Pattern.compile("\\b"); 502 Matcher m = p.matcher(testString); 503 int position = 0; 504 int start = 0; 505 while (m.find(position)) { 506 start = m.start(); 507 if (start == testString.length()) 508 break; 509 if (m.find(start+1)) { 510 position = m.start(); 511 } else { 512 position = testString.length(); 513 } 514 if (testString.substring(start, position).equals(" ")) 515 continue; 516 if (!testString.substring(start, position-1).startsWith("word")) 517 failCount++; 518 } 519 report("Customer word search"); 520 } 521 522 // This is for bug 4994840 523 private static void caretAtEndTest() throws Exception { 524 // Problem only occurs with multiline patterns 525 // containing a beginning-of-line caret "^" followed 526 // by an expression that also matches the empty string. 527 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 528 Matcher matcher = pattern.matcher("\r"); 529 matcher.find(); 530 matcher.find(); 531 report("Caret at end"); 532 } 533 534 // This test is for 4979006 535 // Check to see if word boundary construct properly handles unicode 536 // non spacing marks 537 private static void unicodeWordBoundsTest() throws Exception { 538 String spaces = " "; 539 String wordChar = "a"; 540 String nsm = "\u030a"; 541 542 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 543 544 Pattern pattern = Pattern.compile("\\b"); 545 Matcher matcher = pattern.matcher(""); 546 // S=other B=word character N=non spacing mark .=word boundary 547 // SS.BB.SS 548 String input = spaces + wordChar + wordChar + spaces; 549 twoFindIndexes(input, matcher, 2, 4); 550 // SS.BBN.SS 551 input = spaces + wordChar +wordChar + nsm + spaces; 552 twoFindIndexes(input, matcher, 2, 5); 553 // SS.BN.SS 554 input = spaces + wordChar + nsm + spaces; 555 twoFindIndexes(input, matcher, 2, 4); 556 // SS.BNN.SS 557 input = spaces + wordChar + nsm + nsm + spaces; 558 twoFindIndexes(input, matcher, 2, 5); 559 // SSN.BB.SS 560 input = spaces + nsm + wordChar + wordChar + spaces; 561 twoFindIndexes(input, matcher, 3, 5); 562 // SS.BNB.SS 563 input = spaces + wordChar + nsm + wordChar + spaces; 564 twoFindIndexes(input, matcher, 2, 5); 565 // SSNNSS 566 input = spaces + nsm + nsm + spaces; 567 matcher.reset(input); 568 if (matcher.find()) 569 failCount++; 570 // SSN.BBN.SS 571 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 572 twoFindIndexes(input, matcher, 3, 6); 573 574 report("Unicode word boundary"); 575 } 576 577 private static void twoFindIndexes(String input, Matcher matcher, int a, 578 int b) throws Exception 579 { 580 matcher.reset(input); 581 matcher.find(); 582 if (matcher.start() != a) 583 failCount++; 584 matcher.find(); 585 if (matcher.start() != b) 586 failCount++; 587 } 588 589 // This test is for 6284152 590 static void check(String regex, String input, String[] expected) { 591 List<String> result = new ArrayList<String>(); 592 Pattern p = Pattern.compile(regex); 593 Matcher m = p.matcher(input); 594 while (m.find()) { 595 result.add(m.group()); 596 } 597 if (!Arrays.asList(expected).equals(result)) 598 failCount++; 599 } 600 601 private static void lookbehindTest() throws Exception { 602 //Positive 603 check("(?<=%.{0,5})foo\\d", 604 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 605 new String[]{"foo1", "foo2", "foo3"}); 606 607 //boundary at end of the lookbehind sub-regex should work consistently 608 //with the boundary just after the lookbehind sub-regex 609 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 610 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 611 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 612 check("(?<!abc \\b)foo", "abc foo", new String[0]); 613 614 //Negative 615 check("(?<!%.{0,5})foo\\d", 616 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 617 new String[] {"foo4", "foo5"}); 618 619 //Positive greedy 620 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 621 622 //Positive reluctant 623 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 624 625 //supplementary 626 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 627 new String[] {"fo\ud800\udc00o"}); 628 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 629 new String[] {"fo\ud800\udc00o"}); 630 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 631 new String[] {"fo\ud800\udc00o"}); 632 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 633 new String[] {"fo\ud800\udc00o"}); 634 report("Lookbehind"); 635 } 636 637 // This test is for 4938995 638 // Check to see if weak region boundaries are transparent to 639 // lookahead and lookbehind constructs 640 private static void boundsTest() throws Exception { 641 String fullMessage = "catdogcat"; 642 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 643 Matcher matcher = pattern.matcher("catdogca"); 644 matcher.useTransparentBounds(true); 645 if (matcher.find()) 646 failCount++; 647 matcher.reset("atdogcat"); 648 if (matcher.find()) 649 failCount++; 650 matcher.reset(fullMessage); 651 if (!matcher.find()) 652 failCount++; 653 matcher.reset(fullMessage); 654 matcher.region(0,9); 655 if (!matcher.find()) 656 failCount++; 657 matcher.reset(fullMessage); 658 matcher.region(0,6); 659 if (!matcher.find()) 660 failCount++; 661 matcher.reset(fullMessage); 662 matcher.region(3,6); 663 if (!matcher.find()) 664 failCount++; 665 matcher.useTransparentBounds(false); 666 if (matcher.find()) 667 failCount++; 668 669 // Negative lookahead/lookbehind 670 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 671 matcher = pattern.matcher("dogcat"); 672 matcher.useTransparentBounds(true); 673 matcher.region(0,3); 674 if (matcher.find()) 675 failCount++; 676 matcher.reset("catdog"); 677 matcher.region(3,6); 678 if (matcher.find()) 679 failCount++; 680 matcher.useTransparentBounds(false); 681 matcher.reset("dogcat"); 682 matcher.region(0,3); 683 if (!matcher.find()) 684 failCount++; 685 matcher.reset("catdog"); 686 matcher.region(3,6); 687 if (!matcher.find()) 688 failCount++; 689 690 report("Region bounds transparency"); 691 } 692 693 // This test is for 4945394 694 private static void findFromTest() throws Exception { 695 String message = "This is 40 $0 message."; 696 Pattern pat = Pattern.compile("\\$0"); 697 Matcher match = pat.matcher(message); 698 if (!match.find()) 699 failCount++; 700 if (match.find()) 701 failCount++; 702 if (match.find()) 703 failCount++; 704 report("Check for alternating find"); 705 } 706 707 // This test is for 4872664 and 4892980 708 private static void negatedCharClassTest() throws Exception { 709 Pattern pattern = Pattern.compile("[^>]"); 710 Matcher matcher = pattern.matcher("\u203A"); 711 if (!matcher.matches()) 712 failCount++; 713 pattern = Pattern.compile("[^fr]"); 714 matcher = pattern.matcher("a"); 715 if (!matcher.find()) 716 failCount++; 717 matcher.reset("\u203A"); 718 if (!matcher.find()) 719 failCount++; 720 String s = "for"; 721 String result[] = s.split("[^fr]"); 722 if (!result[0].equals("f")) 723 failCount++; 724 if (!result[1].equals("r")) 725 failCount++; 726 s = "f\u203Ar"; 727 result = s.split("[^fr]"); 728 if (!result[0].equals("f")) 729 failCount++; 730 if (!result[1].equals("r")) 731 failCount++; 732 733 // Test adding to bits, subtracting a node, then adding to bits again 734 pattern = Pattern.compile("[^f\u203Ar]"); 735 matcher = pattern.matcher("a"); 736 if (!matcher.find()) 737 failCount++; 738 matcher.reset("f"); 739 if (matcher.find()) 740 failCount++; 741 matcher.reset("\u203A"); 742 if (matcher.find()) 743 failCount++; 744 matcher.reset("r"); 745 if (matcher.find()) 746 failCount++; 747 matcher.reset("\u203B"); 748 if (!matcher.find()) 749 failCount++; 750 751 // Test subtracting a node, adding to bits, subtracting again 752 pattern = Pattern.compile("[^\u203Ar\u203B]"); 753 matcher = pattern.matcher("a"); 754 if (!matcher.find()) 755 failCount++; 756 matcher.reset("\u203A"); 757 if (matcher.find()) 758 failCount++; 759 matcher.reset("r"); 760 if (matcher.find()) 761 failCount++; 762 matcher.reset("\u203B"); 763 if (matcher.find()) 764 failCount++; 765 matcher.reset("\u203C"); 766 if (!matcher.find()) 767 failCount++; 768 769 report("Negated Character Class"); 770 } 771 772 // This test is for 4628291 773 private static void toStringTest() throws Exception { 774 Pattern pattern = Pattern.compile("b+"); 775 if (pattern.toString() != "b+") 776 failCount++; 777 Matcher matcher = pattern.matcher("aaabbbccc"); 778 String matcherString = matcher.toString(); // unspecified 779 matcher.find(); 780 matcherString = matcher.toString(); // unspecified 781 matcher.region(0,3); 782 matcherString = matcher.toString(); // unspecified 783 matcher.reset(); 784 matcherString = matcher.toString(); // unspecified 785 report("toString"); 786 } 787 788 // This test is for 4808962 789 private static void literalPatternTest() throws Exception { 790 int flags = Pattern.LITERAL; 791 792 Pattern pattern = Pattern.compile("abc\\t$^", flags); 793 check(pattern, "abc\\t$^", true); 794 795 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 796 check(pattern, "abc\\t$^", true); 797 798 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 799 check(pattern, "\\Qa^$bcabc\\E", true); 800 check(pattern, "a^$bcabc", false); 801 802 pattern = Pattern.compile("\\\\Q\\\\E"); 803 check(pattern, "\\Q\\E", true); 804 805 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 806 check(pattern, "abcefg\\Q\\Ehij", true); 807 808 pattern = Pattern.compile("\\\\\\Q\\\\E"); 809 check(pattern, "\\\\\\\\", true); 810 811 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 812 check(pattern, "\\Qa^$bcabc\\E", true); 813 check(pattern, "a^$bcabc", false); 814 815 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 816 check(pattern, "\\Qabc\\Edef", true); 817 check(pattern, "abcdef", false); 818 819 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 820 check(pattern, "abc\\Edef", true); 821 check(pattern, "abcdef", false); 822 823 pattern = Pattern.compile(Pattern.quote("\\E")); 824 check(pattern, "\\E", true); 825 826 pattern = Pattern.compile("((((abc.+?:)", flags); 827 check(pattern, "((((abc.+?:)", true); 828 829 flags |= Pattern.MULTILINE; 830 831 pattern = Pattern.compile("^cat$", flags); 832 check(pattern, "abc^cat$def", true); 833 check(pattern, "cat", false); 834 835 flags |= Pattern.CASE_INSENSITIVE; 836 837 pattern = Pattern.compile("abcdef", flags); 838 check(pattern, "ABCDEF", true); 839 check(pattern, "AbCdEf", true); 840 841 flags |= Pattern.DOTALL; 842 843 pattern = Pattern.compile("a...b", flags); 844 check(pattern, "A...b", true); 845 check(pattern, "Axxxb", false); 846 847 flags |= Pattern.CANON_EQ; 848 849 Pattern p = Pattern.compile("testa\u030a", flags); 850 check(pattern, "testa\u030a", false); 851 check(pattern, "test\u00e5", false); 852 853 // Supplementary character test 854 flags = Pattern.LITERAL; 855 856 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 857 check(pattern, toSupplementaries("abc\\t$^"), true); 858 859 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 860 check(pattern, toSupplementaries("abc\\t$^"), true); 861 862 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 863 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 864 check(pattern, toSupplementaries("a^$bcabc"), false); 865 866 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 867 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 868 check(pattern, toSupplementaries("a^$bcabc"), false); 869 870 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 871 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 872 check(pattern, toSupplementaries("abcdef"), false); 873 874 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 875 check(pattern, toSupplementaries("abc\\Edef"), true); 876 check(pattern, toSupplementaries("abcdef"), false); 877 878 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 879 check(pattern, toSupplementaries("((((abc.+?:)"), true); 880 881 flags |= Pattern.MULTILINE; 882 883 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 884 check(pattern, toSupplementaries("abc^cat$def"), true); 885 check(pattern, toSupplementaries("cat"), false); 886 887 flags |= Pattern.DOTALL; 888 889 // note: this is case-sensitive. 890 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 891 check(pattern, toSupplementaries("a...b"), true); 892 check(pattern, toSupplementaries("axxxb"), false); 893 894 flags |= Pattern.CANON_EQ; 895 896 String t = toSupplementaries("test"); 897 p = Pattern.compile(t + "a\u030a", flags); 898 check(pattern, t + "a\u030a", false); 899 check(pattern, t + "\u00e5", false); 900 901 report("Literal pattern"); 902 } 903 904 // This test is for 4803179 905 // This test is also for 4808962, replacement parts 906 private static void literalReplacementTest() throws Exception { 907 int flags = Pattern.LITERAL; 908 909 Pattern pattern = Pattern.compile("abc", flags); 910 Matcher matcher = pattern.matcher("zzzabczzz"); 911 String replaceTest = "$0"; 912 String result = matcher.replaceAll(replaceTest); 913 if (!result.equals("zzzabczzz")) 914 failCount++; 915 916 matcher.reset(); 917 String literalReplacement = matcher.quoteReplacement(replaceTest); 918 result = matcher.replaceAll(literalReplacement); 919 if (!result.equals("zzz$0zzz")) 920 failCount++; 921 922 matcher.reset(); 923 replaceTest = "\\t$\\$"; 924 literalReplacement = matcher.quoteReplacement(replaceTest); 925 result = matcher.replaceAll(literalReplacement); 926 if (!result.equals("zzz\\t$\\$zzz")) 927 failCount++; 928 929 // Supplementary character test 930 pattern = Pattern.compile(toSupplementaries("abc"), flags); 931 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 932 replaceTest = "$0"; 933 result = matcher.replaceAll(replaceTest); 934 if (!result.equals(toSupplementaries("zzzabczzz"))) 935 failCount++; 936 937 matcher.reset(); 938 literalReplacement = matcher.quoteReplacement(replaceTest); 939 result = matcher.replaceAll(literalReplacement); 940 if (!result.equals(toSupplementaries("zzz$0zzz"))) 941 failCount++; 942 943 matcher.reset(); 944 replaceTest = "\\t$\\$"; 945 literalReplacement = matcher.quoteReplacement(replaceTest); 946 result = matcher.replaceAll(literalReplacement); 947 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 948 failCount++; 949 950 // IAE should be thrown if backslash or '$' is the last character 951 // in replacement string 952 try { 953 "\uac00".replaceAll("\uac00", "$"); 954 failCount++; 955 } catch (IllegalArgumentException iie) { 956 } catch (Exception e) { 957 failCount++; 958 } 959 try { 960 "\uac00".replaceAll("\uac00", "\\"); 961 failCount++; 962 } catch (IllegalArgumentException iie) { 963 } catch (Exception e) { 964 failCount++; 965 } 966 report("Literal replacement"); 967 } 968 969 // This test is for 4757029 970 private static void regionTest() throws Exception { 971 Pattern pattern = Pattern.compile("abc"); 972 Matcher matcher = pattern.matcher("abcdefabc"); 973 974 matcher.region(0,9); 975 if (!matcher.find()) 976 failCount++; 977 if (!matcher.find()) 978 failCount++; 979 matcher.region(0,3); 980 if (!matcher.find()) 981 failCount++; 982 matcher.region(3,6); 983 if (matcher.find()) 984 failCount++; 985 matcher.region(0,2); 986 if (matcher.find()) 987 failCount++; 988 989 expectRegionFail(matcher, 1, -1); 990 expectRegionFail(matcher, -1, -1); 991 expectRegionFail(matcher, -1, 1); 992 expectRegionFail(matcher, 5, 3); 993 expectRegionFail(matcher, 5, 12); 994 expectRegionFail(matcher, 12, 12); 995 996 pattern = Pattern.compile("^abc$"); 997 matcher = pattern.matcher("zzzabczzz"); 998 matcher.region(0,9); 999 if (matcher.find()) 1000 failCount++; 1001 matcher.region(3,6); 1002 if (!matcher.find()) 1003 failCount++; 1004 matcher.region(3,6); 1005 matcher.useAnchoringBounds(false); 1006 if (matcher.find()) 1007 failCount++; 1008 1009 // Supplementary character test 1010 pattern = Pattern.compile(toSupplementaries("abc")); 1011 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1012 matcher.region(0,9*2); 1013 if (!matcher.find()) 1014 failCount++; 1015 if (!matcher.find()) 1016 failCount++; 1017 matcher.region(0,3*2); 1018 if (!matcher.find()) 1019 failCount++; 1020 matcher.region(1,3*2); 1021 if (matcher.find()) 1022 failCount++; 1023 matcher.region(3*2,6*2); 1024 if (matcher.find()) 1025 failCount++; 1026 matcher.region(0,2*2); 1027 if (matcher.find()) 1028 failCount++; 1029 matcher.region(0,2*2+1); 1030 if (matcher.find()) 1031 failCount++; 1032 1033 expectRegionFail(matcher, 1*2, -1); 1034 expectRegionFail(matcher, -1, -1); 1035 expectRegionFail(matcher, -1, 1*2); 1036 expectRegionFail(matcher, 5*2, 3*2); 1037 expectRegionFail(matcher, 5*2, 12*2); 1038 expectRegionFail(matcher, 12*2, 12*2); 1039 1040 pattern = Pattern.compile(toSupplementaries("^abc$")); 1041 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1042 matcher.region(0,9*2); 1043 if (matcher.find()) 1044 failCount++; 1045 matcher.region(3*2,6*2); 1046 if (!matcher.find()) 1047 failCount++; 1048 matcher.region(3*2+1,6*2); 1049 if (matcher.find()) 1050 failCount++; 1051 matcher.region(3*2,6*2-1); 1052 if (matcher.find()) 1053 failCount++; 1054 matcher.region(3*2,6*2); 1055 matcher.useAnchoringBounds(false); 1056 if (matcher.find()) 1057 failCount++; 1058 report("Regions"); 1059 } 1060 1061 private static void expectRegionFail(Matcher matcher, int index1, 1062 int index2) 1063 { 1064 try { 1065 matcher.region(index1, index2); 1066 failCount++; 1067 } catch (IndexOutOfBoundsException ioobe) { 1068 // Correct result 1069 } catch (IllegalStateException ise) { 1070 // Correct result 1071 } 1072 } 1073 1074 // This test is for 4803197 1075 private static void escapedSegmentTest() throws Exception { 1076 1077 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1078 check(pattern, "dir1\\dir2", true); 1079 1080 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1081 check(pattern, "dir1\\dir2\\", true); 1082 1083 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1084 check(pattern, "dir1\\dir2\\", true); 1085 1086 // Supplementary character test 1087 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1088 check(pattern, toSupplementaries("dir1\\dir2"), true); 1089 1090 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1091 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1092 1093 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1094 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1095 1096 report("Escaped segment"); 1097 } 1098 1099 // This test is for 4792284 1100 private static void nonCaptureRepetitionTest() throws Exception { 1101 String input = "abcdefgh;"; 1102 1103 String[] patterns = new String[] { 1104 "(?:\\w{4})+;", 1105 "(?:\\w{8})*;", 1106 "(?:\\w{2}){2,4};", 1107 "(?:\\w{4}){2,};", // only matches the 1108 ".*?(?:\\w{5})+;", // specified minimum 1109 ".*?(?:\\w{9})*;", // number of reps - OK 1110 "(?:\\w{4})+?;", // lazy repetition - OK 1111 "(?:\\w{4})++;", // possessive repetition - OK 1112 "(?:\\w{2,}?)+;", // non-deterministic - OK 1113 "(\\w{4})+;", // capturing group - OK 1114 }; 1115 1116 for (int i = 0; i < patterns.length; i++) { 1117 // Check find() 1118 check(patterns[i], 0, input, input, true); 1119 // Check matches() 1120 Pattern p = Pattern.compile(patterns[i]); 1121 Matcher m = p.matcher(input); 1122 1123 if (m.matches()) { 1124 if (!m.group(0).equals(input)) 1125 failCount++; 1126 } else { 1127 failCount++; 1128 } 1129 } 1130 1131 report("Non capturing repetition"); 1132 } 1133 1134 // This test is for 6358731 1135 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1136 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1137 Matcher matcher = pattern.matcher("abcd"); 1138 if (!matcher.matches() || 1139 matcher.group(1) != null || 1140 !matcher.group(2).equals("abcd")) { 1141 failCount++; 1142 } 1143 report("Not captured GroupCurly"); 1144 } 1145 1146 // This test is for 4706545 1147 private static void javaCharClassTest() throws Exception { 1148 for (int i=0; i<1000; i++) { 1149 char c = (char)generator.nextInt(); 1150 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1151 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1152 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1153 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1154 check("{javaDigit}", c, Character.isDigit(c)); 1155 check("{javaDefined}", c, Character.isDefined(c)); 1156 check("{javaLetter}", c, Character.isLetter(c)); 1157 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1158 check("{javaJavaIdentifierStart}", c, 1159 Character.isJavaIdentifierStart(c)); 1160 check("{javaJavaIdentifierPart}", c, 1161 Character.isJavaIdentifierPart(c)); 1162 check("{javaUnicodeIdentifierStart}", c, 1163 Character.isUnicodeIdentifierStart(c)); 1164 check("{javaUnicodeIdentifierPart}", c, 1165 Character.isUnicodeIdentifierPart(c)); 1166 check("{javaIdentifierIgnorable}", c, 1167 Character.isIdentifierIgnorable(c)); 1168 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1169 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1170 check("{javaISOControl}", c, Character.isISOControl(c)); 1171 check("{javaMirrored}", c, Character.isMirrored(c)); 1172 1173 } 1174 1175 // Supplementary character test 1176 for (int i=0; i<1000; i++) { 1177 int c = generator.nextInt(Character.MAX_CODE_POINT 1178 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1179 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1180 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1181 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1182 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1183 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1184 check("{javaDigit}", c, Character.isDigit(c)); 1185 check("{javaDefined}", c, Character.isDefined(c)); 1186 check("{javaLetter}", c, Character.isLetter(c)); 1187 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1188 check("{javaJavaIdentifierStart}", c, 1189 Character.isJavaIdentifierStart(c)); 1190 check("{javaJavaIdentifierPart}", c, 1191 Character.isJavaIdentifierPart(c)); 1192 check("{javaUnicodeIdentifierStart}", c, 1193 Character.isUnicodeIdentifierStart(c)); 1194 check("{javaUnicodeIdentifierPart}", c, 1195 Character.isUnicodeIdentifierPart(c)); 1196 check("{javaIdentifierIgnorable}", c, 1197 Character.isIdentifierIgnorable(c)); 1198 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1199 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1200 check("{javaISOControl}", c, Character.isISOControl(c)); 1201 check("{javaMirrored}", c, Character.isMirrored(c)); 1202 } 1203 1204 report("Java character classes"); 1205 } 1206 1207 // This test is for 4523620 1208 /* 1209 private static void numOccurrencesTest() throws Exception { 1210 Pattern pattern = Pattern.compile("aaa"); 1211 1212 if (pattern.numOccurrences("aaaaaa", false) != 2) 1213 failCount++; 1214 if (pattern.numOccurrences("aaaaaa", true) != 4) 1215 failCount++; 1216 1217 pattern = Pattern.compile("^"); 1218 if (pattern.numOccurrences("aaaaaa", false) != 1) 1219 failCount++; 1220 if (pattern.numOccurrences("aaaaaa", true) != 1) 1221 failCount++; 1222 1223 report("Number of Occurrences"); 1224 } 1225 */ 1226 1227 // This test is for 4776374 1228 private static void caretBetweenTerminatorsTest() throws Exception { 1229 int flags1 = Pattern.DOTALL; 1230 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1231 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1232 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1233 1234 check("^....", flags1, "test\ntest", "test", true); 1235 check(".....^", flags1, "test\ntest", "test", false); 1236 check(".....^", flags1, "test\n", "test", false); 1237 check("....^", flags1, "test\r\n", "test", false); 1238 1239 check("^....", flags2, "test\ntest", "test", true); 1240 check("....^", flags2, "test\ntest", "test", false); 1241 check(".....^", flags2, "test\n", "test", false); 1242 check("....^", flags2, "test\r\n", "test", false); 1243 1244 check("^....", flags3, "test\ntest", "test", true); 1245 check(".....^", flags3, "test\ntest", "test\n", true); 1246 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1247 check(".....^", flags3, "test\n", "test", false); 1248 check(".....^", flags3, "test\r\n", "test", false); 1249 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1250 1251 check("^....", flags4, "test\ntest", "test", true); 1252 check(".....^", flags3, "test\ntest", "test\n", true); 1253 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1254 check(".....^", flags4, "test\n", "test\n", false); 1255 check(".....^", flags4, "test\r\n", "test\r", false); 1256 1257 // Supplementary character test 1258 String t = toSupplementaries("test"); 1259 check("^....", flags1, t+"\n"+t, t, true); 1260 check(".....^", flags1, t+"\n"+t, t, false); 1261 check(".....^", flags1, t+"\n", t, false); 1262 check("....^", flags1, t+"\r\n", t, false); 1263 1264 check("^....", flags2, t+"\n"+t, t, true); 1265 check("....^", flags2, t+"\n"+t, t, false); 1266 check(".....^", flags2, t+"\n", t, false); 1267 check("....^", flags2, t+"\r\n", t, false); 1268 1269 check("^....", flags3, t+"\n"+t, t, true); 1270 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1271 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1272 check(".....^", flags3, t+"\n", t, false); 1273 check(".....^", flags3, t+"\r\n", t, false); 1274 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1275 1276 check("^....", flags4, t+"\n"+t, t, true); 1277 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1278 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1279 check(".....^", flags4, t+"\n", t+"\n", false); 1280 check(".....^", flags4, t+"\r\n", t+"\r", false); 1281 1282 report("Caret between terminators"); 1283 } 1284 1285 // This test is for 4727935 1286 private static void dollarAtEndTest() throws Exception { 1287 int flags1 = Pattern.DOTALL; 1288 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1289 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1290 1291 check("....$", flags1, "test\n", "test", true); 1292 check("....$", flags1, "test\r\n", "test", true); 1293 check(".....$", flags1, "test\n", "test\n", true); 1294 check(".....$", flags1, "test\u0085", "test\u0085", true); 1295 check("....$", flags1, "test\u0085", "test", true); 1296 1297 check("....$", flags2, "test\n", "test", true); 1298 check(".....$", flags2, "test\n", "test\n", true); 1299 check(".....$", flags2, "test\u0085", "test\u0085", true); 1300 check("....$", flags2, "test\u0085", "est\u0085", true); 1301 1302 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1303 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1304 check("....$blah", flags3, "test\nblah", "!!!!", false); 1305 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1306 1307 // Supplementary character test 1308 String t = toSupplementaries("test"); 1309 String b = toSupplementaries("blah"); 1310 check("....$", flags1, t+"\n", t, true); 1311 check("....$", flags1, t+"\r\n", t, true); 1312 check(".....$", flags1, t+"\n", t+"\n", true); 1313 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1314 check("....$", flags1, t+"\u0085", t, true); 1315 1316 check("....$", flags2, t+"\n", t, true); 1317 check(".....$", flags2, t+"\n", t+"\n", true); 1318 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1319 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1320 1321 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1322 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1323 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1324 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1325 1326 report("Dollar at End"); 1327 } 1328 1329 // This test is for 4711773 1330 private static void multilineDollarTest() throws Exception { 1331 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1332 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1333 matcher.find(); 1334 if (matcher.start(0) != 9) 1335 failCount++; 1336 matcher.find(); 1337 if (matcher.start(0) != 20) 1338 failCount++; 1339 1340 // Supplementary character test 1341 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1342 matcher.find(); 1343 if (matcher.start(0) != 9*2) 1344 failCount++; 1345 matcher.find(); 1346 if (matcher.start(0) != 20*2) 1347 failCount++; 1348 1349 report("Multiline Dollar"); 1350 } 1351 1352 private static void reluctantRepetitionTest() throws Exception { 1353 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1354 check(p, "1 word word word 2", true); 1355 check(p, "1 wor wo w 2", true); 1356 check(p, "1 word word 2", true); 1357 check(p, "1 word 2", true); 1358 check(p, "1 wo w w 2", true); 1359 check(p, "1 wo w 2", true); 1360 check(p, "1 wor w 2", true); 1361 1362 p = Pattern.compile("([a-z])+?c"); 1363 Matcher m = p.matcher("ababcdefdec"); 1364 check(m, "ababc"); 1365 1366 // Supplementary character test 1367 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1368 m = p.matcher(toSupplementaries("ababcdefdec")); 1369 check(m, toSupplementaries("ababc")); 1370 1371 report("Reluctant Repetition"); 1372 } 1373 1374 private static Pattern serializedPattern(Pattern p) throws Exception { 1375 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1376 ObjectOutputStream oos = new ObjectOutputStream(baos); 1377 oos.writeObject(p); 1378 oos.close(); 1379 try (ObjectInputStream ois = new ObjectInputStream( 1380 new ByteArrayInputStream(baos.toByteArray()))) { 1381 return (Pattern)ois.readObject(); 1382 } 1383 } 1384 1385 private static void serializeTest() throws Exception { 1386 String patternStr = "(b)"; 1387 String matchStr = "b"; 1388 Pattern pattern = Pattern.compile(patternStr); 1389 Pattern serializedPattern = serializedPattern(pattern); 1390 Matcher matcher = serializedPattern.matcher(matchStr); 1391 if (!matcher.matches()) 1392 failCount++; 1393 if (matcher.groupCount() != 1) 1394 failCount++; 1395 1396 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1397 serializedPattern = serializedPattern(pattern); 1398 if (!serializedPattern.matcher("Ab").matches()) 1399 failCount++; 1400 if (serializedPattern.matcher("AB").matches()) 1401 failCount++; 1402 1403 report("Serialization"); 1404 } 1405 1406 private static void gTest() { 1407 Pattern pattern = Pattern.compile("\\G\\w"); 1408 Matcher matcher = pattern.matcher("abc#x#x"); 1409 matcher.find(); 1410 matcher.find(); 1411 matcher.find(); 1412 if (matcher.find()) 1413 failCount++; 1414 1415 pattern = Pattern.compile("\\GA*"); 1416 matcher = pattern.matcher("1A2AA3"); 1417 matcher.find(); 1418 if (matcher.find()) 1419 failCount++; 1420 1421 pattern = Pattern.compile("\\GA*"); 1422 matcher = pattern.matcher("1A2AA3"); 1423 if (!matcher.find(1)) 1424 failCount++; 1425 matcher.find(); 1426 if (matcher.find()) 1427 failCount++; 1428 1429 report("\\G"); 1430 } 1431 1432 private static void zTest() { 1433 Pattern pattern = Pattern.compile("foo\\Z"); 1434 // Positives 1435 check(pattern, "foo\u0085", true); 1436 check(pattern, "foo\u2028", true); 1437 check(pattern, "foo\u2029", true); 1438 check(pattern, "foo\n", true); 1439 check(pattern, "foo\r", true); 1440 check(pattern, "foo\r\n", true); 1441 // Negatives 1442 check(pattern, "fooo", false); 1443 check(pattern, "foo\n\r", false); 1444 1445 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1446 // Positives 1447 check(pattern, "foo", true); 1448 check(pattern, "foo\n", true); 1449 // Negatives 1450 check(pattern, "foo\r", false); 1451 check(pattern, "foo\u0085", false); 1452 check(pattern, "foo\u2028", false); 1453 check(pattern, "foo\u2029", false); 1454 1455 report("\\Z"); 1456 } 1457 1458 private static void replaceFirstTest() { 1459 Pattern pattern = Pattern.compile("(ab)(c*)"); 1460 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1461 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1462 failCount++; 1463 1464 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1465 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1466 failCount++; 1467 1468 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1469 String result = matcher.replaceFirst("$1"); 1470 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1471 failCount++; 1472 1473 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1474 result = matcher.replaceFirst("$2"); 1475 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1476 failCount++; 1477 1478 pattern = Pattern.compile("a*"); 1479 matcher = pattern.matcher("aaaaaaaaaa"); 1480 if (!matcher.replaceFirst("test").equals("test")) 1481 failCount++; 1482 1483 pattern = Pattern.compile("a+"); 1484 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1485 if (!matcher.replaceFirst("test").equals("zzztest")) 1486 failCount++; 1487 1488 // Supplementary character test 1489 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1490 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1491 if (!matcher.replaceFirst(toSupplementaries("test")) 1492 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1493 failCount++; 1494 1495 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1496 if (!matcher.replaceFirst(toSupplementaries("test")). 1497 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1498 failCount++; 1499 1500 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1501 result = matcher.replaceFirst("$1"); 1502 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1503 failCount++; 1504 1505 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1506 result = matcher.replaceFirst("$2"); 1507 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1508 failCount++; 1509 1510 pattern = Pattern.compile(toSupplementaries("a*")); 1511 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1512 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1513 failCount++; 1514 1515 pattern = Pattern.compile(toSupplementaries("a+")); 1516 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1517 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1518 failCount++; 1519 1520 report("Replace First"); 1521 } 1522 1523 private static void unixLinesTest() { 1524 Pattern pattern = Pattern.compile(".*"); 1525 Matcher matcher = pattern.matcher("aa\u2028blah"); 1526 matcher.find(); 1527 if (!matcher.group(0).equals("aa")) 1528 failCount++; 1529 1530 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1531 matcher = pattern.matcher("aa\u2028blah"); 1532 matcher.find(); 1533 if (!matcher.group(0).equals("aa\u2028blah")) 1534 failCount++; 1535 1536 pattern = Pattern.compile("[az]$", 1537 Pattern.MULTILINE | Pattern.UNIX_LINES); 1538 matcher = pattern.matcher("aa\u2028zz"); 1539 check(matcher, "a\u2028", false); 1540 1541 // Supplementary character test 1542 pattern = Pattern.compile(".*"); 1543 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1544 matcher.find(); 1545 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1546 failCount++; 1547 1548 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1549 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1550 matcher.find(); 1551 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1552 failCount++; 1553 1554 pattern = Pattern.compile(toSupplementaries("[az]$"), 1555 Pattern.MULTILINE | Pattern.UNIX_LINES); 1556 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1557 check(matcher, toSupplementaries("a\u2028"), false); 1558 1559 report("Unix Lines"); 1560 } 1561 1562 private static void commentsTest() { 1563 int flags = Pattern.COMMENTS; 1564 1565 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1566 Matcher matcher = pattern.matcher("aa#aa"); 1567 if (!matcher.matches()) 1568 failCount++; 1569 1570 pattern = Pattern.compile("aa # blah", flags); 1571 matcher = pattern.matcher("aa"); 1572 if (!matcher.matches()) 1573 failCount++; 1574 1575 pattern = Pattern.compile("aa blah", flags); 1576 matcher = pattern.matcher("aablah"); 1577 if (!matcher.matches()) 1578 failCount++; 1579 1580 pattern = Pattern.compile("aa # blah blech ", flags); 1581 matcher = pattern.matcher("aa"); 1582 if (!matcher.matches()) 1583 failCount++; 1584 1585 pattern = Pattern.compile("aa # blah\n ", flags); 1586 matcher = pattern.matcher("aa"); 1587 if (!matcher.matches()) 1588 failCount++; 1589 1590 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1591 matcher = pattern.matcher("aabc"); 1592 if (!matcher.matches()) 1593 failCount++; 1594 1595 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1596 matcher = pattern.matcher("aabc"); 1597 if (!matcher.matches()) 1598 failCount++; 1599 1600 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1601 matcher = pattern.matcher("aabc#blech"); 1602 if (!matcher.matches()) 1603 failCount++; 1604 1605 // Supplementary character test 1606 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1607 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1608 if (!matcher.matches()) 1609 failCount++; 1610 1611 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1612 matcher = pattern.matcher(toSupplementaries("aa")); 1613 if (!matcher.matches()) 1614 failCount++; 1615 1616 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1617 matcher = pattern.matcher(toSupplementaries("aablah")); 1618 if (!matcher.matches()) 1619 failCount++; 1620 1621 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1622 matcher = pattern.matcher(toSupplementaries("aa")); 1623 if (!matcher.matches()) 1624 failCount++; 1625 1626 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1627 matcher = pattern.matcher(toSupplementaries("aa")); 1628 if (!matcher.matches()) 1629 failCount++; 1630 1631 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1632 matcher = pattern.matcher(toSupplementaries("aabc")); 1633 if (!matcher.matches()) 1634 failCount++; 1635 1636 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1637 matcher = pattern.matcher(toSupplementaries("aabc")); 1638 if (!matcher.matches()) 1639 failCount++; 1640 1641 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1642 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1643 if (!matcher.matches()) 1644 failCount++; 1645 1646 report("Comments"); 1647 } 1648 1649 private static void caseFoldingTest() { // bug 4504687 1650 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1651 Pattern pattern = Pattern.compile("aa", flags); 1652 Matcher matcher = pattern.matcher("ab"); 1653 if (matcher.matches()) 1654 failCount++; 1655 1656 pattern = Pattern.compile("aA", flags); 1657 matcher = pattern.matcher("ab"); 1658 if (matcher.matches()) 1659 failCount++; 1660 1661 pattern = Pattern.compile("aa", flags); 1662 matcher = pattern.matcher("aB"); 1663 if (matcher.matches()) 1664 failCount++; 1665 matcher = pattern.matcher("Ab"); 1666 if (matcher.matches()) 1667 failCount++; 1668 1669 // ASCII "a" 1670 // Latin-1 Supplement "a" + grave 1671 // Cyrillic "a" 1672 String[] patterns = new String[] { 1673 //single 1674 "a", "\u00e0", "\u0430", 1675 //slice 1676 "ab", "\u00e0\u00e1", "\u0430\u0431", 1677 //class single 1678 "[a]", "[\u00e0]", "[\u0430]", 1679 //class range 1680 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1681 //back reference 1682 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1683 }; 1684 1685 String[] texts = new String[] { 1686 "A", "\u00c0", "\u0410", 1687 "AB", "\u00c0\u00c1", "\u0410\u0411", 1688 "A", "\u00c0", "\u0410", 1689 "B", "\u00c2", "\u0411", 1690 "aA", "\u00e0\u00c0", "\u0430\u0410" 1691 }; 1692 1693 boolean[] expected = new boolean[] { 1694 true, false, false, 1695 true, false, false, 1696 true, false, false, 1697 true, false, false, 1698 true, false, false 1699 }; 1700 1701 flags = Pattern.CASE_INSENSITIVE; 1702 for (int i = 0; i < patterns.length; i++) { 1703 pattern = Pattern.compile(patterns[i], flags); 1704 matcher = pattern.matcher(texts[i]); 1705 if (matcher.matches() != expected[i]) { 1706 System.out.println("<1> Failed at " + i); 1707 failCount++; 1708 } 1709 } 1710 1711 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1712 for (int i = 0; i < patterns.length; i++) { 1713 pattern = Pattern.compile(patterns[i], flags); 1714 matcher = pattern.matcher(texts[i]); 1715 if (!matcher.matches()) { 1716 System.out.println("<2> Failed at " + i); 1717 failCount++; 1718 } 1719 } 1720 // flag unicode_case alone should do nothing 1721 flags = Pattern.UNICODE_CASE; 1722 for (int i = 0; i < patterns.length; i++) { 1723 pattern = Pattern.compile(patterns[i], flags); 1724 matcher = pattern.matcher(texts[i]); 1725 if (matcher.matches()) { 1726 System.out.println("<3> Failed at " + i); 1727 failCount++; 1728 } 1729 } 1730 1731 // Special cases: i, I, u+0131 and u+0130 1732 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1733 pattern = Pattern.compile("[h-j]+", flags); 1734 if (!pattern.matcher("\u0131\u0130").matches()) 1735 failCount++; 1736 report("Case Folding"); 1737 } 1738 1739 private static void appendTest() { 1740 Pattern pattern = Pattern.compile("(ab)(cd)"); 1741 Matcher matcher = pattern.matcher("abcd"); 1742 String result = matcher.replaceAll("$2$1"); 1743 if (!result.equals("cdab")) 1744 failCount++; 1745 1746 String s1 = "Swap all: first = 123, second = 456"; 1747 String s2 = "Swap one: first = 123, second = 456"; 1748 String r = "$3$2$1"; 1749 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1750 matcher = pattern.matcher(s1); 1751 1752 result = matcher.replaceAll(r); 1753 if (!result.equals("Swap all: 123 = first, 456 = second")) 1754 failCount++; 1755 1756 matcher = pattern.matcher(s2); 1757 1758 if (matcher.find()) { 1759 StringBuffer sb = new StringBuffer(); 1760 matcher.appendReplacement(sb, r); 1761 matcher.appendTail(sb); 1762 result = sb.toString(); 1763 if (!result.equals("Swap one: 123 = first, second = 456")) 1764 failCount++; 1765 } 1766 1767 // Supplementary character test 1768 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1769 matcher = pattern.matcher(toSupplementaries("abcd")); 1770 result = matcher.replaceAll("$2$1"); 1771 if (!result.equals(toSupplementaries("cdab"))) 1772 failCount++; 1773 1774 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1775 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1776 r = toSupplementaries("$3$2$1"); 1777 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1778 matcher = pattern.matcher(s1); 1779 1780 result = matcher.replaceAll(r); 1781 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1782 failCount++; 1783 1784 matcher = pattern.matcher(s2); 1785 1786 if (matcher.find()) { 1787 StringBuffer sb = new StringBuffer(); 1788 matcher.appendReplacement(sb, r); 1789 matcher.appendTail(sb); 1790 result = sb.toString(); 1791 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1792 failCount++; 1793 } 1794 report("Append"); 1795 } 1796 1797 private static void splitTest() { 1798 Pattern pattern = Pattern.compile(":"); 1799 String[] result = pattern.split("foo:and:boo", 2); 1800 if (!result[0].equals("foo")) 1801 failCount++; 1802 if (!result[1].equals("and:boo")) 1803 failCount++; 1804 // Supplementary character test 1805 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1806 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1807 if (!result[0].equals(toSupplementaries("foo"))) 1808 failCount++; 1809 if (!result[1].equals(toSupplementaries("andXboo"))) 1810 failCount++; 1811 1812 CharBuffer cb = CharBuffer.allocate(100); 1813 cb.put("foo:and:boo"); 1814 cb.flip(); 1815 result = pattern.split(cb); 1816 if (!result[0].equals("foo")) 1817 failCount++; 1818 if (!result[1].equals("and")) 1819 failCount++; 1820 if (!result[2].equals("boo")) 1821 failCount++; 1822 1823 // Supplementary character test 1824 CharBuffer cbs = CharBuffer.allocate(100); 1825 cbs.put(toSupplementaries("fooXandXboo")); 1826 cbs.flip(); 1827 result = patternX.split(cbs); 1828 if (!result[0].equals(toSupplementaries("foo"))) 1829 failCount++; 1830 if (!result[1].equals(toSupplementaries("and"))) 1831 failCount++; 1832 if (!result[2].equals(toSupplementaries("boo"))) 1833 failCount++; 1834 1835 String source = "0123456789"; 1836 for (int limit=-2; limit<3; limit++) { 1837 for (int x=0; x<10; x++) { 1838 result = source.split(Integer.toString(x), limit); 1839 int expectedLength = limit < 1 ? 2 : limit; 1840 1841 if ((limit == 0) && (x == 9)) { 1842 // expected dropping of "" 1843 if (result.length != 1) 1844 failCount++; 1845 if (!result[0].equals("012345678")) { 1846 failCount++; 1847 } 1848 } else { 1849 if (result.length != expectedLength) { 1850 failCount++; 1851 } 1852 if (!result[0].equals(source.substring(0,x))) { 1853 if (limit != 1) { 1854 failCount++; 1855 } else { 1856 if (!result[0].equals(source.substring(0,10))) { 1857 failCount++; 1858 } 1859 } 1860 } 1861 if (expectedLength > 1) { // Check segment 2 1862 if (!result[1].equals(source.substring(x+1,10))) 1863 failCount++; 1864 } 1865 } 1866 } 1867 } 1868 // Check the case for no match found 1869 for (int limit=-2; limit<3; limit++) { 1870 result = source.split("e", limit); 1871 if (result.length != 1) 1872 failCount++; 1873 if (!result[0].equals(source)) 1874 failCount++; 1875 } 1876 // Check the case for limit == 0, source = ""; 1877 // split() now returns 0-length for empty source "" see #6559590 1878 source = ""; 1879 result = source.split("e", 0); 1880 if (result.length != 1) 1881 failCount++; 1882 if (!result[0].equals(source)) 1883 failCount++; 1884 1885 // Check both split() and splitAsStraem(), especially for zero-lenth 1886 // input and zero-lenth match cases 1887 String[][] input = new String[][] { 1888 { " ", "Abc Efg Hij" }, // normal non-zero-match 1889 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1890 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1891 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1892 { "(?=\\p{Lu})", "AbcEfg" }, 1893 { "(?=\\p{Lu})", "Abc" }, 1894 { " ", "" }, // zero-length input 1895 { ".*", "" }, 1896 1897 // some tests from PatternStreamTest.java 1898 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1899 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1900 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1901 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1902 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1903 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1904 { "\u56da", "" }, 1905 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1906 { "o", "boo:and:foo" }, 1907 { "o", "booooo:and:fooooo" }, 1908 { "o", "fooooo:" }, 1909 }; 1910 1911 String[][] expected = new String[][] { 1912 { "Abc", "Efg", "Hij" }, 1913 { "", "Abc", "Efg", "Hij" }, 1914 { "Abc", "", "Efg", "Hij" }, 1915 { "Abc", "Efg", "Hij" }, 1916 { "Abc", "Efg" }, 1917 { "Abc" }, 1918 { "" }, 1919 { "" }, 1920 1921 { "awgqwefg1fefw", "vssv1vvv1" }, 1922 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1923 { "awgqwefg", "fefw4vssv", "vvv" }, 1924 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1925 { "1", "23", "456", "7890" }, 1926 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1927 { "" }, 1928 { "This", "is", "testing", "", "with", "different", "separators" }, 1929 { "b", "", ":and:f" }, 1930 { "b", "", "", "", "", ":and:f" }, 1931 { "f", "", "", "", "", ":" }, 1932 }; 1933 for (int i = 0; i < input.length; i++) { 1934 pattern = Pattern.compile(input[i][0]); 1935 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1936 failCount++; 1937 } 1938 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1939 // array for zero-length input for now 1940 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1941 expected[i])) { 1942 failCount++; 1943 } 1944 } 1945 report("Split"); 1946 } 1947 1948 private static void negationTest() { 1949 Pattern pattern = Pattern.compile("[\\[@^]+"); 1950 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1951 if (!matcher.find()) 1952 failCount++; 1953 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1954 failCount++; 1955 pattern = Pattern.compile("[@\\[^]+"); 1956 matcher = pattern.matcher("@@@@[[[[^^^^"); 1957 if (!matcher.find()) 1958 failCount++; 1959 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1960 failCount++; 1961 pattern = Pattern.compile("[@\\[^@]+"); 1962 matcher = pattern.matcher("@@@@[[[[^^^^"); 1963 if (!matcher.find()) 1964 failCount++; 1965 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1966 failCount++; 1967 1968 pattern = Pattern.compile("\\)"); 1969 matcher = pattern.matcher("xxx)xxx"); 1970 if (!matcher.find()) 1971 failCount++; 1972 1973 report("Negation"); 1974 } 1975 1976 private static void ampersandTest() { 1977 Pattern pattern = Pattern.compile("[&@]+"); 1978 check(pattern, "@@@@&&&&", true); 1979 1980 pattern = Pattern.compile("[@&]+"); 1981 check(pattern, "@@@@&&&&", true); 1982 1983 pattern = Pattern.compile("[@\\&]+"); 1984 check(pattern, "@@@@&&&&", true); 1985 1986 report("Ampersand"); 1987 } 1988 1989 private static void octalTest() throws Exception { 1990 Pattern pattern = Pattern.compile("\\u0007"); 1991 Matcher matcher = pattern.matcher("\u0007"); 1992 if (!matcher.matches()) 1993 failCount++; 1994 pattern = Pattern.compile("\\07"); 1995 matcher = pattern.matcher("\u0007"); 1996 if (!matcher.matches()) 1997 failCount++; 1998 pattern = Pattern.compile("\\007"); 1999 matcher = pattern.matcher("\u0007"); 2000 if (!matcher.matches()) 2001 failCount++; 2002 pattern = Pattern.compile("\\0007"); 2003 matcher = pattern.matcher("\u0007"); 2004 if (!matcher.matches()) 2005 failCount++; 2006 pattern = Pattern.compile("\\040"); 2007 matcher = pattern.matcher("\u0020"); 2008 if (!matcher.matches()) 2009 failCount++; 2010 pattern = Pattern.compile("\\0403"); 2011 matcher = pattern.matcher("\u00203"); 2012 if (!matcher.matches()) 2013 failCount++; 2014 pattern = Pattern.compile("\\0103"); 2015 matcher = pattern.matcher("\u0043"); 2016 if (!matcher.matches()) 2017 failCount++; 2018 2019 report("Octal"); 2020 } 2021 2022 private static void longPatternTest() throws Exception { 2023 try { 2024 Pattern pattern = Pattern.compile( 2025 "a 32-character-long pattern xxxx"); 2026 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2027 pattern = Pattern.compile("a thirty four character long regex"); 2028 StringBuffer patternToBe = new StringBuffer(101); 2029 for (int i=0; i<100; i++) 2030 patternToBe.append((char)(97 + i%26)); 2031 pattern = Pattern.compile(patternToBe.toString()); 2032 } catch (PatternSyntaxException e) { 2033 failCount++; 2034 } 2035 2036 // Supplementary character test 2037 try { 2038 Pattern pattern = Pattern.compile( 2039 toSupplementaries("a 32-character-long pattern xxxx")); 2040 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2041 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2042 StringBuffer patternToBe = new StringBuffer(101*2); 2043 for (int i=0; i<100; i++) 2044 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2045 + 97 + i%26)); 2046 pattern = Pattern.compile(patternToBe.toString()); 2047 } catch (PatternSyntaxException e) { 2048 failCount++; 2049 } 2050 report("LongPattern"); 2051 } 2052 2053 private static void group0Test() throws Exception { 2054 Pattern pattern = Pattern.compile("(tes)ting"); 2055 Matcher matcher = pattern.matcher("testing"); 2056 check(matcher, "testing"); 2057 2058 matcher.reset("testing"); 2059 if (matcher.lookingAt()) { 2060 if (!matcher.group(0).equals("testing")) 2061 failCount++; 2062 } else { 2063 failCount++; 2064 } 2065 2066 matcher.reset("testing"); 2067 if (matcher.matches()) { 2068 if (!matcher.group(0).equals("testing")) 2069 failCount++; 2070 } else { 2071 failCount++; 2072 } 2073 2074 pattern = Pattern.compile("(tes)ting"); 2075 matcher = pattern.matcher("testing"); 2076 if (matcher.lookingAt()) { 2077 if (!matcher.group(0).equals("testing")) 2078 failCount++; 2079 } else { 2080 failCount++; 2081 } 2082 2083 pattern = Pattern.compile("^(tes)ting"); 2084 matcher = pattern.matcher("testing"); 2085 if (matcher.matches()) { 2086 if (!matcher.group(0).equals("testing")) 2087 failCount++; 2088 } else { 2089 failCount++; 2090 } 2091 2092 // Supplementary character test 2093 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2094 matcher = pattern.matcher(toSupplementaries("testing")); 2095 check(matcher, toSupplementaries("testing")); 2096 2097 matcher.reset(toSupplementaries("testing")); 2098 if (matcher.lookingAt()) { 2099 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2100 failCount++; 2101 } else { 2102 failCount++; 2103 } 2104 2105 matcher.reset(toSupplementaries("testing")); 2106 if (matcher.matches()) { 2107 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2108 failCount++; 2109 } else { 2110 failCount++; 2111 } 2112 2113 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2114 matcher = pattern.matcher(toSupplementaries("testing")); 2115 if (matcher.lookingAt()) { 2116 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2117 failCount++; 2118 } else { 2119 failCount++; 2120 } 2121 2122 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2123 matcher = pattern.matcher(toSupplementaries("testing")); 2124 if (matcher.matches()) { 2125 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2126 failCount++; 2127 } else { 2128 failCount++; 2129 } 2130 2131 report("Group0"); 2132 } 2133 2134 private static void findIntTest() throws Exception { 2135 Pattern p = Pattern.compile("blah"); 2136 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2137 boolean result = m.find(2); 2138 if (!result) 2139 failCount++; 2140 2141 p = Pattern.compile("$"); 2142 m = p.matcher("1234567890"); 2143 result = m.find(10); 2144 if (!result) 2145 failCount++; 2146 try { 2147 result = m.find(11); 2148 failCount++; 2149 } catch (IndexOutOfBoundsException e) { 2150 // correct result 2151 } 2152 2153 // Supplementary character test 2154 p = Pattern.compile(toSupplementaries("blah")); 2155 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2156 result = m.find(2); 2157 if (!result) 2158 failCount++; 2159 2160 report("FindInt"); 2161 } 2162 2163 private static void emptyPatternTest() throws Exception { 2164 Pattern p = Pattern.compile(""); 2165 Matcher m = p.matcher("foo"); 2166 2167 // Should find empty pattern at beginning of input 2168 boolean result = m.find(); 2169 if (result != true) 2170 failCount++; 2171 if (m.start() != 0) 2172 failCount++; 2173 2174 // Should not match entire input if input is not empty 2175 m.reset(); 2176 result = m.matches(); 2177 if (result == true) 2178 failCount++; 2179 2180 try { 2181 m.start(0); 2182 failCount++; 2183 } catch (IllegalStateException e) { 2184 // Correct result 2185 } 2186 2187 // Should match entire input if input is empty 2188 m.reset(""); 2189 result = m.matches(); 2190 if (result != true) 2191 failCount++; 2192 2193 result = Pattern.matches("", ""); 2194 if (result != true) 2195 failCount++; 2196 2197 result = Pattern.matches("", "foo"); 2198 if (result == true) 2199 failCount++; 2200 report("EmptyPattern"); 2201 } 2202 2203 private static void charClassTest() throws Exception { 2204 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2205 check(pattern, "blahb]blech", true); 2206 2207 pattern = Pattern.compile("[abc[def]]"); 2208 check(pattern, "b", true); 2209 2210 // Supplementary character tests 2211 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2212 check(pattern, toSupplementaries("blahb]blech"), true); 2213 2214 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2215 check(pattern, toSupplementaries("b"), true); 2216 2217 try { 2218 // u00ff when UNICODE_CASE 2219 pattern = Pattern.compile("[ab\u00ffcd]", 2220 Pattern.CASE_INSENSITIVE| 2221 Pattern.UNICODE_CASE); 2222 check(pattern, "ab\u00ffcd", true); 2223 check(pattern, "Ab\u0178Cd", true); 2224 2225 // u00b5 when UNICODE_CASE 2226 pattern = Pattern.compile("[ab\u00b5cd]", 2227 Pattern.CASE_INSENSITIVE| 2228 Pattern.UNICODE_CASE); 2229 check(pattern, "ab\u00b5cd", true); 2230 check(pattern, "Ab\u039cCd", true); 2231 } catch (Exception e) { failCount++; } 2232 2233 /* Special cases 2234 (1)LatinSmallLetterLongS u+017f 2235 (2)LatinSmallLetterDotlessI u+0131 2236 (3)LatineCapitalLetterIWithDotAbove u+0130 2237 (4)KelvinSign u+212a 2238 (5)AngstromSign u+212b 2239 */ 2240 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2241 pattern = Pattern.compile("[sik\u00c5]+", flags); 2242 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2243 failCount++; 2244 2245 report("CharClass"); 2246 } 2247 2248 private static void caretTest() throws Exception { 2249 Pattern pattern = Pattern.compile("\\w*"); 2250 Matcher matcher = pattern.matcher("a#bc#def##g"); 2251 check(matcher, "a"); 2252 check(matcher, ""); 2253 check(matcher, "bc"); 2254 check(matcher, ""); 2255 check(matcher, "def"); 2256 check(matcher, ""); 2257 check(matcher, ""); 2258 check(matcher, "g"); 2259 check(matcher, ""); 2260 if (matcher.find()) 2261 failCount++; 2262 2263 pattern = Pattern.compile("^\\w*"); 2264 matcher = pattern.matcher("a#bc#def##g"); 2265 check(matcher, "a"); 2266 if (matcher.find()) 2267 failCount++; 2268 2269 pattern = Pattern.compile("\\w"); 2270 matcher = pattern.matcher("abc##x"); 2271 check(matcher, "a"); 2272 check(matcher, "b"); 2273 check(matcher, "c"); 2274 check(matcher, "x"); 2275 if (matcher.find()) 2276 failCount++; 2277 2278 pattern = Pattern.compile("^\\w"); 2279 matcher = pattern.matcher("abc##x"); 2280 check(matcher, "a"); 2281 if (matcher.find()) 2282 failCount++; 2283 2284 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2285 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2286 check(matcher, "abc"); 2287 if (matcher.find()) 2288 failCount++; 2289 2290 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2291 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2292 check(matcher, "abc"); 2293 check(matcher, "jkl"); 2294 if (matcher.find()) 2295 failCount++; 2296 2297 pattern = Pattern.compile("^", Pattern.MULTILINE); 2298 matcher = pattern.matcher("this is some text"); 2299 String result = matcher.replaceAll("X"); 2300 if (!result.equals("Xthis is some text")) 2301 failCount++; 2302 2303 pattern = Pattern.compile("^"); 2304 matcher = pattern.matcher("this is some text"); 2305 result = matcher.replaceAll("X"); 2306 if (!result.equals("Xthis is some text")) 2307 failCount++; 2308 2309 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2310 matcher = pattern.matcher("this is some text\n"); 2311 result = matcher.replaceAll("X"); 2312 if (!result.equals("Xthis is some text\n")) 2313 failCount++; 2314 2315 report("Caret"); 2316 } 2317 2318 private static void groupCaptureTest() throws Exception { 2319 // Independent group 2320 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2321 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2322 matcher.find(); 2323 try { 2324 String blah = matcher.group(1); 2325 failCount++; 2326 } catch (IndexOutOfBoundsException ioobe) { 2327 // Good result 2328 } 2329 // Pure group 2330 pattern = Pattern.compile("x+(?:y+)z+"); 2331 matcher = pattern.matcher("xxxyyyzzz"); 2332 matcher.find(); 2333 try { 2334 String blah = matcher.group(1); 2335 failCount++; 2336 } catch (IndexOutOfBoundsException ioobe) { 2337 // Good result 2338 } 2339 2340 // Supplementary character tests 2341 // Independent group 2342 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2343 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2344 matcher.find(); 2345 try { 2346 String blah = matcher.group(1); 2347 failCount++; 2348 } catch (IndexOutOfBoundsException ioobe) { 2349 // Good result 2350 } 2351 // Pure group 2352 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2353 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2354 matcher.find(); 2355 try { 2356 String blah = matcher.group(1); 2357 failCount++; 2358 } catch (IndexOutOfBoundsException ioobe) { 2359 // Good result 2360 } 2361 2362 report("GroupCapture"); 2363 } 2364 2365 private static void backRefTest() throws Exception { 2366 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2367 check(pattern, "zzzaabcazzz", true); 2368 2369 pattern = Pattern.compile("(a*)bc\\1"); 2370 check(pattern, "zzzaabcaazzz", true); 2371 2372 pattern = Pattern.compile("(abc)(def)\\1"); 2373 check(pattern, "abcdefabc", true); 2374 2375 pattern = Pattern.compile("(abc)(def)\\3"); 2376 check(pattern, "abcdefabc", false); 2377 2378 try { 2379 for (int i = 1; i < 10; i++) { 2380 // Make sure backref 1-9 are always accepted 2381 pattern = Pattern.compile("abcdef\\" + i); 2382 // and fail to match if the target group does not exit 2383 check(pattern, "abcdef", false); 2384 } 2385 } catch(PatternSyntaxException e) { 2386 failCount++; 2387 } 2388 2389 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2390 check(pattern, "abcdefghija", false); 2391 check(pattern, "abcdefghija1", true); 2392 2393 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2394 check(pattern, "abcdefghijkk", true); 2395 2396 pattern = Pattern.compile("(a)bcdefghij\\11"); 2397 check(pattern, "abcdefghija1", true); 2398 2399 // Supplementary character tests 2400 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2401 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2402 2403 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2404 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2405 2406 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2407 check(pattern, toSupplementaries("abcdefabc"), true); 2408 2409 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2410 check(pattern, toSupplementaries("abcdefabc"), false); 2411 2412 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2413 check(pattern, toSupplementaries("abcdefghija"), false); 2414 check(pattern, toSupplementaries("abcdefghija1"), true); 2415 2416 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2417 check(pattern, toSupplementaries("abcdefghijkk"), true); 2418 2419 report("BackRef"); 2420 } 2421 2422 /** 2423 * Unicode Technical Report #18, section 2.6 End of Line 2424 * There is no empty line to be matched in the sequence \u000D\u000A 2425 * but there is an empty line in the sequence \u000A\u000D. 2426 */ 2427 private static void anchorTest() throws Exception { 2428 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2429 Matcher m = p.matcher("blah1\r\nblah2"); 2430 m.find(); 2431 m.find(); 2432 if (!m.group().equals("blah2")) 2433 failCount++; 2434 2435 m.reset("blah1\n\rblah2"); 2436 m.find(); 2437 m.find(); 2438 m.find(); 2439 if (!m.group().equals("blah2")) 2440 failCount++; 2441 2442 // Test behavior of $ with \r\n at end of input 2443 p = Pattern.compile(".+$"); 2444 m = p.matcher("blah1\r\n"); 2445 if (!m.find()) 2446 failCount++; 2447 if (!m.group().equals("blah1")) 2448 failCount++; 2449 if (m.find()) 2450 failCount++; 2451 2452 // Test behavior of $ with \r\n at end of input in multiline 2453 p = Pattern.compile(".+$", Pattern.MULTILINE); 2454 m = p.matcher("blah1\r\n"); 2455 if (!m.find()) 2456 failCount++; 2457 if (m.find()) 2458 failCount++; 2459 2460 // Test for $ recognition of \u0085 for bug 4527731 2461 p = Pattern.compile(".+$", Pattern.MULTILINE); 2462 m = p.matcher("blah1\u0085"); 2463 if (!m.find()) 2464 failCount++; 2465 2466 // Supplementary character test 2467 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2468 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2469 m.find(); 2470 m.find(); 2471 if (!m.group().equals(toSupplementaries("blah2"))) 2472 failCount++; 2473 2474 m.reset(toSupplementaries("blah1\n\rblah2")); 2475 m.find(); 2476 m.find(); 2477 m.find(); 2478 if (!m.group().equals(toSupplementaries("blah2"))) 2479 failCount++; 2480 2481 // Test behavior of $ with \r\n at end of input 2482 p = Pattern.compile(".+$"); 2483 m = p.matcher(toSupplementaries("blah1\r\n")); 2484 if (!m.find()) 2485 failCount++; 2486 if (!m.group().equals(toSupplementaries("blah1"))) 2487 failCount++; 2488 if (m.find()) 2489 failCount++; 2490 2491 // Test behavior of $ with \r\n at end of input in multiline 2492 p = Pattern.compile(".+$", Pattern.MULTILINE); 2493 m = p.matcher(toSupplementaries("blah1\r\n")); 2494 if (!m.find()) 2495 failCount++; 2496 if (m.find()) 2497 failCount++; 2498 2499 // Test for $ recognition of \u0085 for bug 4527731 2500 p = Pattern.compile(".+$", Pattern.MULTILINE); 2501 m = p.matcher(toSupplementaries("blah1\u0085")); 2502 if (!m.find()) 2503 failCount++; 2504 2505 report("Anchors"); 2506 } 2507 2508 /** 2509 * A basic sanity test of Matcher.lookingAt(). 2510 */ 2511 private static void lookingAtTest() throws Exception { 2512 Pattern p = Pattern.compile("(ab)(c*)"); 2513 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2514 2515 if (!m.lookingAt()) 2516 failCount++; 2517 2518 if (!m.group().equals(m.group(0))) 2519 failCount++; 2520 2521 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2522 if (m.lookingAt()) 2523 failCount++; 2524 2525 // Supplementary character test 2526 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2527 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2528 2529 if (!m.lookingAt()) 2530 failCount++; 2531 2532 if (!m.group().equals(m.group(0))) 2533 failCount++; 2534 2535 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2536 if (m.lookingAt()) 2537 failCount++; 2538 2539 report("Looking At"); 2540 } 2541 2542 /** 2543 * A basic sanity test of Matcher.matches(). 2544 */ 2545 private static void matchesTest() throws Exception { 2546 // matches() 2547 Pattern p = Pattern.compile("ulb(c*)"); 2548 Matcher m = p.matcher("ulbcccccc"); 2549 if (!m.matches()) 2550 failCount++; 2551 2552 // find() but not matches() 2553 m.reset("zzzulbcccccc"); 2554 if (m.matches()) 2555 failCount++; 2556 2557 // lookingAt() but not matches() 2558 m.reset("ulbccccccdef"); 2559 if (m.matches()) 2560 failCount++; 2561 2562 // matches() 2563 p = Pattern.compile("a|ad"); 2564 m = p.matcher("ad"); 2565 if (!m.matches()) 2566 failCount++; 2567 2568 // Supplementary character test 2569 // matches() 2570 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2571 m = p.matcher(toSupplementaries("ulbcccccc")); 2572 if (!m.matches()) 2573 failCount++; 2574 2575 // find() but not matches() 2576 m.reset(toSupplementaries("zzzulbcccccc")); 2577 if (m.matches()) 2578 failCount++; 2579 2580 // lookingAt() but not matches() 2581 m.reset(toSupplementaries("ulbccccccdef")); 2582 if (m.matches()) 2583 failCount++; 2584 2585 // matches() 2586 p = Pattern.compile(toSupplementaries("a|ad")); 2587 m = p.matcher(toSupplementaries("ad")); 2588 if (!m.matches()) 2589 failCount++; 2590 2591 report("Matches"); 2592 } 2593 2594 /** 2595 * A basic sanity test of Pattern.matches(). 2596 */ 2597 private static void patternMatchesTest() throws Exception { 2598 // matches() 2599 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2600 toSupplementaries("ulbcccccc"))) 2601 failCount++; 2602 2603 // find() but not matches() 2604 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2605 toSupplementaries("zzzulbcccccc"))) 2606 failCount++; 2607 2608 // lookingAt() but not matches() 2609 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2610 toSupplementaries("ulbccccccdef"))) 2611 failCount++; 2612 2613 // Supplementary character test 2614 // matches() 2615 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2616 toSupplementaries("ulbcccccc"))) 2617 failCount++; 2618 2619 // find() but not matches() 2620 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2621 toSupplementaries("zzzulbcccccc"))) 2622 failCount++; 2623 2624 // lookingAt() but not matches() 2625 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2626 toSupplementaries("ulbccccccdef"))) 2627 failCount++; 2628 2629 report("Pattern Matches"); 2630 } 2631 2632 /** 2633 * Canonical equivalence testing. Tests the ability of the engine 2634 * to match sequences that are not explicitly specified in the 2635 * pattern when they are considered equivalent by the Unicode Standard. 2636 */ 2637 private static void ceTest() throws Exception { 2638 // Decomposed char outside char classes 2639 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2640 Matcher m = p.matcher("test\u00e5"); 2641 if (!m.matches()) 2642 failCount++; 2643 2644 m.reset("testa\u030a"); 2645 if (!m.matches()) 2646 failCount++; 2647 2648 // Composed char outside char classes 2649 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2650 m = p.matcher("test\u00e5"); 2651 if (!m.matches()) 2652 failCount++; 2653 2654 m.reset("testa\u030a"); 2655 if (!m.find()) 2656 failCount++; 2657 2658 // Decomposed char inside a char class 2659 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2660 m = p.matcher("test\u00e5"); 2661 if (!m.find()) 2662 failCount++; 2663 2664 m.reset("testa\u030a"); 2665 if (!m.find()) 2666 failCount++; 2667 2668 // Composed char inside a char class 2669 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2670 m = p.matcher("test\u00e5"); 2671 if (!m.find()) 2672 failCount++; 2673 2674 m.reset("testa\u0300"); 2675 if (!m.find()) 2676 failCount++; 2677 2678 m.reset("testa\u030a"); 2679 if (!m.find()) 2680 failCount++; 2681 2682 // Marks that cannot legally change order and be equivalent 2683 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2684 check(p, "testa\u0308\u0300", true); 2685 check(p, "testa\u0300\u0308", false); 2686 2687 // Marks that can legally change order and be equivalent 2688 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2689 check(p, "testa\u0308\u0323", true); 2690 check(p, "testa\u0323\u0308", true); 2691 2692 // Test all equivalences of the sequence a\u0308\u0323\u0300 2693 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2694 check(p, "testa\u0308\u0323\u0300", true); 2695 check(p, "testa\u0323\u0308\u0300", true); 2696 check(p, "testa\u0308\u0300\u0323", true); 2697 check(p, "test\u00e4\u0323\u0300", true); 2698 check(p, "test\u00e4\u0300\u0323", true); 2699 2700 Object[][] data = new Object[][] { 2701 2702 // JDK-4867170 2703 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2704 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2705 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2706 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2707 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2708 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2709 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2710 2711 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2712 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2713 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2714 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2715 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2716 2717 // backtracking, force to match "\u1f80", instead of \u1f82" 2718 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2719 2720 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2721 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2722 2723 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2724 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2725 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2726 2727 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2728 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2729 { "\u1f80", "ab\u1f80cd", "f", true }, 2730 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2731 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2732 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2733 { "\u1f82", "\u1f80\u0300", "m", true }, 2734 2735 // JDK-7080302 # compile failed 2736 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2737 2738 // JDK-6728861, same cause as above one 2739 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2740 2741 // JDK-6995635 2742 { "(\u00e9)", "e\u0301", "m", true }, 2743 2744 // JDK-6736245 2745 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2746 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2747 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2748 2749 // 4916384. 2750 // Decomposed hangul (jamos) works inside clazz 2751 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2752 { "[\u1100\u1161]", "\uac00", "m", true}, 2753 2754 { "[\uac00]", "\u1100\u1161", "m", true}, 2755 { "[\uac00]", "\uac00", "m", true}, 2756 2757 // Decomposed hangul (jamos) 2758 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2759 { "\u1100\u1161", "\uac00", "m", true}, 2760 2761 // Composed hangul 2762 { "\uac00", "\u1100\u1161", "m", true }, 2763 { "\uac00", "\uac00", "m", true }, 2764 2765 /* Need a NFDSlice to nfd the source to solve this issue 2766 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2767 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2768 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2769 2770 // Decomposed supplementary outside char classes 2771 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2772 // Composed supplementary outside char classes 2773 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2774 */ 2775 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2776 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2777 2778 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2779 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2780 }; 2781 2782 int failCount = 0; 2783 for (Object[] d : data) { 2784 String pn = (String)d[0]; 2785 String tt = (String)d[1]; 2786 boolean isFind = "f".equals(((String)d[2])); 2787 boolean expected = (boolean)d[3]; 2788 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2789 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2790 if (ret != expected) { 2791 failCount++; 2792 continue; 2793 } 2794 } 2795 report("Canonical Equivalence"); 2796 } 2797 2798 /** 2799 * A basic sanity test of Matcher.replaceAll(). 2800 */ 2801 private static void globalSubstitute() throws Exception { 2802 // Global substitution with a literal 2803 Pattern p = Pattern.compile("(ab)(c*)"); 2804 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2805 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2806 failCount++; 2807 2808 m.reset("zzzabccczzzabcczzzabccczzz"); 2809 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2810 failCount++; 2811 2812 // Global substitution with groups 2813 m.reset("zzzabccczzzabcczzzabccczzz"); 2814 String result = m.replaceAll("$1"); 2815 if (!result.equals("zzzabzzzabzzzabzzz")) 2816 failCount++; 2817 2818 // Supplementary character test 2819 // Global substitution with a literal 2820 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2821 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2822 if (!m.replaceAll(toSupplementaries("test")). 2823 equals(toSupplementaries("testzzztestzzztest"))) 2824 failCount++; 2825 2826 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2827 if (!m.replaceAll(toSupplementaries("test")). 2828 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2829 failCount++; 2830 2831 // Global substitution with groups 2832 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2833 result = m.replaceAll("$1"); 2834 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2835 failCount++; 2836 2837 report("Global Substitution"); 2838 } 2839 2840 /** 2841 * Tests the usage of Matcher.appendReplacement() with literal 2842 * and group substitutions. 2843 */ 2844 private static void stringbufferSubstitute() throws Exception { 2845 // SB substitution with literal 2846 String blah = "zzzblahzzz"; 2847 Pattern p = Pattern.compile("blah"); 2848 Matcher m = p.matcher(blah); 2849 StringBuffer result = new StringBuffer(); 2850 try { 2851 m.appendReplacement(result, "blech"); 2852 failCount++; 2853 } catch (IllegalStateException e) { 2854 } 2855 m.find(); 2856 m.appendReplacement(result, "blech"); 2857 if (!result.toString().equals("zzzblech")) 2858 failCount++; 2859 2860 m.appendTail(result); 2861 if (!result.toString().equals("zzzblechzzz")) 2862 failCount++; 2863 2864 // SB substitution with groups 2865 blah = "zzzabcdzzz"; 2866 p = Pattern.compile("(ab)(cd)*"); 2867 m = p.matcher(blah); 2868 result = new StringBuffer(); 2869 try { 2870 m.appendReplacement(result, "$1"); 2871 failCount++; 2872 } catch (IllegalStateException e) { 2873 } 2874 m.find(); 2875 m.appendReplacement(result, "$1"); 2876 if (!result.toString().equals("zzzab")) 2877 failCount++; 2878 2879 m.appendTail(result); 2880 if (!result.toString().equals("zzzabzzz")) 2881 failCount++; 2882 2883 // SB substitution with 3 groups 2884 blah = "zzzabcdcdefzzz"; 2885 p = Pattern.compile("(ab)(cd)*(ef)"); 2886 m = p.matcher(blah); 2887 result = new StringBuffer(); 2888 try { 2889 m.appendReplacement(result, "$1w$2w$3"); 2890 failCount++; 2891 } catch (IllegalStateException e) { 2892 } 2893 m.find(); 2894 m.appendReplacement(result, "$1w$2w$3"); 2895 if (!result.toString().equals("zzzabwcdwef")) 2896 failCount++; 2897 2898 m.appendTail(result); 2899 if (!result.toString().equals("zzzabwcdwefzzz")) 2900 failCount++; 2901 2902 // SB substitution with groups and three matches 2903 // skipping middle match 2904 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2905 p = Pattern.compile("(ab)(cd*)"); 2906 m = p.matcher(blah); 2907 result = new StringBuffer(); 2908 try { 2909 m.appendReplacement(result, "$1"); 2910 failCount++; 2911 } catch (IllegalStateException e) { 2912 } 2913 m.find(); 2914 m.appendReplacement(result, "$1"); 2915 if (!result.toString().equals("zzzab")) 2916 failCount++; 2917 2918 m.find(); 2919 m.find(); 2920 m.appendReplacement(result, "$2"); 2921 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2922 failCount++; 2923 2924 m.appendTail(result); 2925 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2926 failCount++; 2927 2928 // Check to make sure escaped $ is ignored 2929 blah = "zzzabcdcdefzzz"; 2930 p = Pattern.compile("(ab)(cd)*(ef)"); 2931 m = p.matcher(blah); 2932 result = new StringBuffer(); 2933 m.find(); 2934 m.appendReplacement(result, "$1w\\$2w$3"); 2935 if (!result.toString().equals("zzzabw$2wef")) 2936 failCount++; 2937 2938 m.appendTail(result); 2939 if (!result.toString().equals("zzzabw$2wefzzz")) 2940 failCount++; 2941 2942 // Check to make sure a reference to nonexistent group causes error 2943 blah = "zzzabcdcdefzzz"; 2944 p = Pattern.compile("(ab)(cd)*(ef)"); 2945 m = p.matcher(blah); 2946 result = new StringBuffer(); 2947 m.find(); 2948 try { 2949 m.appendReplacement(result, "$1w$5w$3"); 2950 failCount++; 2951 } catch (IndexOutOfBoundsException ioobe) { 2952 // Correct result 2953 } 2954 2955 // Check double digit group references 2956 blah = "zzz123456789101112zzz"; 2957 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2958 m = p.matcher(blah); 2959 result = new StringBuffer(); 2960 m.find(); 2961 m.appendReplacement(result, "$1w$11w$3"); 2962 if (!result.toString().equals("zzz1w11w3")) 2963 failCount++; 2964 2965 // Check to make sure it backs off $15 to $1 if only three groups 2966 blah = "zzzabcdcdefzzz"; 2967 p = Pattern.compile("(ab)(cd)*(ef)"); 2968 m = p.matcher(blah); 2969 result = new StringBuffer(); 2970 m.find(); 2971 m.appendReplacement(result, "$1w$15w$3"); 2972 if (!result.toString().equals("zzzabwab5wef")) 2973 failCount++; 2974 2975 2976 // Supplementary character test 2977 // SB substitution with literal 2978 blah = toSupplementaries("zzzblahzzz"); 2979 p = Pattern.compile(toSupplementaries("blah")); 2980 m = p.matcher(blah); 2981 result = new StringBuffer(); 2982 try { 2983 m.appendReplacement(result, toSupplementaries("blech")); 2984 failCount++; 2985 } catch (IllegalStateException e) { 2986 } 2987 m.find(); 2988 m.appendReplacement(result, toSupplementaries("blech")); 2989 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2990 failCount++; 2991 2992 m.appendTail(result); 2993 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2994 failCount++; 2995 2996 // SB substitution with groups 2997 blah = toSupplementaries("zzzabcdzzz"); 2998 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2999 m = p.matcher(blah); 3000 result = new StringBuffer(); 3001 try { 3002 m.appendReplacement(result, "$1"); 3003 failCount++; 3004 } catch (IllegalStateException e) { 3005 } 3006 m.find(); 3007 m.appendReplacement(result, "$1"); 3008 if (!result.toString().equals(toSupplementaries("zzzab"))) 3009 failCount++; 3010 3011 m.appendTail(result); 3012 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3013 failCount++; 3014 3015 // SB substitution with 3 groups 3016 blah = toSupplementaries("zzzabcdcdefzzz"); 3017 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3018 m = p.matcher(blah); 3019 result = new StringBuffer(); 3020 try { 3021 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3022 failCount++; 3023 } catch (IllegalStateException e) { 3024 } 3025 m.find(); 3026 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3027 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3028 failCount++; 3029 3030 m.appendTail(result); 3031 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3032 failCount++; 3033 3034 // SB substitution with groups and three matches 3035 // skipping middle match 3036 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3037 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3038 m = p.matcher(blah); 3039 result = new StringBuffer(); 3040 try { 3041 m.appendReplacement(result, "$1"); 3042 failCount++; 3043 } catch (IllegalStateException e) { 3044 } 3045 m.find(); 3046 m.appendReplacement(result, "$1"); 3047 if (!result.toString().equals(toSupplementaries("zzzab"))) 3048 failCount++; 3049 3050 m.find(); 3051 m.find(); 3052 m.appendReplacement(result, "$2"); 3053 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3054 failCount++; 3055 3056 m.appendTail(result); 3057 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3058 failCount++; 3059 3060 // Check to make sure escaped $ is ignored 3061 blah = toSupplementaries("zzzabcdcdefzzz"); 3062 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3063 m = p.matcher(blah); 3064 result = new StringBuffer(); 3065 m.find(); 3066 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3067 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3068 failCount++; 3069 3070 m.appendTail(result); 3071 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3072 failCount++; 3073 3074 // Check to make sure a reference to nonexistent group causes error 3075 blah = toSupplementaries("zzzabcdcdefzzz"); 3076 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3077 m = p.matcher(blah); 3078 result = new StringBuffer(); 3079 m.find(); 3080 try { 3081 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3082 failCount++; 3083 } catch (IndexOutOfBoundsException ioobe) { 3084 // Correct result 3085 } 3086 3087 // Check double digit group references 3088 blah = toSupplementaries("zzz123456789101112zzz"); 3089 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3090 m = p.matcher(blah); 3091 result = new StringBuffer(); 3092 m.find(); 3093 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3094 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3095 failCount++; 3096 3097 // Check to make sure it backs off $15 to $1 if only three groups 3098 blah = toSupplementaries("zzzabcdcdefzzz"); 3099 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3100 m = p.matcher(blah); 3101 result = new StringBuffer(); 3102 m.find(); 3103 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3104 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3105 failCount++; 3106 3107 // Check nothing has been appended into the output buffer if 3108 // the replacement string triggers IllegalArgumentException. 3109 p = Pattern.compile("(abc)"); 3110 m = p.matcher("abcd"); 3111 result = new StringBuffer(); 3112 m.find(); 3113 try { 3114 m.appendReplacement(result, ("xyz$g")); 3115 failCount++; 3116 } catch (IllegalArgumentException iae) { 3117 if (result.length() != 0) 3118 failCount++; 3119 } 3120 3121 report("SB Substitution"); 3122 } 3123 3124 /** 3125 * Tests the usage of Matcher.appendReplacement() with literal 3126 * and group substitutions. 3127 */ 3128 private static void stringbuilderSubstitute() throws Exception { 3129 // SB substitution with literal 3130 String blah = "zzzblahzzz"; 3131 Pattern p = Pattern.compile("blah"); 3132 Matcher m = p.matcher(blah); 3133 StringBuilder result = new StringBuilder(); 3134 try { 3135 m.appendReplacement(result, "blech"); 3136 failCount++; 3137 } catch (IllegalStateException e) { 3138 } 3139 m.find(); 3140 m.appendReplacement(result, "blech"); 3141 if (!result.toString().equals("zzzblech")) 3142 failCount++; 3143 3144 m.appendTail(result); 3145 if (!result.toString().equals("zzzblechzzz")) 3146 failCount++; 3147 3148 // SB substitution with groups 3149 blah = "zzzabcdzzz"; 3150 p = Pattern.compile("(ab)(cd)*"); 3151 m = p.matcher(blah); 3152 result = new StringBuilder(); 3153 try { 3154 m.appendReplacement(result, "$1"); 3155 failCount++; 3156 } catch (IllegalStateException e) { 3157 } 3158 m.find(); 3159 m.appendReplacement(result, "$1"); 3160 if (!result.toString().equals("zzzab")) 3161 failCount++; 3162 3163 m.appendTail(result); 3164 if (!result.toString().equals("zzzabzzz")) 3165 failCount++; 3166 3167 // SB substitution with 3 groups 3168 blah = "zzzabcdcdefzzz"; 3169 p = Pattern.compile("(ab)(cd)*(ef)"); 3170 m = p.matcher(blah); 3171 result = new StringBuilder(); 3172 try { 3173 m.appendReplacement(result, "$1w$2w$3"); 3174 failCount++; 3175 } catch (IllegalStateException e) { 3176 } 3177 m.find(); 3178 m.appendReplacement(result, "$1w$2w$3"); 3179 if (!result.toString().equals("zzzabwcdwef")) 3180 failCount++; 3181 3182 m.appendTail(result); 3183 if (!result.toString().equals("zzzabwcdwefzzz")) 3184 failCount++; 3185 3186 // SB substitution with groups and three matches 3187 // skipping middle match 3188 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3189 p = Pattern.compile("(ab)(cd*)"); 3190 m = p.matcher(blah); 3191 result = new StringBuilder(); 3192 try { 3193 m.appendReplacement(result, "$1"); 3194 failCount++; 3195 } catch (IllegalStateException e) { 3196 } 3197 m.find(); 3198 m.appendReplacement(result, "$1"); 3199 if (!result.toString().equals("zzzab")) 3200 failCount++; 3201 3202 m.find(); 3203 m.find(); 3204 m.appendReplacement(result, "$2"); 3205 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3206 failCount++; 3207 3208 m.appendTail(result); 3209 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3210 failCount++; 3211 3212 // Check to make sure escaped $ is ignored 3213 blah = "zzzabcdcdefzzz"; 3214 p = Pattern.compile("(ab)(cd)*(ef)"); 3215 m = p.matcher(blah); 3216 result = new StringBuilder(); 3217 m.find(); 3218 m.appendReplacement(result, "$1w\\$2w$3"); 3219 if (!result.toString().equals("zzzabw$2wef")) 3220 failCount++; 3221 3222 m.appendTail(result); 3223 if (!result.toString().equals("zzzabw$2wefzzz")) 3224 failCount++; 3225 3226 // Check to make sure a reference to nonexistent group causes error 3227 blah = "zzzabcdcdefzzz"; 3228 p = Pattern.compile("(ab)(cd)*(ef)"); 3229 m = p.matcher(blah); 3230 result = new StringBuilder(); 3231 m.find(); 3232 try { 3233 m.appendReplacement(result, "$1w$5w$3"); 3234 failCount++; 3235 } catch (IndexOutOfBoundsException ioobe) { 3236 // Correct result 3237 } 3238 3239 // Check double digit group references 3240 blah = "zzz123456789101112zzz"; 3241 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3242 m = p.matcher(blah); 3243 result = new StringBuilder(); 3244 m.find(); 3245 m.appendReplacement(result, "$1w$11w$3"); 3246 if (!result.toString().equals("zzz1w11w3")) 3247 failCount++; 3248 3249 // Check to make sure it backs off $15 to $1 if only three groups 3250 blah = "zzzabcdcdefzzz"; 3251 p = Pattern.compile("(ab)(cd)*(ef)"); 3252 m = p.matcher(blah); 3253 result = new StringBuilder(); 3254 m.find(); 3255 m.appendReplacement(result, "$1w$15w$3"); 3256 if (!result.toString().equals("zzzabwab5wef")) 3257 failCount++; 3258 3259 3260 // Supplementary character test 3261 // SB substitution with literal 3262 blah = toSupplementaries("zzzblahzzz"); 3263 p = Pattern.compile(toSupplementaries("blah")); 3264 m = p.matcher(blah); 3265 result = new StringBuilder(); 3266 try { 3267 m.appendReplacement(result, toSupplementaries("blech")); 3268 failCount++; 3269 } catch (IllegalStateException e) { 3270 } 3271 m.find(); 3272 m.appendReplacement(result, toSupplementaries("blech")); 3273 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3274 failCount++; 3275 m.appendTail(result); 3276 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3277 failCount++; 3278 3279 // SB substitution with groups 3280 blah = toSupplementaries("zzzabcdzzz"); 3281 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3282 m = p.matcher(blah); 3283 result = new StringBuilder(); 3284 try { 3285 m.appendReplacement(result, "$1"); 3286 failCount++; 3287 } catch (IllegalStateException e) { 3288 } 3289 m.find(); 3290 m.appendReplacement(result, "$1"); 3291 if (!result.toString().equals(toSupplementaries("zzzab"))) 3292 failCount++; 3293 3294 m.appendTail(result); 3295 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3296 failCount++; 3297 3298 // SB substitution with 3 groups 3299 blah = toSupplementaries("zzzabcdcdefzzz"); 3300 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3301 m = p.matcher(blah); 3302 result = new StringBuilder(); 3303 try { 3304 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3305 failCount++; 3306 } catch (IllegalStateException e) { 3307 } 3308 m.find(); 3309 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3310 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3311 failCount++; 3312 3313 m.appendTail(result); 3314 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3315 failCount++; 3316 3317 // SB substitution with groups and three matches 3318 // skipping middle match 3319 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3320 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3321 m = p.matcher(blah); 3322 result = new StringBuilder(); 3323 try { 3324 m.appendReplacement(result, "$1"); 3325 failCount++; 3326 } catch (IllegalStateException e) { 3327 } 3328 m.find(); 3329 m.appendReplacement(result, "$1"); 3330 if (!result.toString().equals(toSupplementaries("zzzab"))) 3331 failCount++; 3332 3333 m.find(); 3334 m.find(); 3335 m.appendReplacement(result, "$2"); 3336 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3337 failCount++; 3338 3339 m.appendTail(result); 3340 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3341 failCount++; 3342 3343 // Check to make sure escaped $ is ignored 3344 blah = toSupplementaries("zzzabcdcdefzzz"); 3345 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3346 m = p.matcher(blah); 3347 result = new StringBuilder(); 3348 m.find(); 3349 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3350 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3351 failCount++; 3352 3353 m.appendTail(result); 3354 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3355 failCount++; 3356 3357 // Check to make sure a reference to nonexistent group causes error 3358 blah = toSupplementaries("zzzabcdcdefzzz"); 3359 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3360 m = p.matcher(blah); 3361 result = new StringBuilder(); 3362 m.find(); 3363 try { 3364 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3365 failCount++; 3366 } catch (IndexOutOfBoundsException ioobe) { 3367 // Correct result 3368 } 3369 // Check double digit group references 3370 blah = toSupplementaries("zzz123456789101112zzz"); 3371 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3372 m = p.matcher(blah); 3373 result = new StringBuilder(); 3374 m.find(); 3375 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3376 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3377 failCount++; 3378 3379 // Check to make sure it backs off $15 to $1 if only three groups 3380 blah = toSupplementaries("zzzabcdcdefzzz"); 3381 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3382 m = p.matcher(blah); 3383 result = new StringBuilder(); 3384 m.find(); 3385 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3386 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3387 failCount++; 3388 // Check nothing has been appended into the output buffer if 3389 // the replacement string triggers IllegalArgumentException. 3390 p = Pattern.compile("(abc)"); 3391 m = p.matcher("abcd"); 3392 result = new StringBuilder(); 3393 m.find(); 3394 try { 3395 m.appendReplacement(result, ("xyz$g")); 3396 failCount++; 3397 } catch (IllegalArgumentException iae) { 3398 if (result.length() != 0) 3399 failCount++; 3400 } 3401 report("SB Substitution 2"); 3402 } 3403 3404 /* 3405 * 5 groups of characters are created to make a substitution string. 3406 * A base string will be created including random lead chars, the 3407 * substitution string, and random trailing chars. 3408 * A pattern containing the 5 groups is searched for and replaced with: 3409 * random group + random string + random group. 3410 * The results are checked for correctness. 3411 */ 3412 private static void substitutionBasher() { 3413 for (int runs = 0; runs<1000; runs++) { 3414 // Create a base string to work in 3415 int leadingChars = generator.nextInt(10); 3416 StringBuffer baseBuffer = new StringBuffer(100); 3417 String leadingString = getRandomAlphaString(leadingChars); 3418 baseBuffer.append(leadingString); 3419 3420 // Create 5 groups of random number of random chars 3421 // Create the string to substitute 3422 // Create the pattern string to search for 3423 StringBuffer bufferToSub = new StringBuffer(25); 3424 StringBuffer bufferToPat = new StringBuffer(50); 3425 String[] groups = new String[5]; 3426 for(int i=0; i<5; i++) { 3427 int aGroupSize = generator.nextInt(5)+1; 3428 groups[i] = getRandomAlphaString(aGroupSize); 3429 bufferToSub.append(groups[i]); 3430 bufferToPat.append('('); 3431 bufferToPat.append(groups[i]); 3432 bufferToPat.append(')'); 3433 } 3434 String stringToSub = bufferToSub.toString(); 3435 String pattern = bufferToPat.toString(); 3436 3437 // Place sub string into working string at random index 3438 baseBuffer.append(stringToSub); 3439 3440 // Append random chars to end 3441 int trailingChars = generator.nextInt(10); 3442 String trailingString = getRandomAlphaString(trailingChars); 3443 baseBuffer.append(trailingString); 3444 String baseString = baseBuffer.toString(); 3445 3446 // Create test pattern and matcher 3447 Pattern p = Pattern.compile(pattern); 3448 Matcher m = p.matcher(baseString); 3449 3450 // Reject candidate if pattern happens to start early 3451 m.find(); 3452 if (m.start() < leadingChars) 3453 continue; 3454 3455 // Reject candidate if more than one match 3456 if (m.find()) 3457 continue; 3458 3459 // Construct a replacement string with : 3460 // random group + random string + random group 3461 StringBuffer bufferToRep = new StringBuffer(); 3462 int groupIndex1 = generator.nextInt(5); 3463 bufferToRep.append("$" + (groupIndex1 + 1)); 3464 String randomMidString = getRandomAlphaString(5); 3465 bufferToRep.append(randomMidString); 3466 int groupIndex2 = generator.nextInt(5); 3467 bufferToRep.append("$" + (groupIndex2 + 1)); 3468 String replacement = bufferToRep.toString(); 3469 3470 // Do the replacement 3471 String result = m.replaceAll(replacement); 3472 3473 // Construct expected result 3474 StringBuffer bufferToRes = new StringBuffer(); 3475 bufferToRes.append(leadingString); 3476 bufferToRes.append(groups[groupIndex1]); 3477 bufferToRes.append(randomMidString); 3478 bufferToRes.append(groups[groupIndex2]); 3479 bufferToRes.append(trailingString); 3480 String expectedResult = bufferToRes.toString(); 3481 3482 // Check results 3483 if (!result.equals(expectedResult)) 3484 failCount++; 3485 } 3486 3487 report("Substitution Basher"); 3488 } 3489 3490 /* 3491 * 5 groups of characters are created to make a substitution string. 3492 * A base string will be created including random lead chars, the 3493 * substitution string, and random trailing chars. 3494 * A pattern containing the 5 groups is searched for and replaced with: 3495 * random group + random string + random group. 3496 * The results are checked for correctness. 3497 */ 3498 private static void substitutionBasher2() { 3499 for (int runs = 0; runs<1000; runs++) { 3500 // Create a base string to work in 3501 int leadingChars = generator.nextInt(10); 3502 StringBuilder baseBuffer = new StringBuilder(100); 3503 String leadingString = getRandomAlphaString(leadingChars); 3504 baseBuffer.append(leadingString); 3505 3506 // Create 5 groups of random number of random chars 3507 // Create the string to substitute 3508 // Create the pattern string to search for 3509 StringBuilder bufferToSub = new StringBuilder(25); 3510 StringBuilder bufferToPat = new StringBuilder(50); 3511 String[] groups = new String[5]; 3512 for(int i=0; i<5; i++) { 3513 int aGroupSize = generator.nextInt(5)+1; 3514 groups[i] = getRandomAlphaString(aGroupSize); 3515 bufferToSub.append(groups[i]); 3516 bufferToPat.append('('); 3517 bufferToPat.append(groups[i]); 3518 bufferToPat.append(')'); 3519 } 3520 String stringToSub = bufferToSub.toString(); 3521 String pattern = bufferToPat.toString(); 3522 3523 // Place sub string into working string at random index 3524 baseBuffer.append(stringToSub); 3525 3526 // Append random chars to end 3527 int trailingChars = generator.nextInt(10); 3528 String trailingString = getRandomAlphaString(trailingChars); 3529 baseBuffer.append(trailingString); 3530 String baseString = baseBuffer.toString(); 3531 3532 // Create test pattern and matcher 3533 Pattern p = Pattern.compile(pattern); 3534 Matcher m = p.matcher(baseString); 3535 3536 // Reject candidate if pattern happens to start early 3537 m.find(); 3538 if (m.start() < leadingChars) 3539 continue; 3540 3541 // Reject candidate if more than one match 3542 if (m.find()) 3543 continue; 3544 3545 // Construct a replacement string with : 3546 // random group + random string + random group 3547 StringBuilder bufferToRep = new StringBuilder(); 3548 int groupIndex1 = generator.nextInt(5); 3549 bufferToRep.append("$" + (groupIndex1 + 1)); 3550 String randomMidString = getRandomAlphaString(5); 3551 bufferToRep.append(randomMidString); 3552 int groupIndex2 = generator.nextInt(5); 3553 bufferToRep.append("$" + (groupIndex2 + 1)); 3554 String replacement = bufferToRep.toString(); 3555 3556 // Do the replacement 3557 String result = m.replaceAll(replacement); 3558 3559 // Construct expected result 3560 StringBuilder bufferToRes = new StringBuilder(); 3561 bufferToRes.append(leadingString); 3562 bufferToRes.append(groups[groupIndex1]); 3563 bufferToRes.append(randomMidString); 3564 bufferToRes.append(groups[groupIndex2]); 3565 bufferToRes.append(trailingString); 3566 String expectedResult = bufferToRes.toString(); 3567 3568 // Check results 3569 if (!result.equals(expectedResult)) { 3570 failCount++; 3571 } 3572 } 3573 3574 report("Substitution Basher 2"); 3575 } 3576 3577 /** 3578 * Checks the handling of some escape sequences that the Pattern 3579 * class should process instead of the java compiler. These are 3580 * not in the file because the escapes should be be processed 3581 * by the Pattern class when the regex is compiled. 3582 */ 3583 private static void escapes() throws Exception { 3584 Pattern p = Pattern.compile("\\043"); 3585 Matcher m = p.matcher("#"); 3586 if (!m.find()) 3587 failCount++; 3588 3589 p = Pattern.compile("\\x23"); 3590 m = p.matcher("#"); 3591 if (!m.find()) 3592 failCount++; 3593 3594 p = Pattern.compile("\\u0023"); 3595 m = p.matcher("#"); 3596 if (!m.find()) 3597 failCount++; 3598 3599 report("Escape sequences"); 3600 } 3601 3602 /** 3603 * Checks the handling of blank input situations. These 3604 * tests are incompatible with my test file format. 3605 */ 3606 private static void blankInput() throws Exception { 3607 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3608 Matcher m = p.matcher(""); 3609 if (m.find()) 3610 failCount++; 3611 3612 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3613 m = p.matcher(""); 3614 if (!m.find()) 3615 failCount++; 3616 3617 p = Pattern.compile("abc"); 3618 m = p.matcher(""); 3619 if (m.find()) 3620 failCount++; 3621 3622 p = Pattern.compile("a*"); 3623 m = p.matcher(""); 3624 if (!m.find()) 3625 failCount++; 3626 3627 report("Blank input"); 3628 } 3629 3630 /** 3631 * Tests the Boyer-Moore pattern matching of a character sequence 3632 * on randomly generated patterns. 3633 */ 3634 private static void bm() throws Exception { 3635 doBnM('a'); 3636 report("Boyer Moore (ASCII)"); 3637 3638 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3639 report("Boyer Moore (Supplementary)"); 3640 } 3641 3642 private static void doBnM(int baseCharacter) throws Exception { 3643 int achar=0; 3644 3645 for (int i=0; i<100; i++) { 3646 // Create a short pattern to search for 3647 int patternLength = generator.nextInt(7) + 4; 3648 StringBuffer patternBuffer = new StringBuffer(patternLength); 3649 String pattern; 3650 retry: for (;;) { 3651 for (int x=0; x<patternLength; x++) { 3652 int ch = baseCharacter + generator.nextInt(26); 3653 if (Character.isSupplementaryCodePoint(ch)) { 3654 patternBuffer.append(Character.toChars(ch)); 3655 } else { 3656 patternBuffer.append((char)ch); 3657 } 3658 } 3659 pattern = patternBuffer.toString(); 3660 3661 // Avoid patterns that start and end with the same substring 3662 // See JDK-6854417 3663 for (int x=1; x < pattern.length(); x++) { 3664 if (pattern.startsWith(pattern.substring(x))) 3665 continue retry; 3666 } 3667 break; 3668 } 3669 Pattern p = Pattern.compile(pattern); 3670 3671 // Create a buffer with random ASCII chars that does 3672 // not match the sample 3673 String toSearch = null; 3674 StringBuffer s = null; 3675 Matcher m = p.matcher(""); 3676 do { 3677 s = new StringBuffer(100); 3678 for (int x=0; x<100; x++) { 3679 int ch = baseCharacter + generator.nextInt(26); 3680 if (Character.isSupplementaryCodePoint(ch)) { 3681 s.append(Character.toChars(ch)); 3682 } else { 3683 s.append((char)ch); 3684 } 3685 } 3686 toSearch = s.toString(); 3687 m.reset(toSearch); 3688 } while (m.find()); 3689 3690 // Insert the pattern at a random spot 3691 int insertIndex = generator.nextInt(99); 3692 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3693 insertIndex++; 3694 s = s.insert(insertIndex, pattern); 3695 toSearch = s.toString(); 3696 3697 // Make sure that the pattern is found 3698 m.reset(toSearch); 3699 if (!m.find()) 3700 failCount++; 3701 3702 // Make sure that the match text is the pattern 3703 if (!m.group().equals(pattern)) 3704 failCount++; 3705 3706 // Make sure match occured at insertion point 3707 if (m.start() != insertIndex) 3708 failCount++; 3709 } 3710 } 3711 3712 /** 3713 * Tests the matching of slices on randomly generated patterns. 3714 * The Boyer-Moore optimization is not done on these patterns 3715 * because it uses unicode case folding. 3716 */ 3717 private static void slice() throws Exception { 3718 doSlice(Character.MAX_VALUE); 3719 report("Slice"); 3720 3721 doSlice(Character.MAX_CODE_POINT); 3722 report("Slice (Supplementary)"); 3723 } 3724 3725 private static void doSlice(int maxCharacter) throws Exception { 3726 Random generator = new Random(); 3727 int achar=0; 3728 3729 for (int i=0; i<100; i++) { 3730 // Create a short pattern to search for 3731 int patternLength = generator.nextInt(7) + 4; 3732 StringBuffer patternBuffer = new StringBuffer(patternLength); 3733 for (int x=0; x<patternLength; x++) { 3734 int randomChar = 0; 3735 while (!Character.isLetterOrDigit(randomChar)) 3736 randomChar = generator.nextInt(maxCharacter); 3737 if (Character.isSupplementaryCodePoint(randomChar)) { 3738 patternBuffer.append(Character.toChars(randomChar)); 3739 } else { 3740 patternBuffer.append((char) randomChar); 3741 } 3742 } 3743 String pattern = patternBuffer.toString(); 3744 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3745 3746 // Create a buffer with random chars that does not match the sample 3747 String toSearch = null; 3748 StringBuffer s = null; 3749 Matcher m = p.matcher(""); 3750 do { 3751 s = new StringBuffer(100); 3752 for (int x=0; x<100; x++) { 3753 int randomChar = 0; 3754 while (!Character.isLetterOrDigit(randomChar)) 3755 randomChar = generator.nextInt(maxCharacter); 3756 if (Character.isSupplementaryCodePoint(randomChar)) { 3757 s.append(Character.toChars(randomChar)); 3758 } else { 3759 s.append((char) randomChar); 3760 } 3761 } 3762 toSearch = s.toString(); 3763 m.reset(toSearch); 3764 } while (m.find()); 3765 3766 // Insert the pattern at a random spot 3767 int insertIndex = generator.nextInt(99); 3768 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3769 insertIndex++; 3770 s = s.insert(insertIndex, pattern); 3771 toSearch = s.toString(); 3772 3773 // Make sure that the pattern is found 3774 m.reset(toSearch); 3775 if (!m.find()) 3776 failCount++; 3777 3778 // Make sure that the match text is the pattern 3779 if (!m.group().equals(pattern)) 3780 failCount++; 3781 3782 // Make sure match occured at insertion point 3783 if (m.start() != insertIndex) 3784 failCount++; 3785 } 3786 } 3787 3788 private static void explainFailure(String pattern, String data, 3789 String expected, String actual) { 3790 System.err.println("----------------------------------------"); 3791 System.err.println("Pattern = "+pattern); 3792 System.err.println("Data = "+data); 3793 System.err.println("Expected = " + expected); 3794 System.err.println("Actual = " + actual); 3795 } 3796 3797 private static void explainFailure(String pattern, String data, 3798 Throwable t) { 3799 System.err.println("----------------------------------------"); 3800 System.err.println("Pattern = "+pattern); 3801 System.err.println("Data = "+data); 3802 t.printStackTrace(System.err); 3803 } 3804 3805 // Testing examples from a file 3806 3807 /** 3808 * Goes through the file "TestCases.txt" and creates many patterns 3809 * described in the file, matching the patterns against input lines in 3810 * the file, and comparing the results against the correct results 3811 * also found in the file. The file format is described in comments 3812 * at the head of the file. 3813 */ 3814 private static void processFile(String fileName) throws Exception { 3815 File testCases = new File(System.getProperty("test.src", "."), 3816 fileName); 3817 FileInputStream in = new FileInputStream(testCases); 3818 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3819 3820 // Process next test case. 3821 String aLine; 3822 while((aLine = r.readLine()) != null) { 3823 // Read a line for pattern 3824 String patternString = grabLine(r); 3825 Pattern p = null; 3826 try { 3827 p = compileTestPattern(patternString); 3828 } catch (PatternSyntaxException e) { 3829 String dataString = grabLine(r); 3830 String expectedResult = grabLine(r); 3831 if (expectedResult.startsWith("error")) 3832 continue; 3833 explainFailure(patternString, dataString, e); 3834 failCount++; 3835 continue; 3836 } 3837 3838 // Read a line for input string 3839 String dataString = grabLine(r); 3840 Matcher m = p.matcher(dataString); 3841 StringBuffer result = new StringBuffer(); 3842 3843 // Check for IllegalStateExceptions before a match 3844 failCount += preMatchInvariants(m); 3845 3846 boolean found = m.find(); 3847 3848 if (found) 3849 failCount += postTrueMatchInvariants(m); 3850 else 3851 failCount += postFalseMatchInvariants(m); 3852 3853 if (found) { 3854 result.append("true "); 3855 result.append(m.group(0) + " "); 3856 } else { 3857 result.append("false "); 3858 } 3859 3860 result.append(m.groupCount()); 3861 3862 if (found) { 3863 for (int i=1; i<m.groupCount()+1; i++) 3864 if (m.group(i) != null) 3865 result.append(" " +m.group(i)); 3866 } 3867 3868 // Read a line for the expected result 3869 String expectedResult = grabLine(r); 3870 3871 if (!result.toString().equals(expectedResult)) { 3872 explainFailure(patternString, dataString, expectedResult, result.toString()); 3873 failCount++; 3874 } 3875 } 3876 3877 report(fileName); 3878 } 3879 3880 private static int preMatchInvariants(Matcher m) { 3881 int failCount = 0; 3882 try { 3883 m.start(); 3884 failCount++; 3885 } catch (IllegalStateException ise) {} 3886 try { 3887 m.end(); 3888 failCount++; 3889 } catch (IllegalStateException ise) {} 3890 try { 3891 m.group(); 3892 failCount++; 3893 } catch (IllegalStateException ise) {} 3894 return failCount; 3895 } 3896 3897 private static int postFalseMatchInvariants(Matcher m) { 3898 int failCount = 0; 3899 try { 3900 m.group(); 3901 failCount++; 3902 } catch (IllegalStateException ise) {} 3903 try { 3904 m.start(); 3905 failCount++; 3906 } catch (IllegalStateException ise) {} 3907 try { 3908 m.end(); 3909 failCount++; 3910 } catch (IllegalStateException ise) {} 3911 return failCount; 3912 } 3913 3914 private static int postTrueMatchInvariants(Matcher m) { 3915 int failCount = 0; 3916 //assert(m.start() = m.start(0); 3917 if (m.start() != m.start(0)) 3918 failCount++; 3919 //assert(m.end() = m.end(0); 3920 if (m.start() != m.start(0)) 3921 failCount++; 3922 //assert(m.group() = m.group(0); 3923 if (!m.group().equals(m.group(0))) 3924 failCount++; 3925 try { 3926 m.group(50); 3927 failCount++; 3928 } catch (IndexOutOfBoundsException ise) {} 3929 3930 return failCount; 3931 } 3932 3933 private static Pattern compileTestPattern(String patternString) { 3934 if (!patternString.startsWith("'")) { 3935 return Pattern.compile(patternString); 3936 } 3937 int break1 = patternString.lastIndexOf("'"); 3938 String flagString = patternString.substring( 3939 break1+1, patternString.length()); 3940 patternString = patternString.substring(1, break1); 3941 3942 if (flagString.equals("i")) 3943 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3944 3945 if (flagString.equals("m")) 3946 return Pattern.compile(patternString, Pattern.MULTILINE); 3947 3948 return Pattern.compile(patternString); 3949 } 3950 3951 /** 3952 * Reads a line from the input file. Keeps reading lines until a non 3953 * empty non comment line is read. If the line contains a \n then 3954 * these two characters are replaced by a newline char. If a \\uxxxx 3955 * sequence is read then the sequence is replaced by the unicode char. 3956 */ 3957 private static String grabLine(BufferedReader r) throws Exception { 3958 int index = 0; 3959 String line = r.readLine(); 3960 while (line.startsWith("//") || line.length() < 1) 3961 line = r.readLine(); 3962 while ((index = line.indexOf("\\n")) != -1) { 3963 StringBuffer temp = new StringBuffer(line); 3964 temp.replace(index, index+2, "\n"); 3965 line = temp.toString(); 3966 } 3967 while ((index = line.indexOf("\\u")) != -1) { 3968 StringBuffer temp = new StringBuffer(line); 3969 String value = temp.substring(index+2, index+6); 3970 char aChar = (char)Integer.parseInt(value, 16); 3971 String unicodeChar = "" + aChar; 3972 temp.replace(index, index+6, unicodeChar); 3973 line = temp.toString(); 3974 } 3975 3976 return line; 3977 } 3978 3979 private static void check(Pattern p, String s, String g, String expected) { 3980 Matcher m = p.matcher(s); 3981 m.find(); 3982 if (!m.group(g).equals(expected) || 3983 s.charAt(m.start(g)) != expected.charAt(0) || 3984 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3985 failCount++; 3986 } 3987 3988 private static void checkReplaceFirst(String p, String s, String r, String expected) 3989 { 3990 if (!expected.equals(Pattern.compile(p) 3991 .matcher(s) 3992 .replaceFirst(r))) 3993 failCount++; 3994 } 3995 3996 private static void checkReplaceAll(String p, String s, String r, String expected) 3997 { 3998 if (!expected.equals(Pattern.compile(p) 3999 .matcher(s) 4000 .replaceAll(r))) 4001 failCount++; 4002 } 4003 4004 private static void checkExpectedFail(String p) { 4005 try { 4006 Pattern.compile(p); 4007 } catch (PatternSyntaxException pse) { 4008 //pse.printStackTrace(); 4009 return; 4010 } 4011 failCount++; 4012 } 4013 4014 private static void checkExpectedIAE(Matcher m, String g) { 4015 m.find(); 4016 try { 4017 m.group(g); 4018 } catch (IllegalArgumentException x) { 4019 //iae.printStackTrace(); 4020 try { 4021 m.start(g); 4022 } catch (IllegalArgumentException xx) { 4023 try { 4024 m.start(g); 4025 } catch (IllegalArgumentException xxx) { 4026 return; 4027 } 4028 } 4029 } 4030 failCount++; 4031 } 4032 4033 private static void checkExpectedNPE(Matcher m) { 4034 m.find(); 4035 try { 4036 m.group(null); 4037 } catch (NullPointerException x) { 4038 try { 4039 m.start(null); 4040 } catch (NullPointerException xx) { 4041 try { 4042 m.end(null); 4043 } catch (NullPointerException xxx) { 4044 return; 4045 } 4046 } 4047 } 4048 failCount++; 4049 } 4050 4051 private static void namedGroupCaptureTest() throws Exception { 4052 check(Pattern.compile("x+(?<gname>y+)z+"), 4053 "xxxyyyzzz", 4054 "gname", 4055 "yyy"); 4056 4057 check(Pattern.compile("x+(?<gname8>y+)z+"), 4058 "xxxyyyzzz", 4059 "gname8", 4060 "yyy"); 4061 4062 //backref 4063 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4064 check(pattern, "zzzaabcazzz", true); // found "abca" 4065 4066 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4067 "zzzaabcaazzz", true); 4068 4069 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4070 "abcdefabc", true); 4071 4072 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4073 "abcdefghijkk", true); 4074 4075 // Supplementary character tests 4076 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4077 toSupplementaries("zzzaabcazzz"), true); 4078 4079 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4080 toSupplementaries("zzzaabcaazzz"), true); 4081 4082 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4083 toSupplementaries("abcdefabc"), true); 4084 4085 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4086 "(?<gname>" + 4087 toSupplementaries("k)") + "\\k<gname>"), 4088 toSupplementaries("abcdefghijkk"), true); 4089 4090 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4091 "xxxyyyzzzyyy", 4092 "gname", 4093 "yyy"); 4094 4095 //replaceFirst/All 4096 checkReplaceFirst("(?<gn>ab)(c*)", 4097 "abccczzzabcczzzabccc", 4098 "${gn}", 4099 "abzzzabcczzzabccc"); 4100 4101 checkReplaceAll("(?<gn>ab)(c*)", 4102 "abccczzzabcczzzabccc", 4103 "${gn}", 4104 "abzzzabzzzab"); 4105 4106 4107 checkReplaceFirst("(?<gn>ab)(c*)", 4108 "zzzabccczzzabcczzzabccczzz", 4109 "${gn}", 4110 "zzzabzzzabcczzzabccczzz"); 4111 4112 checkReplaceAll("(?<gn>ab)(c*)", 4113 "zzzabccczzzabcczzzabccczzz", 4114 "${gn}", 4115 "zzzabzzzabzzzabzzz"); 4116 4117 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4118 "zzzabccczzzabcczzzabccczzz", 4119 "${gn2}", 4120 "zzzccczzzabcczzzabccczzz"); 4121 4122 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4123 "zzzabccczzzabcczzzabccczzz", 4124 "${gn2}", 4125 "zzzccczzzcczzzccczzz"); 4126 4127 //toSupplementaries("(ab)(c*)")); 4128 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4129 ")(?<gn2>" + toSupplementaries("c") + "*)", 4130 toSupplementaries("abccczzzabcczzzabccc"), 4131 "${gn1}", 4132 toSupplementaries("abzzzabcczzzabccc")); 4133 4134 4135 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4136 ")(?<gn2>" + toSupplementaries("c") + "*)", 4137 toSupplementaries("abccczzzabcczzzabccc"), 4138 "${gn1}", 4139 toSupplementaries("abzzzabzzzab")); 4140 4141 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4142 ")(?<gn2>" + toSupplementaries("c") + "*)", 4143 toSupplementaries("abccczzzabcczzzabccc"), 4144 "${gn2}", 4145 toSupplementaries("ccczzzabcczzzabccc")); 4146 4147 4148 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4149 ")(?<gn2>" + toSupplementaries("c") + "*)", 4150 toSupplementaries("abccczzzabcczzzabccc"), 4151 "${gn2}", 4152 toSupplementaries("ccczzzcczzzccc")); 4153 4154 checkReplaceFirst("(?<dog>Dog)AndCat", 4155 "zzzDogAndCatzzzDogAndCatzzz", 4156 "${dog}", 4157 "zzzDogzzzDogAndCatzzz"); 4158 4159 4160 checkReplaceAll("(?<dog>Dog)AndCat", 4161 "zzzDogAndCatzzzDogAndCatzzz", 4162 "${dog}", 4163 "zzzDogzzzDogzzz"); 4164 4165 // backref in Matcher & String 4166 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4167 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4168 failCount++; 4169 4170 // negative 4171 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4172 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4173 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4174 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4175 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4176 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4177 "gnameX"); 4178 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4179 report("NamedGroupCapture"); 4180 } 4181 4182 // This is for bug 6919132 4183 private static void nonBmpClassComplementTest() throws Exception { 4184 Pattern p = Pattern.compile("\\P{Lu}"); 4185 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4186 4187 if (m.find() && m.start() == 1) 4188 failCount++; 4189 4190 // from a unicode category 4191 p = Pattern.compile("\\P{Lu}"); 4192 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4193 if (m.find()) 4194 failCount++; 4195 if (!m.hitEnd()) 4196 failCount++; 4197 4198 // block 4199 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4200 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4201 if (m.find() && m.start() == 1) 4202 failCount++; 4203 4204 p = Pattern.compile("\\P{sc=GRANTHA}"); 4205 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4206 if (m.find() && m.start() == 1) 4207 failCount++; 4208 4209 report("NonBmpClassComplement"); 4210 } 4211 4212 private static void unicodePropertiesTest() throws Exception { 4213 // different forms 4214 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4215 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4216 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4217 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4218 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4219 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4220 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4221 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4222 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4223 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4224 failCount++; 4225 4226 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4227 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4228 Matcher lastSM = common; 4229 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4230 4231 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4232 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4233 Matcher lastBM = latin; 4234 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4235 4236 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4237 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4238 continue; // only pick couple code points, they are the same 4239 } 4240 4241 // Unicode Script 4242 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4243 Matcher m; 4244 String str = new String(Character.toChars(cp)); 4245 if (script == lastScript) { 4246 m = lastSM; 4247 m.reset(str); 4248 } else { 4249 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4250 } 4251 if (!m.matches()) { 4252 failCount++; 4253 } 4254 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4255 other.reset(str); 4256 if (other.matches()) { 4257 failCount++; 4258 } 4259 lastSM = m; 4260 lastScript = script; 4261 4262 // Unicode Block 4263 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4264 if (block == null) { 4265 //System.out.printf("Not a Block: cp=%x%n", cp); 4266 continue; 4267 } 4268 if (block == lastBlock) { 4269 m = lastBM; 4270 m.reset(str); 4271 } else { 4272 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4273 } 4274 if (!m.matches()) { 4275 failCount++; 4276 } 4277 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4278 other.reset(str); 4279 if (other.matches()) { 4280 failCount++; 4281 } 4282 lastBM = m; 4283 lastBlock = block; 4284 } 4285 report("unicodeProperties"); 4286 } 4287 4288 private static void unicodeHexNotationTest() throws Exception { 4289 4290 // negative 4291 checkExpectedFail("\\x{-23}"); 4292 checkExpectedFail("\\x{110000}"); 4293 checkExpectedFail("\\x{}"); 4294 checkExpectedFail("\\x{AB[ef]"); 4295 4296 // codepoint 4297 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4298 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4299 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4300 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4301 4302 // in class 4303 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4304 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4305 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4306 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4307 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4308 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4309 4310 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4311 String s = "A" + new String(Character.toChars(cp)) + "B"; 4312 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4313 : String.format("\\u%04x\\u%04x", 4314 (int) Character.toChars(cp)[0], 4315 (int) Character.toChars(cp)[1]); 4316 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4317 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4318 failCount++; 4319 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4320 failCount++; 4321 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4322 failCount++; 4323 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4324 failCount++; 4325 } 4326 report("unicodeHexNotation"); 4327 } 4328 4329 private static void unicodeClassesTest() throws Exception { 4330 4331 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4332 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4333 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4334 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4335 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4336 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4337 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4338 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4339 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4340 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4341 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4342 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4343 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4344 Matcher bound = Pattern.compile("\\b").matcher(""); 4345 Matcher word = Pattern.compile("\\w++").matcher(""); 4346 // UNICODE_CHARACTER_CLASS 4347 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4348 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4349 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4350 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4351 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4352 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4353 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4354 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4355 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4356 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4357 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4358 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4359 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4360 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4361 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4362 // embedded flag (?U) 4363 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4364 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4365 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4366 4367 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4368 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4369 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4370 // properties 4371 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4372 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4373 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4374 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4375 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4376 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4377 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4378 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4379 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4380 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4381 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4382 // javaMethod 4383 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4384 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4385 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4386 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4387 // GC/C 4388 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4389 4390 for (int cp = 1; cp < 0x30000; cp++) { 4391 String str = new String(Character.toChars(cp)); 4392 int type = Character.getType(cp); 4393 if (// lower 4394 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4395 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4396 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4397 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4398 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4399 // upper 4400 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4401 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4402 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4403 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4404 // alpha 4405 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4406 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4407 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4408 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4409 // digit 4410 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4411 Character.isDigit(cp) != digitU.reset(str).matches() || 4412 // alnum 4413 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4414 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4415 // punct 4416 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4417 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4418 // graph 4419 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4420 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4421 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4422 // blank 4423 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4424 != blank.reset(str).matches() || 4425 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4426 // print 4427 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4428 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4429 // cntrl 4430 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4431 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4432 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4433 // hexdigit 4434 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4435 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4436 // space 4437 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4438 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4439 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4440 // word 4441 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4442 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4443 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4444 // bwordb 4445 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4446 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4447 // properties 4448 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4449 Character.isLetter(cp) != letterP.reset(str).matches()|| 4450 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4451 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4452 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4453 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4454 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4455 // gc_C 4456 (Character.CONTROL == type || Character.FORMAT == type || 4457 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4458 Character.UNASSIGNED == type) 4459 != gcC.reset(str).matches()) { 4460 failCount++; 4461 } 4462 } 4463 4464 // bounds/word align 4465 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4466 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4467 failCount++; 4468 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4469 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4470 failCount++; 4471 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4472 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4473 failCount++; 4474 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4475 failCount++; 4476 report("unicodePredefinedClasses"); 4477 } 4478 4479 private static void unicodeCharacterNameTest() throws Exception { 4480 4481 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4482 if (!Character.isValidCodePoint(cp) || 4483 Character.getType(cp) == Character.UNASSIGNED) 4484 continue; 4485 String str = new String(Character.toChars(cp)); 4486 // single 4487 String p = "\\N{" + Character.getName(cp) + "}"; 4488 if (!Pattern.compile(p).matcher(str).matches()) { 4489 failCount++; 4490 } 4491 // class[c] 4492 p = "[\\N{" + Character.getName(cp) + "}]"; 4493 if (!Pattern.compile(p).matcher(str).matches()) { 4494 failCount++; 4495 } 4496 } 4497 4498 // range 4499 for (int i = 0; i < 10; i++) { 4500 int start = generator.nextInt(20); 4501 int end = start + generator.nextInt(200); 4502 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4503 String str; 4504 for (int cp = start; cp < end; cp++) { 4505 str = new String(Character.toChars(cp)); 4506 if (!Pattern.compile(p).matcher(str).matches()) { 4507 failCount++; 4508 } 4509 } 4510 str = new String(Character.toChars(end + 10)); 4511 if (Pattern.compile(p).matcher(str).matches()) { 4512 failCount++; 4513 } 4514 } 4515 4516 // slice 4517 for (int i = 0; i < 10; i++) { 4518 int n = generator.nextInt(256); 4519 int[] buf = new int[n]; 4520 StringBuffer sb = new StringBuffer(1024); 4521 for (int j = 0; j < n; j++) { 4522 int cp = generator.nextInt(1000); 4523 if (!Character.isValidCodePoint(cp) || 4524 Character.getType(cp) == Character.UNASSIGNED) 4525 cp = 0x4e00; // just use 4e00 4526 sb.append("\\N{" + Character.getName(cp) + "}"); 4527 buf[j] = cp; 4528 } 4529 String p = sb.toString(); 4530 String str = new String(buf, 0, buf.length); 4531 if (!Pattern.compile(p).matcher(str).matches()) { 4532 failCount++; 4533 } 4534 } 4535 report("unicodeCharacterName"); 4536 } 4537 4538 private static void horizontalAndVerticalWSTest() throws Exception { 4539 String hws = new String (new char[] { 4540 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4541 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4542 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4543 0x202f, 0x205f, 0x3000 }); 4544 String vws = new String (new char[] { 4545 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4546 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4547 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4548 failCount++; 4549 if (Pattern.compile("\\H").matcher(hws).find() || 4550 Pattern.compile("[\\H]").matcher(hws).find()) 4551 failCount++; 4552 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4553 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4554 failCount++; 4555 if (Pattern.compile("\\V").matcher(vws).find() || 4556 Pattern.compile("[\\V]").matcher(vws).find()) 4557 failCount++; 4558 String prefix = "abcd"; 4559 String suffix = "efgh"; 4560 String ng = "A"; 4561 for (int i = 0; i < hws.length(); i++) { 4562 String c = String.valueOf(hws.charAt(i)); 4563 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4564 if (!m.find() || !c.equals(m.group())) 4565 failCount++; 4566 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4567 if (!m.find() || !c.equals(m.group())) 4568 failCount++; 4569 4570 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4571 if (!m.find() || !ng.equals(m.group())) 4572 failCount++; 4573 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4574 if (!m.find() || !ng.equals(m.group())) 4575 failCount++; 4576 } 4577 for (int i = 0; i < vws.length(); i++) { 4578 String c = String.valueOf(vws.charAt(i)); 4579 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4580 if (!m.find() || !c.equals(m.group())) 4581 failCount++; 4582 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4583 if (!m.find() || !c.equals(m.group())) 4584 failCount++; 4585 4586 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4587 if (!m.find() || !ng.equals(m.group())) 4588 failCount++; 4589 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4590 if (!m.find() || !ng.equals(m.group())) 4591 failCount++; 4592 } 4593 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4594 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4595 failCount++; 4596 report("horizontalAndVerticalWSTest"); 4597 } 4598 4599 private static void linebreakTest() throws Exception { 4600 String linebreaks = new String (new char[] { 4601 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4602 String crnl = "\r\n"; 4603 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4604 Pattern.compile("\\R").matcher(crnl).matches() && 4605 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4606 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4607 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4608 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4609 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4610 failCount++; 4611 } 4612 report("linebreakTest"); 4613 } 4614 4615 // #7189363 4616 private static void branchTest() throws Exception { 4617 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4618 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4619 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4620 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4621 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4622 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4623 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4624 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4625 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4626 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4627 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4628 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4629 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4630 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4631 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4632 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4633 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4634 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4635 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4636 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4637 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4638 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4639 failCount++; 4640 report("branchTest"); 4641 } 4642 4643 // This test is for 8007395 4644 private static void groupCurlyNotFoundSuppTest() throws Exception { 4645 String input = "test this as \ud83d\ude0d"; 4646 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4647 "test(.)*(@[a-zA-Z.]+)", 4648 "test([^B])+(@[a-zA-Z.]+)", 4649 "test([^B])*(@[a-zA-Z.]+)", 4650 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4651 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4652 }) { 4653 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4654 .matcher(input); 4655 try { 4656 if (m.find()) { 4657 failCount++; 4658 } 4659 } catch (Exception x) { 4660 failCount++; 4661 } 4662 } 4663 report("GroupCurly NotFoundSupp"); 4664 } 4665 4666 // This test is for 8023647 4667 private static void groupCurlyBackoffTest() throws Exception { 4668 if (!"abc1c".matches("(\\w)+1\\1") || 4669 "abc11".matches("(\\w)+1\\1")) { 4670 failCount++; 4671 } 4672 report("GroupCurly backoff"); 4673 } 4674 4675 // This test is for 8012646 4676 private static void patternAsPredicate() throws Exception { 4677 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4678 4679 if (p.test("")) { 4680 failCount++; 4681 } 4682 if (!p.test("word")) { 4683 failCount++; 4684 } 4685 if (p.test("1234")) { 4686 failCount++; 4687 } 4688 if (!p.test("word1234")) { 4689 failCount++; 4690 } 4691 report("Pattern.asPredicate"); 4692 } 4693 4694 // This test is for 8184692 4695 private static void patternAsMatchPredicate() throws Exception { 4696 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4697 4698 if (p.test("")) { 4699 failCount++; 4700 } 4701 if (!p.test("word")) { 4702 failCount++; 4703 } 4704 if (p.test("1234word")) { 4705 failCount++; 4706 } 4707 if (p.test("1234")) { 4708 failCount++; 4709 } 4710 report("Pattern.asMatchPredicate"); 4711 } 4712 4713 4714 // This test is for 8035975 4715 private static void invalidFlags() throws Exception { 4716 for (int flag = 1; flag != 0; flag <<= 1) { 4717 switch (flag) { 4718 case Pattern.CASE_INSENSITIVE: 4719 case Pattern.MULTILINE: 4720 case Pattern.DOTALL: 4721 case Pattern.UNICODE_CASE: 4722 case Pattern.CANON_EQ: 4723 case Pattern.UNIX_LINES: 4724 case Pattern.LITERAL: 4725 case Pattern.UNICODE_CHARACTER_CLASS: 4726 case Pattern.COMMENTS: 4727 // valid flag, continue 4728 break; 4729 default: 4730 try { 4731 Pattern.compile(".", flag); 4732 failCount++; 4733 } catch (IllegalArgumentException expected) { 4734 } 4735 } 4736 } 4737 report("Invalid compile flags"); 4738 } 4739 4740 // This test is for 8158482 4741 private static void embeddedFlags() throws Exception { 4742 try { 4743 Pattern.compile("(?i).(?-i)."); 4744 Pattern.compile("(?m).(?-m)."); 4745 Pattern.compile("(?s).(?-s)."); 4746 Pattern.compile("(?d).(?-d)."); 4747 Pattern.compile("(?u).(?-u)."); 4748 Pattern.compile("(?c).(?-c)."); 4749 Pattern.compile("(?x).(?-x)."); 4750 Pattern.compile("(?U).(?-U)."); 4751 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4752 } catch (PatternSyntaxException x) { 4753 failCount++; 4754 } 4755 report("Embedded flags"); 4756 } 4757 4758 private static void grapheme() throws Exception { 4759 Files.lines(UCDFiles.GRAPHEME_BREAK_TEST) 4760 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4761 .forEach( ln -> { 4762 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4763 // System.out.println(str); 4764 String[] strs = ln.split("\u00f7|\u00d7"); 4765 StringBuilder src = new StringBuilder(); 4766 ArrayList<String> graphemes = new ArrayList<>(); 4767 StringBuilder buf = new StringBuilder(); 4768 int offBk = 0; 4769 for (String str : strs) { 4770 if (str.length() == 0) // first empty str 4771 continue; 4772 int cp = Integer.parseInt(str, 16); 4773 src.appendCodePoint(cp); 4774 buf.appendCodePoint(cp); 4775 offBk += (str.length() + 1); 4776 if (ln.charAt(offBk) == '\u00f7') { // DIV 4777 graphemes.add(buf.toString()); 4778 buf = new StringBuilder(); 4779 } 4780 } 4781 Pattern p = Pattern.compile("\\X"); 4782 Matcher m = p.matcher(src.toString()); 4783 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4784 for (String g : graphemes) { 4785 // System.out.printf(" grapheme:=[%s]%n", g); 4786 // (1) test \\X directly 4787 if (!m.find() || !m.group().equals(g)) { 4788 System.out.println("Failed \\X [" + ln + "] : " + g); 4789 failCount++; 4790 } 4791 // (2) test \\b{g} + \\X via Scanner 4792 boolean hasNext = s.hasNext(p); 4793 // if (!s.hasNext() || !s.next().equals(next)) { 4794 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4795 System.out.println("Failed b{g} [" + ln + "] : " + g); 4796 failCount++; 4797 } 4798 } 4799 }); 4800 // some sanity checks 4801 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4802 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4803 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4804 failCount++; 4805 // make sure "\b{n}" still works 4806 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4807 failCount++; 4808 report("Unicode extended grapheme cluster"); 4809 } 4810 4811 // hangup/timeout if go into exponential backtracking 4812 private static void expoBacktracking() throws Exception { 4813 4814 Object[][] patternMatchers = { 4815 // 6328855 4816 { "(.*\n*)*", 4817 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4818 false }, 4819 // 6192895 4820 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4821 "Hello World this is a test this is a test this is a test A", 4822 true }, 4823 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4824 "Hello World this is a test this is a test this is a test \u4e00 ", 4825 false }, 4826 { " *([a-z0-9]+ *)+", 4827 "hello world this is a test this is a test this is a test A", 4828 false }, 4829 // 4771934 [FIXED] #5013651? 4830 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4831 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4832 true }, 4833 // 4866249 [FIXED] 4834 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4835 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4836 true }, 4837 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4838 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4839 false }, 4840 // 6345469 4841 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4842 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4843 true }, // --> matched 4844 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4845 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4846 false }, 4847 // 5026912 4848 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4849 "156580451111112225588087755221111111566969655555555", 4850 false}, 4851 // 6988218 4852 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4853 "'%)) order by ANGEBOT.ID", 4854 false}, // find 4855 // 6693451 4856 { "^(\\s*foo\\s*)*$", 4857 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4858 true }, 4859 { "^(\\s*foo\\s*)*$", 4860 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4861 false 4862 }, 4863 // 7006761 4864 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4865 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4866 // 8140212 4867 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4868 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4869 false 4870 }, 4871 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4872 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4873 4874 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4875 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4876 4877 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4878 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4879 4880 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4881 4882 /* not fixed 4883 //8132141 ---> second level exponential backtracking 4884 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4885 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4886 */ 4887 }; 4888 4889 for (Object[] pm : patternMatchers) { 4890 String p = (String)pm[0]; 4891 String s = (String)pm[1]; 4892 boolean r = (Boolean)pm[2]; 4893 if (r != Pattern.compile(p).matcher(s).matches()) { 4894 failCount++; 4895 } 4896 } 4897 } 4898 4899 private static void invalidGroupName() { 4900 // Invalid start of a group name 4901 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4902 "\u0060", "\u007b", "\u0416")) { 4903 for (String pat : List.of("(?<" + groupName + ">)", 4904 "\\k<" + groupName + ">")) { 4905 try { 4906 Pattern.compile(pat); 4907 failCount++; 4908 } catch (PatternSyntaxException e) { 4909 if (!e.getMessage().startsWith( 4910 "capturing group name does not start with a" 4911 + " Latin letter")) { 4912 failCount++; 4913 } 4914 } 4915 } 4916 } 4917 // Invalid char in a group name 4918 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4919 "d\u0060", "e\u007b", "f\u0416")) { 4920 for (String pat : List.of("(?<" + groupName + ">)", 4921 "\\k<" + groupName + ">")) { 4922 try { 4923 Pattern.compile(pat); 4924 failCount++; 4925 } catch (PatternSyntaxException e) { 4926 if (!e.getMessage().startsWith( 4927 "named capturing group is missing trailing '>'")) { 4928 failCount++; 4929 } 4930 } 4931 } 4932 } 4933 report("Invalid capturing group names"); 4934 } 4935 }