1 /* 2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895 37 * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706 38 * 8194667 8197462 8184692 39 * 40 * @library /test/lib 41 * @build jdk.test.lib.RandomFactory 42 * @run main RegExTest 43 * @key randomness 44 */ 45 46 import java.util.function.Function; 47 import java.util.regex.*; 48 import java.util.Random; 49 import java.util.Scanner; 50 import java.io.*; 51 import java.nio.file.*; 52 import java.util.*; 53 import java.nio.CharBuffer; 54 import java.util.function.Predicate; 55 import jdk.test.lib.RandomFactory; 56 57 /** 58 * This is a test class created to check the operation of 59 * the Pattern and Matcher classes. 60 */ 61 public class RegExTest { 62 63 private static Random generator = RandomFactory.getRandom(); 64 private static boolean failure = false; 65 private static int failCount = 0; 66 private static String firstFailure = null; 67 68 /** 69 * Main to interpret arguments and run several tests. 70 * 71 */ 72 public static void main(String[] args) throws Exception { 73 // Most of the tests are in a file 74 processFile("TestCases.txt"); 75 //processFile("PerlCases.txt"); 76 processFile("BMPTestCases.txt"); 77 processFile("SupplementaryTestCases.txt"); 78 79 // These test many randomly generated char patterns 80 bm(); 81 slice(); 82 83 // These are hard to put into the file 84 escapes(); 85 blankInput(); 86 87 // Substitition tests on randomly generated sequences 88 globalSubstitute(); 89 stringbufferSubstitute(); 90 stringbuilderSubstitute(); 91 92 substitutionBasher(); 93 substitutionBasher2(); 94 95 // Canonical Equivalence 96 ceTest(); 97 98 // Anchors 99 anchorTest(); 100 101 // boolean match calls 102 matchesTest(); 103 lookingAtTest(); 104 105 // Pattern API 106 patternMatchesTest(); 107 108 // Misc 109 lookbehindTest(); 110 nullArgumentTest(); 111 backRefTest(); 112 groupCaptureTest(); 113 caretTest(); 114 charClassTest(); 115 emptyPatternTest(); 116 findIntTest(); 117 group0Test(); 118 longPatternTest(); 119 octalTest(); 120 ampersandTest(); 121 negationTest(); 122 splitTest(); 123 appendTest(); 124 caseFoldingTest(); 125 commentsTest(); 126 unixLinesTest(); 127 replaceFirstTest(); 128 gTest(); 129 zTest(); 130 serializeTest(); 131 reluctantRepetitionTest(); 132 multilineDollarTest(); 133 dollarAtEndTest(); 134 caretBetweenTerminatorsTest(); 135 // This RFE rejected in Tiger numOccurrencesTest(); 136 javaCharClassTest(); 137 nonCaptureRepetitionTest(); 138 notCapturedGroupCurlyMatchTest(); 139 escapedSegmentTest(); 140 literalPatternTest(); 141 literalReplacementTest(); 142 regionTest(); 143 toStringTest(); 144 negatedCharClassTest(); 145 findFromTest(); 146 boundsTest(); 147 unicodeWordBoundsTest(); 148 caretAtEndTest(); 149 wordSearchTest(); 150 hitEndTest(); 151 toMatchResultTest(); 152 toMatchResultTest2(); 153 surrogatesInClassTest(); 154 removeQEQuotingTest(); 155 namedGroupCaptureTest(); 156 nonBmpClassComplementTest(); 157 unicodePropertiesTest(); 158 unicodeHexNotationTest(); 159 unicodeClassesTest(); 160 unicodeCharacterNameTest(); 161 horizontalAndVerticalWSTest(); 162 linebreakTest(); 163 branchTest(); 164 groupCurlyNotFoundSuppTest(); 165 groupCurlyBackoffTest(); 166 patternAsPredicate(); 167 patternAsMatchPredicate(); 168 invalidFlags(); 169 embeddedFlags(); 170 grapheme(); 171 expoBacktracking(); 172 invalidGroupName(); 173 174 if (failure) { 175 throw new 176 RuntimeException("RegExTest failed, 1st failure: " + 177 firstFailure); 178 } else { 179 System.err.println("OKAY: All tests passed."); 180 } 181 } 182 183 // Utility functions 184 185 private static String getRandomAlphaString(int length) { 186 StringBuffer buf = new StringBuffer(length); 187 for (int i=0; i<length; i++) { 188 char randChar = (char)(97 + generator.nextInt(26)); 189 buf.append(randChar); 190 } 191 return buf.toString(); 192 } 193 194 private static void check(Matcher m, String expected) { 195 m.find(); 196 if (!m.group().equals(expected)) 197 failCount++; 198 } 199 200 private static void check(Matcher m, String result, boolean expected) { 201 m.find(); 202 if (m.group().equals(result) != expected) 203 failCount++; 204 } 205 206 private static void check(Pattern p, String s, boolean expected) { 207 if (p.matcher(s).find() != expected) 208 failCount++; 209 } 210 211 private static void check(String p, String s, boolean expected) { 212 Matcher matcher = Pattern.compile(p).matcher(s); 213 if (matcher.find() != expected) 214 failCount++; 215 } 216 217 private static void check(String p, char c, boolean expected) { 218 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 219 Pattern pattern = Pattern.compile(propertyPattern); 220 char[] ca = new char[1]; ca[0] = c; 221 Matcher matcher = pattern.matcher(new String(ca)); 222 if (!matcher.find()) 223 failCount++; 224 } 225 226 private static void check(String p, int codePoint, boolean expected) { 227 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 228 Pattern pattern = Pattern.compile(propertyPattern); 229 char[] ca = Character.toChars(codePoint); 230 Matcher matcher = pattern.matcher(new String(ca)); 231 if (!matcher.find()) 232 failCount++; 233 } 234 235 private static void check(String p, int flag, String input, String s, 236 boolean expected) 237 { 238 Pattern pattern = Pattern.compile(p, flag); 239 Matcher matcher = pattern.matcher(input); 240 if (expected) 241 check(matcher, s, expected); 242 else 243 check(pattern, input, false); 244 } 245 246 private static void report(String testName) { 247 int spacesToAdd = 30 - testName.length(); 248 StringBuffer paddedNameBuffer = new StringBuffer(testName); 249 for (int i=0; i<spacesToAdd; i++) 250 paddedNameBuffer.append(" "); 251 String paddedName = paddedNameBuffer.toString(); 252 System.err.println(paddedName + ": " + 253 (failCount==0 ? "Passed":"Failed("+failCount+")")); 254 if (failCount > 0) { 255 failure = true; 256 257 if (firstFailure == null) { 258 firstFailure = testName; 259 } 260 } 261 262 failCount = 0; 263 } 264 265 /** 266 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 267 * supplementary characters. This method does NOT fully take care 268 * of the regex syntax. 269 */ 270 private static String toSupplementaries(String s) { 271 int length = s.length(); 272 StringBuffer sb = new StringBuffer(length * 2); 273 274 for (int i = 0; i < length; ) { 275 char c = s.charAt(i++); 276 if (c == '\\') { 277 sb.append(c); 278 if (i < length) { 279 c = s.charAt(i++); 280 sb.append(c); 281 if (c == 'u') { 282 // assume no syntax error 283 sb.append(s.charAt(i++)); 284 sb.append(s.charAt(i++)); 285 sb.append(s.charAt(i++)); 286 sb.append(s.charAt(i++)); 287 } 288 } 289 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 290 sb.append('\ud800').append((char)('\udc00'+c)); 291 } else { 292 sb.append(c); 293 } 294 } 295 return sb.toString(); 296 } 297 298 // Regular expression tests 299 300 // This is for bug 6178785 301 // Test if an expected NPE gets thrown when passing in a null argument 302 private static boolean check(Runnable test) { 303 try { 304 test.run(); 305 failCount++; 306 return false; 307 } catch (NullPointerException npe) { 308 return true; 309 } 310 } 311 312 private static void nullArgumentTest() { 313 check(() -> Pattern.compile(null)); 314 check(() -> Pattern.matches(null, null)); 315 check(() -> Pattern.matches("xyz", null)); 316 check(() -> Pattern.quote(null)); 317 check(() -> Pattern.compile("xyz").split(null)); 318 check(() -> Pattern.compile("xyz").matcher(null)); 319 320 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 321 m.matches(); 322 check(() -> m.appendTail((StringBuffer) null)); 323 check(() -> m.appendTail((StringBuilder)null)); 324 check(() -> m.replaceAll((String) null)); 325 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 326 check(() -> m.replaceFirst((String)null)); 327 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 328 check(() -> m.appendReplacement((StringBuffer)null, null)); 329 check(() -> m.appendReplacement((StringBuilder)null, null)); 330 check(() -> m.reset(null)); 331 check(() -> Matcher.quoteReplacement(null)); 332 //check(() -> m.usePattern(null)); 333 334 report("Null Argument"); 335 } 336 337 // This is for bug6635133 338 // Test if surrogate pair in Unicode escapes can be handled correctly. 339 private static void surrogatesInClassTest() throws Exception { 340 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 341 Matcher matcher = pattern.matcher("\ud834\udd22"); 342 if (!matcher.find()) 343 failCount++; 344 345 report("Surrogate pair in Unicode escape"); 346 } 347 348 // This is for bug6990617 349 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 350 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 351 // char is an octal digit. 352 private static void removeQEQuotingTest() throws Exception { 353 Pattern pattern = 354 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 355 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 356 if (!matcher.find()) 357 failCount++; 358 359 report("Remove Q/E Quoting"); 360 } 361 362 // This is for bug 4988891 363 // Test toMatchResult to see that it is a copy of the Matcher 364 // that is not affected by subsequent operations on the original 365 private static void toMatchResultTest() throws Exception { 366 Pattern pattern = Pattern.compile("squid"); 367 Matcher matcher = pattern.matcher( 368 "agiantsquidofdestinyasmallsquidoffate"); 369 matcher.find(); 370 int matcherStart1 = matcher.start(); 371 MatchResult mr = matcher.toMatchResult(); 372 if (mr == matcher) 373 failCount++; 374 int resultStart1 = mr.start(); 375 if (matcherStart1 != resultStart1) 376 failCount++; 377 matcher.find(); 378 int matcherStart2 = matcher.start(); 379 int resultStart2 = mr.start(); 380 if (matcherStart2 == resultStart2) 381 failCount++; 382 if (resultStart1 != resultStart2) 383 failCount++; 384 MatchResult mr2 = matcher.toMatchResult(); 385 if (mr == mr2) 386 failCount++; 387 if (mr2.start() != matcherStart2) 388 failCount++; 389 report("toMatchResult is a copy"); 390 } 391 392 private static void checkExpectedISE(Runnable test) { 393 try { 394 test.run(); 395 failCount++; 396 } catch (IllegalStateException x) { 397 } catch (IndexOutOfBoundsException xx) { 398 failCount++; 399 } 400 } 401 402 private static void checkExpectedIOOE(Runnable test) { 403 try { 404 test.run(); 405 failCount++; 406 } catch (IndexOutOfBoundsException x) {} 407 } 408 409 // This is for bug 8074678 410 // Test the result of toMatchResult throws ISE if no match is availble 411 private static void toMatchResultTest2() throws Exception { 412 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 413 matcher.find(); 414 MatchResult mr = matcher.toMatchResult(); 415 416 checkExpectedISE(() -> mr.start()); 417 checkExpectedISE(() -> mr.start(2)); 418 checkExpectedISE(() -> mr.end()); 419 checkExpectedISE(() -> mr.end(2)); 420 checkExpectedISE(() -> mr.group()); 421 checkExpectedISE(() -> mr.group(2)); 422 423 matcher = Pattern.compile("(match)").matcher("there is a match"); 424 matcher.find(); 425 MatchResult mr2 = matcher.toMatchResult(); 426 checkExpectedIOOE(() -> mr2.start(2)); 427 checkExpectedIOOE(() -> mr2.end(2)); 428 checkExpectedIOOE(() -> mr2.group(2)); 429 430 report("toMatchResult2 appropriate exceptions"); 431 } 432 433 // This is for bug 5013885 434 // Must test a slice to see if it reports hitEnd correctly 435 private static void hitEndTest() throws Exception { 436 // Basic test of Slice node 437 Pattern p = Pattern.compile("^squidattack"); 438 Matcher m = p.matcher("squack"); 439 m.find(); 440 if (m.hitEnd()) 441 failCount++; 442 m.reset("squid"); 443 m.find(); 444 if (!m.hitEnd()) 445 failCount++; 446 447 // Test Slice, SliceA and SliceU nodes 448 for (int i=0; i<3; i++) { 449 int flags = 0; 450 if (i==1) flags = Pattern.CASE_INSENSITIVE; 451 if (i==2) flags = Pattern.UNICODE_CASE; 452 p = Pattern.compile("^abc", flags); 453 m = p.matcher("ad"); 454 m.find(); 455 if (m.hitEnd()) 456 failCount++; 457 m.reset("ab"); 458 m.find(); 459 if (!m.hitEnd()) 460 failCount++; 461 } 462 463 // Test Boyer-Moore node 464 p = Pattern.compile("catattack"); 465 m = p.matcher("attack"); 466 m.find(); 467 if (!m.hitEnd()) 468 failCount++; 469 470 p = Pattern.compile("catattack"); 471 m = p.matcher("attackattackattackcatatta"); 472 m.find(); 473 if (!m.hitEnd()) 474 failCount++; 475 476 // 8184706: Matching u+0d at EOL against \R should hit-end 477 p = Pattern.compile("...\\R"); 478 m = p.matcher("cat" + (char)0x0a); 479 m.find(); 480 if (m.hitEnd()) 481 failCount++; 482 483 m = p.matcher("cat" + (char)0x0d); 484 m.find(); 485 if (!m.hitEnd()) 486 failCount++; 487 488 m = p.matcher("cat" + (char)0x0d + (char)0x0a); 489 m.find(); 490 if (m.hitEnd()) 491 failCount++; 492 493 report("hitEnd"); 494 } 495 496 // This is for bug 4997476 497 // It is weird code submitted by customer demonstrating a regression 498 private static void wordSearchTest() throws Exception { 499 String testString = new String("word1 word2 word3"); 500 Pattern p = Pattern.compile("\\b"); 501 Matcher m = p.matcher(testString); 502 int position = 0; 503 int start = 0; 504 while (m.find(position)) { 505 start = m.start(); 506 if (start == testString.length()) 507 break; 508 if (m.find(start+1)) { 509 position = m.start(); 510 } else { 511 position = testString.length(); 512 } 513 if (testString.substring(start, position).equals(" ")) 514 continue; 515 if (!testString.substring(start, position-1).startsWith("word")) 516 failCount++; 517 } 518 report("Customer word search"); 519 } 520 521 // This is for bug 4994840 522 private static void caretAtEndTest() throws Exception { 523 // Problem only occurs with multiline patterns 524 // containing a beginning-of-line caret "^" followed 525 // by an expression that also matches the empty string. 526 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 527 Matcher matcher = pattern.matcher("\r"); 528 matcher.find(); 529 matcher.find(); 530 report("Caret at end"); 531 } 532 533 // This test is for 4979006 534 // Check to see if word boundary construct properly handles unicode 535 // non spacing marks 536 private static void unicodeWordBoundsTest() throws Exception { 537 String spaces = " "; 538 String wordChar = "a"; 539 String nsm = "\u030a"; 540 541 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 542 543 Pattern pattern = Pattern.compile("\\b"); 544 Matcher matcher = pattern.matcher(""); 545 // S=other B=word character N=non spacing mark .=word boundary 546 // SS.BB.SS 547 String input = spaces + wordChar + wordChar + spaces; 548 twoFindIndexes(input, matcher, 2, 4); 549 // SS.BBN.SS 550 input = spaces + wordChar +wordChar + nsm + spaces; 551 twoFindIndexes(input, matcher, 2, 5); 552 // SS.BN.SS 553 input = spaces + wordChar + nsm + spaces; 554 twoFindIndexes(input, matcher, 2, 4); 555 // SS.BNN.SS 556 input = spaces + wordChar + nsm + nsm + spaces; 557 twoFindIndexes(input, matcher, 2, 5); 558 // SSN.BB.SS 559 input = spaces + nsm + wordChar + wordChar + spaces; 560 twoFindIndexes(input, matcher, 3, 5); 561 // SS.BNB.SS 562 input = spaces + wordChar + nsm + wordChar + spaces; 563 twoFindIndexes(input, matcher, 2, 5); 564 // SSNNSS 565 input = spaces + nsm + nsm + spaces; 566 matcher.reset(input); 567 if (matcher.find()) 568 failCount++; 569 // SSN.BBN.SS 570 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 571 twoFindIndexes(input, matcher, 3, 6); 572 573 report("Unicode word boundary"); 574 } 575 576 private static void twoFindIndexes(String input, Matcher matcher, int a, 577 int b) throws Exception 578 { 579 matcher.reset(input); 580 matcher.find(); 581 if (matcher.start() != a) 582 failCount++; 583 matcher.find(); 584 if (matcher.start() != b) 585 failCount++; 586 } 587 588 // This test is for 6284152 589 static void check(String regex, String input, String[] expected) { 590 List<String> result = new ArrayList<String>(); 591 Pattern p = Pattern.compile(regex); 592 Matcher m = p.matcher(input); 593 while (m.find()) { 594 result.add(m.group()); 595 } 596 if (!Arrays.asList(expected).equals(result)) 597 failCount++; 598 } 599 600 private static void lookbehindTest() throws Exception { 601 //Positive 602 check("(?<=%.{0,5})foo\\d", 603 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 604 new String[]{"foo1", "foo2", "foo3"}); 605 606 //boundary at end of the lookbehind sub-regex should work consistently 607 //with the boundary just after the lookbehind sub-regex 608 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 609 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 610 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 611 check("(?<!abc \\b)foo", "abc foo", new String[0]); 612 613 //Negative 614 check("(?<!%.{0,5})foo\\d", 615 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 616 new String[] {"foo4", "foo5"}); 617 618 //Positive greedy 619 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 620 621 //Positive reluctant 622 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 623 624 //supplementary 625 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 626 new String[] {"fo\ud800\udc00o"}); 627 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 628 new String[] {"fo\ud800\udc00o"}); 629 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 630 new String[] {"fo\ud800\udc00o"}); 631 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 632 new String[] {"fo\ud800\udc00o"}); 633 report("Lookbehind"); 634 } 635 636 // This test is for 4938995 637 // Check to see if weak region boundaries are transparent to 638 // lookahead and lookbehind constructs 639 private static void boundsTest() throws Exception { 640 String fullMessage = "catdogcat"; 641 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 642 Matcher matcher = pattern.matcher("catdogca"); 643 matcher.useTransparentBounds(true); 644 if (matcher.find()) 645 failCount++; 646 matcher.reset("atdogcat"); 647 if (matcher.find()) 648 failCount++; 649 matcher.reset(fullMessage); 650 if (!matcher.find()) 651 failCount++; 652 matcher.reset(fullMessage); 653 matcher.region(0,9); 654 if (!matcher.find()) 655 failCount++; 656 matcher.reset(fullMessage); 657 matcher.region(0,6); 658 if (!matcher.find()) 659 failCount++; 660 matcher.reset(fullMessage); 661 matcher.region(3,6); 662 if (!matcher.find()) 663 failCount++; 664 matcher.useTransparentBounds(false); 665 if (matcher.find()) 666 failCount++; 667 668 // Negative lookahead/lookbehind 669 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 670 matcher = pattern.matcher("dogcat"); 671 matcher.useTransparentBounds(true); 672 matcher.region(0,3); 673 if (matcher.find()) 674 failCount++; 675 matcher.reset("catdog"); 676 matcher.region(3,6); 677 if (matcher.find()) 678 failCount++; 679 matcher.useTransparentBounds(false); 680 matcher.reset("dogcat"); 681 matcher.region(0,3); 682 if (!matcher.find()) 683 failCount++; 684 matcher.reset("catdog"); 685 matcher.region(3,6); 686 if (!matcher.find()) 687 failCount++; 688 689 report("Region bounds transparency"); 690 } 691 692 // This test is for 4945394 693 private static void findFromTest() throws Exception { 694 String message = "This is 40 $0 message."; 695 Pattern pat = Pattern.compile("\\$0"); 696 Matcher match = pat.matcher(message); 697 if (!match.find()) 698 failCount++; 699 if (match.find()) 700 failCount++; 701 if (match.find()) 702 failCount++; 703 report("Check for alternating find"); 704 } 705 706 // This test is for 4872664 and 4892980 707 private static void negatedCharClassTest() throws Exception { 708 Pattern pattern = Pattern.compile("[^>]"); 709 Matcher matcher = pattern.matcher("\u203A"); 710 if (!matcher.matches()) 711 failCount++; 712 pattern = Pattern.compile("[^fr]"); 713 matcher = pattern.matcher("a"); 714 if (!matcher.find()) 715 failCount++; 716 matcher.reset("\u203A"); 717 if (!matcher.find()) 718 failCount++; 719 String s = "for"; 720 String result[] = s.split("[^fr]"); 721 if (!result[0].equals("f")) 722 failCount++; 723 if (!result[1].equals("r")) 724 failCount++; 725 s = "f\u203Ar"; 726 result = s.split("[^fr]"); 727 if (!result[0].equals("f")) 728 failCount++; 729 if (!result[1].equals("r")) 730 failCount++; 731 732 // Test adding to bits, subtracting a node, then adding to bits again 733 pattern = Pattern.compile("[^f\u203Ar]"); 734 matcher = pattern.matcher("a"); 735 if (!matcher.find()) 736 failCount++; 737 matcher.reset("f"); 738 if (matcher.find()) 739 failCount++; 740 matcher.reset("\u203A"); 741 if (matcher.find()) 742 failCount++; 743 matcher.reset("r"); 744 if (matcher.find()) 745 failCount++; 746 matcher.reset("\u203B"); 747 if (!matcher.find()) 748 failCount++; 749 750 // Test subtracting a node, adding to bits, subtracting again 751 pattern = Pattern.compile("[^\u203Ar\u203B]"); 752 matcher = pattern.matcher("a"); 753 if (!matcher.find()) 754 failCount++; 755 matcher.reset("\u203A"); 756 if (matcher.find()) 757 failCount++; 758 matcher.reset("r"); 759 if (matcher.find()) 760 failCount++; 761 matcher.reset("\u203B"); 762 if (matcher.find()) 763 failCount++; 764 matcher.reset("\u203C"); 765 if (!matcher.find()) 766 failCount++; 767 768 report("Negated Character Class"); 769 } 770 771 // This test is for 4628291 772 private static void toStringTest() throws Exception { 773 Pattern pattern = Pattern.compile("b+"); 774 if (pattern.toString() != "b+") 775 failCount++; 776 Matcher matcher = pattern.matcher("aaabbbccc"); 777 String matcherString = matcher.toString(); // unspecified 778 matcher.find(); 779 matcherString = matcher.toString(); // unspecified 780 matcher.region(0,3); 781 matcherString = matcher.toString(); // unspecified 782 matcher.reset(); 783 matcherString = matcher.toString(); // unspecified 784 report("toString"); 785 } 786 787 // This test is for 4808962 788 private static void literalPatternTest() throws Exception { 789 int flags = Pattern.LITERAL; 790 791 Pattern pattern = Pattern.compile("abc\\t$^", flags); 792 check(pattern, "abc\\t$^", true); 793 794 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 795 check(pattern, "abc\\t$^", true); 796 797 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 798 check(pattern, "\\Qa^$bcabc\\E", true); 799 check(pattern, "a^$bcabc", false); 800 801 pattern = Pattern.compile("\\\\Q\\\\E"); 802 check(pattern, "\\Q\\E", true); 803 804 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 805 check(pattern, "abcefg\\Q\\Ehij", true); 806 807 pattern = Pattern.compile("\\\\\\Q\\\\E"); 808 check(pattern, "\\\\\\\\", true); 809 810 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 811 check(pattern, "\\Qa^$bcabc\\E", true); 812 check(pattern, "a^$bcabc", false); 813 814 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 815 check(pattern, "\\Qabc\\Edef", true); 816 check(pattern, "abcdef", false); 817 818 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 819 check(pattern, "abc\\Edef", true); 820 check(pattern, "abcdef", false); 821 822 pattern = Pattern.compile(Pattern.quote("\\E")); 823 check(pattern, "\\E", true); 824 825 pattern = Pattern.compile("((((abc.+?:)", flags); 826 check(pattern, "((((abc.+?:)", true); 827 828 flags |= Pattern.MULTILINE; 829 830 pattern = Pattern.compile("^cat$", flags); 831 check(pattern, "abc^cat$def", true); 832 check(pattern, "cat", false); 833 834 flags |= Pattern.CASE_INSENSITIVE; 835 836 pattern = Pattern.compile("abcdef", flags); 837 check(pattern, "ABCDEF", true); 838 check(pattern, "AbCdEf", true); 839 840 flags |= Pattern.DOTALL; 841 842 pattern = Pattern.compile("a...b", flags); 843 check(pattern, "A...b", true); 844 check(pattern, "Axxxb", false); 845 846 flags |= Pattern.CANON_EQ; 847 848 Pattern p = Pattern.compile("testa\u030a", flags); 849 check(pattern, "testa\u030a", false); 850 check(pattern, "test\u00e5", false); 851 852 // Supplementary character test 853 flags = Pattern.LITERAL; 854 855 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 856 check(pattern, toSupplementaries("abc\\t$^"), true); 857 858 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 859 check(pattern, toSupplementaries("abc\\t$^"), true); 860 861 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 862 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 863 check(pattern, toSupplementaries("a^$bcabc"), false); 864 865 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 866 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 867 check(pattern, toSupplementaries("a^$bcabc"), false); 868 869 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 870 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 871 check(pattern, toSupplementaries("abcdef"), false); 872 873 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 874 check(pattern, toSupplementaries("abc\\Edef"), true); 875 check(pattern, toSupplementaries("abcdef"), false); 876 877 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 878 check(pattern, toSupplementaries("((((abc.+?:)"), true); 879 880 flags |= Pattern.MULTILINE; 881 882 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 883 check(pattern, toSupplementaries("abc^cat$def"), true); 884 check(pattern, toSupplementaries("cat"), false); 885 886 flags |= Pattern.DOTALL; 887 888 // note: this is case-sensitive. 889 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 890 check(pattern, toSupplementaries("a...b"), true); 891 check(pattern, toSupplementaries("axxxb"), false); 892 893 flags |= Pattern.CANON_EQ; 894 895 String t = toSupplementaries("test"); 896 p = Pattern.compile(t + "a\u030a", flags); 897 check(pattern, t + "a\u030a", false); 898 check(pattern, t + "\u00e5", false); 899 900 report("Literal pattern"); 901 } 902 903 // This test is for 4803179 904 // This test is also for 4808962, replacement parts 905 private static void literalReplacementTest() throws Exception { 906 int flags = Pattern.LITERAL; 907 908 Pattern pattern = Pattern.compile("abc", flags); 909 Matcher matcher = pattern.matcher("zzzabczzz"); 910 String replaceTest = "$0"; 911 String result = matcher.replaceAll(replaceTest); 912 if (!result.equals("zzzabczzz")) 913 failCount++; 914 915 matcher.reset(); 916 String literalReplacement = matcher.quoteReplacement(replaceTest); 917 result = matcher.replaceAll(literalReplacement); 918 if (!result.equals("zzz$0zzz")) 919 failCount++; 920 921 matcher.reset(); 922 replaceTest = "\\t$\\$"; 923 literalReplacement = matcher.quoteReplacement(replaceTest); 924 result = matcher.replaceAll(literalReplacement); 925 if (!result.equals("zzz\\t$\\$zzz")) 926 failCount++; 927 928 // Supplementary character test 929 pattern = Pattern.compile(toSupplementaries("abc"), flags); 930 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 931 replaceTest = "$0"; 932 result = matcher.replaceAll(replaceTest); 933 if (!result.equals(toSupplementaries("zzzabczzz"))) 934 failCount++; 935 936 matcher.reset(); 937 literalReplacement = matcher.quoteReplacement(replaceTest); 938 result = matcher.replaceAll(literalReplacement); 939 if (!result.equals(toSupplementaries("zzz$0zzz"))) 940 failCount++; 941 942 matcher.reset(); 943 replaceTest = "\\t$\\$"; 944 literalReplacement = matcher.quoteReplacement(replaceTest); 945 result = matcher.replaceAll(literalReplacement); 946 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 947 failCount++; 948 949 // IAE should be thrown if backslash or '$' is the last character 950 // in replacement string 951 try { 952 "\uac00".replaceAll("\uac00", "$"); 953 failCount++; 954 } catch (IllegalArgumentException iie) { 955 } catch (Exception e) { 956 failCount++; 957 } 958 try { 959 "\uac00".replaceAll("\uac00", "\\"); 960 failCount++; 961 } catch (IllegalArgumentException iie) { 962 } catch (Exception e) { 963 failCount++; 964 } 965 report("Literal replacement"); 966 } 967 968 // This test is for 4757029 969 private static void regionTest() throws Exception { 970 Pattern pattern = Pattern.compile("abc"); 971 Matcher matcher = pattern.matcher("abcdefabc"); 972 973 matcher.region(0,9); 974 if (!matcher.find()) 975 failCount++; 976 if (!matcher.find()) 977 failCount++; 978 matcher.region(0,3); 979 if (!matcher.find()) 980 failCount++; 981 matcher.region(3,6); 982 if (matcher.find()) 983 failCount++; 984 matcher.region(0,2); 985 if (matcher.find()) 986 failCount++; 987 988 expectRegionFail(matcher, 1, -1); 989 expectRegionFail(matcher, -1, -1); 990 expectRegionFail(matcher, -1, 1); 991 expectRegionFail(matcher, 5, 3); 992 expectRegionFail(matcher, 5, 12); 993 expectRegionFail(matcher, 12, 12); 994 995 pattern = Pattern.compile("^abc$"); 996 matcher = pattern.matcher("zzzabczzz"); 997 matcher.region(0,9); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(3,6); 1001 if (!matcher.find()) 1002 failCount++; 1003 matcher.region(3,6); 1004 matcher.useAnchoringBounds(false); 1005 if (matcher.find()) 1006 failCount++; 1007 1008 // Supplementary character test 1009 pattern = Pattern.compile(toSupplementaries("abc")); 1010 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 1011 matcher.region(0,9*2); 1012 if (!matcher.find()) 1013 failCount++; 1014 if (!matcher.find()) 1015 failCount++; 1016 matcher.region(0,3*2); 1017 if (!matcher.find()) 1018 failCount++; 1019 matcher.region(1,3*2); 1020 if (matcher.find()) 1021 failCount++; 1022 matcher.region(3*2,6*2); 1023 if (matcher.find()) 1024 failCount++; 1025 matcher.region(0,2*2); 1026 if (matcher.find()) 1027 failCount++; 1028 matcher.region(0,2*2+1); 1029 if (matcher.find()) 1030 failCount++; 1031 1032 expectRegionFail(matcher, 1*2, -1); 1033 expectRegionFail(matcher, -1, -1); 1034 expectRegionFail(matcher, -1, 1*2); 1035 expectRegionFail(matcher, 5*2, 3*2); 1036 expectRegionFail(matcher, 5*2, 12*2); 1037 expectRegionFail(matcher, 12*2, 12*2); 1038 1039 pattern = Pattern.compile(toSupplementaries("^abc$")); 1040 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1041 matcher.region(0,9*2); 1042 if (matcher.find()) 1043 failCount++; 1044 matcher.region(3*2,6*2); 1045 if (!matcher.find()) 1046 failCount++; 1047 matcher.region(3*2+1,6*2); 1048 if (matcher.find()) 1049 failCount++; 1050 matcher.region(3*2,6*2-1); 1051 if (matcher.find()) 1052 failCount++; 1053 matcher.region(3*2,6*2); 1054 matcher.useAnchoringBounds(false); 1055 if (matcher.find()) 1056 failCount++; 1057 report("Regions"); 1058 } 1059 1060 private static void expectRegionFail(Matcher matcher, int index1, 1061 int index2) 1062 { 1063 try { 1064 matcher.region(index1, index2); 1065 failCount++; 1066 } catch (IndexOutOfBoundsException ioobe) { 1067 // Correct result 1068 } catch (IllegalStateException ise) { 1069 // Correct result 1070 } 1071 } 1072 1073 // This test is for 4803197 1074 private static void escapedSegmentTest() throws Exception { 1075 1076 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1077 check(pattern, "dir1\\dir2", true); 1078 1079 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1080 check(pattern, "dir1\\dir2\\", true); 1081 1082 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1083 check(pattern, "dir1\\dir2\\", true); 1084 1085 // Supplementary character test 1086 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1087 check(pattern, toSupplementaries("dir1\\dir2"), true); 1088 1089 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1090 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1091 1092 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1093 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1094 1095 report("Escaped segment"); 1096 } 1097 1098 // This test is for 4792284 1099 private static void nonCaptureRepetitionTest() throws Exception { 1100 String input = "abcdefgh;"; 1101 1102 String[] patterns = new String[] { 1103 "(?:\\w{4})+;", 1104 "(?:\\w{8})*;", 1105 "(?:\\w{2}){2,4};", 1106 "(?:\\w{4}){2,};", // only matches the 1107 ".*?(?:\\w{5})+;", // specified minimum 1108 ".*?(?:\\w{9})*;", // number of reps - OK 1109 "(?:\\w{4})+?;", // lazy repetition - OK 1110 "(?:\\w{4})++;", // possessive repetition - OK 1111 "(?:\\w{2,}?)+;", // non-deterministic - OK 1112 "(\\w{4})+;", // capturing group - OK 1113 }; 1114 1115 for (int i = 0; i < patterns.length; i++) { 1116 // Check find() 1117 check(patterns[i], 0, input, input, true); 1118 // Check matches() 1119 Pattern p = Pattern.compile(patterns[i]); 1120 Matcher m = p.matcher(input); 1121 1122 if (m.matches()) { 1123 if (!m.group(0).equals(input)) 1124 failCount++; 1125 } else { 1126 failCount++; 1127 } 1128 } 1129 1130 report("Non capturing repetition"); 1131 } 1132 1133 // This test is for 6358731 1134 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1135 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1136 Matcher matcher = pattern.matcher("abcd"); 1137 if (!matcher.matches() || 1138 matcher.group(1) != null || 1139 !matcher.group(2).equals("abcd")) { 1140 failCount++; 1141 } 1142 report("Not captured GroupCurly"); 1143 } 1144 1145 // This test is for 4706545 1146 private static void javaCharClassTest() throws Exception { 1147 for (int i=0; i<1000; i++) { 1148 char c = (char)generator.nextInt(); 1149 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1150 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1151 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1152 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1153 check("{javaDigit}", c, Character.isDigit(c)); 1154 check("{javaDefined}", c, Character.isDefined(c)); 1155 check("{javaLetter}", c, Character.isLetter(c)); 1156 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1157 check("{javaJavaIdentifierStart}", c, 1158 Character.isJavaIdentifierStart(c)); 1159 check("{javaJavaIdentifierPart}", c, 1160 Character.isJavaIdentifierPart(c)); 1161 check("{javaUnicodeIdentifierStart}", c, 1162 Character.isUnicodeIdentifierStart(c)); 1163 check("{javaUnicodeIdentifierPart}", c, 1164 Character.isUnicodeIdentifierPart(c)); 1165 check("{javaIdentifierIgnorable}", c, 1166 Character.isIdentifierIgnorable(c)); 1167 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1168 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1169 check("{javaISOControl}", c, Character.isISOControl(c)); 1170 check("{javaMirrored}", c, Character.isMirrored(c)); 1171 1172 } 1173 1174 // Supplementary character test 1175 for (int i=0; i<1000; i++) { 1176 int c = generator.nextInt(Character.MAX_CODE_POINT 1177 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1178 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1179 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1180 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1181 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1182 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1183 check("{javaDigit}", c, Character.isDigit(c)); 1184 check("{javaDefined}", c, Character.isDefined(c)); 1185 check("{javaLetter}", c, Character.isLetter(c)); 1186 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1187 check("{javaJavaIdentifierStart}", c, 1188 Character.isJavaIdentifierStart(c)); 1189 check("{javaJavaIdentifierPart}", c, 1190 Character.isJavaIdentifierPart(c)); 1191 check("{javaUnicodeIdentifierStart}", c, 1192 Character.isUnicodeIdentifierStart(c)); 1193 check("{javaUnicodeIdentifierPart}", c, 1194 Character.isUnicodeIdentifierPart(c)); 1195 check("{javaIdentifierIgnorable}", c, 1196 Character.isIdentifierIgnorable(c)); 1197 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1198 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1199 check("{javaISOControl}", c, Character.isISOControl(c)); 1200 check("{javaMirrored}", c, Character.isMirrored(c)); 1201 } 1202 1203 report("Java character classes"); 1204 } 1205 1206 // This test is for 4523620 1207 /* 1208 private static void numOccurrencesTest() throws Exception { 1209 Pattern pattern = Pattern.compile("aaa"); 1210 1211 if (pattern.numOccurrences("aaaaaa", false) != 2) 1212 failCount++; 1213 if (pattern.numOccurrences("aaaaaa", true) != 4) 1214 failCount++; 1215 1216 pattern = Pattern.compile("^"); 1217 if (pattern.numOccurrences("aaaaaa", false) != 1) 1218 failCount++; 1219 if (pattern.numOccurrences("aaaaaa", true) != 1) 1220 failCount++; 1221 1222 report("Number of Occurrences"); 1223 } 1224 */ 1225 1226 // This test is for 4776374 1227 private static void caretBetweenTerminatorsTest() throws Exception { 1228 int flags1 = Pattern.DOTALL; 1229 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1230 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1231 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1232 1233 check("^....", flags1, "test\ntest", "test", true); 1234 check(".....^", flags1, "test\ntest", "test", false); 1235 check(".....^", flags1, "test\n", "test", false); 1236 check("....^", flags1, "test\r\n", "test", false); 1237 1238 check("^....", flags2, "test\ntest", "test", true); 1239 check("....^", flags2, "test\ntest", "test", false); 1240 check(".....^", flags2, "test\n", "test", false); 1241 check("....^", flags2, "test\r\n", "test", false); 1242 1243 check("^....", flags3, "test\ntest", "test", true); 1244 check(".....^", flags3, "test\ntest", "test\n", true); 1245 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1246 check(".....^", flags3, "test\n", "test", false); 1247 check(".....^", flags3, "test\r\n", "test", false); 1248 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1249 1250 check("^....", flags4, "test\ntest", "test", true); 1251 check(".....^", flags3, "test\ntest", "test\n", true); 1252 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1253 check(".....^", flags4, "test\n", "test\n", false); 1254 check(".....^", flags4, "test\r\n", "test\r", false); 1255 1256 // Supplementary character test 1257 String t = toSupplementaries("test"); 1258 check("^....", flags1, t+"\n"+t, t, true); 1259 check(".....^", flags1, t+"\n"+t, t, false); 1260 check(".....^", flags1, t+"\n", t, false); 1261 check("....^", flags1, t+"\r\n", t, false); 1262 1263 check("^....", flags2, t+"\n"+t, t, true); 1264 check("....^", flags2, t+"\n"+t, t, false); 1265 check(".....^", flags2, t+"\n", t, false); 1266 check("....^", flags2, t+"\r\n", t, false); 1267 1268 check("^....", flags3, t+"\n"+t, t, true); 1269 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1270 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1271 check(".....^", flags3, t+"\n", t, false); 1272 check(".....^", flags3, t+"\r\n", t, false); 1273 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1274 1275 check("^....", flags4, t+"\n"+t, t, true); 1276 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1277 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1278 check(".....^", flags4, t+"\n", t+"\n", false); 1279 check(".....^", flags4, t+"\r\n", t+"\r", false); 1280 1281 report("Caret between terminators"); 1282 } 1283 1284 // This test is for 4727935 1285 private static void dollarAtEndTest() throws Exception { 1286 int flags1 = Pattern.DOTALL; 1287 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1288 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1289 1290 check("....$", flags1, "test\n", "test", true); 1291 check("....$", flags1, "test\r\n", "test", true); 1292 check(".....$", flags1, "test\n", "test\n", true); 1293 check(".....$", flags1, "test\u0085", "test\u0085", true); 1294 check("....$", flags1, "test\u0085", "test", true); 1295 1296 check("....$", flags2, "test\n", "test", true); 1297 check(".....$", flags2, "test\n", "test\n", true); 1298 check(".....$", flags2, "test\u0085", "test\u0085", true); 1299 check("....$", flags2, "test\u0085", "est\u0085", true); 1300 1301 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1302 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1303 check("....$blah", flags3, "test\nblah", "!!!!", false); 1304 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1305 1306 // Supplementary character test 1307 String t = toSupplementaries("test"); 1308 String b = toSupplementaries("blah"); 1309 check("....$", flags1, t+"\n", t, true); 1310 check("....$", flags1, t+"\r\n", t, true); 1311 check(".....$", flags1, t+"\n", t+"\n", true); 1312 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1313 check("....$", flags1, t+"\u0085", t, true); 1314 1315 check("....$", flags2, t+"\n", t, true); 1316 check(".....$", flags2, t+"\n", t+"\n", true); 1317 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1318 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1319 1320 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1321 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1322 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1323 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1324 1325 report("Dollar at End"); 1326 } 1327 1328 // This test is for 4711773 1329 private static void multilineDollarTest() throws Exception { 1330 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1331 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1332 matcher.find(); 1333 if (matcher.start(0) != 9) 1334 failCount++; 1335 matcher.find(); 1336 if (matcher.start(0) != 20) 1337 failCount++; 1338 1339 // Supplementary character test 1340 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1341 matcher.find(); 1342 if (matcher.start(0) != 9*2) 1343 failCount++; 1344 matcher.find(); 1345 if (matcher.start(0) != 20*2) 1346 failCount++; 1347 1348 report("Multiline Dollar"); 1349 } 1350 1351 private static void reluctantRepetitionTest() throws Exception { 1352 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1353 check(p, "1 word word word 2", true); 1354 check(p, "1 wor wo w 2", true); 1355 check(p, "1 word word 2", true); 1356 check(p, "1 word 2", true); 1357 check(p, "1 wo w w 2", true); 1358 check(p, "1 wo w 2", true); 1359 check(p, "1 wor w 2", true); 1360 1361 p = Pattern.compile("([a-z])+?c"); 1362 Matcher m = p.matcher("ababcdefdec"); 1363 check(m, "ababc"); 1364 1365 // Supplementary character test 1366 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1367 m = p.matcher(toSupplementaries("ababcdefdec")); 1368 check(m, toSupplementaries("ababc")); 1369 1370 report("Reluctant Repetition"); 1371 } 1372 1373 private static Pattern serializedPattern(Pattern p) throws Exception { 1374 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1375 ObjectOutputStream oos = new ObjectOutputStream(baos); 1376 oos.writeObject(p); 1377 oos.close(); 1378 try (ObjectInputStream ois = new ObjectInputStream( 1379 new ByteArrayInputStream(baos.toByteArray()))) { 1380 return (Pattern)ois.readObject(); 1381 } 1382 } 1383 1384 private static void serializeTest() throws Exception { 1385 String patternStr = "(b)"; 1386 String matchStr = "b"; 1387 Pattern pattern = Pattern.compile(patternStr); 1388 Pattern serializedPattern = serializedPattern(pattern); 1389 Matcher matcher = serializedPattern.matcher(matchStr); 1390 if (!matcher.matches()) 1391 failCount++; 1392 if (matcher.groupCount() != 1) 1393 failCount++; 1394 1395 pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE); 1396 serializedPattern = serializedPattern(pattern); 1397 if (!serializedPattern.matcher("Ab").matches()) 1398 failCount++; 1399 if (serializedPattern.matcher("AB").matches()) 1400 failCount++; 1401 1402 report("Serialization"); 1403 } 1404 1405 private static void gTest() { 1406 Pattern pattern = Pattern.compile("\\G\\w"); 1407 Matcher matcher = pattern.matcher("abc#x#x"); 1408 matcher.find(); 1409 matcher.find(); 1410 matcher.find(); 1411 if (matcher.find()) 1412 failCount++; 1413 1414 pattern = Pattern.compile("\\GA*"); 1415 matcher = pattern.matcher("1A2AA3"); 1416 matcher.find(); 1417 if (matcher.find()) 1418 failCount++; 1419 1420 pattern = Pattern.compile("\\GA*"); 1421 matcher = pattern.matcher("1A2AA3"); 1422 if (!matcher.find(1)) 1423 failCount++; 1424 matcher.find(); 1425 if (matcher.find()) 1426 failCount++; 1427 1428 report("\\G"); 1429 } 1430 1431 private static void zTest() { 1432 Pattern pattern = Pattern.compile("foo\\Z"); 1433 // Positives 1434 check(pattern, "foo\u0085", true); 1435 check(pattern, "foo\u2028", true); 1436 check(pattern, "foo\u2029", true); 1437 check(pattern, "foo\n", true); 1438 check(pattern, "foo\r", true); 1439 check(pattern, "foo\r\n", true); 1440 // Negatives 1441 check(pattern, "fooo", false); 1442 check(pattern, "foo\n\r", false); 1443 1444 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1445 // Positives 1446 check(pattern, "foo", true); 1447 check(pattern, "foo\n", true); 1448 // Negatives 1449 check(pattern, "foo\r", false); 1450 check(pattern, "foo\u0085", false); 1451 check(pattern, "foo\u2028", false); 1452 check(pattern, "foo\u2029", false); 1453 1454 report("\\Z"); 1455 } 1456 1457 private static void replaceFirstTest() { 1458 Pattern pattern = Pattern.compile("(ab)(c*)"); 1459 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1460 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1461 failCount++; 1462 1463 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1464 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1465 failCount++; 1466 1467 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1468 String result = matcher.replaceFirst("$1"); 1469 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1470 failCount++; 1471 1472 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1473 result = matcher.replaceFirst("$2"); 1474 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1475 failCount++; 1476 1477 pattern = Pattern.compile("a*"); 1478 matcher = pattern.matcher("aaaaaaaaaa"); 1479 if (!matcher.replaceFirst("test").equals("test")) 1480 failCount++; 1481 1482 pattern = Pattern.compile("a+"); 1483 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1484 if (!matcher.replaceFirst("test").equals("zzztest")) 1485 failCount++; 1486 1487 // Supplementary character test 1488 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1489 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1490 if (!matcher.replaceFirst(toSupplementaries("test")) 1491 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1492 failCount++; 1493 1494 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1495 if (!matcher.replaceFirst(toSupplementaries("test")). 1496 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1497 failCount++; 1498 1499 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1500 result = matcher.replaceFirst("$1"); 1501 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1502 failCount++; 1503 1504 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1505 result = matcher.replaceFirst("$2"); 1506 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1507 failCount++; 1508 1509 pattern = Pattern.compile(toSupplementaries("a*")); 1510 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1511 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1512 failCount++; 1513 1514 pattern = Pattern.compile(toSupplementaries("a+")); 1515 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1516 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1517 failCount++; 1518 1519 report("Replace First"); 1520 } 1521 1522 private static void unixLinesTest() { 1523 Pattern pattern = Pattern.compile(".*"); 1524 Matcher matcher = pattern.matcher("aa\u2028blah"); 1525 matcher.find(); 1526 if (!matcher.group(0).equals("aa")) 1527 failCount++; 1528 1529 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1530 matcher = pattern.matcher("aa\u2028blah"); 1531 matcher.find(); 1532 if (!matcher.group(0).equals("aa\u2028blah")) 1533 failCount++; 1534 1535 pattern = Pattern.compile("[az]$", 1536 Pattern.MULTILINE | Pattern.UNIX_LINES); 1537 matcher = pattern.matcher("aa\u2028zz"); 1538 check(matcher, "a\u2028", false); 1539 1540 // Supplementary character test 1541 pattern = Pattern.compile(".*"); 1542 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1543 matcher.find(); 1544 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1545 failCount++; 1546 1547 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1548 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1549 matcher.find(); 1550 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1551 failCount++; 1552 1553 pattern = Pattern.compile(toSupplementaries("[az]$"), 1554 Pattern.MULTILINE | Pattern.UNIX_LINES); 1555 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1556 check(matcher, toSupplementaries("a\u2028"), false); 1557 1558 report("Unix Lines"); 1559 } 1560 1561 private static void commentsTest() { 1562 int flags = Pattern.COMMENTS; 1563 1564 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1565 Matcher matcher = pattern.matcher("aa#aa"); 1566 if (!matcher.matches()) 1567 failCount++; 1568 1569 pattern = Pattern.compile("aa # blah", flags); 1570 matcher = pattern.matcher("aa"); 1571 if (!matcher.matches()) 1572 failCount++; 1573 1574 pattern = Pattern.compile("aa blah", flags); 1575 matcher = pattern.matcher("aablah"); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile("aa # blah blech ", flags); 1580 matcher = pattern.matcher("aa"); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile("aa # blah\n ", flags); 1585 matcher = pattern.matcher("aa"); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1590 matcher = pattern.matcher("aabc"); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1595 matcher = pattern.matcher("aabc"); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1600 matcher = pattern.matcher("aabc#blech"); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 // Supplementary character test 1605 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1606 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1607 if (!matcher.matches()) 1608 failCount++; 1609 1610 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1611 matcher = pattern.matcher(toSupplementaries("aa")); 1612 if (!matcher.matches()) 1613 failCount++; 1614 1615 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1616 matcher = pattern.matcher(toSupplementaries("aablah")); 1617 if (!matcher.matches()) 1618 failCount++; 1619 1620 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1621 matcher = pattern.matcher(toSupplementaries("aa")); 1622 if (!matcher.matches()) 1623 failCount++; 1624 1625 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1626 matcher = pattern.matcher(toSupplementaries("aa")); 1627 if (!matcher.matches()) 1628 failCount++; 1629 1630 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1631 matcher = pattern.matcher(toSupplementaries("aabc")); 1632 if (!matcher.matches()) 1633 failCount++; 1634 1635 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1636 matcher = pattern.matcher(toSupplementaries("aabc")); 1637 if (!matcher.matches()) 1638 failCount++; 1639 1640 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1641 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1642 if (!matcher.matches()) 1643 failCount++; 1644 1645 report("Comments"); 1646 } 1647 1648 private static void caseFoldingTest() { // bug 4504687 1649 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1650 Pattern pattern = Pattern.compile("aa", flags); 1651 Matcher matcher = pattern.matcher("ab"); 1652 if (matcher.matches()) 1653 failCount++; 1654 1655 pattern = Pattern.compile("aA", flags); 1656 matcher = pattern.matcher("ab"); 1657 if (matcher.matches()) 1658 failCount++; 1659 1660 pattern = Pattern.compile("aa", flags); 1661 matcher = pattern.matcher("aB"); 1662 if (matcher.matches()) 1663 failCount++; 1664 matcher = pattern.matcher("Ab"); 1665 if (matcher.matches()) 1666 failCount++; 1667 1668 // ASCII "a" 1669 // Latin-1 Supplement "a" + grave 1670 // Cyrillic "a" 1671 String[] patterns = new String[] { 1672 //single 1673 "a", "\u00e0", "\u0430", 1674 //slice 1675 "ab", "\u00e0\u00e1", "\u0430\u0431", 1676 //class single 1677 "[a]", "[\u00e0]", "[\u0430]", 1678 //class range 1679 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1680 //back reference 1681 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1682 }; 1683 1684 String[] texts = new String[] { 1685 "A", "\u00c0", "\u0410", 1686 "AB", "\u00c0\u00c1", "\u0410\u0411", 1687 "A", "\u00c0", "\u0410", 1688 "B", "\u00c2", "\u0411", 1689 "aA", "\u00e0\u00c0", "\u0430\u0410" 1690 }; 1691 1692 boolean[] expected = new boolean[] { 1693 true, false, false, 1694 true, false, false, 1695 true, false, false, 1696 true, false, false, 1697 true, false, false 1698 }; 1699 1700 flags = Pattern.CASE_INSENSITIVE; 1701 for (int i = 0; i < patterns.length; i++) { 1702 pattern = Pattern.compile(patterns[i], flags); 1703 matcher = pattern.matcher(texts[i]); 1704 if (matcher.matches() != expected[i]) { 1705 System.out.println("<1> Failed at " + i); 1706 failCount++; 1707 } 1708 } 1709 1710 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1711 for (int i = 0; i < patterns.length; i++) { 1712 pattern = Pattern.compile(patterns[i], flags); 1713 matcher = pattern.matcher(texts[i]); 1714 if (!matcher.matches()) { 1715 System.out.println("<2> Failed at " + i); 1716 failCount++; 1717 } 1718 } 1719 // flag unicode_case alone should do nothing 1720 flags = Pattern.UNICODE_CASE; 1721 for (int i = 0; i < patterns.length; i++) { 1722 pattern = Pattern.compile(patterns[i], flags); 1723 matcher = pattern.matcher(texts[i]); 1724 if (matcher.matches()) { 1725 System.out.println("<3> Failed at " + i); 1726 failCount++; 1727 } 1728 } 1729 1730 // Special cases: i, I, u+0131 and u+0130 1731 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1732 pattern = Pattern.compile("[h-j]+", flags); 1733 if (!pattern.matcher("\u0131\u0130").matches()) 1734 failCount++; 1735 report("Case Folding"); 1736 } 1737 1738 private static void appendTest() { 1739 Pattern pattern = Pattern.compile("(ab)(cd)"); 1740 Matcher matcher = pattern.matcher("abcd"); 1741 String result = matcher.replaceAll("$2$1"); 1742 if (!result.equals("cdab")) 1743 failCount++; 1744 1745 String s1 = "Swap all: first = 123, second = 456"; 1746 String s2 = "Swap one: first = 123, second = 456"; 1747 String r = "$3$2$1"; 1748 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1749 matcher = pattern.matcher(s1); 1750 1751 result = matcher.replaceAll(r); 1752 if (!result.equals("Swap all: 123 = first, 456 = second")) 1753 failCount++; 1754 1755 matcher = pattern.matcher(s2); 1756 1757 if (matcher.find()) { 1758 StringBuffer sb = new StringBuffer(); 1759 matcher.appendReplacement(sb, r); 1760 matcher.appendTail(sb); 1761 result = sb.toString(); 1762 if (!result.equals("Swap one: 123 = first, second = 456")) 1763 failCount++; 1764 } 1765 1766 // Supplementary character test 1767 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1768 matcher = pattern.matcher(toSupplementaries("abcd")); 1769 result = matcher.replaceAll("$2$1"); 1770 if (!result.equals(toSupplementaries("cdab"))) 1771 failCount++; 1772 1773 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1774 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1775 r = toSupplementaries("$3$2$1"); 1776 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1777 matcher = pattern.matcher(s1); 1778 1779 result = matcher.replaceAll(r); 1780 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1781 failCount++; 1782 1783 matcher = pattern.matcher(s2); 1784 1785 if (matcher.find()) { 1786 StringBuffer sb = new StringBuffer(); 1787 matcher.appendReplacement(sb, r); 1788 matcher.appendTail(sb); 1789 result = sb.toString(); 1790 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1791 failCount++; 1792 } 1793 report("Append"); 1794 } 1795 1796 private static void splitTest() { 1797 Pattern pattern = Pattern.compile(":"); 1798 String[] result = pattern.split("foo:and:boo", 2); 1799 if (!result[0].equals("foo")) 1800 failCount++; 1801 if (!result[1].equals("and:boo")) 1802 failCount++; 1803 // Supplementary character test 1804 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1805 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1806 if (!result[0].equals(toSupplementaries("foo"))) 1807 failCount++; 1808 if (!result[1].equals(toSupplementaries("andXboo"))) 1809 failCount++; 1810 1811 CharBuffer cb = CharBuffer.allocate(100); 1812 cb.put("foo:and:boo"); 1813 cb.flip(); 1814 result = pattern.split(cb); 1815 if (!result[0].equals("foo")) 1816 failCount++; 1817 if (!result[1].equals("and")) 1818 failCount++; 1819 if (!result[2].equals("boo")) 1820 failCount++; 1821 1822 // Supplementary character test 1823 CharBuffer cbs = CharBuffer.allocate(100); 1824 cbs.put(toSupplementaries("fooXandXboo")); 1825 cbs.flip(); 1826 result = patternX.split(cbs); 1827 if (!result[0].equals(toSupplementaries("foo"))) 1828 failCount++; 1829 if (!result[1].equals(toSupplementaries("and"))) 1830 failCount++; 1831 if (!result[2].equals(toSupplementaries("boo"))) 1832 failCount++; 1833 1834 String source = "0123456789"; 1835 for (int limit=-2; limit<3; limit++) { 1836 for (int x=0; x<10; x++) { 1837 result = source.split(Integer.toString(x), limit); 1838 int expectedLength = limit < 1 ? 2 : limit; 1839 1840 if ((limit == 0) && (x == 9)) { 1841 // expected dropping of "" 1842 if (result.length != 1) 1843 failCount++; 1844 if (!result[0].equals("012345678")) { 1845 failCount++; 1846 } 1847 } else { 1848 if (result.length != expectedLength) { 1849 failCount++; 1850 } 1851 if (!result[0].equals(source.substring(0,x))) { 1852 if (limit != 1) { 1853 failCount++; 1854 } else { 1855 if (!result[0].equals(source.substring(0,10))) { 1856 failCount++; 1857 } 1858 } 1859 } 1860 if (expectedLength > 1) { // Check segment 2 1861 if (!result[1].equals(source.substring(x+1,10))) 1862 failCount++; 1863 } 1864 } 1865 } 1866 } 1867 // Check the case for no match found 1868 for (int limit=-2; limit<3; limit++) { 1869 result = source.split("e", limit); 1870 if (result.length != 1) 1871 failCount++; 1872 if (!result[0].equals(source)) 1873 failCount++; 1874 } 1875 // Check the case for limit == 0, source = ""; 1876 // split() now returns 0-length for empty source "" see #6559590 1877 source = ""; 1878 result = source.split("e", 0); 1879 if (result.length != 1) 1880 failCount++; 1881 if (!result[0].equals(source)) 1882 failCount++; 1883 1884 // Check both split() and splitAsStraem(), especially for zero-lenth 1885 // input and zero-lenth match cases 1886 String[][] input = new String[][] { 1887 { " ", "Abc Efg Hij" }, // normal non-zero-match 1888 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1889 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1890 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1891 { "(?=\\p{Lu})", "AbcEfg" }, 1892 { "(?=\\p{Lu})", "Abc" }, 1893 { " ", "" }, // zero-length input 1894 { ".*", "" }, 1895 1896 // some tests from PatternStreamTest.java 1897 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1898 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1899 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1900 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1901 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1902 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1903 { "\u56da", "" }, 1904 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1905 { "o", "boo:and:foo" }, 1906 { "o", "booooo:and:fooooo" }, 1907 { "o", "fooooo:" }, 1908 }; 1909 1910 String[][] expected = new String[][] { 1911 { "Abc", "Efg", "Hij" }, 1912 { "", "Abc", "Efg", "Hij" }, 1913 { "Abc", "", "Efg", "Hij" }, 1914 { "Abc", "Efg", "Hij" }, 1915 { "Abc", "Efg" }, 1916 { "Abc" }, 1917 { "" }, 1918 { "" }, 1919 1920 { "awgqwefg1fefw", "vssv1vvv1" }, 1921 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1922 { "awgqwefg", "fefw4vssv", "vvv" }, 1923 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1924 { "1", "23", "456", "7890" }, 1925 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1926 { "" }, 1927 { "This", "is", "testing", "", "with", "different", "separators" }, 1928 { "b", "", ":and:f" }, 1929 { "b", "", "", "", "", ":and:f" }, 1930 { "f", "", "", "", "", ":" }, 1931 }; 1932 for (int i = 0; i < input.length; i++) { 1933 pattern = Pattern.compile(input[i][0]); 1934 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1935 failCount++; 1936 } 1937 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1938 // array for zero-length input for now 1939 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1940 expected[i])) { 1941 failCount++; 1942 } 1943 } 1944 report("Split"); 1945 } 1946 1947 private static void negationTest() { 1948 Pattern pattern = Pattern.compile("[\\[@^]+"); 1949 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1950 if (!matcher.find()) 1951 failCount++; 1952 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1953 failCount++; 1954 pattern = Pattern.compile("[@\\[^]+"); 1955 matcher = pattern.matcher("@@@@[[[[^^^^"); 1956 if (!matcher.find()) 1957 failCount++; 1958 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1959 failCount++; 1960 pattern = Pattern.compile("[@\\[^@]+"); 1961 matcher = pattern.matcher("@@@@[[[[^^^^"); 1962 if (!matcher.find()) 1963 failCount++; 1964 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1965 failCount++; 1966 1967 pattern = Pattern.compile("\\)"); 1968 matcher = pattern.matcher("xxx)xxx"); 1969 if (!matcher.find()) 1970 failCount++; 1971 1972 report("Negation"); 1973 } 1974 1975 private static void ampersandTest() { 1976 Pattern pattern = Pattern.compile("[&@]+"); 1977 check(pattern, "@@@@&&&&", true); 1978 1979 pattern = Pattern.compile("[@&]+"); 1980 check(pattern, "@@@@&&&&", true); 1981 1982 pattern = Pattern.compile("[@\\&]+"); 1983 check(pattern, "@@@@&&&&", true); 1984 1985 report("Ampersand"); 1986 } 1987 1988 private static void octalTest() throws Exception { 1989 Pattern pattern = Pattern.compile("\\u0007"); 1990 Matcher matcher = pattern.matcher("\u0007"); 1991 if (!matcher.matches()) 1992 failCount++; 1993 pattern = Pattern.compile("\\07"); 1994 matcher = pattern.matcher("\u0007"); 1995 if (!matcher.matches()) 1996 failCount++; 1997 pattern = Pattern.compile("\\007"); 1998 matcher = pattern.matcher("\u0007"); 1999 if (!matcher.matches()) 2000 failCount++; 2001 pattern = Pattern.compile("\\0007"); 2002 matcher = pattern.matcher("\u0007"); 2003 if (!matcher.matches()) 2004 failCount++; 2005 pattern = Pattern.compile("\\040"); 2006 matcher = pattern.matcher("\u0020"); 2007 if (!matcher.matches()) 2008 failCount++; 2009 pattern = Pattern.compile("\\0403"); 2010 matcher = pattern.matcher("\u00203"); 2011 if (!matcher.matches()) 2012 failCount++; 2013 pattern = Pattern.compile("\\0103"); 2014 matcher = pattern.matcher("\u0043"); 2015 if (!matcher.matches()) 2016 failCount++; 2017 2018 report("Octal"); 2019 } 2020 2021 private static void longPatternTest() throws Exception { 2022 try { 2023 Pattern pattern = Pattern.compile( 2024 "a 32-character-long pattern xxxx"); 2025 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 2026 pattern = Pattern.compile("a thirty four character long regex"); 2027 StringBuffer patternToBe = new StringBuffer(101); 2028 for (int i=0; i<100; i++) 2029 patternToBe.append((char)(97 + i%26)); 2030 pattern = Pattern.compile(patternToBe.toString()); 2031 } catch (PatternSyntaxException e) { 2032 failCount++; 2033 } 2034 2035 // Supplementary character test 2036 try { 2037 Pattern pattern = Pattern.compile( 2038 toSupplementaries("a 32-character-long pattern xxxx")); 2039 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2040 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2041 StringBuffer patternToBe = new StringBuffer(101*2); 2042 for (int i=0; i<100; i++) 2043 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2044 + 97 + i%26)); 2045 pattern = Pattern.compile(patternToBe.toString()); 2046 } catch (PatternSyntaxException e) { 2047 failCount++; 2048 } 2049 report("LongPattern"); 2050 } 2051 2052 private static void group0Test() throws Exception { 2053 Pattern pattern = Pattern.compile("(tes)ting"); 2054 Matcher matcher = pattern.matcher("testing"); 2055 check(matcher, "testing"); 2056 2057 matcher.reset("testing"); 2058 if (matcher.lookingAt()) { 2059 if (!matcher.group(0).equals("testing")) 2060 failCount++; 2061 } else { 2062 failCount++; 2063 } 2064 2065 matcher.reset("testing"); 2066 if (matcher.matches()) { 2067 if (!matcher.group(0).equals("testing")) 2068 failCount++; 2069 } else { 2070 failCount++; 2071 } 2072 2073 pattern = Pattern.compile("(tes)ting"); 2074 matcher = pattern.matcher("testing"); 2075 if (matcher.lookingAt()) { 2076 if (!matcher.group(0).equals("testing")) 2077 failCount++; 2078 } else { 2079 failCount++; 2080 } 2081 2082 pattern = Pattern.compile("^(tes)ting"); 2083 matcher = pattern.matcher("testing"); 2084 if (matcher.matches()) { 2085 if (!matcher.group(0).equals("testing")) 2086 failCount++; 2087 } else { 2088 failCount++; 2089 } 2090 2091 // Supplementary character test 2092 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2093 matcher = pattern.matcher(toSupplementaries("testing")); 2094 check(matcher, toSupplementaries("testing")); 2095 2096 matcher.reset(toSupplementaries("testing")); 2097 if (matcher.lookingAt()) { 2098 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2099 failCount++; 2100 } else { 2101 failCount++; 2102 } 2103 2104 matcher.reset(toSupplementaries("testing")); 2105 if (matcher.matches()) { 2106 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2107 failCount++; 2108 } else { 2109 failCount++; 2110 } 2111 2112 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2113 matcher = pattern.matcher(toSupplementaries("testing")); 2114 if (matcher.lookingAt()) { 2115 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2116 failCount++; 2117 } else { 2118 failCount++; 2119 } 2120 2121 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2122 matcher = pattern.matcher(toSupplementaries("testing")); 2123 if (matcher.matches()) { 2124 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2125 failCount++; 2126 } else { 2127 failCount++; 2128 } 2129 2130 report("Group0"); 2131 } 2132 2133 private static void findIntTest() throws Exception { 2134 Pattern p = Pattern.compile("blah"); 2135 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2136 boolean result = m.find(2); 2137 if (!result) 2138 failCount++; 2139 2140 p = Pattern.compile("$"); 2141 m = p.matcher("1234567890"); 2142 result = m.find(10); 2143 if (!result) 2144 failCount++; 2145 try { 2146 result = m.find(11); 2147 failCount++; 2148 } catch (IndexOutOfBoundsException e) { 2149 // correct result 2150 } 2151 2152 // Supplementary character test 2153 p = Pattern.compile(toSupplementaries("blah")); 2154 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2155 result = m.find(2); 2156 if (!result) 2157 failCount++; 2158 2159 report("FindInt"); 2160 } 2161 2162 private static void emptyPatternTest() throws Exception { 2163 Pattern p = Pattern.compile(""); 2164 Matcher m = p.matcher("foo"); 2165 2166 // Should find empty pattern at beginning of input 2167 boolean result = m.find(); 2168 if (result != true) 2169 failCount++; 2170 if (m.start() != 0) 2171 failCount++; 2172 2173 // Should not match entire input if input is not empty 2174 m.reset(); 2175 result = m.matches(); 2176 if (result == true) 2177 failCount++; 2178 2179 try { 2180 m.start(0); 2181 failCount++; 2182 } catch (IllegalStateException e) { 2183 // Correct result 2184 } 2185 2186 // Should match entire input if input is empty 2187 m.reset(""); 2188 result = m.matches(); 2189 if (result != true) 2190 failCount++; 2191 2192 result = Pattern.matches("", ""); 2193 if (result != true) 2194 failCount++; 2195 2196 result = Pattern.matches("", "foo"); 2197 if (result == true) 2198 failCount++; 2199 report("EmptyPattern"); 2200 } 2201 2202 private static void charClassTest() throws Exception { 2203 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2204 check(pattern, "blahb]blech", true); 2205 2206 pattern = Pattern.compile("[abc[def]]"); 2207 check(pattern, "b", true); 2208 2209 // Supplementary character tests 2210 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2211 check(pattern, toSupplementaries("blahb]blech"), true); 2212 2213 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2214 check(pattern, toSupplementaries("b"), true); 2215 2216 try { 2217 // u00ff when UNICODE_CASE 2218 pattern = Pattern.compile("[ab\u00ffcd]", 2219 Pattern.CASE_INSENSITIVE| 2220 Pattern.UNICODE_CASE); 2221 check(pattern, "ab\u00ffcd", true); 2222 check(pattern, "Ab\u0178Cd", true); 2223 2224 // u00b5 when UNICODE_CASE 2225 pattern = Pattern.compile("[ab\u00b5cd]", 2226 Pattern.CASE_INSENSITIVE| 2227 Pattern.UNICODE_CASE); 2228 check(pattern, "ab\u00b5cd", true); 2229 check(pattern, "Ab\u039cCd", true); 2230 } catch (Exception e) { failCount++; } 2231 2232 /* Special cases 2233 (1)LatinSmallLetterLongS u+017f 2234 (2)LatinSmallLetterDotlessI u+0131 2235 (3)LatineCapitalLetterIWithDotAbove u+0130 2236 (4)KelvinSign u+212a 2237 (5)AngstromSign u+212b 2238 */ 2239 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2240 pattern = Pattern.compile("[sik\u00c5]+", flags); 2241 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2242 failCount++; 2243 2244 report("CharClass"); 2245 } 2246 2247 private static void caretTest() throws Exception { 2248 Pattern pattern = Pattern.compile("\\w*"); 2249 Matcher matcher = pattern.matcher("a#bc#def##g"); 2250 check(matcher, "a"); 2251 check(matcher, ""); 2252 check(matcher, "bc"); 2253 check(matcher, ""); 2254 check(matcher, "def"); 2255 check(matcher, ""); 2256 check(matcher, ""); 2257 check(matcher, "g"); 2258 check(matcher, ""); 2259 if (matcher.find()) 2260 failCount++; 2261 2262 pattern = Pattern.compile("^\\w*"); 2263 matcher = pattern.matcher("a#bc#def##g"); 2264 check(matcher, "a"); 2265 if (matcher.find()) 2266 failCount++; 2267 2268 pattern = Pattern.compile("\\w"); 2269 matcher = pattern.matcher("abc##x"); 2270 check(matcher, "a"); 2271 check(matcher, "b"); 2272 check(matcher, "c"); 2273 check(matcher, "x"); 2274 if (matcher.find()) 2275 failCount++; 2276 2277 pattern = Pattern.compile("^\\w"); 2278 matcher = pattern.matcher("abc##x"); 2279 check(matcher, "a"); 2280 if (matcher.find()) 2281 failCount++; 2282 2283 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2284 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2285 check(matcher, "abc"); 2286 if (matcher.find()) 2287 failCount++; 2288 2289 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2290 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2291 check(matcher, "abc"); 2292 check(matcher, "jkl"); 2293 if (matcher.find()) 2294 failCount++; 2295 2296 pattern = Pattern.compile("^", Pattern.MULTILINE); 2297 matcher = pattern.matcher("this is some text"); 2298 String result = matcher.replaceAll("X"); 2299 if (!result.equals("Xthis is some text")) 2300 failCount++; 2301 2302 pattern = Pattern.compile("^"); 2303 matcher = pattern.matcher("this is some text"); 2304 result = matcher.replaceAll("X"); 2305 if (!result.equals("Xthis is some text")) 2306 failCount++; 2307 2308 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2309 matcher = pattern.matcher("this is some text\n"); 2310 result = matcher.replaceAll("X"); 2311 if (!result.equals("Xthis is some text\n")) 2312 failCount++; 2313 2314 report("Caret"); 2315 } 2316 2317 private static void groupCaptureTest() throws Exception { 2318 // Independent group 2319 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2320 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2321 matcher.find(); 2322 try { 2323 String blah = matcher.group(1); 2324 failCount++; 2325 } catch (IndexOutOfBoundsException ioobe) { 2326 // Good result 2327 } 2328 // Pure group 2329 pattern = Pattern.compile("x+(?:y+)z+"); 2330 matcher = pattern.matcher("xxxyyyzzz"); 2331 matcher.find(); 2332 try { 2333 String blah = matcher.group(1); 2334 failCount++; 2335 } catch (IndexOutOfBoundsException ioobe) { 2336 // Good result 2337 } 2338 2339 // Supplementary character tests 2340 // Independent group 2341 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2342 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2343 matcher.find(); 2344 try { 2345 String blah = matcher.group(1); 2346 failCount++; 2347 } catch (IndexOutOfBoundsException ioobe) { 2348 // Good result 2349 } 2350 // Pure group 2351 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2352 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2353 matcher.find(); 2354 try { 2355 String blah = matcher.group(1); 2356 failCount++; 2357 } catch (IndexOutOfBoundsException ioobe) { 2358 // Good result 2359 } 2360 2361 report("GroupCapture"); 2362 } 2363 2364 private static void backRefTest() throws Exception { 2365 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2366 check(pattern, "zzzaabcazzz", true); 2367 2368 pattern = Pattern.compile("(a*)bc\\1"); 2369 check(pattern, "zzzaabcaazzz", true); 2370 2371 pattern = Pattern.compile("(abc)(def)\\1"); 2372 check(pattern, "abcdefabc", true); 2373 2374 pattern = Pattern.compile("(abc)(def)\\3"); 2375 check(pattern, "abcdefabc", false); 2376 2377 try { 2378 for (int i = 1; i < 10; i++) { 2379 // Make sure backref 1-9 are always accepted 2380 pattern = Pattern.compile("abcdef\\" + i); 2381 // and fail to match if the target group does not exit 2382 check(pattern, "abcdef", false); 2383 } 2384 } catch(PatternSyntaxException e) { 2385 failCount++; 2386 } 2387 2388 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2389 check(pattern, "abcdefghija", false); 2390 check(pattern, "abcdefghija1", true); 2391 2392 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2393 check(pattern, "abcdefghijkk", true); 2394 2395 pattern = Pattern.compile("(a)bcdefghij\\11"); 2396 check(pattern, "abcdefghija1", true); 2397 2398 // Supplementary character tests 2399 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2400 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2401 2402 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2403 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2404 2405 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2406 check(pattern, toSupplementaries("abcdefabc"), true); 2407 2408 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2409 check(pattern, toSupplementaries("abcdefabc"), false); 2410 2411 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2412 check(pattern, toSupplementaries("abcdefghija"), false); 2413 check(pattern, toSupplementaries("abcdefghija1"), true); 2414 2415 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2416 check(pattern, toSupplementaries("abcdefghijkk"), true); 2417 2418 report("BackRef"); 2419 } 2420 2421 /** 2422 * Unicode Technical Report #18, section 2.6 End of Line 2423 * There is no empty line to be matched in the sequence \u000D\u000A 2424 * but there is an empty line in the sequence \u000A\u000D. 2425 */ 2426 private static void anchorTest() throws Exception { 2427 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2428 Matcher m = p.matcher("blah1\r\nblah2"); 2429 m.find(); 2430 m.find(); 2431 if (!m.group().equals("blah2")) 2432 failCount++; 2433 2434 m.reset("blah1\n\rblah2"); 2435 m.find(); 2436 m.find(); 2437 m.find(); 2438 if (!m.group().equals("blah2")) 2439 failCount++; 2440 2441 // Test behavior of $ with \r\n at end of input 2442 p = Pattern.compile(".+$"); 2443 m = p.matcher("blah1\r\n"); 2444 if (!m.find()) 2445 failCount++; 2446 if (!m.group().equals("blah1")) 2447 failCount++; 2448 if (m.find()) 2449 failCount++; 2450 2451 // Test behavior of $ with \r\n at end of input in multiline 2452 p = Pattern.compile(".+$", Pattern.MULTILINE); 2453 m = p.matcher("blah1\r\n"); 2454 if (!m.find()) 2455 failCount++; 2456 if (m.find()) 2457 failCount++; 2458 2459 // Test for $ recognition of \u0085 for bug 4527731 2460 p = Pattern.compile(".+$", Pattern.MULTILINE); 2461 m = p.matcher("blah1\u0085"); 2462 if (!m.find()) 2463 failCount++; 2464 2465 // Supplementary character test 2466 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2467 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2468 m.find(); 2469 m.find(); 2470 if (!m.group().equals(toSupplementaries("blah2"))) 2471 failCount++; 2472 2473 m.reset(toSupplementaries("blah1\n\rblah2")); 2474 m.find(); 2475 m.find(); 2476 m.find(); 2477 if (!m.group().equals(toSupplementaries("blah2"))) 2478 failCount++; 2479 2480 // Test behavior of $ with \r\n at end of input 2481 p = Pattern.compile(".+$"); 2482 m = p.matcher(toSupplementaries("blah1\r\n")); 2483 if (!m.find()) 2484 failCount++; 2485 if (!m.group().equals(toSupplementaries("blah1"))) 2486 failCount++; 2487 if (m.find()) 2488 failCount++; 2489 2490 // Test behavior of $ with \r\n at end of input in multiline 2491 p = Pattern.compile(".+$", Pattern.MULTILINE); 2492 m = p.matcher(toSupplementaries("blah1\r\n")); 2493 if (!m.find()) 2494 failCount++; 2495 if (m.find()) 2496 failCount++; 2497 2498 // Test for $ recognition of \u0085 for bug 4527731 2499 p = Pattern.compile(".+$", Pattern.MULTILINE); 2500 m = p.matcher(toSupplementaries("blah1\u0085")); 2501 if (!m.find()) 2502 failCount++; 2503 2504 report("Anchors"); 2505 } 2506 2507 /** 2508 * A basic sanity test of Matcher.lookingAt(). 2509 */ 2510 private static void lookingAtTest() throws Exception { 2511 Pattern p = Pattern.compile("(ab)(c*)"); 2512 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2513 2514 if (!m.lookingAt()) 2515 failCount++; 2516 2517 if (!m.group().equals(m.group(0))) 2518 failCount++; 2519 2520 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2521 if (m.lookingAt()) 2522 failCount++; 2523 2524 // Supplementary character test 2525 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2526 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2527 2528 if (!m.lookingAt()) 2529 failCount++; 2530 2531 if (!m.group().equals(m.group(0))) 2532 failCount++; 2533 2534 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2535 if (m.lookingAt()) 2536 failCount++; 2537 2538 report("Looking At"); 2539 } 2540 2541 /** 2542 * A basic sanity test of Matcher.matches(). 2543 */ 2544 private static void matchesTest() throws Exception { 2545 // matches() 2546 Pattern p = Pattern.compile("ulb(c*)"); 2547 Matcher m = p.matcher("ulbcccccc"); 2548 if (!m.matches()) 2549 failCount++; 2550 2551 // find() but not matches() 2552 m.reset("zzzulbcccccc"); 2553 if (m.matches()) 2554 failCount++; 2555 2556 // lookingAt() but not matches() 2557 m.reset("ulbccccccdef"); 2558 if (m.matches()) 2559 failCount++; 2560 2561 // matches() 2562 p = Pattern.compile("a|ad"); 2563 m = p.matcher("ad"); 2564 if (!m.matches()) 2565 failCount++; 2566 2567 // Supplementary character test 2568 // matches() 2569 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2570 m = p.matcher(toSupplementaries("ulbcccccc")); 2571 if (!m.matches()) 2572 failCount++; 2573 2574 // find() but not matches() 2575 m.reset(toSupplementaries("zzzulbcccccc")); 2576 if (m.matches()) 2577 failCount++; 2578 2579 // lookingAt() but not matches() 2580 m.reset(toSupplementaries("ulbccccccdef")); 2581 if (m.matches()) 2582 failCount++; 2583 2584 // matches() 2585 p = Pattern.compile(toSupplementaries("a|ad")); 2586 m = p.matcher(toSupplementaries("ad")); 2587 if (!m.matches()) 2588 failCount++; 2589 2590 report("Matches"); 2591 } 2592 2593 /** 2594 * A basic sanity test of Pattern.matches(). 2595 */ 2596 private static void patternMatchesTest() throws Exception { 2597 // matches() 2598 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2599 toSupplementaries("ulbcccccc"))) 2600 failCount++; 2601 2602 // find() but not matches() 2603 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2604 toSupplementaries("zzzulbcccccc"))) 2605 failCount++; 2606 2607 // lookingAt() but not matches() 2608 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2609 toSupplementaries("ulbccccccdef"))) 2610 failCount++; 2611 2612 // Supplementary character test 2613 // matches() 2614 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2615 toSupplementaries("ulbcccccc"))) 2616 failCount++; 2617 2618 // find() but not matches() 2619 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2620 toSupplementaries("zzzulbcccccc"))) 2621 failCount++; 2622 2623 // lookingAt() but not matches() 2624 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2625 toSupplementaries("ulbccccccdef"))) 2626 failCount++; 2627 2628 report("Pattern Matches"); 2629 } 2630 2631 /** 2632 * Canonical equivalence testing. Tests the ability of the engine 2633 * to match sequences that are not explicitly specified in the 2634 * pattern when they are considered equivalent by the Unicode Standard. 2635 */ 2636 private static void ceTest() throws Exception { 2637 // Decomposed char outside char classes 2638 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2639 Matcher m = p.matcher("test\u00e5"); 2640 if (!m.matches()) 2641 failCount++; 2642 2643 m.reset("testa\u030a"); 2644 if (!m.matches()) 2645 failCount++; 2646 2647 // Composed char outside char classes 2648 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2649 m = p.matcher("test\u00e5"); 2650 if (!m.matches()) 2651 failCount++; 2652 2653 m.reset("testa\u030a"); 2654 if (!m.find()) 2655 failCount++; 2656 2657 // Decomposed char inside a char class 2658 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2659 m = p.matcher("test\u00e5"); 2660 if (!m.find()) 2661 failCount++; 2662 2663 m.reset("testa\u030a"); 2664 if (!m.find()) 2665 failCount++; 2666 2667 // Composed char inside a char class 2668 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2669 m = p.matcher("test\u00e5"); 2670 if (!m.find()) 2671 failCount++; 2672 2673 m.reset("testa\u0300"); 2674 if (!m.find()) 2675 failCount++; 2676 2677 m.reset("testa\u030a"); 2678 if (!m.find()) 2679 failCount++; 2680 2681 // Marks that cannot legally change order and be equivalent 2682 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2683 check(p, "testa\u0308\u0300", true); 2684 check(p, "testa\u0300\u0308", false); 2685 2686 // Marks that can legally change order and be equivalent 2687 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2688 check(p, "testa\u0308\u0323", true); 2689 check(p, "testa\u0323\u0308", true); 2690 2691 // Test all equivalences of the sequence a\u0308\u0323\u0300 2692 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2693 check(p, "testa\u0308\u0323\u0300", true); 2694 check(p, "testa\u0323\u0308\u0300", true); 2695 check(p, "testa\u0308\u0300\u0323", true); 2696 check(p, "test\u00e4\u0323\u0300", true); 2697 check(p, "test\u00e4\u0300\u0323", true); 2698 2699 Object[][] data = new Object[][] { 2700 2701 // JDK-4867170 2702 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2703 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2704 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2705 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2706 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2707 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2708 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2709 2710 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2711 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2712 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2713 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2714 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2715 2716 // backtracking, force to match "\u1f80", instead of \u1f82" 2717 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2718 2719 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2720 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2721 2722 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2723 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2724 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2725 2726 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2727 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2728 { "\u1f80", "ab\u1f80cd", "f", true }, 2729 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2730 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2731 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2732 { "\u1f82", "\u1f80\u0300", "m", true }, 2733 2734 // JDK-7080302 # compile failed 2735 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2736 2737 // JDK-6728861, same cause as above one 2738 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2739 2740 // JDK-6995635 2741 { "(\u00e9)", "e\u0301", "m", true }, 2742 2743 // JDK-6736245 2744 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2745 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2746 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2747 2748 // 4916384. 2749 // Decomposed hangul (jamos) works inside clazz 2750 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2751 { "[\u1100\u1161]", "\uac00", "m", true}, 2752 2753 { "[\uac00]", "\u1100\u1161", "m", true}, 2754 { "[\uac00]", "\uac00", "m", true}, 2755 2756 // Decomposed hangul (jamos) 2757 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2758 { "\u1100\u1161", "\uac00", "m", true}, 2759 2760 // Composed hangul 2761 { "\uac00", "\u1100\u1161", "m", true }, 2762 { "\uac00", "\uac00", "m", true }, 2763 2764 /* Need a NFDSlice to nfd the source to solve this issue 2765 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2766 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2767 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2768 2769 // Decomposed supplementary outside char classes 2770 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2771 // Composed supplementary outside char classes 2772 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2773 */ 2774 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2775 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2776 2777 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2778 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2779 }; 2780 2781 int failCount = 0; 2782 for (Object[] d : data) { 2783 String pn = (String)d[0]; 2784 String tt = (String)d[1]; 2785 boolean isFind = "f".equals(((String)d[2])); 2786 boolean expected = (boolean)d[3]; 2787 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2788 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2789 if (ret != expected) { 2790 failCount++; 2791 continue; 2792 } 2793 } 2794 report("Canonical Equivalence"); 2795 } 2796 2797 /** 2798 * A basic sanity test of Matcher.replaceAll(). 2799 */ 2800 private static void globalSubstitute() throws Exception { 2801 // Global substitution with a literal 2802 Pattern p = Pattern.compile("(ab)(c*)"); 2803 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2804 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2805 failCount++; 2806 2807 m.reset("zzzabccczzzabcczzzabccczzz"); 2808 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2809 failCount++; 2810 2811 // Global substitution with groups 2812 m.reset("zzzabccczzzabcczzzabccczzz"); 2813 String result = m.replaceAll("$1"); 2814 if (!result.equals("zzzabzzzabzzzabzzz")) 2815 failCount++; 2816 2817 // Supplementary character test 2818 // Global substitution with a literal 2819 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2820 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2821 if (!m.replaceAll(toSupplementaries("test")). 2822 equals(toSupplementaries("testzzztestzzztest"))) 2823 failCount++; 2824 2825 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2826 if (!m.replaceAll(toSupplementaries("test")). 2827 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2828 failCount++; 2829 2830 // Global substitution with groups 2831 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2832 result = m.replaceAll("$1"); 2833 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2834 failCount++; 2835 2836 report("Global Substitution"); 2837 } 2838 2839 /** 2840 * Tests the usage of Matcher.appendReplacement() with literal 2841 * and group substitutions. 2842 */ 2843 private static void stringbufferSubstitute() throws Exception { 2844 // SB substitution with literal 2845 String blah = "zzzblahzzz"; 2846 Pattern p = Pattern.compile("blah"); 2847 Matcher m = p.matcher(blah); 2848 StringBuffer result = new StringBuffer(); 2849 try { 2850 m.appendReplacement(result, "blech"); 2851 failCount++; 2852 } catch (IllegalStateException e) { 2853 } 2854 m.find(); 2855 m.appendReplacement(result, "blech"); 2856 if (!result.toString().equals("zzzblech")) 2857 failCount++; 2858 2859 m.appendTail(result); 2860 if (!result.toString().equals("zzzblechzzz")) 2861 failCount++; 2862 2863 // SB substitution with groups 2864 blah = "zzzabcdzzz"; 2865 p = Pattern.compile("(ab)(cd)*"); 2866 m = p.matcher(blah); 2867 result = new StringBuffer(); 2868 try { 2869 m.appendReplacement(result, "$1"); 2870 failCount++; 2871 } catch (IllegalStateException e) { 2872 } 2873 m.find(); 2874 m.appendReplacement(result, "$1"); 2875 if (!result.toString().equals("zzzab")) 2876 failCount++; 2877 2878 m.appendTail(result); 2879 if (!result.toString().equals("zzzabzzz")) 2880 failCount++; 2881 2882 // SB substitution with 3 groups 2883 blah = "zzzabcdcdefzzz"; 2884 p = Pattern.compile("(ab)(cd)*(ef)"); 2885 m = p.matcher(blah); 2886 result = new StringBuffer(); 2887 try { 2888 m.appendReplacement(result, "$1w$2w$3"); 2889 failCount++; 2890 } catch (IllegalStateException e) { 2891 } 2892 m.find(); 2893 m.appendReplacement(result, "$1w$2w$3"); 2894 if (!result.toString().equals("zzzabwcdwef")) 2895 failCount++; 2896 2897 m.appendTail(result); 2898 if (!result.toString().equals("zzzabwcdwefzzz")) 2899 failCount++; 2900 2901 // SB substitution with groups and three matches 2902 // skipping middle match 2903 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2904 p = Pattern.compile("(ab)(cd*)"); 2905 m = p.matcher(blah); 2906 result = new StringBuffer(); 2907 try { 2908 m.appendReplacement(result, "$1"); 2909 failCount++; 2910 } catch (IllegalStateException e) { 2911 } 2912 m.find(); 2913 m.appendReplacement(result, "$1"); 2914 if (!result.toString().equals("zzzab")) 2915 failCount++; 2916 2917 m.find(); 2918 m.find(); 2919 m.appendReplacement(result, "$2"); 2920 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2921 failCount++; 2922 2923 m.appendTail(result); 2924 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2925 failCount++; 2926 2927 // Check to make sure escaped $ is ignored 2928 blah = "zzzabcdcdefzzz"; 2929 p = Pattern.compile("(ab)(cd)*(ef)"); 2930 m = p.matcher(blah); 2931 result = new StringBuffer(); 2932 m.find(); 2933 m.appendReplacement(result, "$1w\\$2w$3"); 2934 if (!result.toString().equals("zzzabw$2wef")) 2935 failCount++; 2936 2937 m.appendTail(result); 2938 if (!result.toString().equals("zzzabw$2wefzzz")) 2939 failCount++; 2940 2941 // Check to make sure a reference to nonexistent group causes error 2942 blah = "zzzabcdcdefzzz"; 2943 p = Pattern.compile("(ab)(cd)*(ef)"); 2944 m = p.matcher(blah); 2945 result = new StringBuffer(); 2946 m.find(); 2947 try { 2948 m.appendReplacement(result, "$1w$5w$3"); 2949 failCount++; 2950 } catch (IndexOutOfBoundsException ioobe) { 2951 // Correct result 2952 } 2953 2954 // Check double digit group references 2955 blah = "zzz123456789101112zzz"; 2956 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2957 m = p.matcher(blah); 2958 result = new StringBuffer(); 2959 m.find(); 2960 m.appendReplacement(result, "$1w$11w$3"); 2961 if (!result.toString().equals("zzz1w11w3")) 2962 failCount++; 2963 2964 // Check to make sure it backs off $15 to $1 if only three groups 2965 blah = "zzzabcdcdefzzz"; 2966 p = Pattern.compile("(ab)(cd)*(ef)"); 2967 m = p.matcher(blah); 2968 result = new StringBuffer(); 2969 m.find(); 2970 m.appendReplacement(result, "$1w$15w$3"); 2971 if (!result.toString().equals("zzzabwab5wef")) 2972 failCount++; 2973 2974 2975 // Supplementary character test 2976 // SB substitution with literal 2977 blah = toSupplementaries("zzzblahzzz"); 2978 p = Pattern.compile(toSupplementaries("blah")); 2979 m = p.matcher(blah); 2980 result = new StringBuffer(); 2981 try { 2982 m.appendReplacement(result, toSupplementaries("blech")); 2983 failCount++; 2984 } catch (IllegalStateException e) { 2985 } 2986 m.find(); 2987 m.appendReplacement(result, toSupplementaries("blech")); 2988 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2989 failCount++; 2990 2991 m.appendTail(result); 2992 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2993 failCount++; 2994 2995 // SB substitution with groups 2996 blah = toSupplementaries("zzzabcdzzz"); 2997 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2998 m = p.matcher(blah); 2999 result = new StringBuffer(); 3000 try { 3001 m.appendReplacement(result, "$1"); 3002 failCount++; 3003 } catch (IllegalStateException e) { 3004 } 3005 m.find(); 3006 m.appendReplacement(result, "$1"); 3007 if (!result.toString().equals(toSupplementaries("zzzab"))) 3008 failCount++; 3009 3010 m.appendTail(result); 3011 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3012 failCount++; 3013 3014 // SB substitution with 3 groups 3015 blah = toSupplementaries("zzzabcdcdefzzz"); 3016 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3017 m = p.matcher(blah); 3018 result = new StringBuffer(); 3019 try { 3020 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3021 failCount++; 3022 } catch (IllegalStateException e) { 3023 } 3024 m.find(); 3025 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3026 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3027 failCount++; 3028 3029 m.appendTail(result); 3030 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3031 failCount++; 3032 3033 // SB substitution with groups and three matches 3034 // skipping middle match 3035 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3036 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3037 m = p.matcher(blah); 3038 result = new StringBuffer(); 3039 try { 3040 m.appendReplacement(result, "$1"); 3041 failCount++; 3042 } catch (IllegalStateException e) { 3043 } 3044 m.find(); 3045 m.appendReplacement(result, "$1"); 3046 if (!result.toString().equals(toSupplementaries("zzzab"))) 3047 failCount++; 3048 3049 m.find(); 3050 m.find(); 3051 m.appendReplacement(result, "$2"); 3052 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3053 failCount++; 3054 3055 m.appendTail(result); 3056 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3057 failCount++; 3058 3059 // Check to make sure escaped $ is ignored 3060 blah = toSupplementaries("zzzabcdcdefzzz"); 3061 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3062 m = p.matcher(blah); 3063 result = new StringBuffer(); 3064 m.find(); 3065 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3066 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3067 failCount++; 3068 3069 m.appendTail(result); 3070 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3071 failCount++; 3072 3073 // Check to make sure a reference to nonexistent group causes error 3074 blah = toSupplementaries("zzzabcdcdefzzz"); 3075 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3076 m = p.matcher(blah); 3077 result = new StringBuffer(); 3078 m.find(); 3079 try { 3080 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3081 failCount++; 3082 } catch (IndexOutOfBoundsException ioobe) { 3083 // Correct result 3084 } 3085 3086 // Check double digit group references 3087 blah = toSupplementaries("zzz123456789101112zzz"); 3088 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3089 m = p.matcher(blah); 3090 result = new StringBuffer(); 3091 m.find(); 3092 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3093 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3094 failCount++; 3095 3096 // Check to make sure it backs off $15 to $1 if only three groups 3097 blah = toSupplementaries("zzzabcdcdefzzz"); 3098 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3099 m = p.matcher(blah); 3100 result = new StringBuffer(); 3101 m.find(); 3102 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3103 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3104 failCount++; 3105 3106 // Check nothing has been appended into the output buffer if 3107 // the replacement string triggers IllegalArgumentException. 3108 p = Pattern.compile("(abc)"); 3109 m = p.matcher("abcd"); 3110 result = new StringBuffer(); 3111 m.find(); 3112 try { 3113 m.appendReplacement(result, ("xyz$g")); 3114 failCount++; 3115 } catch (IllegalArgumentException iae) { 3116 if (result.length() != 0) 3117 failCount++; 3118 } 3119 3120 report("SB Substitution"); 3121 } 3122 3123 /** 3124 * Tests the usage of Matcher.appendReplacement() with literal 3125 * and group substitutions. 3126 */ 3127 private static void stringbuilderSubstitute() throws Exception { 3128 // SB substitution with literal 3129 String blah = "zzzblahzzz"; 3130 Pattern p = Pattern.compile("blah"); 3131 Matcher m = p.matcher(blah); 3132 StringBuilder result = new StringBuilder(); 3133 try { 3134 m.appendReplacement(result, "blech"); 3135 failCount++; 3136 } catch (IllegalStateException e) { 3137 } 3138 m.find(); 3139 m.appendReplacement(result, "blech"); 3140 if (!result.toString().equals("zzzblech")) 3141 failCount++; 3142 3143 m.appendTail(result); 3144 if (!result.toString().equals("zzzblechzzz")) 3145 failCount++; 3146 3147 // SB substitution with groups 3148 blah = "zzzabcdzzz"; 3149 p = Pattern.compile("(ab)(cd)*"); 3150 m = p.matcher(blah); 3151 result = new StringBuilder(); 3152 try { 3153 m.appendReplacement(result, "$1"); 3154 failCount++; 3155 } catch (IllegalStateException e) { 3156 } 3157 m.find(); 3158 m.appendReplacement(result, "$1"); 3159 if (!result.toString().equals("zzzab")) 3160 failCount++; 3161 3162 m.appendTail(result); 3163 if (!result.toString().equals("zzzabzzz")) 3164 failCount++; 3165 3166 // SB substitution with 3 groups 3167 blah = "zzzabcdcdefzzz"; 3168 p = Pattern.compile("(ab)(cd)*(ef)"); 3169 m = p.matcher(blah); 3170 result = new StringBuilder(); 3171 try { 3172 m.appendReplacement(result, "$1w$2w$3"); 3173 failCount++; 3174 } catch (IllegalStateException e) { 3175 } 3176 m.find(); 3177 m.appendReplacement(result, "$1w$2w$3"); 3178 if (!result.toString().equals("zzzabwcdwef")) 3179 failCount++; 3180 3181 m.appendTail(result); 3182 if (!result.toString().equals("zzzabwcdwefzzz")) 3183 failCount++; 3184 3185 // SB substitution with groups and three matches 3186 // skipping middle match 3187 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3188 p = Pattern.compile("(ab)(cd*)"); 3189 m = p.matcher(blah); 3190 result = new StringBuilder(); 3191 try { 3192 m.appendReplacement(result, "$1"); 3193 failCount++; 3194 } catch (IllegalStateException e) { 3195 } 3196 m.find(); 3197 m.appendReplacement(result, "$1"); 3198 if (!result.toString().equals("zzzab")) 3199 failCount++; 3200 3201 m.find(); 3202 m.find(); 3203 m.appendReplacement(result, "$2"); 3204 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3205 failCount++; 3206 3207 m.appendTail(result); 3208 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3209 failCount++; 3210 3211 // Check to make sure escaped $ is ignored 3212 blah = "zzzabcdcdefzzz"; 3213 p = Pattern.compile("(ab)(cd)*(ef)"); 3214 m = p.matcher(blah); 3215 result = new StringBuilder(); 3216 m.find(); 3217 m.appendReplacement(result, "$1w\\$2w$3"); 3218 if (!result.toString().equals("zzzabw$2wef")) 3219 failCount++; 3220 3221 m.appendTail(result); 3222 if (!result.toString().equals("zzzabw$2wefzzz")) 3223 failCount++; 3224 3225 // Check to make sure a reference to nonexistent group causes error 3226 blah = "zzzabcdcdefzzz"; 3227 p = Pattern.compile("(ab)(cd)*(ef)"); 3228 m = p.matcher(blah); 3229 result = new StringBuilder(); 3230 m.find(); 3231 try { 3232 m.appendReplacement(result, "$1w$5w$3"); 3233 failCount++; 3234 } catch (IndexOutOfBoundsException ioobe) { 3235 // Correct result 3236 } 3237 3238 // Check double digit group references 3239 blah = "zzz123456789101112zzz"; 3240 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3241 m = p.matcher(blah); 3242 result = new StringBuilder(); 3243 m.find(); 3244 m.appendReplacement(result, "$1w$11w$3"); 3245 if (!result.toString().equals("zzz1w11w3")) 3246 failCount++; 3247 3248 // Check to make sure it backs off $15 to $1 if only three groups 3249 blah = "zzzabcdcdefzzz"; 3250 p = Pattern.compile("(ab)(cd)*(ef)"); 3251 m = p.matcher(blah); 3252 result = new StringBuilder(); 3253 m.find(); 3254 m.appendReplacement(result, "$1w$15w$3"); 3255 if (!result.toString().equals("zzzabwab5wef")) 3256 failCount++; 3257 3258 3259 // Supplementary character test 3260 // SB substitution with literal 3261 blah = toSupplementaries("zzzblahzzz"); 3262 p = Pattern.compile(toSupplementaries("blah")); 3263 m = p.matcher(blah); 3264 result = new StringBuilder(); 3265 try { 3266 m.appendReplacement(result, toSupplementaries("blech")); 3267 failCount++; 3268 } catch (IllegalStateException e) { 3269 } 3270 m.find(); 3271 m.appendReplacement(result, toSupplementaries("blech")); 3272 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3273 failCount++; 3274 m.appendTail(result); 3275 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3276 failCount++; 3277 3278 // SB substitution with groups 3279 blah = toSupplementaries("zzzabcdzzz"); 3280 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3281 m = p.matcher(blah); 3282 result = new StringBuilder(); 3283 try { 3284 m.appendReplacement(result, "$1"); 3285 failCount++; 3286 } catch (IllegalStateException e) { 3287 } 3288 m.find(); 3289 m.appendReplacement(result, "$1"); 3290 if (!result.toString().equals(toSupplementaries("zzzab"))) 3291 failCount++; 3292 3293 m.appendTail(result); 3294 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3295 failCount++; 3296 3297 // SB substitution with 3 groups 3298 blah = toSupplementaries("zzzabcdcdefzzz"); 3299 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3300 m = p.matcher(blah); 3301 result = new StringBuilder(); 3302 try { 3303 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3304 failCount++; 3305 } catch (IllegalStateException e) { 3306 } 3307 m.find(); 3308 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3309 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3310 failCount++; 3311 3312 m.appendTail(result); 3313 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3314 failCount++; 3315 3316 // SB substitution with groups and three matches 3317 // skipping middle match 3318 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3319 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3320 m = p.matcher(blah); 3321 result = new StringBuilder(); 3322 try { 3323 m.appendReplacement(result, "$1"); 3324 failCount++; 3325 } catch (IllegalStateException e) { 3326 } 3327 m.find(); 3328 m.appendReplacement(result, "$1"); 3329 if (!result.toString().equals(toSupplementaries("zzzab"))) 3330 failCount++; 3331 3332 m.find(); 3333 m.find(); 3334 m.appendReplacement(result, "$2"); 3335 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3336 failCount++; 3337 3338 m.appendTail(result); 3339 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3340 failCount++; 3341 3342 // Check to make sure escaped $ is ignored 3343 blah = toSupplementaries("zzzabcdcdefzzz"); 3344 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3345 m = p.matcher(blah); 3346 result = new StringBuilder(); 3347 m.find(); 3348 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3349 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3350 failCount++; 3351 3352 m.appendTail(result); 3353 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3354 failCount++; 3355 3356 // Check to make sure a reference to nonexistent group causes error 3357 blah = toSupplementaries("zzzabcdcdefzzz"); 3358 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3359 m = p.matcher(blah); 3360 result = new StringBuilder(); 3361 m.find(); 3362 try { 3363 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3364 failCount++; 3365 } catch (IndexOutOfBoundsException ioobe) { 3366 // Correct result 3367 } 3368 // Check double digit group references 3369 blah = toSupplementaries("zzz123456789101112zzz"); 3370 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3371 m = p.matcher(blah); 3372 result = new StringBuilder(); 3373 m.find(); 3374 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3375 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3376 failCount++; 3377 3378 // Check to make sure it backs off $15 to $1 if only three groups 3379 blah = toSupplementaries("zzzabcdcdefzzz"); 3380 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3381 m = p.matcher(blah); 3382 result = new StringBuilder(); 3383 m.find(); 3384 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3385 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3386 failCount++; 3387 // Check nothing has been appended into the output buffer if 3388 // the replacement string triggers IllegalArgumentException. 3389 p = Pattern.compile("(abc)"); 3390 m = p.matcher("abcd"); 3391 result = new StringBuilder(); 3392 m.find(); 3393 try { 3394 m.appendReplacement(result, ("xyz$g")); 3395 failCount++; 3396 } catch (IllegalArgumentException iae) { 3397 if (result.length() != 0) 3398 failCount++; 3399 } 3400 report("SB Substitution 2"); 3401 } 3402 3403 /* 3404 * 5 groups of characters are created to make a substitution string. 3405 * A base string will be created including random lead chars, the 3406 * substitution string, and random trailing chars. 3407 * A pattern containing the 5 groups is searched for and replaced with: 3408 * random group + random string + random group. 3409 * The results are checked for correctness. 3410 */ 3411 private static void substitutionBasher() { 3412 for (int runs = 0; runs<1000; runs++) { 3413 // Create a base string to work in 3414 int leadingChars = generator.nextInt(10); 3415 StringBuffer baseBuffer = new StringBuffer(100); 3416 String leadingString = getRandomAlphaString(leadingChars); 3417 baseBuffer.append(leadingString); 3418 3419 // Create 5 groups of random number of random chars 3420 // Create the string to substitute 3421 // Create the pattern string to search for 3422 StringBuffer bufferToSub = new StringBuffer(25); 3423 StringBuffer bufferToPat = new StringBuffer(50); 3424 String[] groups = new String[5]; 3425 for(int i=0; i<5; i++) { 3426 int aGroupSize = generator.nextInt(5)+1; 3427 groups[i] = getRandomAlphaString(aGroupSize); 3428 bufferToSub.append(groups[i]); 3429 bufferToPat.append('('); 3430 bufferToPat.append(groups[i]); 3431 bufferToPat.append(')'); 3432 } 3433 String stringToSub = bufferToSub.toString(); 3434 String pattern = bufferToPat.toString(); 3435 3436 // Place sub string into working string at random index 3437 baseBuffer.append(stringToSub); 3438 3439 // Append random chars to end 3440 int trailingChars = generator.nextInt(10); 3441 String trailingString = getRandomAlphaString(trailingChars); 3442 baseBuffer.append(trailingString); 3443 String baseString = baseBuffer.toString(); 3444 3445 // Create test pattern and matcher 3446 Pattern p = Pattern.compile(pattern); 3447 Matcher m = p.matcher(baseString); 3448 3449 // Reject candidate if pattern happens to start early 3450 m.find(); 3451 if (m.start() < leadingChars) 3452 continue; 3453 3454 // Reject candidate if more than one match 3455 if (m.find()) 3456 continue; 3457 3458 // Construct a replacement string with : 3459 // random group + random string + random group 3460 StringBuffer bufferToRep = new StringBuffer(); 3461 int groupIndex1 = generator.nextInt(5); 3462 bufferToRep.append("$" + (groupIndex1 + 1)); 3463 String randomMidString = getRandomAlphaString(5); 3464 bufferToRep.append(randomMidString); 3465 int groupIndex2 = generator.nextInt(5); 3466 bufferToRep.append("$" + (groupIndex2 + 1)); 3467 String replacement = bufferToRep.toString(); 3468 3469 // Do the replacement 3470 String result = m.replaceAll(replacement); 3471 3472 // Construct expected result 3473 StringBuffer bufferToRes = new StringBuffer(); 3474 bufferToRes.append(leadingString); 3475 bufferToRes.append(groups[groupIndex1]); 3476 bufferToRes.append(randomMidString); 3477 bufferToRes.append(groups[groupIndex2]); 3478 bufferToRes.append(trailingString); 3479 String expectedResult = bufferToRes.toString(); 3480 3481 // Check results 3482 if (!result.equals(expectedResult)) 3483 failCount++; 3484 } 3485 3486 report("Substitution Basher"); 3487 } 3488 3489 /* 3490 * 5 groups of characters are created to make a substitution string. 3491 * A base string will be created including random lead chars, the 3492 * substitution string, and random trailing chars. 3493 * A pattern containing the 5 groups is searched for and replaced with: 3494 * random group + random string + random group. 3495 * The results are checked for correctness. 3496 */ 3497 private static void substitutionBasher2() { 3498 for (int runs = 0; runs<1000; runs++) { 3499 // Create a base string to work in 3500 int leadingChars = generator.nextInt(10); 3501 StringBuilder baseBuffer = new StringBuilder(100); 3502 String leadingString = getRandomAlphaString(leadingChars); 3503 baseBuffer.append(leadingString); 3504 3505 // Create 5 groups of random number of random chars 3506 // Create the string to substitute 3507 // Create the pattern string to search for 3508 StringBuilder bufferToSub = new StringBuilder(25); 3509 StringBuilder bufferToPat = new StringBuilder(50); 3510 String[] groups = new String[5]; 3511 for(int i=0; i<5; i++) { 3512 int aGroupSize = generator.nextInt(5)+1; 3513 groups[i] = getRandomAlphaString(aGroupSize); 3514 bufferToSub.append(groups[i]); 3515 bufferToPat.append('('); 3516 bufferToPat.append(groups[i]); 3517 bufferToPat.append(')'); 3518 } 3519 String stringToSub = bufferToSub.toString(); 3520 String pattern = bufferToPat.toString(); 3521 3522 // Place sub string into working string at random index 3523 baseBuffer.append(stringToSub); 3524 3525 // Append random chars to end 3526 int trailingChars = generator.nextInt(10); 3527 String trailingString = getRandomAlphaString(trailingChars); 3528 baseBuffer.append(trailingString); 3529 String baseString = baseBuffer.toString(); 3530 3531 // Create test pattern and matcher 3532 Pattern p = Pattern.compile(pattern); 3533 Matcher m = p.matcher(baseString); 3534 3535 // Reject candidate if pattern happens to start early 3536 m.find(); 3537 if (m.start() < leadingChars) 3538 continue; 3539 3540 // Reject candidate if more than one match 3541 if (m.find()) 3542 continue; 3543 3544 // Construct a replacement string with : 3545 // random group + random string + random group 3546 StringBuilder bufferToRep = new StringBuilder(); 3547 int groupIndex1 = generator.nextInt(5); 3548 bufferToRep.append("$" + (groupIndex1 + 1)); 3549 String randomMidString = getRandomAlphaString(5); 3550 bufferToRep.append(randomMidString); 3551 int groupIndex2 = generator.nextInt(5); 3552 bufferToRep.append("$" + (groupIndex2 + 1)); 3553 String replacement = bufferToRep.toString(); 3554 3555 // Do the replacement 3556 String result = m.replaceAll(replacement); 3557 3558 // Construct expected result 3559 StringBuilder bufferToRes = new StringBuilder(); 3560 bufferToRes.append(leadingString); 3561 bufferToRes.append(groups[groupIndex1]); 3562 bufferToRes.append(randomMidString); 3563 bufferToRes.append(groups[groupIndex2]); 3564 bufferToRes.append(trailingString); 3565 String expectedResult = bufferToRes.toString(); 3566 3567 // Check results 3568 if (!result.equals(expectedResult)) { 3569 failCount++; 3570 } 3571 } 3572 3573 report("Substitution Basher 2"); 3574 } 3575 3576 /** 3577 * Checks the handling of some escape sequences that the Pattern 3578 * class should process instead of the java compiler. These are 3579 * not in the file because the escapes should be be processed 3580 * by the Pattern class when the regex is compiled. 3581 */ 3582 private static void escapes() throws Exception { 3583 Pattern p = Pattern.compile("\\043"); 3584 Matcher m = p.matcher("#"); 3585 if (!m.find()) 3586 failCount++; 3587 3588 p = Pattern.compile("\\x23"); 3589 m = p.matcher("#"); 3590 if (!m.find()) 3591 failCount++; 3592 3593 p = Pattern.compile("\\u0023"); 3594 m = p.matcher("#"); 3595 if (!m.find()) 3596 failCount++; 3597 3598 report("Escape sequences"); 3599 } 3600 3601 /** 3602 * Checks the handling of blank input situations. These 3603 * tests are incompatible with my test file format. 3604 */ 3605 private static void blankInput() throws Exception { 3606 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3607 Matcher m = p.matcher(""); 3608 if (m.find()) 3609 failCount++; 3610 3611 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3612 m = p.matcher(""); 3613 if (!m.find()) 3614 failCount++; 3615 3616 p = Pattern.compile("abc"); 3617 m = p.matcher(""); 3618 if (m.find()) 3619 failCount++; 3620 3621 p = Pattern.compile("a*"); 3622 m = p.matcher(""); 3623 if (!m.find()) 3624 failCount++; 3625 3626 report("Blank input"); 3627 } 3628 3629 /** 3630 * Tests the Boyer-Moore pattern matching of a character sequence 3631 * on randomly generated patterns. 3632 */ 3633 private static void bm() throws Exception { 3634 doBnM('a'); 3635 report("Boyer Moore (ASCII)"); 3636 3637 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3638 report("Boyer Moore (Supplementary)"); 3639 } 3640 3641 private static void doBnM(int baseCharacter) throws Exception { 3642 int achar=0; 3643 3644 for (int i=0; i<100; i++) { 3645 // Create a short pattern to search for 3646 int patternLength = generator.nextInt(7) + 4; 3647 StringBuffer patternBuffer = new StringBuffer(patternLength); 3648 String pattern; 3649 retry: for (;;) { 3650 for (int x=0; x<patternLength; x++) { 3651 int ch = baseCharacter + generator.nextInt(26); 3652 if (Character.isSupplementaryCodePoint(ch)) { 3653 patternBuffer.append(Character.toChars(ch)); 3654 } else { 3655 patternBuffer.append((char)ch); 3656 } 3657 } 3658 pattern = patternBuffer.toString(); 3659 3660 // Avoid patterns that start and end with the same substring 3661 // See JDK-6854417 3662 for (int x=1; x < pattern.length(); x++) { 3663 if (pattern.startsWith(pattern.substring(x))) 3664 continue retry; 3665 } 3666 break; 3667 } 3668 Pattern p = Pattern.compile(pattern); 3669 3670 // Create a buffer with random ASCII chars that does 3671 // not match the sample 3672 String toSearch = null; 3673 StringBuffer s = null; 3674 Matcher m = p.matcher(""); 3675 do { 3676 s = new StringBuffer(100); 3677 for (int x=0; x<100; x++) { 3678 int ch = baseCharacter + generator.nextInt(26); 3679 if (Character.isSupplementaryCodePoint(ch)) { 3680 s.append(Character.toChars(ch)); 3681 } else { 3682 s.append((char)ch); 3683 } 3684 } 3685 toSearch = s.toString(); 3686 m.reset(toSearch); 3687 } while (m.find()); 3688 3689 // Insert the pattern at a random spot 3690 int insertIndex = generator.nextInt(99); 3691 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3692 insertIndex++; 3693 s = s.insert(insertIndex, pattern); 3694 toSearch = s.toString(); 3695 3696 // Make sure that the pattern is found 3697 m.reset(toSearch); 3698 if (!m.find()) 3699 failCount++; 3700 3701 // Make sure that the match text is the pattern 3702 if (!m.group().equals(pattern)) 3703 failCount++; 3704 3705 // Make sure match occured at insertion point 3706 if (m.start() != insertIndex) 3707 failCount++; 3708 } 3709 } 3710 3711 /** 3712 * Tests the matching of slices on randomly generated patterns. 3713 * The Boyer-Moore optimization is not done on these patterns 3714 * because it uses unicode case folding. 3715 */ 3716 private static void slice() throws Exception { 3717 doSlice(Character.MAX_VALUE); 3718 report("Slice"); 3719 3720 doSlice(Character.MAX_CODE_POINT); 3721 report("Slice (Supplementary)"); 3722 } 3723 3724 private static void doSlice(int maxCharacter) throws Exception { 3725 Random generator = new Random(); 3726 int achar=0; 3727 3728 for (int i=0; i<100; i++) { 3729 // Create a short pattern to search for 3730 int patternLength = generator.nextInt(7) + 4; 3731 StringBuffer patternBuffer = new StringBuffer(patternLength); 3732 for (int x=0; x<patternLength; x++) { 3733 int randomChar = 0; 3734 while (!Character.isLetterOrDigit(randomChar)) 3735 randomChar = generator.nextInt(maxCharacter); 3736 if (Character.isSupplementaryCodePoint(randomChar)) { 3737 patternBuffer.append(Character.toChars(randomChar)); 3738 } else { 3739 patternBuffer.append((char) randomChar); 3740 } 3741 } 3742 String pattern = patternBuffer.toString(); 3743 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3744 3745 // Create a buffer with random chars that does not match the sample 3746 String toSearch = null; 3747 StringBuffer s = null; 3748 Matcher m = p.matcher(""); 3749 do { 3750 s = new StringBuffer(100); 3751 for (int x=0; x<100; x++) { 3752 int randomChar = 0; 3753 while (!Character.isLetterOrDigit(randomChar)) 3754 randomChar = generator.nextInt(maxCharacter); 3755 if (Character.isSupplementaryCodePoint(randomChar)) { 3756 s.append(Character.toChars(randomChar)); 3757 } else { 3758 s.append((char) randomChar); 3759 } 3760 } 3761 toSearch = s.toString(); 3762 m.reset(toSearch); 3763 } while (m.find()); 3764 3765 // Insert the pattern at a random spot 3766 int insertIndex = generator.nextInt(99); 3767 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3768 insertIndex++; 3769 s = s.insert(insertIndex, pattern); 3770 toSearch = s.toString(); 3771 3772 // Make sure that the pattern is found 3773 m.reset(toSearch); 3774 if (!m.find()) 3775 failCount++; 3776 3777 // Make sure that the match text is the pattern 3778 if (!m.group().equals(pattern)) 3779 failCount++; 3780 3781 // Make sure match occured at insertion point 3782 if (m.start() != insertIndex) 3783 failCount++; 3784 } 3785 } 3786 3787 private static void explainFailure(String pattern, String data, 3788 String expected, String actual) { 3789 System.err.println("----------------------------------------"); 3790 System.err.println("Pattern = "+pattern); 3791 System.err.println("Data = "+data); 3792 System.err.println("Expected = " + expected); 3793 System.err.println("Actual = " + actual); 3794 } 3795 3796 private static void explainFailure(String pattern, String data, 3797 Throwable t) { 3798 System.err.println("----------------------------------------"); 3799 System.err.println("Pattern = "+pattern); 3800 System.err.println("Data = "+data); 3801 t.printStackTrace(System.err); 3802 } 3803 3804 // Testing examples from a file 3805 3806 /** 3807 * Goes through the file "TestCases.txt" and creates many patterns 3808 * described in the file, matching the patterns against input lines in 3809 * the file, and comparing the results against the correct results 3810 * also found in the file. The file format is described in comments 3811 * at the head of the file. 3812 */ 3813 private static void processFile(String fileName) throws Exception { 3814 File testCases = new File(System.getProperty("test.src", "."), 3815 fileName); 3816 FileInputStream in = new FileInputStream(testCases); 3817 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3818 3819 // Process next test case. 3820 String aLine; 3821 while((aLine = r.readLine()) != null) { 3822 // Read a line for pattern 3823 String patternString = grabLine(r); 3824 Pattern p = null; 3825 try { 3826 p = compileTestPattern(patternString); 3827 } catch (PatternSyntaxException e) { 3828 String dataString = grabLine(r); 3829 String expectedResult = grabLine(r); 3830 if (expectedResult.startsWith("error")) 3831 continue; 3832 explainFailure(patternString, dataString, e); 3833 failCount++; 3834 continue; 3835 } 3836 3837 // Read a line for input string 3838 String dataString = grabLine(r); 3839 Matcher m = p.matcher(dataString); 3840 StringBuffer result = new StringBuffer(); 3841 3842 // Check for IllegalStateExceptions before a match 3843 failCount += preMatchInvariants(m); 3844 3845 boolean found = m.find(); 3846 3847 if (found) 3848 failCount += postTrueMatchInvariants(m); 3849 else 3850 failCount += postFalseMatchInvariants(m); 3851 3852 if (found) { 3853 result.append("true "); 3854 result.append(m.group(0) + " "); 3855 } else { 3856 result.append("false "); 3857 } 3858 3859 result.append(m.groupCount()); 3860 3861 if (found) { 3862 for (int i=1; i<m.groupCount()+1; i++) 3863 if (m.group(i) != null) 3864 result.append(" " +m.group(i)); 3865 } 3866 3867 // Read a line for the expected result 3868 String expectedResult = grabLine(r); 3869 3870 if (!result.toString().equals(expectedResult)) { 3871 explainFailure(patternString, dataString, expectedResult, result.toString()); 3872 failCount++; 3873 } 3874 } 3875 3876 report(fileName); 3877 } 3878 3879 private static int preMatchInvariants(Matcher m) { 3880 int failCount = 0; 3881 try { 3882 m.start(); 3883 failCount++; 3884 } catch (IllegalStateException ise) {} 3885 try { 3886 m.end(); 3887 failCount++; 3888 } catch (IllegalStateException ise) {} 3889 try { 3890 m.group(); 3891 failCount++; 3892 } catch (IllegalStateException ise) {} 3893 return failCount; 3894 } 3895 3896 private static int postFalseMatchInvariants(Matcher m) { 3897 int failCount = 0; 3898 try { 3899 m.group(); 3900 failCount++; 3901 } catch (IllegalStateException ise) {} 3902 try { 3903 m.start(); 3904 failCount++; 3905 } catch (IllegalStateException ise) {} 3906 try { 3907 m.end(); 3908 failCount++; 3909 } catch (IllegalStateException ise) {} 3910 return failCount; 3911 } 3912 3913 private static int postTrueMatchInvariants(Matcher m) { 3914 int failCount = 0; 3915 //assert(m.start() = m.start(0); 3916 if (m.start() != m.start(0)) 3917 failCount++; 3918 //assert(m.end() = m.end(0); 3919 if (m.start() != m.start(0)) 3920 failCount++; 3921 //assert(m.group() = m.group(0); 3922 if (!m.group().equals(m.group(0))) 3923 failCount++; 3924 try { 3925 m.group(50); 3926 failCount++; 3927 } catch (IndexOutOfBoundsException ise) {} 3928 3929 return failCount; 3930 } 3931 3932 private static Pattern compileTestPattern(String patternString) { 3933 if (!patternString.startsWith("'")) { 3934 return Pattern.compile(patternString); 3935 } 3936 int break1 = patternString.lastIndexOf("'"); 3937 String flagString = patternString.substring( 3938 break1+1, patternString.length()); 3939 patternString = patternString.substring(1, break1); 3940 3941 if (flagString.equals("i")) 3942 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3943 3944 if (flagString.equals("m")) 3945 return Pattern.compile(patternString, Pattern.MULTILINE); 3946 3947 return Pattern.compile(patternString); 3948 } 3949 3950 /** 3951 * Reads a line from the input file. Keeps reading lines until a non 3952 * empty non comment line is read. If the line contains a \n then 3953 * these two characters are replaced by a newline char. If a \\uxxxx 3954 * sequence is read then the sequence is replaced by the unicode char. 3955 */ 3956 private static String grabLine(BufferedReader r) throws Exception { 3957 int index = 0; 3958 String line = r.readLine(); 3959 while (line.startsWith("//") || line.length() < 1) 3960 line = r.readLine(); 3961 while ((index = line.indexOf("\\n")) != -1) { 3962 StringBuffer temp = new StringBuffer(line); 3963 temp.replace(index, index+2, "\n"); 3964 line = temp.toString(); 3965 } 3966 while ((index = line.indexOf("\\u")) != -1) { 3967 StringBuffer temp = new StringBuffer(line); 3968 String value = temp.substring(index+2, index+6); 3969 char aChar = (char)Integer.parseInt(value, 16); 3970 String unicodeChar = "" + aChar; 3971 temp.replace(index, index+6, unicodeChar); 3972 line = temp.toString(); 3973 } 3974 3975 return line; 3976 } 3977 3978 private static void check(Pattern p, String s, String g, String expected) { 3979 Matcher m = p.matcher(s); 3980 m.find(); 3981 if (!m.group(g).equals(expected) || 3982 s.charAt(m.start(g)) != expected.charAt(0) || 3983 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3984 failCount++; 3985 } 3986 3987 private static void checkReplaceFirst(String p, String s, String r, String expected) 3988 { 3989 if (!expected.equals(Pattern.compile(p) 3990 .matcher(s) 3991 .replaceFirst(r))) 3992 failCount++; 3993 } 3994 3995 private static void checkReplaceAll(String p, String s, String r, String expected) 3996 { 3997 if (!expected.equals(Pattern.compile(p) 3998 .matcher(s) 3999 .replaceAll(r))) 4000 failCount++; 4001 } 4002 4003 private static void checkExpectedFail(String p) { 4004 try { 4005 Pattern.compile(p); 4006 } catch (PatternSyntaxException pse) { 4007 //pse.printStackTrace(); 4008 return; 4009 } 4010 failCount++; 4011 } 4012 4013 private static void checkExpectedIAE(Matcher m, String g) { 4014 m.find(); 4015 try { 4016 m.group(g); 4017 } catch (IllegalArgumentException x) { 4018 //iae.printStackTrace(); 4019 try { 4020 m.start(g); 4021 } catch (IllegalArgumentException xx) { 4022 try { 4023 m.start(g); 4024 } catch (IllegalArgumentException xxx) { 4025 return; 4026 } 4027 } 4028 } 4029 failCount++; 4030 } 4031 4032 private static void checkExpectedNPE(Matcher m) { 4033 m.find(); 4034 try { 4035 m.group(null); 4036 } catch (NullPointerException x) { 4037 try { 4038 m.start(null); 4039 } catch (NullPointerException xx) { 4040 try { 4041 m.end(null); 4042 } catch (NullPointerException xxx) { 4043 return; 4044 } 4045 } 4046 } 4047 failCount++; 4048 } 4049 4050 private static void namedGroupCaptureTest() throws Exception { 4051 check(Pattern.compile("x+(?<gname>y+)z+"), 4052 "xxxyyyzzz", 4053 "gname", 4054 "yyy"); 4055 4056 check(Pattern.compile("x+(?<gname8>y+)z+"), 4057 "xxxyyyzzz", 4058 "gname8", 4059 "yyy"); 4060 4061 //backref 4062 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4063 check(pattern, "zzzaabcazzz", true); // found "abca" 4064 4065 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4066 "zzzaabcaazzz", true); 4067 4068 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4069 "abcdefabc", true); 4070 4071 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4072 "abcdefghijkk", true); 4073 4074 // Supplementary character tests 4075 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4076 toSupplementaries("zzzaabcazzz"), true); 4077 4078 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4079 toSupplementaries("zzzaabcaazzz"), true); 4080 4081 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4082 toSupplementaries("abcdefabc"), true); 4083 4084 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4085 "(?<gname>" + 4086 toSupplementaries("k)") + "\\k<gname>"), 4087 toSupplementaries("abcdefghijkk"), true); 4088 4089 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4090 "xxxyyyzzzyyy", 4091 "gname", 4092 "yyy"); 4093 4094 //replaceFirst/All 4095 checkReplaceFirst("(?<gn>ab)(c*)", 4096 "abccczzzabcczzzabccc", 4097 "${gn}", 4098 "abzzzabcczzzabccc"); 4099 4100 checkReplaceAll("(?<gn>ab)(c*)", 4101 "abccczzzabcczzzabccc", 4102 "${gn}", 4103 "abzzzabzzzab"); 4104 4105 4106 checkReplaceFirst("(?<gn>ab)(c*)", 4107 "zzzabccczzzabcczzzabccczzz", 4108 "${gn}", 4109 "zzzabzzzabcczzzabccczzz"); 4110 4111 checkReplaceAll("(?<gn>ab)(c*)", 4112 "zzzabccczzzabcczzzabccczzz", 4113 "${gn}", 4114 "zzzabzzzabzzzabzzz"); 4115 4116 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4117 "zzzabccczzzabcczzzabccczzz", 4118 "${gn2}", 4119 "zzzccczzzabcczzzabccczzz"); 4120 4121 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4122 "zzzabccczzzabcczzzabccczzz", 4123 "${gn2}", 4124 "zzzccczzzcczzzccczzz"); 4125 4126 //toSupplementaries("(ab)(c*)")); 4127 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4128 ")(?<gn2>" + toSupplementaries("c") + "*)", 4129 toSupplementaries("abccczzzabcczzzabccc"), 4130 "${gn1}", 4131 toSupplementaries("abzzzabcczzzabccc")); 4132 4133 4134 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4135 ")(?<gn2>" + toSupplementaries("c") + "*)", 4136 toSupplementaries("abccczzzabcczzzabccc"), 4137 "${gn1}", 4138 toSupplementaries("abzzzabzzzab")); 4139 4140 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4141 ")(?<gn2>" + toSupplementaries("c") + "*)", 4142 toSupplementaries("abccczzzabcczzzabccc"), 4143 "${gn2}", 4144 toSupplementaries("ccczzzabcczzzabccc")); 4145 4146 4147 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4148 ")(?<gn2>" + toSupplementaries("c") + "*)", 4149 toSupplementaries("abccczzzabcczzzabccc"), 4150 "${gn2}", 4151 toSupplementaries("ccczzzcczzzccc")); 4152 4153 checkReplaceFirst("(?<dog>Dog)AndCat", 4154 "zzzDogAndCatzzzDogAndCatzzz", 4155 "${dog}", 4156 "zzzDogzzzDogAndCatzzz"); 4157 4158 4159 checkReplaceAll("(?<dog>Dog)AndCat", 4160 "zzzDogAndCatzzzDogAndCatzzz", 4161 "${dog}", 4162 "zzzDogzzzDogzzz"); 4163 4164 // backref in Matcher & String 4165 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4166 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4167 failCount++; 4168 4169 // negative 4170 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4171 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4172 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4173 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4174 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4175 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4176 "gnameX"); 4177 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4178 report("NamedGroupCapture"); 4179 } 4180 4181 // This is for bug 6919132 4182 private static void nonBmpClassComplementTest() throws Exception { 4183 Pattern p = Pattern.compile("\\P{Lu}"); 4184 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4185 4186 if (m.find() && m.start() == 1) 4187 failCount++; 4188 4189 // from a unicode category 4190 p = Pattern.compile("\\P{Lu}"); 4191 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4192 if (m.find()) 4193 failCount++; 4194 if (!m.hitEnd()) 4195 failCount++; 4196 4197 // block 4198 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4199 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4200 if (m.find() && m.start() == 1) 4201 failCount++; 4202 4203 p = Pattern.compile("\\P{sc=GRANTHA}"); 4204 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4205 if (m.find() && m.start() == 1) 4206 failCount++; 4207 4208 report("NonBmpClassComplement"); 4209 } 4210 4211 private static void unicodePropertiesTest() throws Exception { 4212 // different forms 4213 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4214 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4215 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4216 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4217 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4218 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4219 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4220 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4221 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4222 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4223 failCount++; 4224 4225 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4226 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4227 Matcher lastSM = common; 4228 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4229 4230 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4231 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4232 Matcher lastBM = latin; 4233 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4234 4235 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4236 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4237 continue; // only pick couple code points, they are the same 4238 } 4239 4240 // Unicode Script 4241 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4242 Matcher m; 4243 String str = new String(Character.toChars(cp)); 4244 if (script == lastScript) { 4245 m = lastSM; 4246 m.reset(str); 4247 } else { 4248 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4249 } 4250 if (!m.matches()) { 4251 failCount++; 4252 } 4253 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4254 other.reset(str); 4255 if (other.matches()) { 4256 failCount++; 4257 } 4258 lastSM = m; 4259 lastScript = script; 4260 4261 // Unicode Block 4262 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4263 if (block == null) { 4264 //System.out.printf("Not a Block: cp=%x%n", cp); 4265 continue; 4266 } 4267 if (block == lastBlock) { 4268 m = lastBM; 4269 m.reset(str); 4270 } else { 4271 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4272 } 4273 if (!m.matches()) { 4274 failCount++; 4275 } 4276 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4277 other.reset(str); 4278 if (other.matches()) { 4279 failCount++; 4280 } 4281 lastBM = m; 4282 lastBlock = block; 4283 } 4284 report("unicodeProperties"); 4285 } 4286 4287 private static void unicodeHexNotationTest() throws Exception { 4288 4289 // negative 4290 checkExpectedFail("\\x{-23}"); 4291 checkExpectedFail("\\x{110000}"); 4292 checkExpectedFail("\\x{}"); 4293 checkExpectedFail("\\x{AB[ef]"); 4294 4295 // codepoint 4296 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4297 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4298 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4299 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4300 4301 // in class 4302 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4303 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4304 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4305 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4306 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4307 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4308 4309 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4310 String s = "A" + new String(Character.toChars(cp)) + "B"; 4311 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4312 : String.format("\\u%04x\\u%04x", 4313 (int) Character.toChars(cp)[0], 4314 (int) Character.toChars(cp)[1]); 4315 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4316 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4317 failCount++; 4318 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4319 failCount++; 4320 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4321 failCount++; 4322 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4323 failCount++; 4324 } 4325 report("unicodeHexNotation"); 4326 } 4327 4328 private static void unicodeClassesTest() throws Exception { 4329 4330 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4331 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4332 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4333 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4334 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4335 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4336 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4337 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4338 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4339 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4340 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4341 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4342 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4343 Matcher bound = Pattern.compile("\\b").matcher(""); 4344 Matcher word = Pattern.compile("\\w++").matcher(""); 4345 // UNICODE_CHARACTER_CLASS 4346 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4347 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4348 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4349 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4350 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4351 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4352 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4353 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4354 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4355 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4356 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4357 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4358 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4359 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4360 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4361 // embedded flag (?U) 4362 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4363 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4364 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4365 4366 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4367 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4368 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4369 // properties 4370 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4371 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4372 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4373 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4374 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4375 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4376 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4377 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4378 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4379 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4380 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4381 // javaMethod 4382 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4383 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4384 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4385 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4386 // GC/C 4387 Matcher gcC = Pattern.compile("\\p{C}").matcher(""); 4388 4389 for (int cp = 1; cp < 0x30000; cp++) { 4390 String str = new String(Character.toChars(cp)); 4391 int type = Character.getType(cp); 4392 if (// lower 4393 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4394 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4395 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4396 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4397 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4398 // upper 4399 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4400 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4401 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4402 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4403 // alpha 4404 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4405 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4406 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4407 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4408 // digit 4409 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4410 Character.isDigit(cp) != digitU.reset(str).matches() || 4411 // alnum 4412 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4413 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4414 // punct 4415 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4416 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4417 // graph 4418 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4419 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4420 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4421 // blank 4422 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4423 != blank.reset(str).matches() || 4424 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4425 // print 4426 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4427 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4428 // cntrl 4429 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4430 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4431 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4432 // hexdigit 4433 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4434 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4435 // space 4436 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4437 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4438 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4439 // word 4440 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4441 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4442 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4443 // bwordb 4444 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4445 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4446 // properties 4447 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4448 Character.isLetter(cp) != letterP.reset(str).matches()|| 4449 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4450 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4451 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4452 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4453 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() || 4454 // gc_C 4455 (Character.CONTROL == type || Character.FORMAT == type || 4456 Character.PRIVATE_USE == type || Character.SURROGATE == type || 4457 Character.UNASSIGNED == type) 4458 != gcC.reset(str).matches()) { 4459 failCount++; 4460 } 4461 } 4462 4463 // bounds/word align 4464 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4465 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4466 failCount++; 4467 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4468 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4469 failCount++; 4470 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4471 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4472 failCount++; 4473 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4474 failCount++; 4475 report("unicodePredefinedClasses"); 4476 } 4477 4478 private static void unicodeCharacterNameTest() throws Exception { 4479 4480 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4481 if (!Character.isValidCodePoint(cp) || 4482 Character.getType(cp) == Character.UNASSIGNED) 4483 continue; 4484 String str = new String(Character.toChars(cp)); 4485 // single 4486 String p = "\\N{" + Character.getName(cp) + "}"; 4487 if (!Pattern.compile(p).matcher(str).matches()) { 4488 failCount++; 4489 } 4490 // class[c] 4491 p = "[\\N{" + Character.getName(cp) + "}]"; 4492 if (!Pattern.compile(p).matcher(str).matches()) { 4493 failCount++; 4494 } 4495 } 4496 4497 // range 4498 for (int i = 0; i < 10; i++) { 4499 int start = generator.nextInt(20); 4500 int end = start + generator.nextInt(200); 4501 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4502 String str; 4503 for (int cp = start; cp < end; cp++) { 4504 str = new String(Character.toChars(cp)); 4505 if (!Pattern.compile(p).matcher(str).matches()) { 4506 failCount++; 4507 } 4508 } 4509 str = new String(Character.toChars(end + 10)); 4510 if (Pattern.compile(p).matcher(str).matches()) { 4511 failCount++; 4512 } 4513 } 4514 4515 // slice 4516 for (int i = 0; i < 10; i++) { 4517 int n = generator.nextInt(256); 4518 int[] buf = new int[n]; 4519 StringBuffer sb = new StringBuffer(1024); 4520 for (int j = 0; j < n; j++) { 4521 int cp = generator.nextInt(1000); 4522 if (!Character.isValidCodePoint(cp) || 4523 Character.getType(cp) == Character.UNASSIGNED) 4524 cp = 0x4e00; // just use 4e00 4525 sb.append("\\N{" + Character.getName(cp) + "}"); 4526 buf[j] = cp; 4527 } 4528 String p = sb.toString(); 4529 String str = new String(buf, 0, buf.length); 4530 if (!Pattern.compile(p).matcher(str).matches()) { 4531 failCount++; 4532 } 4533 } 4534 report("unicodeCharacterName"); 4535 } 4536 4537 private static void horizontalAndVerticalWSTest() throws Exception { 4538 String hws = new String (new char[] { 4539 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4540 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4541 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4542 0x202f, 0x205f, 0x3000 }); 4543 String vws = new String (new char[] { 4544 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4545 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4546 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4547 failCount++; 4548 if (Pattern.compile("\\H").matcher(hws).find() || 4549 Pattern.compile("[\\H]").matcher(hws).find()) 4550 failCount++; 4551 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4552 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4553 failCount++; 4554 if (Pattern.compile("\\V").matcher(vws).find() || 4555 Pattern.compile("[\\V]").matcher(vws).find()) 4556 failCount++; 4557 String prefix = "abcd"; 4558 String suffix = "efgh"; 4559 String ng = "A"; 4560 for (int i = 0; i < hws.length(); i++) { 4561 String c = String.valueOf(hws.charAt(i)); 4562 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4563 if (!m.find() || !c.equals(m.group())) 4564 failCount++; 4565 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4566 if (!m.find() || !c.equals(m.group())) 4567 failCount++; 4568 4569 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4570 if (!m.find() || !ng.equals(m.group())) 4571 failCount++; 4572 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4573 if (!m.find() || !ng.equals(m.group())) 4574 failCount++; 4575 } 4576 for (int i = 0; i < vws.length(); i++) { 4577 String c = String.valueOf(vws.charAt(i)); 4578 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4579 if (!m.find() || !c.equals(m.group())) 4580 failCount++; 4581 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4582 if (!m.find() || !c.equals(m.group())) 4583 failCount++; 4584 4585 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4586 if (!m.find() || !ng.equals(m.group())) 4587 failCount++; 4588 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4589 if (!m.find() || !ng.equals(m.group())) 4590 failCount++; 4591 } 4592 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4593 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4594 failCount++; 4595 report("horizontalAndVerticalWSTest"); 4596 } 4597 4598 private static void linebreakTest() throws Exception { 4599 String linebreaks = new String (new char[] { 4600 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4601 String crnl = "\r\n"; 4602 if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() && 4603 Pattern.compile("\\R").matcher(crnl).matches() && 4604 Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() && 4605 Pattern.compile("\\Rabc").matcher("\rabc").matches() && 4606 Pattern.compile("\\R\\R").matcher(crnl).matches() && // backtracking 4607 Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking 4608 !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029 4609 failCount++; 4610 } 4611 report("linebreakTest"); 4612 } 4613 4614 // #7189363 4615 private static void branchTest() throws Exception { 4616 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4617 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4618 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4619 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4620 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4621 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4622 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4623 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4624 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4625 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4626 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4627 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4628 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4629 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4630 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4631 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4632 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4633 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4634 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4635 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4636 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4637 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4638 failCount++; 4639 report("branchTest"); 4640 } 4641 4642 // This test is for 8007395 4643 private static void groupCurlyNotFoundSuppTest() throws Exception { 4644 String input = "test this as \ud83d\ude0d"; 4645 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4646 "test(.)*(@[a-zA-Z.]+)", 4647 "test([^B])+(@[a-zA-Z.]+)", 4648 "test([^B])*(@[a-zA-Z.]+)", 4649 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4650 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4651 }) { 4652 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4653 .matcher(input); 4654 try { 4655 if (m.find()) { 4656 failCount++; 4657 } 4658 } catch (Exception x) { 4659 failCount++; 4660 } 4661 } 4662 report("GroupCurly NotFoundSupp"); 4663 } 4664 4665 // This test is for 8023647 4666 private static void groupCurlyBackoffTest() throws Exception { 4667 if (!"abc1c".matches("(\\w)+1\\1") || 4668 "abc11".matches("(\\w)+1\\1")) { 4669 failCount++; 4670 } 4671 report("GroupCurly backoff"); 4672 } 4673 4674 // This test is for 8012646 4675 private static void patternAsPredicate() throws Exception { 4676 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4677 4678 if (p.test("")) { 4679 failCount++; 4680 } 4681 if (!p.test("word")) { 4682 failCount++; 4683 } 4684 if (p.test("1234")) { 4685 failCount++; 4686 } 4687 if (!p.test("word1234")) { 4688 failCount++; 4689 } 4690 report("Pattern.asPredicate"); 4691 } 4692 4693 // This test is for 8184692 4694 private static void patternAsMatchPredicate() throws Exception { 4695 Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate(); 4696 4697 if (p.test("")) { 4698 failCount++; 4699 } 4700 if (!p.test("word")) { 4701 failCount++; 4702 } 4703 if (p.test("1234word")) { 4704 failCount++; 4705 } 4706 if (p.test("1234")) { 4707 failCount++; 4708 } 4709 report("Pattern.asMatchPredicate"); 4710 } 4711 4712 4713 // This test is for 8035975 4714 private static void invalidFlags() throws Exception { 4715 for (int flag = 1; flag != 0; flag <<= 1) { 4716 switch (flag) { 4717 case Pattern.CASE_INSENSITIVE: 4718 case Pattern.MULTILINE: 4719 case Pattern.DOTALL: 4720 case Pattern.UNICODE_CASE: 4721 case Pattern.CANON_EQ: 4722 case Pattern.UNIX_LINES: 4723 case Pattern.LITERAL: 4724 case Pattern.UNICODE_CHARACTER_CLASS: 4725 case Pattern.COMMENTS: 4726 // valid flag, continue 4727 break; 4728 default: 4729 try { 4730 Pattern.compile(".", flag); 4731 failCount++; 4732 } catch (IllegalArgumentException expected) { 4733 } 4734 } 4735 } 4736 report("Invalid compile flags"); 4737 } 4738 4739 // This test is for 8158482 4740 private static void embeddedFlags() throws Exception { 4741 try { 4742 Pattern.compile("(?i).(?-i)."); 4743 Pattern.compile("(?m).(?-m)."); 4744 Pattern.compile("(?s).(?-s)."); 4745 Pattern.compile("(?d).(?-d)."); 4746 Pattern.compile("(?u).(?-u)."); 4747 Pattern.compile("(?c).(?-c)."); 4748 Pattern.compile("(?x).(?-x)."); 4749 Pattern.compile("(?U).(?-U)."); 4750 Pattern.compile("(?imsducxU).(?-imsducxU)."); 4751 } catch (PatternSyntaxException x) { 4752 failCount++; 4753 } 4754 report("Embedded flags"); 4755 } 4756 4757 private static void grapheme() throws Exception { 4758 Files.lines(Paths.get(System.getProperty("test.src", "."), 4759 "GraphemeBreakTest.txt")) 4760 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4761 .forEach( ln -> { 4762 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4763 // System.out.println(str); 4764 String[] strs = ln.split("\u00f7|\u00d7"); 4765 StringBuilder src = new StringBuilder(); 4766 ArrayList<String> graphemes = new ArrayList<>(); 4767 StringBuilder buf = new StringBuilder(); 4768 int offBk = 0; 4769 for (String str : strs) { 4770 if (str.length() == 0) // first empty str 4771 continue; 4772 int cp = Integer.parseInt(str, 16); 4773 src.appendCodePoint(cp); 4774 buf.appendCodePoint(cp); 4775 offBk += (str.length() + 1); 4776 if (ln.charAt(offBk) == '\u00f7') { // DIV 4777 graphemes.add(buf.toString()); 4778 buf = new StringBuilder(); 4779 } 4780 } 4781 Pattern p = Pattern.compile("\\X"); 4782 Matcher m = p.matcher(src.toString()); 4783 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4784 for (String g : graphemes) { 4785 // System.out.printf(" grapheme:=[%s]%n", g); 4786 // (1) test \\X directly 4787 if (!m.find() || !m.group().equals(g)) { 4788 System.out.println("Failed \\X [" + ln + "] : " + g); 4789 failCount++; 4790 } 4791 // (2) test \\b{g} + \\X via Scanner 4792 boolean hasNext = s.hasNext(p); 4793 // if (!s.hasNext() || !s.next().equals(next)) { 4794 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4795 System.out.println("Failed b{g} [" + ln + "] : " + g); 4796 failCount++; 4797 } 4798 } 4799 }); 4800 // some sanity checks 4801 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4802 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4803 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4804 failCount++; 4805 // make sure "\b{n}" still works 4806 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4807 failCount++; 4808 report("Unicode extended grapheme cluster"); 4809 } 4810 4811 // hangup/timeout if go into exponential backtracking 4812 private static void expoBacktracking() throws Exception { 4813 4814 Object[][] patternMatchers = { 4815 // 6328855 4816 { "(.*\n*)*", 4817 "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)", 4818 false }, 4819 // 6192895 4820 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4821 "Hello World this is a test this is a test this is a test A", 4822 true }, 4823 { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+", 4824 "Hello World this is a test this is a test this is a test \u4e00 ", 4825 false }, 4826 { " *([a-z0-9]+ *)+", 4827 "hello world this is a test this is a test this is a test A", 4828 false }, 4829 // 4771934 [FIXED] #5013651? 4830 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4831 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com", 4832 true }, 4833 // 4866249 [FIXED] 4834 { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>", 4835 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">", 4836 true }, 4837 { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$", 4838 "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com", 4839 false }, 4840 // 6345469 4841 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4842 " < br/> < / p> <p> <html> <adfasfdasdf> </p>", 4843 true }, // --> matched 4844 { "((<[^>]+>)?(((\\s)?)*(\\ )?)*((\\s)?)*)+", 4845 " < br/> < / p> <p> <html> <adfasfdasdf> p </p>", 4846 false }, 4847 // 5026912 4848 { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$", 4849 "156580451111112225588087755221111111566969655555555", 4850 false}, 4851 // 6988218 4852 { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')", 4853 "'%)) order by ANGEBOT.ID", 4854 false}, // find 4855 // 6693451 4856 { "^(\\s*foo\\s*)*$", 4857 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo", 4858 true }, 4859 { "^(\\s*foo\\s*)*$", 4860 "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo", 4861 false 4862 }, 4863 // 7006761 4864 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true}, 4865 { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false}, 4866 // 8140212 4867 { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)", 4868 "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()", 4869 false 4870 }, 4871 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true}, 4872 { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false}, 4873 4874 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true }, 4875 { "(x+)*y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4876 4877 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true}, 4878 { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false}, 4879 4880 { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false}, 4881 4882 /* not fixed 4883 //8132141 ---> second level exponential backtracking 4884 { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*", 4885 "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" }, 4886 */ 4887 }; 4888 4889 for (Object[] pm : patternMatchers) { 4890 String p = (String)pm[0]; 4891 String s = (String)pm[1]; 4892 boolean r = (Boolean)pm[2]; 4893 if (r != Pattern.compile(p).matcher(s).matches()) { 4894 failCount++; 4895 } 4896 } 4897 } 4898 4899 private static void invalidGroupName() { 4900 // Invalid start of a group name 4901 for (String groupName : List.of("", ".", "0", "\u0040", "\u005b", 4902 "\u0060", "\u007b", "\u0416")) { 4903 for (String pat : List.of("(?<" + groupName + ">)", 4904 "\\k<" + groupName + ">")) { 4905 try { 4906 Pattern.compile(pat); 4907 failCount++; 4908 } catch (PatternSyntaxException e) { 4909 if (!e.getMessage().startsWith( 4910 "capturing group name does not start with a" 4911 + " Latin letter")) { 4912 failCount++; 4913 } 4914 } 4915 } 4916 } 4917 // Invalid char in a group name 4918 for (String groupName : List.of("a.", "b\u0040", "c\u005b", 4919 "d\u0060", "e\u007b", "f\u0416")) { 4920 for (String pat : List.of("(?<" + groupName + ">)", 4921 "\\k<" + groupName + ">")) { 4922 try { 4923 Pattern.compile(pat); 4924 failCount++; 4925 } catch (PatternSyntaxException e) { 4926 if (!e.getMessage().startsWith( 4927 "named capturing group is missing trailing '>'")) { 4928 failCount++; 4929 } 4930 } 4931 } 4932 } 4933 report("Invalid capturing group names"); 4934 } 4935 }