1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 37 * @library /lib/testlibrary 38 * @build jdk.testlibrary.* 39 * @run main RegExTest 40 * @key randomness 41 */ 42 43 import java.util.function.Function; 44 import java.util.regex.*; 45 import java.util.Random; 46 import java.util.Scanner; 47 import java.io.*; 48 import java.nio.file.*; 49 import java.util.*; 50 import java.nio.CharBuffer; 51 import java.util.function.Predicate; 52 import jdk.testlibrary.RandomFactory; 53 54 /** 55 * This is a test class created to check the operation of 56 * the Pattern and Matcher classes. 57 */ 58 public class RegExTest { 59 60 private static Random generator = RandomFactory.getRandom(); 61 private static boolean failure = false; 62 private static int failCount = 0; 63 private static String firstFailure = null; 64 65 /** 66 * Main to interpret arguments and run several tests. 67 * 68 */ 69 public static void main(String[] args) throws Exception { 70 // Most of the tests are in a file 71 processFile("TestCases.txt"); 72 //processFile("PerlCases.txt"); 73 processFile("BMPTestCases.txt"); 74 processFile("SupplementaryTestCases.txt"); 75 76 // These test many randomly generated char patterns 77 bm(); 78 slice(); 79 80 // These are hard to put into the file 81 escapes(); 82 blankInput(); 83 84 // Substitition tests on randomly generated sequences 85 globalSubstitute(); 86 stringbufferSubstitute(); 87 stringbuilderSubstitute(); 88 89 substitutionBasher(); 90 substitutionBasher2(); 91 92 // Canonical Equivalence 93 ceTest(); 94 95 // Anchors 96 anchorTest(); 97 98 // boolean match calls 99 matchesTest(); 100 lookingAtTest(); 101 102 // Pattern API 103 patternMatchesTest(); 104 105 // Misc 106 lookbehindTest(); 107 nullArgumentTest(); 108 backRefTest(); 109 groupCaptureTest(); 110 caretTest(); 111 charClassTest(); 112 emptyPatternTest(); 113 findIntTest(); 114 group0Test(); 115 longPatternTest(); 116 octalTest(); 117 ampersandTest(); 118 negationTest(); 119 splitTest(); 120 appendTest(); 121 caseFoldingTest(); 122 commentsTest(); 123 unixLinesTest(); 124 replaceFirstTest(); 125 gTest(); 126 zTest(); 127 serializeTest(); 128 reluctantRepetitionTest(); 129 multilineDollarTest(); 130 dollarAtEndTest(); 131 caretBetweenTerminatorsTest(); 132 // This RFE rejected in Tiger numOccurrencesTest(); 133 javaCharClassTest(); 134 nonCaptureRepetitionTest(); 135 notCapturedGroupCurlyMatchTest(); 136 escapedSegmentTest(); 137 literalPatternTest(); 138 literalReplacementTest(); 139 regionTest(); 140 toStringTest(); 141 negatedCharClassTest(); 142 findFromTest(); 143 boundsTest(); 144 unicodeWordBoundsTest(); 145 caretAtEndTest(); 146 wordSearchTest(); 147 hitEndTest(); 148 toMatchResultTest(); 149 toMatchResultTest2(); 150 surrogatesInClassTest(); 151 removeQEQuotingTest(); 152 namedGroupCaptureTest(); 153 nonBmpClassComplementTest(); 154 unicodePropertiesTest(); 155 unicodeHexNotationTest(); 156 unicodeClassesTest(); 157 unicodeCharacterNameTest(); 158 horizontalAndVerticalWSTest(); 159 linebreakTest(); 160 branchTest(); 161 groupCurlyNotFoundSuppTest(); 162 groupCurlyBackoffTest(); 163 patternAsPredicate(); 164 invalidFlags(); 165 grapheme(); 166 167 if (failure) { 168 throw new 169 RuntimeException("RegExTest failed, 1st failure: " + 170 firstFailure); 171 } else { 172 System.err.println("OKAY: All tests passed."); 173 } 174 } 175 176 // Utility functions 177 178 private static String getRandomAlphaString(int length) { 179 StringBuffer buf = new StringBuffer(length); 180 for (int i=0; i<length; i++) { 181 char randChar = (char)(97 + generator.nextInt(26)); 182 buf.append(randChar); 183 } 184 return buf.toString(); 185 } 186 187 private static void check(Matcher m, String expected) { 188 m.find(); 189 if (!m.group().equals(expected)) 190 failCount++; 191 } 192 193 private static void check(Matcher m, String result, boolean expected) { 194 m.find(); 195 if (m.group().equals(result) != expected) 196 failCount++; 197 } 198 199 private static void check(Pattern p, String s, boolean expected) { 200 if (p.matcher(s).find() != expected) 201 failCount++; 202 } 203 204 private static void check(String p, String s, boolean expected) { 205 Matcher matcher = Pattern.compile(p).matcher(s); 206 if (matcher.find() != expected) 207 failCount++; 208 } 209 210 private static void check(String p, char c, boolean expected) { 211 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 212 Pattern pattern = Pattern.compile(propertyPattern); 213 char[] ca = new char[1]; ca[0] = c; 214 Matcher matcher = pattern.matcher(new String(ca)); 215 if (!matcher.find()) 216 failCount++; 217 } 218 219 private static void check(String p, int codePoint, boolean expected) { 220 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 221 Pattern pattern = Pattern.compile(propertyPattern); 222 char[] ca = Character.toChars(codePoint); 223 Matcher matcher = pattern.matcher(new String(ca)); 224 if (!matcher.find()) 225 failCount++; 226 } 227 228 private static void check(String p, int flag, String input, String s, 229 boolean expected) 230 { 231 Pattern pattern = Pattern.compile(p, flag); 232 Matcher matcher = pattern.matcher(input); 233 if (expected) 234 check(matcher, s, expected); 235 else 236 check(pattern, input, false); 237 } 238 239 private static void report(String testName) { 240 int spacesToAdd = 30 - testName.length(); 241 StringBuffer paddedNameBuffer = new StringBuffer(testName); 242 for (int i=0; i<spacesToAdd; i++) 243 paddedNameBuffer.append(" "); 244 String paddedName = paddedNameBuffer.toString(); 245 System.err.println(paddedName + ": " + 246 (failCount==0 ? "Passed":"Failed("+failCount+")")); 247 if (failCount > 0) { 248 failure = true; 249 250 if (firstFailure == null) { 251 firstFailure = testName; 252 } 253 } 254 255 failCount = 0; 256 } 257 258 /** 259 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 260 * supplementary characters. This method does NOT fully take care 261 * of the regex syntax. 262 */ 263 private static String toSupplementaries(String s) { 264 int length = s.length(); 265 StringBuffer sb = new StringBuffer(length * 2); 266 267 for (int i = 0; i < length; ) { 268 char c = s.charAt(i++); 269 if (c == '\\') { 270 sb.append(c); 271 if (i < length) { 272 c = s.charAt(i++); 273 sb.append(c); 274 if (c == 'u') { 275 // assume no syntax error 276 sb.append(s.charAt(i++)); 277 sb.append(s.charAt(i++)); 278 sb.append(s.charAt(i++)); 279 sb.append(s.charAt(i++)); 280 } 281 } 282 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 283 sb.append('\ud800').append((char)('\udc00'+c)); 284 } else { 285 sb.append(c); 286 } 287 } 288 return sb.toString(); 289 } 290 291 // Regular expression tests 292 293 // This is for bug 6178785 294 // Test if an expected NPE gets thrown when passing in a null argument 295 private static boolean check(Runnable test) { 296 try { 297 test.run(); 298 failCount++; 299 return false; 300 } catch (NullPointerException npe) { 301 return true; 302 } 303 } 304 305 private static void nullArgumentTest() { 306 check(() -> Pattern.compile(null)); 307 check(() -> Pattern.matches(null, null)); 308 check(() -> Pattern.matches("xyz", null)); 309 check(() -> Pattern.quote(null)); 310 check(() -> Pattern.compile("xyz").split(null)); 311 check(() -> Pattern.compile("xyz").matcher(null)); 312 313 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 314 m.matches(); 315 check(() -> m.appendTail((StringBuffer) null)); 316 check(() -> m.appendTail((StringBuilder)null)); 317 check(() -> m.replaceAll((String) null)); 318 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 319 check(() -> m.replaceFirst((String)null)); 320 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 321 check(() -> m.appendReplacement((StringBuffer)null, null)); 322 check(() -> m.appendReplacement((StringBuilder)null, null)); 323 check(() -> m.reset(null)); 324 check(() -> Matcher.quoteReplacement(null)); 325 //check(() -> m.usePattern(null)); 326 327 report("Null Argument"); 328 } 329 330 // This is for bug6635133 331 // Test if surrogate pair in Unicode escapes can be handled correctly. 332 private static void surrogatesInClassTest() throws Exception { 333 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 334 Matcher matcher = pattern.matcher("\ud834\udd22"); 335 if (!matcher.find()) 336 failCount++; 337 338 report("Surrogate pair in Unicode escape"); 339 } 340 341 // This is for bug6990617 342 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 343 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 344 // char is an octal digit. 345 private static void removeQEQuotingTest() throws Exception { 346 Pattern pattern = 347 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 348 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 349 if (!matcher.find()) 350 failCount++; 351 352 report("Remove Q/E Quoting"); 353 } 354 355 // This is for bug 4988891 356 // Test toMatchResult to see that it is a copy of the Matcher 357 // that is not affected by subsequent operations on the original 358 private static void toMatchResultTest() throws Exception { 359 Pattern pattern = Pattern.compile("squid"); 360 Matcher matcher = pattern.matcher( 361 "agiantsquidofdestinyasmallsquidoffate"); 362 matcher.find(); 363 int matcherStart1 = matcher.start(); 364 MatchResult mr = matcher.toMatchResult(); 365 if (mr == matcher) 366 failCount++; 367 int resultStart1 = mr.start(); 368 if (matcherStart1 != resultStart1) 369 failCount++; 370 matcher.find(); 371 int matcherStart2 = matcher.start(); 372 int resultStart2 = mr.start(); 373 if (matcherStart2 == resultStart2) 374 failCount++; 375 if (resultStart1 != resultStart2) 376 failCount++; 377 MatchResult mr2 = matcher.toMatchResult(); 378 if (mr == mr2) 379 failCount++; 380 if (mr2.start() != matcherStart2) 381 failCount++; 382 report("toMatchResult is a copy"); 383 } 384 385 private static void checkExpectedISE(Runnable test) { 386 try { 387 test.run(); 388 failCount++; 389 } catch (IllegalStateException x) { 390 } catch (IndexOutOfBoundsException xx) { 391 failCount++; 392 } 393 } 394 395 private static void checkExpectedIOOE(Runnable test) { 396 try { 397 test.run(); 398 failCount++; 399 } catch (IndexOutOfBoundsException x) {} 400 } 401 402 // This is for bug 8074678 403 // Test the result of toMatchResult throws ISE if no match is availble 404 private static void toMatchResultTest2() throws Exception { 405 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 406 matcher.find(); 407 MatchResult mr = matcher.toMatchResult(); 408 409 checkExpectedISE(() -> mr.start()); 410 checkExpectedISE(() -> mr.start(2)); 411 checkExpectedISE(() -> mr.end()); 412 checkExpectedISE(() -> mr.end(2)); 413 checkExpectedISE(() -> mr.group()); 414 checkExpectedISE(() -> mr.group(2)); 415 416 matcher = Pattern.compile("(match)").matcher("there is a match"); 417 matcher.find(); 418 MatchResult mr2 = matcher.toMatchResult(); 419 checkExpectedIOOE(() -> mr2.start(2)); 420 checkExpectedIOOE(() -> mr2.end(2)); 421 checkExpectedIOOE(() -> mr2.group(2)); 422 423 report("toMatchResult2 appropriate exceptions"); 424 } 425 426 // This is for bug 5013885 427 // Must test a slice to see if it reports hitEnd correctly 428 private static void hitEndTest() throws Exception { 429 // Basic test of Slice node 430 Pattern p = Pattern.compile("^squidattack"); 431 Matcher m = p.matcher("squack"); 432 m.find(); 433 if (m.hitEnd()) 434 failCount++; 435 m.reset("squid"); 436 m.find(); 437 if (!m.hitEnd()) 438 failCount++; 439 440 // Test Slice, SliceA and SliceU nodes 441 for (int i=0; i<3; i++) { 442 int flags = 0; 443 if (i==1) flags = Pattern.CASE_INSENSITIVE; 444 if (i==2) flags = Pattern.UNICODE_CASE; 445 p = Pattern.compile("^abc", flags); 446 m = p.matcher("ad"); 447 m.find(); 448 if (m.hitEnd()) 449 failCount++; 450 m.reset("ab"); 451 m.find(); 452 if (!m.hitEnd()) 453 failCount++; 454 } 455 456 // Test Boyer-Moore node 457 p = Pattern.compile("catattack"); 458 m = p.matcher("attack"); 459 m.find(); 460 if (!m.hitEnd()) 461 failCount++; 462 463 p = Pattern.compile("catattack"); 464 m = p.matcher("attackattackattackcatatta"); 465 m.find(); 466 if (!m.hitEnd()) 467 failCount++; 468 report("hitEnd from a Slice"); 469 } 470 471 // This is for bug 4997476 472 // It is weird code submitted by customer demonstrating a regression 473 private static void wordSearchTest() throws Exception { 474 String testString = new String("word1 word2 word3"); 475 Pattern p = Pattern.compile("\\b"); 476 Matcher m = p.matcher(testString); 477 int position = 0; 478 int start = 0; 479 while (m.find(position)) { 480 start = m.start(); 481 if (start == testString.length()) 482 break; 483 if (m.find(start+1)) { 484 position = m.start(); 485 } else { 486 position = testString.length(); 487 } 488 if (testString.substring(start, position).equals(" ")) 489 continue; 490 if (!testString.substring(start, position-1).startsWith("word")) 491 failCount++; 492 } 493 report("Customer word search"); 494 } 495 496 // This is for bug 4994840 497 private static void caretAtEndTest() throws Exception { 498 // Problem only occurs with multiline patterns 499 // containing a beginning-of-line caret "^" followed 500 // by an expression that also matches the empty string. 501 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 502 Matcher matcher = pattern.matcher("\r"); 503 matcher.find(); 504 matcher.find(); 505 report("Caret at end"); 506 } 507 508 // This test is for 4979006 509 // Check to see if word boundary construct properly handles unicode 510 // non spacing marks 511 private static void unicodeWordBoundsTest() throws Exception { 512 String spaces = " "; 513 String wordChar = "a"; 514 String nsm = "\u030a"; 515 516 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 517 518 Pattern pattern = Pattern.compile("\\b"); 519 Matcher matcher = pattern.matcher(""); 520 // S=other B=word character N=non spacing mark .=word boundary 521 // SS.BB.SS 522 String input = spaces + wordChar + wordChar + spaces; 523 twoFindIndexes(input, matcher, 2, 4); 524 // SS.BBN.SS 525 input = spaces + wordChar +wordChar + nsm + spaces; 526 twoFindIndexes(input, matcher, 2, 5); 527 // SS.BN.SS 528 input = spaces + wordChar + nsm + spaces; 529 twoFindIndexes(input, matcher, 2, 4); 530 // SS.BNN.SS 531 input = spaces + wordChar + nsm + nsm + spaces; 532 twoFindIndexes(input, matcher, 2, 5); 533 // SSN.BB.SS 534 input = spaces + nsm + wordChar + wordChar + spaces; 535 twoFindIndexes(input, matcher, 3, 5); 536 // SS.BNB.SS 537 input = spaces + wordChar + nsm + wordChar + spaces; 538 twoFindIndexes(input, matcher, 2, 5); 539 // SSNNSS 540 input = spaces + nsm + nsm + spaces; 541 matcher.reset(input); 542 if (matcher.find()) 543 failCount++; 544 // SSN.BBN.SS 545 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 546 twoFindIndexes(input, matcher, 3, 6); 547 548 report("Unicode word boundary"); 549 } 550 551 private static void twoFindIndexes(String input, Matcher matcher, int a, 552 int b) throws Exception 553 { 554 matcher.reset(input); 555 matcher.find(); 556 if (matcher.start() != a) 557 failCount++; 558 matcher.find(); 559 if (matcher.start() != b) 560 failCount++; 561 } 562 563 // This test is for 6284152 564 static void check(String regex, String input, String[] expected) { 565 List<String> result = new ArrayList<String>(); 566 Pattern p = Pattern.compile(regex); 567 Matcher m = p.matcher(input); 568 while (m.find()) { 569 result.add(m.group()); 570 } 571 if (!Arrays.asList(expected).equals(result)) 572 failCount++; 573 } 574 575 private static void lookbehindTest() throws Exception { 576 //Positive 577 check("(?<=%.{0,5})foo\\d", 578 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 579 new String[]{"foo1", "foo2", "foo3"}); 580 581 //boundary at end of the lookbehind sub-regex should work consistently 582 //with the boundary just after the lookbehind sub-regex 583 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 584 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 585 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 586 check("(?<!abc \\b)foo", "abc foo", new String[0]); 587 588 //Negative 589 check("(?<!%.{0,5})foo\\d", 590 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 591 new String[] {"foo4", "foo5"}); 592 593 //Positive greedy 594 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 595 596 //Positive reluctant 597 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 598 599 //supplementary 600 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 601 new String[] {"fo\ud800\udc00o"}); 602 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 603 new String[] {"fo\ud800\udc00o"}); 604 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 605 new String[] {"fo\ud800\udc00o"}); 606 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 607 new String[] {"fo\ud800\udc00o"}); 608 report("Lookbehind"); 609 } 610 611 // This test is for 4938995 612 // Check to see if weak region boundaries are transparent to 613 // lookahead and lookbehind constructs 614 private static void boundsTest() throws Exception { 615 String fullMessage = "catdogcat"; 616 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 617 Matcher matcher = pattern.matcher("catdogca"); 618 matcher.useTransparentBounds(true); 619 if (matcher.find()) 620 failCount++; 621 matcher.reset("atdogcat"); 622 if (matcher.find()) 623 failCount++; 624 matcher.reset(fullMessage); 625 if (!matcher.find()) 626 failCount++; 627 matcher.reset(fullMessage); 628 matcher.region(0,9); 629 if (!matcher.find()) 630 failCount++; 631 matcher.reset(fullMessage); 632 matcher.region(0,6); 633 if (!matcher.find()) 634 failCount++; 635 matcher.reset(fullMessage); 636 matcher.region(3,6); 637 if (!matcher.find()) 638 failCount++; 639 matcher.useTransparentBounds(false); 640 if (matcher.find()) 641 failCount++; 642 643 // Negative lookahead/lookbehind 644 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 645 matcher = pattern.matcher("dogcat"); 646 matcher.useTransparentBounds(true); 647 matcher.region(0,3); 648 if (matcher.find()) 649 failCount++; 650 matcher.reset("catdog"); 651 matcher.region(3,6); 652 if (matcher.find()) 653 failCount++; 654 matcher.useTransparentBounds(false); 655 matcher.reset("dogcat"); 656 matcher.region(0,3); 657 if (!matcher.find()) 658 failCount++; 659 matcher.reset("catdog"); 660 matcher.region(3,6); 661 if (!matcher.find()) 662 failCount++; 663 664 report("Region bounds transparency"); 665 } 666 667 // This test is for 4945394 668 private static void findFromTest() throws Exception { 669 String message = "This is 40 $0 message."; 670 Pattern pat = Pattern.compile("\\$0"); 671 Matcher match = pat.matcher(message); 672 if (!match.find()) 673 failCount++; 674 if (match.find()) 675 failCount++; 676 if (match.find()) 677 failCount++; 678 report("Check for alternating find"); 679 } 680 681 // This test is for 4872664 and 4892980 682 private static void negatedCharClassTest() throws Exception { 683 Pattern pattern = Pattern.compile("[^>]"); 684 Matcher matcher = pattern.matcher("\u203A"); 685 if (!matcher.matches()) 686 failCount++; 687 pattern = Pattern.compile("[^fr]"); 688 matcher = pattern.matcher("a"); 689 if (!matcher.find()) 690 failCount++; 691 matcher.reset("\u203A"); 692 if (!matcher.find()) 693 failCount++; 694 String s = "for"; 695 String result[] = s.split("[^fr]"); 696 if (!result[0].equals("f")) 697 failCount++; 698 if (!result[1].equals("r")) 699 failCount++; 700 s = "f\u203Ar"; 701 result = s.split("[^fr]"); 702 if (!result[0].equals("f")) 703 failCount++; 704 if (!result[1].equals("r")) 705 failCount++; 706 707 // Test adding to bits, subtracting a node, then adding to bits again 708 pattern = Pattern.compile("[^f\u203Ar]"); 709 matcher = pattern.matcher("a"); 710 if (!matcher.find()) 711 failCount++; 712 matcher.reset("f"); 713 if (matcher.find()) 714 failCount++; 715 matcher.reset("\u203A"); 716 if (matcher.find()) 717 failCount++; 718 matcher.reset("r"); 719 if (matcher.find()) 720 failCount++; 721 matcher.reset("\u203B"); 722 if (!matcher.find()) 723 failCount++; 724 725 // Test subtracting a node, adding to bits, subtracting again 726 pattern = Pattern.compile("[^\u203Ar\u203B]"); 727 matcher = pattern.matcher("a"); 728 if (!matcher.find()) 729 failCount++; 730 matcher.reset("\u203A"); 731 if (matcher.find()) 732 failCount++; 733 matcher.reset("r"); 734 if (matcher.find()) 735 failCount++; 736 matcher.reset("\u203B"); 737 if (matcher.find()) 738 failCount++; 739 matcher.reset("\u203C"); 740 if (!matcher.find()) 741 failCount++; 742 743 report("Negated Character Class"); 744 } 745 746 // This test is for 4628291 747 private static void toStringTest() throws Exception { 748 Pattern pattern = Pattern.compile("b+"); 749 if (pattern.toString() != "b+") 750 failCount++; 751 Matcher matcher = pattern.matcher("aaabbbccc"); 752 String matcherString = matcher.toString(); // unspecified 753 matcher.find(); 754 matcherString = matcher.toString(); // unspecified 755 matcher.region(0,3); 756 matcherString = matcher.toString(); // unspecified 757 matcher.reset(); 758 matcherString = matcher.toString(); // unspecified 759 report("toString"); 760 } 761 762 // This test is for 4808962 763 private static void literalPatternTest() throws Exception { 764 int flags = Pattern.LITERAL; 765 766 Pattern pattern = Pattern.compile("abc\\t$^", flags); 767 check(pattern, "abc\\t$^", true); 768 769 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 770 check(pattern, "abc\\t$^", true); 771 772 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 773 check(pattern, "\\Qa^$bcabc\\E", true); 774 check(pattern, "a^$bcabc", false); 775 776 pattern = Pattern.compile("\\\\Q\\\\E"); 777 check(pattern, "\\Q\\E", true); 778 779 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 780 check(pattern, "abcefg\\Q\\Ehij", true); 781 782 pattern = Pattern.compile("\\\\\\Q\\\\E"); 783 check(pattern, "\\\\\\\\", true); 784 785 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 786 check(pattern, "\\Qa^$bcabc\\E", true); 787 check(pattern, "a^$bcabc", false); 788 789 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 790 check(pattern, "\\Qabc\\Edef", true); 791 check(pattern, "abcdef", false); 792 793 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 794 check(pattern, "abc\\Edef", true); 795 check(pattern, "abcdef", false); 796 797 pattern = Pattern.compile(Pattern.quote("\\E")); 798 check(pattern, "\\E", true); 799 800 pattern = Pattern.compile("((((abc.+?:)", flags); 801 check(pattern, "((((abc.+?:)", true); 802 803 flags |= Pattern.MULTILINE; 804 805 pattern = Pattern.compile("^cat$", flags); 806 check(pattern, "abc^cat$def", true); 807 check(pattern, "cat", false); 808 809 flags |= Pattern.CASE_INSENSITIVE; 810 811 pattern = Pattern.compile("abcdef", flags); 812 check(pattern, "ABCDEF", true); 813 check(pattern, "AbCdEf", true); 814 815 flags |= Pattern.DOTALL; 816 817 pattern = Pattern.compile("a...b", flags); 818 check(pattern, "A...b", true); 819 check(pattern, "Axxxb", false); 820 821 flags |= Pattern.CANON_EQ; 822 823 Pattern p = Pattern.compile("testa\u030a", flags); 824 check(pattern, "testa\u030a", false); 825 check(pattern, "test\u00e5", false); 826 827 // Supplementary character test 828 flags = Pattern.LITERAL; 829 830 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 831 check(pattern, toSupplementaries("abc\\t$^"), true); 832 833 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 834 check(pattern, toSupplementaries("abc\\t$^"), true); 835 836 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 837 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 838 check(pattern, toSupplementaries("a^$bcabc"), false); 839 840 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 841 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 842 check(pattern, toSupplementaries("a^$bcabc"), false); 843 844 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 845 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 846 check(pattern, toSupplementaries("abcdef"), false); 847 848 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 849 check(pattern, toSupplementaries("abc\\Edef"), true); 850 check(pattern, toSupplementaries("abcdef"), false); 851 852 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 853 check(pattern, toSupplementaries("((((abc.+?:)"), true); 854 855 flags |= Pattern.MULTILINE; 856 857 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 858 check(pattern, toSupplementaries("abc^cat$def"), true); 859 check(pattern, toSupplementaries("cat"), false); 860 861 flags |= Pattern.DOTALL; 862 863 // note: this is case-sensitive. 864 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 865 check(pattern, toSupplementaries("a...b"), true); 866 check(pattern, toSupplementaries("axxxb"), false); 867 868 flags |= Pattern.CANON_EQ; 869 870 String t = toSupplementaries("test"); 871 p = Pattern.compile(t + "a\u030a", flags); 872 check(pattern, t + "a\u030a", false); 873 check(pattern, t + "\u00e5", false); 874 875 report("Literal pattern"); 876 } 877 878 // This test is for 4803179 879 // This test is also for 4808962, replacement parts 880 private static void literalReplacementTest() throws Exception { 881 int flags = Pattern.LITERAL; 882 883 Pattern pattern = Pattern.compile("abc", flags); 884 Matcher matcher = pattern.matcher("zzzabczzz"); 885 String replaceTest = "$0"; 886 String result = matcher.replaceAll(replaceTest); 887 if (!result.equals("zzzabczzz")) 888 failCount++; 889 890 matcher.reset(); 891 String literalReplacement = matcher.quoteReplacement(replaceTest); 892 result = matcher.replaceAll(literalReplacement); 893 if (!result.equals("zzz$0zzz")) 894 failCount++; 895 896 matcher.reset(); 897 replaceTest = "\\t$\\$"; 898 literalReplacement = matcher.quoteReplacement(replaceTest); 899 result = matcher.replaceAll(literalReplacement); 900 if (!result.equals("zzz\\t$\\$zzz")) 901 failCount++; 902 903 // Supplementary character test 904 pattern = Pattern.compile(toSupplementaries("abc"), flags); 905 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 906 replaceTest = "$0"; 907 result = matcher.replaceAll(replaceTest); 908 if (!result.equals(toSupplementaries("zzzabczzz"))) 909 failCount++; 910 911 matcher.reset(); 912 literalReplacement = matcher.quoteReplacement(replaceTest); 913 result = matcher.replaceAll(literalReplacement); 914 if (!result.equals(toSupplementaries("zzz$0zzz"))) 915 failCount++; 916 917 matcher.reset(); 918 replaceTest = "\\t$\\$"; 919 literalReplacement = matcher.quoteReplacement(replaceTest); 920 result = matcher.replaceAll(literalReplacement); 921 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 922 failCount++; 923 924 // IAE should be thrown if backslash or '$' is the last character 925 // in replacement string 926 try { 927 "\uac00".replaceAll("\uac00", "$"); 928 failCount++; 929 } catch (IllegalArgumentException iie) { 930 } catch (Exception e) { 931 failCount++; 932 } 933 try { 934 "\uac00".replaceAll("\uac00", "\\"); 935 failCount++; 936 } catch (IllegalArgumentException iie) { 937 } catch (Exception e) { 938 failCount++; 939 } 940 report("Literal replacement"); 941 } 942 943 // This test is for 4757029 944 private static void regionTest() throws Exception { 945 Pattern pattern = Pattern.compile("abc"); 946 Matcher matcher = pattern.matcher("abcdefabc"); 947 948 matcher.region(0,9); 949 if (!matcher.find()) 950 failCount++; 951 if (!matcher.find()) 952 failCount++; 953 matcher.region(0,3); 954 if (!matcher.find()) 955 failCount++; 956 matcher.region(3,6); 957 if (matcher.find()) 958 failCount++; 959 matcher.region(0,2); 960 if (matcher.find()) 961 failCount++; 962 963 expectRegionFail(matcher, 1, -1); 964 expectRegionFail(matcher, -1, -1); 965 expectRegionFail(matcher, -1, 1); 966 expectRegionFail(matcher, 5, 3); 967 expectRegionFail(matcher, 5, 12); 968 expectRegionFail(matcher, 12, 12); 969 970 pattern = Pattern.compile("^abc$"); 971 matcher = pattern.matcher("zzzabczzz"); 972 matcher.region(0,9); 973 if (matcher.find()) 974 failCount++; 975 matcher.region(3,6); 976 if (!matcher.find()) 977 failCount++; 978 matcher.region(3,6); 979 matcher.useAnchoringBounds(false); 980 if (matcher.find()) 981 failCount++; 982 983 // Supplementary character test 984 pattern = Pattern.compile(toSupplementaries("abc")); 985 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 986 matcher.region(0,9*2); 987 if (!matcher.find()) 988 failCount++; 989 if (!matcher.find()) 990 failCount++; 991 matcher.region(0,3*2); 992 if (!matcher.find()) 993 failCount++; 994 matcher.region(1,3*2); 995 if (matcher.find()) 996 failCount++; 997 matcher.region(3*2,6*2); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(0,2*2); 1001 if (matcher.find()) 1002 failCount++; 1003 matcher.region(0,2*2+1); 1004 if (matcher.find()) 1005 failCount++; 1006 1007 expectRegionFail(matcher, 1*2, -1); 1008 expectRegionFail(matcher, -1, -1); 1009 expectRegionFail(matcher, -1, 1*2); 1010 expectRegionFail(matcher, 5*2, 3*2); 1011 expectRegionFail(matcher, 5*2, 12*2); 1012 expectRegionFail(matcher, 12*2, 12*2); 1013 1014 pattern = Pattern.compile(toSupplementaries("^abc$")); 1015 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1016 matcher.region(0,9*2); 1017 if (matcher.find()) 1018 failCount++; 1019 matcher.region(3*2,6*2); 1020 if (!matcher.find()) 1021 failCount++; 1022 matcher.region(3*2+1,6*2); 1023 if (matcher.find()) 1024 failCount++; 1025 matcher.region(3*2,6*2-1); 1026 if (matcher.find()) 1027 failCount++; 1028 matcher.region(3*2,6*2); 1029 matcher.useAnchoringBounds(false); 1030 if (matcher.find()) 1031 failCount++; 1032 report("Regions"); 1033 } 1034 1035 private static void expectRegionFail(Matcher matcher, int index1, 1036 int index2) 1037 { 1038 try { 1039 matcher.region(index1, index2); 1040 failCount++; 1041 } catch (IndexOutOfBoundsException ioobe) { 1042 // Correct result 1043 } catch (IllegalStateException ise) { 1044 // Correct result 1045 } 1046 } 1047 1048 // This test is for 4803197 1049 private static void escapedSegmentTest() throws Exception { 1050 1051 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1052 check(pattern, "dir1\\dir2", true); 1053 1054 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1055 check(pattern, "dir1\\dir2\\", true); 1056 1057 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1058 check(pattern, "dir1\\dir2\\", true); 1059 1060 // Supplementary character test 1061 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1062 check(pattern, toSupplementaries("dir1\\dir2"), true); 1063 1064 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1065 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1066 1067 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1068 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1069 1070 report("Escaped segment"); 1071 } 1072 1073 // This test is for 4792284 1074 private static void nonCaptureRepetitionTest() throws Exception { 1075 String input = "abcdefgh;"; 1076 1077 String[] patterns = new String[] { 1078 "(?:\\w{4})+;", 1079 "(?:\\w{8})*;", 1080 "(?:\\w{2}){2,4};", 1081 "(?:\\w{4}){2,};", // only matches the 1082 ".*?(?:\\w{5})+;", // specified minimum 1083 ".*?(?:\\w{9})*;", // number of reps - OK 1084 "(?:\\w{4})+?;", // lazy repetition - OK 1085 "(?:\\w{4})++;", // possessive repetition - OK 1086 "(?:\\w{2,}?)+;", // non-deterministic - OK 1087 "(\\w{4})+;", // capturing group - OK 1088 }; 1089 1090 for (int i = 0; i < patterns.length; i++) { 1091 // Check find() 1092 check(patterns[i], 0, input, input, true); 1093 // Check matches() 1094 Pattern p = Pattern.compile(patterns[i]); 1095 Matcher m = p.matcher(input); 1096 1097 if (m.matches()) { 1098 if (!m.group(0).equals(input)) 1099 failCount++; 1100 } else { 1101 failCount++; 1102 } 1103 } 1104 1105 report("Non capturing repetition"); 1106 } 1107 1108 // This test is for 6358731 1109 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1110 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1111 Matcher matcher = pattern.matcher("abcd"); 1112 if (!matcher.matches() || 1113 matcher.group(1) != null || 1114 !matcher.group(2).equals("abcd")) { 1115 failCount++; 1116 } 1117 report("Not captured GroupCurly"); 1118 } 1119 1120 // This test is for 4706545 1121 private static void javaCharClassTest() throws Exception { 1122 for (int i=0; i<1000; i++) { 1123 char c = (char)generator.nextInt(); 1124 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1125 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1126 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1127 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1128 check("{javaDigit}", c, Character.isDigit(c)); 1129 check("{javaDefined}", c, Character.isDefined(c)); 1130 check("{javaLetter}", c, Character.isLetter(c)); 1131 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1132 check("{javaJavaIdentifierStart}", c, 1133 Character.isJavaIdentifierStart(c)); 1134 check("{javaJavaIdentifierPart}", c, 1135 Character.isJavaIdentifierPart(c)); 1136 check("{javaUnicodeIdentifierStart}", c, 1137 Character.isUnicodeIdentifierStart(c)); 1138 check("{javaUnicodeIdentifierPart}", c, 1139 Character.isUnicodeIdentifierPart(c)); 1140 check("{javaIdentifierIgnorable}", c, 1141 Character.isIdentifierIgnorable(c)); 1142 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1143 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1144 check("{javaISOControl}", c, Character.isISOControl(c)); 1145 check("{javaMirrored}", c, Character.isMirrored(c)); 1146 1147 } 1148 1149 // Supplementary character test 1150 for (int i=0; i<1000; i++) { 1151 int c = generator.nextInt(Character.MAX_CODE_POINT 1152 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1153 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1154 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1155 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1156 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1157 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1158 check("{javaDigit}", c, Character.isDigit(c)); 1159 check("{javaDefined}", c, Character.isDefined(c)); 1160 check("{javaLetter}", c, Character.isLetter(c)); 1161 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1162 check("{javaJavaIdentifierStart}", c, 1163 Character.isJavaIdentifierStart(c)); 1164 check("{javaJavaIdentifierPart}", c, 1165 Character.isJavaIdentifierPart(c)); 1166 check("{javaUnicodeIdentifierStart}", c, 1167 Character.isUnicodeIdentifierStart(c)); 1168 check("{javaUnicodeIdentifierPart}", c, 1169 Character.isUnicodeIdentifierPart(c)); 1170 check("{javaIdentifierIgnorable}", c, 1171 Character.isIdentifierIgnorable(c)); 1172 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1173 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1174 check("{javaISOControl}", c, Character.isISOControl(c)); 1175 check("{javaMirrored}", c, Character.isMirrored(c)); 1176 } 1177 1178 report("Java character classes"); 1179 } 1180 1181 // This test is for 4523620 1182 /* 1183 private static void numOccurrencesTest() throws Exception { 1184 Pattern pattern = Pattern.compile("aaa"); 1185 1186 if (pattern.numOccurrences("aaaaaa", false) != 2) 1187 failCount++; 1188 if (pattern.numOccurrences("aaaaaa", true) != 4) 1189 failCount++; 1190 1191 pattern = Pattern.compile("^"); 1192 if (pattern.numOccurrences("aaaaaa", false) != 1) 1193 failCount++; 1194 if (pattern.numOccurrences("aaaaaa", true) != 1) 1195 failCount++; 1196 1197 report("Number of Occurrences"); 1198 } 1199 */ 1200 1201 // This test is for 4776374 1202 private static void caretBetweenTerminatorsTest() throws Exception { 1203 int flags1 = Pattern.DOTALL; 1204 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1205 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1206 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1207 1208 check("^....", flags1, "test\ntest", "test", true); 1209 check(".....^", flags1, "test\ntest", "test", false); 1210 check(".....^", flags1, "test\n", "test", false); 1211 check("....^", flags1, "test\r\n", "test", false); 1212 1213 check("^....", flags2, "test\ntest", "test", true); 1214 check("....^", flags2, "test\ntest", "test", false); 1215 check(".....^", flags2, "test\n", "test", false); 1216 check("....^", flags2, "test\r\n", "test", false); 1217 1218 check("^....", flags3, "test\ntest", "test", true); 1219 check(".....^", flags3, "test\ntest", "test\n", true); 1220 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1221 check(".....^", flags3, "test\n", "test", false); 1222 check(".....^", flags3, "test\r\n", "test", false); 1223 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1224 1225 check("^....", flags4, "test\ntest", "test", true); 1226 check(".....^", flags3, "test\ntest", "test\n", true); 1227 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1228 check(".....^", flags4, "test\n", "test\n", false); 1229 check(".....^", flags4, "test\r\n", "test\r", false); 1230 1231 // Supplementary character test 1232 String t = toSupplementaries("test"); 1233 check("^....", flags1, t+"\n"+t, t, true); 1234 check(".....^", flags1, t+"\n"+t, t, false); 1235 check(".....^", flags1, t+"\n", t, false); 1236 check("....^", flags1, t+"\r\n", t, false); 1237 1238 check("^....", flags2, t+"\n"+t, t, true); 1239 check("....^", flags2, t+"\n"+t, t, false); 1240 check(".....^", flags2, t+"\n", t, false); 1241 check("....^", flags2, t+"\r\n", t, false); 1242 1243 check("^....", flags3, t+"\n"+t, t, true); 1244 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1245 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1246 check(".....^", flags3, t+"\n", t, false); 1247 check(".....^", flags3, t+"\r\n", t, false); 1248 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1249 1250 check("^....", flags4, t+"\n"+t, t, true); 1251 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1252 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1253 check(".....^", flags4, t+"\n", t+"\n", false); 1254 check(".....^", flags4, t+"\r\n", t+"\r", false); 1255 1256 report("Caret between terminators"); 1257 } 1258 1259 // This test is for 4727935 1260 private static void dollarAtEndTest() throws Exception { 1261 int flags1 = Pattern.DOTALL; 1262 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1263 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1264 1265 check("....$", flags1, "test\n", "test", true); 1266 check("....$", flags1, "test\r\n", "test", true); 1267 check(".....$", flags1, "test\n", "test\n", true); 1268 check(".....$", flags1, "test\u0085", "test\u0085", true); 1269 check("....$", flags1, "test\u0085", "test", true); 1270 1271 check("....$", flags2, "test\n", "test", true); 1272 check(".....$", flags2, "test\n", "test\n", true); 1273 check(".....$", flags2, "test\u0085", "test\u0085", true); 1274 check("....$", flags2, "test\u0085", "est\u0085", true); 1275 1276 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1277 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1278 check("....$blah", flags3, "test\nblah", "!!!!", false); 1279 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1280 1281 // Supplementary character test 1282 String t = toSupplementaries("test"); 1283 String b = toSupplementaries("blah"); 1284 check("....$", flags1, t+"\n", t, true); 1285 check("....$", flags1, t+"\r\n", t, true); 1286 check(".....$", flags1, t+"\n", t+"\n", true); 1287 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1288 check("....$", flags1, t+"\u0085", t, true); 1289 1290 check("....$", flags2, t+"\n", t, true); 1291 check(".....$", flags2, t+"\n", t+"\n", true); 1292 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1293 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1294 1295 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1296 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1297 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1298 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1299 1300 report("Dollar at End"); 1301 } 1302 1303 // This test is for 4711773 1304 private static void multilineDollarTest() throws Exception { 1305 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1306 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1307 matcher.find(); 1308 if (matcher.start(0) != 9) 1309 failCount++; 1310 matcher.find(); 1311 if (matcher.start(0) != 20) 1312 failCount++; 1313 1314 // Supplementary character test 1315 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1316 matcher.find(); 1317 if (matcher.start(0) != 9*2) 1318 failCount++; 1319 matcher.find(); 1320 if (matcher.start(0) != 20*2) 1321 failCount++; 1322 1323 report("Multiline Dollar"); 1324 } 1325 1326 private static void reluctantRepetitionTest() throws Exception { 1327 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1328 check(p, "1 word word word 2", true); 1329 check(p, "1 wor wo w 2", true); 1330 check(p, "1 word word 2", true); 1331 check(p, "1 word 2", true); 1332 check(p, "1 wo w w 2", true); 1333 check(p, "1 wo w 2", true); 1334 check(p, "1 wor w 2", true); 1335 1336 p = Pattern.compile("([a-z])+?c"); 1337 Matcher m = p.matcher("ababcdefdec"); 1338 check(m, "ababc"); 1339 1340 // Supplementary character test 1341 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1342 m = p.matcher(toSupplementaries("ababcdefdec")); 1343 check(m, toSupplementaries("ababc")); 1344 1345 report("Reluctant Repetition"); 1346 } 1347 1348 private static void serializeTest() throws Exception { 1349 String patternStr = "(b)"; 1350 String matchStr = "b"; 1351 Pattern pattern = Pattern.compile(patternStr); 1352 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1353 ObjectOutputStream oos = new ObjectOutputStream(baos); 1354 oos.writeObject(pattern); 1355 oos.close(); 1356 ObjectInputStream ois = new ObjectInputStream( 1357 new ByteArrayInputStream(baos.toByteArray())); 1358 Pattern serializedPattern = (Pattern)ois.readObject(); 1359 ois.close(); 1360 Matcher matcher = serializedPattern.matcher(matchStr); 1361 if (!matcher.matches()) 1362 failCount++; 1363 if (matcher.groupCount() != 1) 1364 failCount++; 1365 1366 report("Serialization"); 1367 } 1368 1369 private static void gTest() { 1370 Pattern pattern = Pattern.compile("\\G\\w"); 1371 Matcher matcher = pattern.matcher("abc#x#x"); 1372 matcher.find(); 1373 matcher.find(); 1374 matcher.find(); 1375 if (matcher.find()) 1376 failCount++; 1377 1378 pattern = Pattern.compile("\\GA*"); 1379 matcher = pattern.matcher("1A2AA3"); 1380 matcher.find(); 1381 if (matcher.find()) 1382 failCount++; 1383 1384 pattern = Pattern.compile("\\GA*"); 1385 matcher = pattern.matcher("1A2AA3"); 1386 if (!matcher.find(1)) 1387 failCount++; 1388 matcher.find(); 1389 if (matcher.find()) 1390 failCount++; 1391 1392 report("\\G"); 1393 } 1394 1395 private static void zTest() { 1396 Pattern pattern = Pattern.compile("foo\\Z"); 1397 // Positives 1398 check(pattern, "foo\u0085", true); 1399 check(pattern, "foo\u2028", true); 1400 check(pattern, "foo\u2029", true); 1401 check(pattern, "foo\n", true); 1402 check(pattern, "foo\r", true); 1403 check(pattern, "foo\r\n", true); 1404 // Negatives 1405 check(pattern, "fooo", false); 1406 check(pattern, "foo\n\r", false); 1407 1408 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1409 // Positives 1410 check(pattern, "foo", true); 1411 check(pattern, "foo\n", true); 1412 // Negatives 1413 check(pattern, "foo\r", false); 1414 check(pattern, "foo\u0085", false); 1415 check(pattern, "foo\u2028", false); 1416 check(pattern, "foo\u2029", false); 1417 1418 report("\\Z"); 1419 } 1420 1421 private static void replaceFirstTest() { 1422 Pattern pattern = Pattern.compile("(ab)(c*)"); 1423 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1424 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1425 failCount++; 1426 1427 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1428 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1429 failCount++; 1430 1431 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1432 String result = matcher.replaceFirst("$1"); 1433 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1434 failCount++; 1435 1436 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1437 result = matcher.replaceFirst("$2"); 1438 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1439 failCount++; 1440 1441 pattern = Pattern.compile("a*"); 1442 matcher = pattern.matcher("aaaaaaaaaa"); 1443 if (!matcher.replaceFirst("test").equals("test")) 1444 failCount++; 1445 1446 pattern = Pattern.compile("a+"); 1447 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1448 if (!matcher.replaceFirst("test").equals("zzztest")) 1449 failCount++; 1450 1451 // Supplementary character test 1452 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1453 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1454 if (!matcher.replaceFirst(toSupplementaries("test")) 1455 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1456 failCount++; 1457 1458 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1459 if (!matcher.replaceFirst(toSupplementaries("test")). 1460 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1461 failCount++; 1462 1463 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1464 result = matcher.replaceFirst("$1"); 1465 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1466 failCount++; 1467 1468 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1469 result = matcher.replaceFirst("$2"); 1470 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1471 failCount++; 1472 1473 pattern = Pattern.compile(toSupplementaries("a*")); 1474 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1475 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1476 failCount++; 1477 1478 pattern = Pattern.compile(toSupplementaries("a+")); 1479 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1480 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1481 failCount++; 1482 1483 report("Replace First"); 1484 } 1485 1486 private static void unixLinesTest() { 1487 Pattern pattern = Pattern.compile(".*"); 1488 Matcher matcher = pattern.matcher("aa\u2028blah"); 1489 matcher.find(); 1490 if (!matcher.group(0).equals("aa")) 1491 failCount++; 1492 1493 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1494 matcher = pattern.matcher("aa\u2028blah"); 1495 matcher.find(); 1496 if (!matcher.group(0).equals("aa\u2028blah")) 1497 failCount++; 1498 1499 pattern = Pattern.compile("[az]$", 1500 Pattern.MULTILINE | Pattern.UNIX_LINES); 1501 matcher = pattern.matcher("aa\u2028zz"); 1502 check(matcher, "a\u2028", false); 1503 1504 // Supplementary character test 1505 pattern = Pattern.compile(".*"); 1506 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1507 matcher.find(); 1508 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1509 failCount++; 1510 1511 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1512 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1513 matcher.find(); 1514 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1515 failCount++; 1516 1517 pattern = Pattern.compile(toSupplementaries("[az]$"), 1518 Pattern.MULTILINE | Pattern.UNIX_LINES); 1519 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1520 check(matcher, toSupplementaries("a\u2028"), false); 1521 1522 report("Unix Lines"); 1523 } 1524 1525 private static void commentsTest() { 1526 int flags = Pattern.COMMENTS; 1527 1528 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1529 Matcher matcher = pattern.matcher("aa#aa"); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile("aa # blah", flags); 1534 matcher = pattern.matcher("aa"); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile("aa blah", flags); 1539 matcher = pattern.matcher("aablah"); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile("aa # blah blech ", flags); 1544 matcher = pattern.matcher("aa"); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile("aa # blah\n ", flags); 1549 matcher = pattern.matcher("aa"); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1554 matcher = pattern.matcher("aabc"); 1555 if (!matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1559 matcher = pattern.matcher("aabc"); 1560 if (!matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1564 matcher = pattern.matcher("aabc#blech"); 1565 if (!matcher.matches()) 1566 failCount++; 1567 1568 // Supplementary character test 1569 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1570 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1571 if (!matcher.matches()) 1572 failCount++; 1573 1574 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1575 matcher = pattern.matcher(toSupplementaries("aa")); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1580 matcher = pattern.matcher(toSupplementaries("aablah")); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1585 matcher = pattern.matcher(toSupplementaries("aa")); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1590 matcher = pattern.matcher(toSupplementaries("aa")); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1595 matcher = pattern.matcher(toSupplementaries("aabc")); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1600 matcher = pattern.matcher(toSupplementaries("aabc")); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1605 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1606 if (!matcher.matches()) 1607 failCount++; 1608 1609 report("Comments"); 1610 } 1611 1612 private static void caseFoldingTest() { // bug 4504687 1613 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1614 Pattern pattern = Pattern.compile("aa", flags); 1615 Matcher matcher = pattern.matcher("ab"); 1616 if (matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile("aA", flags); 1620 matcher = pattern.matcher("ab"); 1621 if (matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile("aa", flags); 1625 matcher = pattern.matcher("aB"); 1626 if (matcher.matches()) 1627 failCount++; 1628 matcher = pattern.matcher("Ab"); 1629 if (matcher.matches()) 1630 failCount++; 1631 1632 // ASCII "a" 1633 // Latin-1 Supplement "a" + grave 1634 // Cyrillic "a" 1635 String[] patterns = new String[] { 1636 //single 1637 "a", "\u00e0", "\u0430", 1638 //slice 1639 "ab", "\u00e0\u00e1", "\u0430\u0431", 1640 //class single 1641 "[a]", "[\u00e0]", "[\u0430]", 1642 //class range 1643 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1644 //back reference 1645 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1646 }; 1647 1648 String[] texts = new String[] { 1649 "A", "\u00c0", "\u0410", 1650 "AB", "\u00c0\u00c1", "\u0410\u0411", 1651 "A", "\u00c0", "\u0410", 1652 "B", "\u00c2", "\u0411", 1653 "aA", "\u00e0\u00c0", "\u0430\u0410" 1654 }; 1655 1656 boolean[] expected = new boolean[] { 1657 true, false, false, 1658 true, false, false, 1659 true, false, false, 1660 true, false, false, 1661 true, false, false 1662 }; 1663 1664 flags = Pattern.CASE_INSENSITIVE; 1665 for (int i = 0; i < patterns.length; i++) { 1666 pattern = Pattern.compile(patterns[i], flags); 1667 matcher = pattern.matcher(texts[i]); 1668 if (matcher.matches() != expected[i]) { 1669 System.out.println("<1> Failed at " + i); 1670 failCount++; 1671 } 1672 } 1673 1674 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1675 for (int i = 0; i < patterns.length; i++) { 1676 pattern = Pattern.compile(patterns[i], flags); 1677 matcher = pattern.matcher(texts[i]); 1678 if (!matcher.matches()) { 1679 System.out.println("<2> Failed at " + i); 1680 failCount++; 1681 } 1682 } 1683 // flag unicode_case alone should do nothing 1684 flags = Pattern.UNICODE_CASE; 1685 for (int i = 0; i < patterns.length; i++) { 1686 pattern = Pattern.compile(patterns[i], flags); 1687 matcher = pattern.matcher(texts[i]); 1688 if (matcher.matches()) { 1689 System.out.println("<3> Failed at " + i); 1690 failCount++; 1691 } 1692 } 1693 1694 // Special cases: i, I, u+0131 and u+0130 1695 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1696 pattern = Pattern.compile("[h-j]+", flags); 1697 if (!pattern.matcher("\u0131\u0130").matches()) 1698 failCount++; 1699 report("Case Folding"); 1700 } 1701 1702 private static void appendTest() { 1703 Pattern pattern = Pattern.compile("(ab)(cd)"); 1704 Matcher matcher = pattern.matcher("abcd"); 1705 String result = matcher.replaceAll("$2$1"); 1706 if (!result.equals("cdab")) 1707 failCount++; 1708 1709 String s1 = "Swap all: first = 123, second = 456"; 1710 String s2 = "Swap one: first = 123, second = 456"; 1711 String r = "$3$2$1"; 1712 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1713 matcher = pattern.matcher(s1); 1714 1715 result = matcher.replaceAll(r); 1716 if (!result.equals("Swap all: 123 = first, 456 = second")) 1717 failCount++; 1718 1719 matcher = pattern.matcher(s2); 1720 1721 if (matcher.find()) { 1722 StringBuffer sb = new StringBuffer(); 1723 matcher.appendReplacement(sb, r); 1724 matcher.appendTail(sb); 1725 result = sb.toString(); 1726 if (!result.equals("Swap one: 123 = first, second = 456")) 1727 failCount++; 1728 } 1729 1730 // Supplementary character test 1731 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1732 matcher = pattern.matcher(toSupplementaries("abcd")); 1733 result = matcher.replaceAll("$2$1"); 1734 if (!result.equals(toSupplementaries("cdab"))) 1735 failCount++; 1736 1737 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1738 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1739 r = toSupplementaries("$3$2$1"); 1740 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1741 matcher = pattern.matcher(s1); 1742 1743 result = matcher.replaceAll(r); 1744 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1745 failCount++; 1746 1747 matcher = pattern.matcher(s2); 1748 1749 if (matcher.find()) { 1750 StringBuffer sb = new StringBuffer(); 1751 matcher.appendReplacement(sb, r); 1752 matcher.appendTail(sb); 1753 result = sb.toString(); 1754 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1755 failCount++; 1756 } 1757 report("Append"); 1758 } 1759 1760 private static void splitTest() { 1761 Pattern pattern = Pattern.compile(":"); 1762 String[] result = pattern.split("foo:and:boo", 2); 1763 if (!result[0].equals("foo")) 1764 failCount++; 1765 if (!result[1].equals("and:boo")) 1766 failCount++; 1767 // Supplementary character test 1768 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1769 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1770 if (!result[0].equals(toSupplementaries("foo"))) 1771 failCount++; 1772 if (!result[1].equals(toSupplementaries("andXboo"))) 1773 failCount++; 1774 1775 CharBuffer cb = CharBuffer.allocate(100); 1776 cb.put("foo:and:boo"); 1777 cb.flip(); 1778 result = pattern.split(cb); 1779 if (!result[0].equals("foo")) 1780 failCount++; 1781 if (!result[1].equals("and")) 1782 failCount++; 1783 if (!result[2].equals("boo")) 1784 failCount++; 1785 1786 // Supplementary character test 1787 CharBuffer cbs = CharBuffer.allocate(100); 1788 cbs.put(toSupplementaries("fooXandXboo")); 1789 cbs.flip(); 1790 result = patternX.split(cbs); 1791 if (!result[0].equals(toSupplementaries("foo"))) 1792 failCount++; 1793 if (!result[1].equals(toSupplementaries("and"))) 1794 failCount++; 1795 if (!result[2].equals(toSupplementaries("boo"))) 1796 failCount++; 1797 1798 String source = "0123456789"; 1799 for (int limit=-2; limit<3; limit++) { 1800 for (int x=0; x<10; x++) { 1801 result = source.split(Integer.toString(x), limit); 1802 int expectedLength = limit < 1 ? 2 : limit; 1803 1804 if ((limit == 0) && (x == 9)) { 1805 // expected dropping of "" 1806 if (result.length != 1) 1807 failCount++; 1808 if (!result[0].equals("012345678")) { 1809 failCount++; 1810 } 1811 } else { 1812 if (result.length != expectedLength) { 1813 failCount++; 1814 } 1815 if (!result[0].equals(source.substring(0,x))) { 1816 if (limit != 1) { 1817 failCount++; 1818 } else { 1819 if (!result[0].equals(source.substring(0,10))) { 1820 failCount++; 1821 } 1822 } 1823 } 1824 if (expectedLength > 1) { // Check segment 2 1825 if (!result[1].equals(source.substring(x+1,10))) 1826 failCount++; 1827 } 1828 } 1829 } 1830 } 1831 // Check the case for no match found 1832 for (int limit=-2; limit<3; limit++) { 1833 result = source.split("e", limit); 1834 if (result.length != 1) 1835 failCount++; 1836 if (!result[0].equals(source)) 1837 failCount++; 1838 } 1839 // Check the case for limit == 0, source = ""; 1840 // split() now returns 0-length for empty source "" see #6559590 1841 source = ""; 1842 result = source.split("e", 0); 1843 if (result.length != 1) 1844 failCount++; 1845 if (!result[0].equals(source)) 1846 failCount++; 1847 1848 // Check both split() and splitAsStraem(), especially for zero-lenth 1849 // input and zero-lenth match cases 1850 String[][] input = new String[][] { 1851 { " ", "Abc Efg Hij" }, // normal non-zero-match 1852 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1853 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1854 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1855 { "(?=\\p{Lu})", "AbcEfg" }, 1856 { "(?=\\p{Lu})", "Abc" }, 1857 { " ", "" }, // zero-length input 1858 { ".*", "" }, 1859 1860 // some tests from PatternStreamTest.java 1861 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1862 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1863 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1864 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1865 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1866 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1867 { "\u56da", "" }, 1868 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1869 { "o", "boo:and:foo" }, 1870 { "o", "booooo:and:fooooo" }, 1871 { "o", "fooooo:" }, 1872 }; 1873 1874 String[][] expected = new String[][] { 1875 { "Abc", "Efg", "Hij" }, 1876 { "", "Abc", "Efg", "Hij" }, 1877 { "Abc", "", "Efg", "Hij" }, 1878 { "Abc", "Efg", "Hij" }, 1879 { "Abc", "Efg" }, 1880 { "Abc" }, 1881 { "" }, 1882 { "" }, 1883 1884 { "awgqwefg1fefw", "vssv1vvv1" }, 1885 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1886 { "awgqwefg", "fefw4vssv", "vvv" }, 1887 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1888 { "1", "23", "456", "7890" }, 1889 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1890 { "" }, 1891 { "This", "is", "testing", "", "with", "different", "separators" }, 1892 { "b", "", ":and:f" }, 1893 { "b", "", "", "", "", ":and:f" }, 1894 { "f", "", "", "", "", ":" }, 1895 }; 1896 for (int i = 0; i < input.length; i++) { 1897 pattern = Pattern.compile(input[i][0]); 1898 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1899 failCount++; 1900 } 1901 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1902 // array for zero-length input for now 1903 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1904 expected[i])) { 1905 failCount++; 1906 } 1907 } 1908 report("Split"); 1909 } 1910 1911 private static void negationTest() { 1912 Pattern pattern = Pattern.compile("[\\[@^]+"); 1913 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1914 if (!matcher.find()) 1915 failCount++; 1916 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1917 failCount++; 1918 pattern = Pattern.compile("[@\\[^]+"); 1919 matcher = pattern.matcher("@@@@[[[[^^^^"); 1920 if (!matcher.find()) 1921 failCount++; 1922 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1923 failCount++; 1924 pattern = Pattern.compile("[@\\[^@]+"); 1925 matcher = pattern.matcher("@@@@[[[[^^^^"); 1926 if (!matcher.find()) 1927 failCount++; 1928 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1929 failCount++; 1930 1931 pattern = Pattern.compile("\\)"); 1932 matcher = pattern.matcher("xxx)xxx"); 1933 if (!matcher.find()) 1934 failCount++; 1935 1936 report("Negation"); 1937 } 1938 1939 private static void ampersandTest() { 1940 Pattern pattern = Pattern.compile("[&@]+"); 1941 check(pattern, "@@@@&&&&", true); 1942 1943 pattern = Pattern.compile("[@&]+"); 1944 check(pattern, "@@@@&&&&", true); 1945 1946 pattern = Pattern.compile("[@\\&]+"); 1947 check(pattern, "@@@@&&&&", true); 1948 1949 report("Ampersand"); 1950 } 1951 1952 private static void octalTest() throws Exception { 1953 Pattern pattern = Pattern.compile("\\u0007"); 1954 Matcher matcher = pattern.matcher("\u0007"); 1955 if (!matcher.matches()) 1956 failCount++; 1957 pattern = Pattern.compile("\\07"); 1958 matcher = pattern.matcher("\u0007"); 1959 if (!matcher.matches()) 1960 failCount++; 1961 pattern = Pattern.compile("\\007"); 1962 matcher = pattern.matcher("\u0007"); 1963 if (!matcher.matches()) 1964 failCount++; 1965 pattern = Pattern.compile("\\0007"); 1966 matcher = pattern.matcher("\u0007"); 1967 if (!matcher.matches()) 1968 failCount++; 1969 pattern = Pattern.compile("\\040"); 1970 matcher = pattern.matcher("\u0020"); 1971 if (!matcher.matches()) 1972 failCount++; 1973 pattern = Pattern.compile("\\0403"); 1974 matcher = pattern.matcher("\u00203"); 1975 if (!matcher.matches()) 1976 failCount++; 1977 pattern = Pattern.compile("\\0103"); 1978 matcher = pattern.matcher("\u0043"); 1979 if (!matcher.matches()) 1980 failCount++; 1981 1982 report("Octal"); 1983 } 1984 1985 private static void longPatternTest() throws Exception { 1986 try { 1987 Pattern pattern = Pattern.compile( 1988 "a 32-character-long pattern xxxx"); 1989 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1990 pattern = Pattern.compile("a thirty four character long regex"); 1991 StringBuffer patternToBe = new StringBuffer(101); 1992 for (int i=0; i<100; i++) 1993 patternToBe.append((char)(97 + i%26)); 1994 pattern = Pattern.compile(patternToBe.toString()); 1995 } catch (PatternSyntaxException e) { 1996 failCount++; 1997 } 1998 1999 // Supplementary character test 2000 try { 2001 Pattern pattern = Pattern.compile( 2002 toSupplementaries("a 32-character-long pattern xxxx")); 2003 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2004 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2005 StringBuffer patternToBe = new StringBuffer(101*2); 2006 for (int i=0; i<100; i++) 2007 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2008 + 97 + i%26)); 2009 pattern = Pattern.compile(patternToBe.toString()); 2010 } catch (PatternSyntaxException e) { 2011 failCount++; 2012 } 2013 report("LongPattern"); 2014 } 2015 2016 private static void group0Test() throws Exception { 2017 Pattern pattern = Pattern.compile("(tes)ting"); 2018 Matcher matcher = pattern.matcher("testing"); 2019 check(matcher, "testing"); 2020 2021 matcher.reset("testing"); 2022 if (matcher.lookingAt()) { 2023 if (!matcher.group(0).equals("testing")) 2024 failCount++; 2025 } else { 2026 failCount++; 2027 } 2028 2029 matcher.reset("testing"); 2030 if (matcher.matches()) { 2031 if (!matcher.group(0).equals("testing")) 2032 failCount++; 2033 } else { 2034 failCount++; 2035 } 2036 2037 pattern = Pattern.compile("(tes)ting"); 2038 matcher = pattern.matcher("testing"); 2039 if (matcher.lookingAt()) { 2040 if (!matcher.group(0).equals("testing")) 2041 failCount++; 2042 } else { 2043 failCount++; 2044 } 2045 2046 pattern = Pattern.compile("^(tes)ting"); 2047 matcher = pattern.matcher("testing"); 2048 if (matcher.matches()) { 2049 if (!matcher.group(0).equals("testing")) 2050 failCount++; 2051 } else { 2052 failCount++; 2053 } 2054 2055 // Supplementary character test 2056 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2057 matcher = pattern.matcher(toSupplementaries("testing")); 2058 check(matcher, toSupplementaries("testing")); 2059 2060 matcher.reset(toSupplementaries("testing")); 2061 if (matcher.lookingAt()) { 2062 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2063 failCount++; 2064 } else { 2065 failCount++; 2066 } 2067 2068 matcher.reset(toSupplementaries("testing")); 2069 if (matcher.matches()) { 2070 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2071 failCount++; 2072 } else { 2073 failCount++; 2074 } 2075 2076 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2077 matcher = pattern.matcher(toSupplementaries("testing")); 2078 if (matcher.lookingAt()) { 2079 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2080 failCount++; 2081 } else { 2082 failCount++; 2083 } 2084 2085 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2086 matcher = pattern.matcher(toSupplementaries("testing")); 2087 if (matcher.matches()) { 2088 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2089 failCount++; 2090 } else { 2091 failCount++; 2092 } 2093 2094 report("Group0"); 2095 } 2096 2097 private static void findIntTest() throws Exception { 2098 Pattern p = Pattern.compile("blah"); 2099 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2100 boolean result = m.find(2); 2101 if (!result) 2102 failCount++; 2103 2104 p = Pattern.compile("$"); 2105 m = p.matcher("1234567890"); 2106 result = m.find(10); 2107 if (!result) 2108 failCount++; 2109 try { 2110 result = m.find(11); 2111 failCount++; 2112 } catch (IndexOutOfBoundsException e) { 2113 // correct result 2114 } 2115 2116 // Supplementary character test 2117 p = Pattern.compile(toSupplementaries("blah")); 2118 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2119 result = m.find(2); 2120 if (!result) 2121 failCount++; 2122 2123 report("FindInt"); 2124 } 2125 2126 private static void emptyPatternTest() throws Exception { 2127 Pattern p = Pattern.compile(""); 2128 Matcher m = p.matcher("foo"); 2129 2130 // Should find empty pattern at beginning of input 2131 boolean result = m.find(); 2132 if (result != true) 2133 failCount++; 2134 if (m.start() != 0) 2135 failCount++; 2136 2137 // Should not match entire input if input is not empty 2138 m.reset(); 2139 result = m.matches(); 2140 if (result == true) 2141 failCount++; 2142 2143 try { 2144 m.start(0); 2145 failCount++; 2146 } catch (IllegalStateException e) { 2147 // Correct result 2148 } 2149 2150 // Should match entire input if input is empty 2151 m.reset(""); 2152 result = m.matches(); 2153 if (result != true) 2154 failCount++; 2155 2156 result = Pattern.matches("", ""); 2157 if (result != true) 2158 failCount++; 2159 2160 result = Pattern.matches("", "foo"); 2161 if (result == true) 2162 failCount++; 2163 report("EmptyPattern"); 2164 } 2165 2166 private static void charClassTest() throws Exception { 2167 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2168 check(pattern, "blahb]blech", true); 2169 2170 pattern = Pattern.compile("[abc[def]]"); 2171 check(pattern, "b", true); 2172 2173 // Supplementary character tests 2174 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2175 check(pattern, toSupplementaries("blahb]blech"), true); 2176 2177 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2178 check(pattern, toSupplementaries("b"), true); 2179 2180 try { 2181 // u00ff when UNICODE_CASE 2182 pattern = Pattern.compile("[ab\u00ffcd]", 2183 Pattern.CASE_INSENSITIVE| 2184 Pattern.UNICODE_CASE); 2185 check(pattern, "ab\u00ffcd", true); 2186 check(pattern, "Ab\u0178Cd", true); 2187 2188 // u00b5 when UNICODE_CASE 2189 pattern = Pattern.compile("[ab\u00b5cd]", 2190 Pattern.CASE_INSENSITIVE| 2191 Pattern.UNICODE_CASE); 2192 check(pattern, "ab\u00b5cd", true); 2193 check(pattern, "Ab\u039cCd", true); 2194 } catch (Exception e) { failCount++; } 2195 2196 /* Special cases 2197 (1)LatinSmallLetterLongS u+017f 2198 (2)LatinSmallLetterDotlessI u+0131 2199 (3)LatineCapitalLetterIWithDotAbove u+0130 2200 (4)KelvinSign u+212a 2201 (5)AngstromSign u+212b 2202 */ 2203 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2204 pattern = Pattern.compile("[sik\u00c5]+", flags); 2205 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2206 failCount++; 2207 2208 report("CharClass"); 2209 } 2210 2211 private static void caretTest() throws Exception { 2212 Pattern pattern = Pattern.compile("\\w*"); 2213 Matcher matcher = pattern.matcher("a#bc#def##g"); 2214 check(matcher, "a"); 2215 check(matcher, ""); 2216 check(matcher, "bc"); 2217 check(matcher, ""); 2218 check(matcher, "def"); 2219 check(matcher, ""); 2220 check(matcher, ""); 2221 check(matcher, "g"); 2222 check(matcher, ""); 2223 if (matcher.find()) 2224 failCount++; 2225 2226 pattern = Pattern.compile("^\\w*"); 2227 matcher = pattern.matcher("a#bc#def##g"); 2228 check(matcher, "a"); 2229 if (matcher.find()) 2230 failCount++; 2231 2232 pattern = Pattern.compile("\\w"); 2233 matcher = pattern.matcher("abc##x"); 2234 check(matcher, "a"); 2235 check(matcher, "b"); 2236 check(matcher, "c"); 2237 check(matcher, "x"); 2238 if (matcher.find()) 2239 failCount++; 2240 2241 pattern = Pattern.compile("^\\w"); 2242 matcher = pattern.matcher("abc##x"); 2243 check(matcher, "a"); 2244 if (matcher.find()) 2245 failCount++; 2246 2247 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2248 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2249 check(matcher, "abc"); 2250 if (matcher.find()) 2251 failCount++; 2252 2253 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2254 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2255 check(matcher, "abc"); 2256 check(matcher, "jkl"); 2257 if (matcher.find()) 2258 failCount++; 2259 2260 pattern = Pattern.compile("^", Pattern.MULTILINE); 2261 matcher = pattern.matcher("this is some text"); 2262 String result = matcher.replaceAll("X"); 2263 if (!result.equals("Xthis is some text")) 2264 failCount++; 2265 2266 pattern = Pattern.compile("^"); 2267 matcher = pattern.matcher("this is some text"); 2268 result = matcher.replaceAll("X"); 2269 if (!result.equals("Xthis is some text")) 2270 failCount++; 2271 2272 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2273 matcher = pattern.matcher("this is some text\n"); 2274 result = matcher.replaceAll("X"); 2275 if (!result.equals("Xthis is some text\n")) 2276 failCount++; 2277 2278 report("Caret"); 2279 } 2280 2281 private static void groupCaptureTest() throws Exception { 2282 // Independent group 2283 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2284 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2285 matcher.find(); 2286 try { 2287 String blah = matcher.group(1); 2288 failCount++; 2289 } catch (IndexOutOfBoundsException ioobe) { 2290 // Good result 2291 } 2292 // Pure group 2293 pattern = Pattern.compile("x+(?:y+)z+"); 2294 matcher = pattern.matcher("xxxyyyzzz"); 2295 matcher.find(); 2296 try { 2297 String blah = matcher.group(1); 2298 failCount++; 2299 } catch (IndexOutOfBoundsException ioobe) { 2300 // Good result 2301 } 2302 2303 // Supplementary character tests 2304 // Independent group 2305 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2306 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2307 matcher.find(); 2308 try { 2309 String blah = matcher.group(1); 2310 failCount++; 2311 } catch (IndexOutOfBoundsException ioobe) { 2312 // Good result 2313 } 2314 // Pure group 2315 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2316 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2317 matcher.find(); 2318 try { 2319 String blah = matcher.group(1); 2320 failCount++; 2321 } catch (IndexOutOfBoundsException ioobe) { 2322 // Good result 2323 } 2324 2325 report("GroupCapture"); 2326 } 2327 2328 private static void backRefTest() throws Exception { 2329 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2330 check(pattern, "zzzaabcazzz", true); 2331 2332 pattern = Pattern.compile("(a*)bc\\1"); 2333 check(pattern, "zzzaabcaazzz", true); 2334 2335 pattern = Pattern.compile("(abc)(def)\\1"); 2336 check(pattern, "abcdefabc", true); 2337 2338 pattern = Pattern.compile("(abc)(def)\\3"); 2339 check(pattern, "abcdefabc", false); 2340 2341 try { 2342 for (int i = 1; i < 10; i++) { 2343 // Make sure backref 1-9 are always accepted 2344 pattern = Pattern.compile("abcdef\\" + i); 2345 // and fail to match if the target group does not exit 2346 check(pattern, "abcdef", false); 2347 } 2348 } catch(PatternSyntaxException e) { 2349 failCount++; 2350 } 2351 2352 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2353 check(pattern, "abcdefghija", false); 2354 check(pattern, "abcdefghija1", true); 2355 2356 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2357 check(pattern, "abcdefghijkk", true); 2358 2359 pattern = Pattern.compile("(a)bcdefghij\\11"); 2360 check(pattern, "abcdefghija1", true); 2361 2362 // Supplementary character tests 2363 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2364 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2365 2366 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2367 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2368 2369 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2370 check(pattern, toSupplementaries("abcdefabc"), true); 2371 2372 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2373 check(pattern, toSupplementaries("abcdefabc"), false); 2374 2375 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2376 check(pattern, toSupplementaries("abcdefghija"), false); 2377 check(pattern, toSupplementaries("abcdefghija1"), true); 2378 2379 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2380 check(pattern, toSupplementaries("abcdefghijkk"), true); 2381 2382 report("BackRef"); 2383 } 2384 2385 /** 2386 * Unicode Technical Report #18, section 2.6 End of Line 2387 * There is no empty line to be matched in the sequence \u000D\u000A 2388 * but there is an empty line in the sequence \u000A\u000D. 2389 */ 2390 private static void anchorTest() throws Exception { 2391 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2392 Matcher m = p.matcher("blah1\r\nblah2"); 2393 m.find(); 2394 m.find(); 2395 if (!m.group().equals("blah2")) 2396 failCount++; 2397 2398 m.reset("blah1\n\rblah2"); 2399 m.find(); 2400 m.find(); 2401 m.find(); 2402 if (!m.group().equals("blah2")) 2403 failCount++; 2404 2405 // Test behavior of $ with \r\n at end of input 2406 p = Pattern.compile(".+$"); 2407 m = p.matcher("blah1\r\n"); 2408 if (!m.find()) 2409 failCount++; 2410 if (!m.group().equals("blah1")) 2411 failCount++; 2412 if (m.find()) 2413 failCount++; 2414 2415 // Test behavior of $ with \r\n at end of input in multiline 2416 p = Pattern.compile(".+$", Pattern.MULTILINE); 2417 m = p.matcher("blah1\r\n"); 2418 if (!m.find()) 2419 failCount++; 2420 if (m.find()) 2421 failCount++; 2422 2423 // Test for $ recognition of \u0085 for bug 4527731 2424 p = Pattern.compile(".+$", Pattern.MULTILINE); 2425 m = p.matcher("blah1\u0085"); 2426 if (!m.find()) 2427 failCount++; 2428 2429 // Supplementary character test 2430 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2431 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2432 m.find(); 2433 m.find(); 2434 if (!m.group().equals(toSupplementaries("blah2"))) 2435 failCount++; 2436 2437 m.reset(toSupplementaries("blah1\n\rblah2")); 2438 m.find(); 2439 m.find(); 2440 m.find(); 2441 if (!m.group().equals(toSupplementaries("blah2"))) 2442 failCount++; 2443 2444 // Test behavior of $ with \r\n at end of input 2445 p = Pattern.compile(".+$"); 2446 m = p.matcher(toSupplementaries("blah1\r\n")); 2447 if (!m.find()) 2448 failCount++; 2449 if (!m.group().equals(toSupplementaries("blah1"))) 2450 failCount++; 2451 if (m.find()) 2452 failCount++; 2453 2454 // Test behavior of $ with \r\n at end of input in multiline 2455 p = Pattern.compile(".+$", Pattern.MULTILINE); 2456 m = p.matcher(toSupplementaries("blah1\r\n")); 2457 if (!m.find()) 2458 failCount++; 2459 if (m.find()) 2460 failCount++; 2461 2462 // Test for $ recognition of \u0085 for bug 4527731 2463 p = Pattern.compile(".+$", Pattern.MULTILINE); 2464 m = p.matcher(toSupplementaries("blah1\u0085")); 2465 if (!m.find()) 2466 failCount++; 2467 2468 report("Anchors"); 2469 } 2470 2471 /** 2472 * A basic sanity test of Matcher.lookingAt(). 2473 */ 2474 private static void lookingAtTest() throws Exception { 2475 Pattern p = Pattern.compile("(ab)(c*)"); 2476 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2477 2478 if (!m.lookingAt()) 2479 failCount++; 2480 2481 if (!m.group().equals(m.group(0))) 2482 failCount++; 2483 2484 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2485 if (m.lookingAt()) 2486 failCount++; 2487 2488 // Supplementary character test 2489 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2490 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2491 2492 if (!m.lookingAt()) 2493 failCount++; 2494 2495 if (!m.group().equals(m.group(0))) 2496 failCount++; 2497 2498 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2499 if (m.lookingAt()) 2500 failCount++; 2501 2502 report("Looking At"); 2503 } 2504 2505 /** 2506 * A basic sanity test of Matcher.matches(). 2507 */ 2508 private static void matchesTest() throws Exception { 2509 // matches() 2510 Pattern p = Pattern.compile("ulb(c*)"); 2511 Matcher m = p.matcher("ulbcccccc"); 2512 if (!m.matches()) 2513 failCount++; 2514 2515 // find() but not matches() 2516 m.reset("zzzulbcccccc"); 2517 if (m.matches()) 2518 failCount++; 2519 2520 // lookingAt() but not matches() 2521 m.reset("ulbccccccdef"); 2522 if (m.matches()) 2523 failCount++; 2524 2525 // matches() 2526 p = Pattern.compile("a|ad"); 2527 m = p.matcher("ad"); 2528 if (!m.matches()) 2529 failCount++; 2530 2531 // Supplementary character test 2532 // matches() 2533 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2534 m = p.matcher(toSupplementaries("ulbcccccc")); 2535 if (!m.matches()) 2536 failCount++; 2537 2538 // find() but not matches() 2539 m.reset(toSupplementaries("zzzulbcccccc")); 2540 if (m.matches()) 2541 failCount++; 2542 2543 // lookingAt() but not matches() 2544 m.reset(toSupplementaries("ulbccccccdef")); 2545 if (m.matches()) 2546 failCount++; 2547 2548 // matches() 2549 p = Pattern.compile(toSupplementaries("a|ad")); 2550 m = p.matcher(toSupplementaries("ad")); 2551 if (!m.matches()) 2552 failCount++; 2553 2554 report("Matches"); 2555 } 2556 2557 /** 2558 * A basic sanity test of Pattern.matches(). 2559 */ 2560 private static void patternMatchesTest() throws Exception { 2561 // matches() 2562 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2563 toSupplementaries("ulbcccccc"))) 2564 failCount++; 2565 2566 // find() but not matches() 2567 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2568 toSupplementaries("zzzulbcccccc"))) 2569 failCount++; 2570 2571 // lookingAt() but not matches() 2572 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2573 toSupplementaries("ulbccccccdef"))) 2574 failCount++; 2575 2576 // Supplementary character test 2577 // matches() 2578 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2579 toSupplementaries("ulbcccccc"))) 2580 failCount++; 2581 2582 // find() but not matches() 2583 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2584 toSupplementaries("zzzulbcccccc"))) 2585 failCount++; 2586 2587 // lookingAt() but not matches() 2588 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2589 toSupplementaries("ulbccccccdef"))) 2590 failCount++; 2591 2592 report("Pattern Matches"); 2593 } 2594 2595 /** 2596 * Canonical equivalence testing. Tests the ability of the engine 2597 * to match sequences that are not explicitly specified in the 2598 * pattern when they are considered equivalent by the Unicode Standard. 2599 */ 2600 private static void ceTest() throws Exception { 2601 // Decomposed char outside char classes 2602 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2603 Matcher m = p.matcher("test\u00e5"); 2604 if (!m.matches()) 2605 failCount++; 2606 2607 m.reset("testa\u030a"); 2608 if (!m.matches()) 2609 failCount++; 2610 2611 // Composed char outside char classes 2612 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2613 m = p.matcher("test\u00e5"); 2614 if (!m.matches()) 2615 failCount++; 2616 2617 m.reset("testa\u030a"); 2618 if (!m.find()) 2619 failCount++; 2620 2621 // Decomposed char inside a char class 2622 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2623 m = p.matcher("test\u00e5"); 2624 if (!m.find()) 2625 failCount++; 2626 2627 m.reset("testa\u030a"); 2628 if (!m.find()) 2629 failCount++; 2630 2631 // Composed char inside a char class 2632 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2633 m = p.matcher("test\u00e5"); 2634 if (!m.find()) 2635 failCount++; 2636 2637 m.reset("testa\u0300"); 2638 if (!m.find()) 2639 failCount++; 2640 2641 m.reset("testa\u030a"); 2642 if (!m.find()) 2643 failCount++; 2644 2645 // Marks that cannot legally change order and be equivalent 2646 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2647 check(p, "testa\u0308\u0300", true); 2648 check(p, "testa\u0300\u0308", false); 2649 2650 // Marks that can legally change order and be equivalent 2651 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2652 check(p, "testa\u0308\u0323", true); 2653 check(p, "testa\u0323\u0308", true); 2654 2655 // Test all equivalences of the sequence a\u0308\u0323\u0300 2656 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2657 check(p, "testa\u0308\u0323\u0300", true); 2658 check(p, "testa\u0323\u0308\u0300", true); 2659 check(p, "testa\u0308\u0300\u0323", true); 2660 check(p, "test\u00e4\u0323\u0300", true); 2661 check(p, "test\u00e4\u0300\u0323", true); 2662 2663 /* 2664 * The following canonical equivalence tests don't work. Bug id: 4916384. 2665 * 2666 // Decomposed hangul (jamos) 2667 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2668 m = p.matcher("\u1100\u1161"); 2669 if (!m.matches()) 2670 failCount++; 2671 2672 m.reset("\uac00"); 2673 if (!m.matches()) 2674 failCount++; 2675 2676 // Composed hangul 2677 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2678 m = p.matcher("\u1100\u1161"); 2679 if (!m.matches()) 2680 failCount++; 2681 2682 m.reset("\uac00"); 2683 if (!m.matches()) 2684 failCount++; 2685 2686 // Decomposed supplementary outside char classes 2687 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2688 m = p.matcher("test\ud834\uddc0"); 2689 if (!m.matches()) 2690 failCount++; 2691 2692 m.reset("test\ud834\uddbc\ud834\udd6f"); 2693 if (!m.matches()) 2694 failCount++; 2695 2696 // Composed supplementary outside char classes 2697 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2698 m.reset("test\ud834\uddbc\ud834\udd6f"); 2699 if (!m.matches()) 2700 failCount++; 2701 2702 m = p.matcher("test\ud834\uddc0"); 2703 if (!m.matches()) 2704 failCount++; 2705 2706 */ 2707 2708 report("Canonical Equivalence"); 2709 } 2710 2711 /** 2712 * A basic sanity test of Matcher.replaceAll(). 2713 */ 2714 private static void globalSubstitute() throws Exception { 2715 // Global substitution with a literal 2716 Pattern p = Pattern.compile("(ab)(c*)"); 2717 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2718 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2719 failCount++; 2720 2721 m.reset("zzzabccczzzabcczzzabccczzz"); 2722 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2723 failCount++; 2724 2725 // Global substitution with groups 2726 m.reset("zzzabccczzzabcczzzabccczzz"); 2727 String result = m.replaceAll("$1"); 2728 if (!result.equals("zzzabzzzabzzzabzzz")) 2729 failCount++; 2730 2731 // Supplementary character test 2732 // Global substitution with a literal 2733 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2734 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2735 if (!m.replaceAll(toSupplementaries("test")). 2736 equals(toSupplementaries("testzzztestzzztest"))) 2737 failCount++; 2738 2739 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2740 if (!m.replaceAll(toSupplementaries("test")). 2741 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2742 failCount++; 2743 2744 // Global substitution with groups 2745 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2746 result = m.replaceAll("$1"); 2747 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2748 failCount++; 2749 2750 report("Global Substitution"); 2751 } 2752 2753 /** 2754 * Tests the usage of Matcher.appendReplacement() with literal 2755 * and group substitutions. 2756 */ 2757 private static void stringbufferSubstitute() throws Exception { 2758 // SB substitution with literal 2759 String blah = "zzzblahzzz"; 2760 Pattern p = Pattern.compile("blah"); 2761 Matcher m = p.matcher(blah); 2762 StringBuffer result = new StringBuffer(); 2763 try { 2764 m.appendReplacement(result, "blech"); 2765 failCount++; 2766 } catch (IllegalStateException e) { 2767 } 2768 m.find(); 2769 m.appendReplacement(result, "blech"); 2770 if (!result.toString().equals("zzzblech")) 2771 failCount++; 2772 2773 m.appendTail(result); 2774 if (!result.toString().equals("zzzblechzzz")) 2775 failCount++; 2776 2777 // SB substitution with groups 2778 blah = "zzzabcdzzz"; 2779 p = Pattern.compile("(ab)(cd)*"); 2780 m = p.matcher(blah); 2781 result = new StringBuffer(); 2782 try { 2783 m.appendReplacement(result, "$1"); 2784 failCount++; 2785 } catch (IllegalStateException e) { 2786 } 2787 m.find(); 2788 m.appendReplacement(result, "$1"); 2789 if (!result.toString().equals("zzzab")) 2790 failCount++; 2791 2792 m.appendTail(result); 2793 if (!result.toString().equals("zzzabzzz")) 2794 failCount++; 2795 2796 // SB substitution with 3 groups 2797 blah = "zzzabcdcdefzzz"; 2798 p = Pattern.compile("(ab)(cd)*(ef)"); 2799 m = p.matcher(blah); 2800 result = new StringBuffer(); 2801 try { 2802 m.appendReplacement(result, "$1w$2w$3"); 2803 failCount++; 2804 } catch (IllegalStateException e) { 2805 } 2806 m.find(); 2807 m.appendReplacement(result, "$1w$2w$3"); 2808 if (!result.toString().equals("zzzabwcdwef")) 2809 failCount++; 2810 2811 m.appendTail(result); 2812 if (!result.toString().equals("zzzabwcdwefzzz")) 2813 failCount++; 2814 2815 // SB substitution with groups and three matches 2816 // skipping middle match 2817 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2818 p = Pattern.compile("(ab)(cd*)"); 2819 m = p.matcher(blah); 2820 result = new StringBuffer(); 2821 try { 2822 m.appendReplacement(result, "$1"); 2823 failCount++; 2824 } catch (IllegalStateException e) { 2825 } 2826 m.find(); 2827 m.appendReplacement(result, "$1"); 2828 if (!result.toString().equals("zzzab")) 2829 failCount++; 2830 2831 m.find(); 2832 m.find(); 2833 m.appendReplacement(result, "$2"); 2834 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2835 failCount++; 2836 2837 m.appendTail(result); 2838 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2839 failCount++; 2840 2841 // Check to make sure escaped $ is ignored 2842 blah = "zzzabcdcdefzzz"; 2843 p = Pattern.compile("(ab)(cd)*(ef)"); 2844 m = p.matcher(blah); 2845 result = new StringBuffer(); 2846 m.find(); 2847 m.appendReplacement(result, "$1w\\$2w$3"); 2848 if (!result.toString().equals("zzzabw$2wef")) 2849 failCount++; 2850 2851 m.appendTail(result); 2852 if (!result.toString().equals("zzzabw$2wefzzz")) 2853 failCount++; 2854 2855 // Check to make sure a reference to nonexistent group causes error 2856 blah = "zzzabcdcdefzzz"; 2857 p = Pattern.compile("(ab)(cd)*(ef)"); 2858 m = p.matcher(blah); 2859 result = new StringBuffer(); 2860 m.find(); 2861 try { 2862 m.appendReplacement(result, "$1w$5w$3"); 2863 failCount++; 2864 } catch (IndexOutOfBoundsException ioobe) { 2865 // Correct result 2866 } 2867 2868 // Check double digit group references 2869 blah = "zzz123456789101112zzz"; 2870 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2871 m = p.matcher(blah); 2872 result = new StringBuffer(); 2873 m.find(); 2874 m.appendReplacement(result, "$1w$11w$3"); 2875 if (!result.toString().equals("zzz1w11w3")) 2876 failCount++; 2877 2878 // Check to make sure it backs off $15 to $1 if only three groups 2879 blah = "zzzabcdcdefzzz"; 2880 p = Pattern.compile("(ab)(cd)*(ef)"); 2881 m = p.matcher(blah); 2882 result = new StringBuffer(); 2883 m.find(); 2884 m.appendReplacement(result, "$1w$15w$3"); 2885 if (!result.toString().equals("zzzabwab5wef")) 2886 failCount++; 2887 2888 2889 // Supplementary character test 2890 // SB substitution with literal 2891 blah = toSupplementaries("zzzblahzzz"); 2892 p = Pattern.compile(toSupplementaries("blah")); 2893 m = p.matcher(blah); 2894 result = new StringBuffer(); 2895 try { 2896 m.appendReplacement(result, toSupplementaries("blech")); 2897 failCount++; 2898 } catch (IllegalStateException e) { 2899 } 2900 m.find(); 2901 m.appendReplacement(result, toSupplementaries("blech")); 2902 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2903 failCount++; 2904 2905 m.appendTail(result); 2906 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2907 failCount++; 2908 2909 // SB substitution with groups 2910 blah = toSupplementaries("zzzabcdzzz"); 2911 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2912 m = p.matcher(blah); 2913 result = new StringBuffer(); 2914 try { 2915 m.appendReplacement(result, "$1"); 2916 failCount++; 2917 } catch (IllegalStateException e) { 2918 } 2919 m.find(); 2920 m.appendReplacement(result, "$1"); 2921 if (!result.toString().equals(toSupplementaries("zzzab"))) 2922 failCount++; 2923 2924 m.appendTail(result); 2925 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2926 failCount++; 2927 2928 // SB substitution with 3 groups 2929 blah = toSupplementaries("zzzabcdcdefzzz"); 2930 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2931 m = p.matcher(blah); 2932 result = new StringBuffer(); 2933 try { 2934 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2935 failCount++; 2936 } catch (IllegalStateException e) { 2937 } 2938 m.find(); 2939 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2940 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2941 failCount++; 2942 2943 m.appendTail(result); 2944 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2945 failCount++; 2946 2947 // SB substitution with groups and three matches 2948 // skipping middle match 2949 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2950 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2951 m = p.matcher(blah); 2952 result = new StringBuffer(); 2953 try { 2954 m.appendReplacement(result, "$1"); 2955 failCount++; 2956 } catch (IllegalStateException e) { 2957 } 2958 m.find(); 2959 m.appendReplacement(result, "$1"); 2960 if (!result.toString().equals(toSupplementaries("zzzab"))) 2961 failCount++; 2962 2963 m.find(); 2964 m.find(); 2965 m.appendReplacement(result, "$2"); 2966 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2967 failCount++; 2968 2969 m.appendTail(result); 2970 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2971 failCount++; 2972 2973 // Check to make sure escaped $ is ignored 2974 blah = toSupplementaries("zzzabcdcdefzzz"); 2975 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2976 m = p.matcher(blah); 2977 result = new StringBuffer(); 2978 m.find(); 2979 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2980 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2981 failCount++; 2982 2983 m.appendTail(result); 2984 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2985 failCount++; 2986 2987 // Check to make sure a reference to nonexistent group causes error 2988 blah = toSupplementaries("zzzabcdcdefzzz"); 2989 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2990 m = p.matcher(blah); 2991 result = new StringBuffer(); 2992 m.find(); 2993 try { 2994 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2995 failCount++; 2996 } catch (IndexOutOfBoundsException ioobe) { 2997 // Correct result 2998 } 2999 3000 // Check double digit group references 3001 blah = toSupplementaries("zzz123456789101112zzz"); 3002 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3003 m = p.matcher(blah); 3004 result = new StringBuffer(); 3005 m.find(); 3006 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3007 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3008 failCount++; 3009 3010 // Check to make sure it backs off $15 to $1 if only three groups 3011 blah = toSupplementaries("zzzabcdcdefzzz"); 3012 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3013 m = p.matcher(blah); 3014 result = new StringBuffer(); 3015 m.find(); 3016 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3017 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3018 failCount++; 3019 3020 // Check nothing has been appended into the output buffer if 3021 // the replacement string triggers IllegalArgumentException. 3022 p = Pattern.compile("(abc)"); 3023 m = p.matcher("abcd"); 3024 result = new StringBuffer(); 3025 m.find(); 3026 try { 3027 m.appendReplacement(result, ("xyz$g")); 3028 failCount++; 3029 } catch (IllegalArgumentException iae) { 3030 if (result.length() != 0) 3031 failCount++; 3032 } 3033 3034 report("SB Substitution"); 3035 } 3036 3037 /** 3038 * Tests the usage of Matcher.appendReplacement() with literal 3039 * and group substitutions. 3040 */ 3041 private static void stringbuilderSubstitute() throws Exception { 3042 // SB substitution with literal 3043 String blah = "zzzblahzzz"; 3044 Pattern p = Pattern.compile("blah"); 3045 Matcher m = p.matcher(blah); 3046 StringBuilder result = new StringBuilder(); 3047 try { 3048 m.appendReplacement(result, "blech"); 3049 failCount++; 3050 } catch (IllegalStateException e) { 3051 } 3052 m.find(); 3053 m.appendReplacement(result, "blech"); 3054 if (!result.toString().equals("zzzblech")) 3055 failCount++; 3056 3057 m.appendTail(result); 3058 if (!result.toString().equals("zzzblechzzz")) 3059 failCount++; 3060 3061 // SB substitution with groups 3062 blah = "zzzabcdzzz"; 3063 p = Pattern.compile("(ab)(cd)*"); 3064 m = p.matcher(blah); 3065 result = new StringBuilder(); 3066 try { 3067 m.appendReplacement(result, "$1"); 3068 failCount++; 3069 } catch (IllegalStateException e) { 3070 } 3071 m.find(); 3072 m.appendReplacement(result, "$1"); 3073 if (!result.toString().equals("zzzab")) 3074 failCount++; 3075 3076 m.appendTail(result); 3077 if (!result.toString().equals("zzzabzzz")) 3078 failCount++; 3079 3080 // SB substitution with 3 groups 3081 blah = "zzzabcdcdefzzz"; 3082 p = Pattern.compile("(ab)(cd)*(ef)"); 3083 m = p.matcher(blah); 3084 result = new StringBuilder(); 3085 try { 3086 m.appendReplacement(result, "$1w$2w$3"); 3087 failCount++; 3088 } catch (IllegalStateException e) { 3089 } 3090 m.find(); 3091 m.appendReplacement(result, "$1w$2w$3"); 3092 if (!result.toString().equals("zzzabwcdwef")) 3093 failCount++; 3094 3095 m.appendTail(result); 3096 if (!result.toString().equals("zzzabwcdwefzzz")) 3097 failCount++; 3098 3099 // SB substitution with groups and three matches 3100 // skipping middle match 3101 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3102 p = Pattern.compile("(ab)(cd*)"); 3103 m = p.matcher(blah); 3104 result = new StringBuilder(); 3105 try { 3106 m.appendReplacement(result, "$1"); 3107 failCount++; 3108 } catch (IllegalStateException e) { 3109 } 3110 m.find(); 3111 m.appendReplacement(result, "$1"); 3112 if (!result.toString().equals("zzzab")) 3113 failCount++; 3114 3115 m.find(); 3116 m.find(); 3117 m.appendReplacement(result, "$2"); 3118 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3119 failCount++; 3120 3121 m.appendTail(result); 3122 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3123 failCount++; 3124 3125 // Check to make sure escaped $ is ignored 3126 blah = "zzzabcdcdefzzz"; 3127 p = Pattern.compile("(ab)(cd)*(ef)"); 3128 m = p.matcher(blah); 3129 result = new StringBuilder(); 3130 m.find(); 3131 m.appendReplacement(result, "$1w\\$2w$3"); 3132 if (!result.toString().equals("zzzabw$2wef")) 3133 failCount++; 3134 3135 m.appendTail(result); 3136 if (!result.toString().equals("zzzabw$2wefzzz")) 3137 failCount++; 3138 3139 // Check to make sure a reference to nonexistent group causes error 3140 blah = "zzzabcdcdefzzz"; 3141 p = Pattern.compile("(ab)(cd)*(ef)"); 3142 m = p.matcher(blah); 3143 result = new StringBuilder(); 3144 m.find(); 3145 try { 3146 m.appendReplacement(result, "$1w$5w$3"); 3147 failCount++; 3148 } catch (IndexOutOfBoundsException ioobe) { 3149 // Correct result 3150 } 3151 3152 // Check double digit group references 3153 blah = "zzz123456789101112zzz"; 3154 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3155 m = p.matcher(blah); 3156 result = new StringBuilder(); 3157 m.find(); 3158 m.appendReplacement(result, "$1w$11w$3"); 3159 if (!result.toString().equals("zzz1w11w3")) 3160 failCount++; 3161 3162 // Check to make sure it backs off $15 to $1 if only three groups 3163 blah = "zzzabcdcdefzzz"; 3164 p = Pattern.compile("(ab)(cd)*(ef)"); 3165 m = p.matcher(blah); 3166 result = new StringBuilder(); 3167 m.find(); 3168 m.appendReplacement(result, "$1w$15w$3"); 3169 if (!result.toString().equals("zzzabwab5wef")) 3170 failCount++; 3171 3172 3173 // Supplementary character test 3174 // SB substitution with literal 3175 blah = toSupplementaries("zzzblahzzz"); 3176 p = Pattern.compile(toSupplementaries("blah")); 3177 m = p.matcher(blah); 3178 result = new StringBuilder(); 3179 try { 3180 m.appendReplacement(result, toSupplementaries("blech")); 3181 failCount++; 3182 } catch (IllegalStateException e) { 3183 } 3184 m.find(); 3185 m.appendReplacement(result, toSupplementaries("blech")); 3186 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3187 failCount++; 3188 m.appendTail(result); 3189 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3190 failCount++; 3191 3192 // SB substitution with groups 3193 blah = toSupplementaries("zzzabcdzzz"); 3194 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3195 m = p.matcher(blah); 3196 result = new StringBuilder(); 3197 try { 3198 m.appendReplacement(result, "$1"); 3199 failCount++; 3200 } catch (IllegalStateException e) { 3201 } 3202 m.find(); 3203 m.appendReplacement(result, "$1"); 3204 if (!result.toString().equals(toSupplementaries("zzzab"))) 3205 failCount++; 3206 3207 m.appendTail(result); 3208 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3209 failCount++; 3210 3211 // SB substitution with 3 groups 3212 blah = toSupplementaries("zzzabcdcdefzzz"); 3213 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3214 m = p.matcher(blah); 3215 result = new StringBuilder(); 3216 try { 3217 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3218 failCount++; 3219 } catch (IllegalStateException e) { 3220 } 3221 m.find(); 3222 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3223 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3224 failCount++; 3225 3226 m.appendTail(result); 3227 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3228 failCount++; 3229 3230 // SB substitution with groups and three matches 3231 // skipping middle match 3232 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3233 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3234 m = p.matcher(blah); 3235 result = new StringBuilder(); 3236 try { 3237 m.appendReplacement(result, "$1"); 3238 failCount++; 3239 } catch (IllegalStateException e) { 3240 } 3241 m.find(); 3242 m.appendReplacement(result, "$1"); 3243 if (!result.toString().equals(toSupplementaries("zzzab"))) 3244 failCount++; 3245 3246 m.find(); 3247 m.find(); 3248 m.appendReplacement(result, "$2"); 3249 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3250 failCount++; 3251 3252 m.appendTail(result); 3253 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3254 failCount++; 3255 3256 // Check to make sure escaped $ is ignored 3257 blah = toSupplementaries("zzzabcdcdefzzz"); 3258 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3259 m = p.matcher(blah); 3260 result = new StringBuilder(); 3261 m.find(); 3262 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3263 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3264 failCount++; 3265 3266 m.appendTail(result); 3267 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3268 failCount++; 3269 3270 // Check to make sure a reference to nonexistent group causes error 3271 blah = toSupplementaries("zzzabcdcdefzzz"); 3272 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3273 m = p.matcher(blah); 3274 result = new StringBuilder(); 3275 m.find(); 3276 try { 3277 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3278 failCount++; 3279 } catch (IndexOutOfBoundsException ioobe) { 3280 // Correct result 3281 } 3282 // Check double digit group references 3283 blah = toSupplementaries("zzz123456789101112zzz"); 3284 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3285 m = p.matcher(blah); 3286 result = new StringBuilder(); 3287 m.find(); 3288 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3289 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3290 failCount++; 3291 3292 // Check to make sure it backs off $15 to $1 if only three groups 3293 blah = toSupplementaries("zzzabcdcdefzzz"); 3294 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3295 m = p.matcher(blah); 3296 result = new StringBuilder(); 3297 m.find(); 3298 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3299 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3300 failCount++; 3301 // Check nothing has been appended into the output buffer if 3302 // the replacement string triggers IllegalArgumentException. 3303 p = Pattern.compile("(abc)"); 3304 m = p.matcher("abcd"); 3305 result = new StringBuilder(); 3306 m.find(); 3307 try { 3308 m.appendReplacement(result, ("xyz$g")); 3309 failCount++; 3310 } catch (IllegalArgumentException iae) { 3311 if (result.length() != 0) 3312 failCount++; 3313 } 3314 report("SB Substitution 2"); 3315 } 3316 3317 /* 3318 * 5 groups of characters are created to make a substitution string. 3319 * A base string will be created including random lead chars, the 3320 * substitution string, and random trailing chars. 3321 * A pattern containing the 5 groups is searched for and replaced with: 3322 * random group + random string + random group. 3323 * The results are checked for correctness. 3324 */ 3325 private static void substitutionBasher() { 3326 for (int runs = 0; runs<1000; runs++) { 3327 // Create a base string to work in 3328 int leadingChars = generator.nextInt(10); 3329 StringBuffer baseBuffer = new StringBuffer(100); 3330 String leadingString = getRandomAlphaString(leadingChars); 3331 baseBuffer.append(leadingString); 3332 3333 // Create 5 groups of random number of random chars 3334 // Create the string to substitute 3335 // Create the pattern string to search for 3336 StringBuffer bufferToSub = new StringBuffer(25); 3337 StringBuffer bufferToPat = new StringBuffer(50); 3338 String[] groups = new String[5]; 3339 for(int i=0; i<5; i++) { 3340 int aGroupSize = generator.nextInt(5)+1; 3341 groups[i] = getRandomAlphaString(aGroupSize); 3342 bufferToSub.append(groups[i]); 3343 bufferToPat.append('('); 3344 bufferToPat.append(groups[i]); 3345 bufferToPat.append(')'); 3346 } 3347 String stringToSub = bufferToSub.toString(); 3348 String pattern = bufferToPat.toString(); 3349 3350 // Place sub string into working string at random index 3351 baseBuffer.append(stringToSub); 3352 3353 // Append random chars to end 3354 int trailingChars = generator.nextInt(10); 3355 String trailingString = getRandomAlphaString(trailingChars); 3356 baseBuffer.append(trailingString); 3357 String baseString = baseBuffer.toString(); 3358 3359 // Create test pattern and matcher 3360 Pattern p = Pattern.compile(pattern); 3361 Matcher m = p.matcher(baseString); 3362 3363 // Reject candidate if pattern happens to start early 3364 m.find(); 3365 if (m.start() < leadingChars) 3366 continue; 3367 3368 // Reject candidate if more than one match 3369 if (m.find()) 3370 continue; 3371 3372 // Construct a replacement string with : 3373 // random group + random string + random group 3374 StringBuffer bufferToRep = new StringBuffer(); 3375 int groupIndex1 = generator.nextInt(5); 3376 bufferToRep.append("$" + (groupIndex1 + 1)); 3377 String randomMidString = getRandomAlphaString(5); 3378 bufferToRep.append(randomMidString); 3379 int groupIndex2 = generator.nextInt(5); 3380 bufferToRep.append("$" + (groupIndex2 + 1)); 3381 String replacement = bufferToRep.toString(); 3382 3383 // Do the replacement 3384 String result = m.replaceAll(replacement); 3385 3386 // Construct expected result 3387 StringBuffer bufferToRes = new StringBuffer(); 3388 bufferToRes.append(leadingString); 3389 bufferToRes.append(groups[groupIndex1]); 3390 bufferToRes.append(randomMidString); 3391 bufferToRes.append(groups[groupIndex2]); 3392 bufferToRes.append(trailingString); 3393 String expectedResult = bufferToRes.toString(); 3394 3395 // Check results 3396 if (!result.equals(expectedResult)) 3397 failCount++; 3398 } 3399 3400 report("Substitution Basher"); 3401 } 3402 3403 /* 3404 * 5 groups of characters are created to make a substitution string. 3405 * A base string will be created including random lead chars, the 3406 * substitution string, and random trailing chars. 3407 * A pattern containing the 5 groups is searched for and replaced with: 3408 * random group + random string + random group. 3409 * The results are checked for correctness. 3410 */ 3411 private static void substitutionBasher2() { 3412 for (int runs = 0; runs<1000; runs++) { 3413 // Create a base string to work in 3414 int leadingChars = generator.nextInt(10); 3415 StringBuilder baseBuffer = new StringBuilder(100); 3416 String leadingString = getRandomAlphaString(leadingChars); 3417 baseBuffer.append(leadingString); 3418 3419 // Create 5 groups of random number of random chars 3420 // Create the string to substitute 3421 // Create the pattern string to search for 3422 StringBuilder bufferToSub = new StringBuilder(25); 3423 StringBuilder bufferToPat = new StringBuilder(50); 3424 String[] groups = new String[5]; 3425 for(int i=0; i<5; i++) { 3426 int aGroupSize = generator.nextInt(5)+1; 3427 groups[i] = getRandomAlphaString(aGroupSize); 3428 bufferToSub.append(groups[i]); 3429 bufferToPat.append('('); 3430 bufferToPat.append(groups[i]); 3431 bufferToPat.append(')'); 3432 } 3433 String stringToSub = bufferToSub.toString(); 3434 String pattern = bufferToPat.toString(); 3435 3436 // Place sub string into working string at random index 3437 baseBuffer.append(stringToSub); 3438 3439 // Append random chars to end 3440 int trailingChars = generator.nextInt(10); 3441 String trailingString = getRandomAlphaString(trailingChars); 3442 baseBuffer.append(trailingString); 3443 String baseString = baseBuffer.toString(); 3444 3445 // Create test pattern and matcher 3446 Pattern p = Pattern.compile(pattern); 3447 Matcher m = p.matcher(baseString); 3448 3449 // Reject candidate if pattern happens to start early 3450 m.find(); 3451 if (m.start() < leadingChars) 3452 continue; 3453 3454 // Reject candidate if more than one match 3455 if (m.find()) 3456 continue; 3457 3458 // Construct a replacement string with : 3459 // random group + random string + random group 3460 StringBuilder bufferToRep = new StringBuilder(); 3461 int groupIndex1 = generator.nextInt(5); 3462 bufferToRep.append("$" + (groupIndex1 + 1)); 3463 String randomMidString = getRandomAlphaString(5); 3464 bufferToRep.append(randomMidString); 3465 int groupIndex2 = generator.nextInt(5); 3466 bufferToRep.append("$" + (groupIndex2 + 1)); 3467 String replacement = bufferToRep.toString(); 3468 3469 // Do the replacement 3470 String result = m.replaceAll(replacement); 3471 3472 // Construct expected result 3473 StringBuilder bufferToRes = new StringBuilder(); 3474 bufferToRes.append(leadingString); 3475 bufferToRes.append(groups[groupIndex1]); 3476 bufferToRes.append(randomMidString); 3477 bufferToRes.append(groups[groupIndex2]); 3478 bufferToRes.append(trailingString); 3479 String expectedResult = bufferToRes.toString(); 3480 3481 // Check results 3482 if (!result.equals(expectedResult)) { 3483 failCount++; 3484 } 3485 } 3486 3487 report("Substitution Basher 2"); 3488 } 3489 3490 /** 3491 * Checks the handling of some escape sequences that the Pattern 3492 * class should process instead of the java compiler. These are 3493 * not in the file because the escapes should be be processed 3494 * by the Pattern class when the regex is compiled. 3495 */ 3496 private static void escapes() throws Exception { 3497 Pattern p = Pattern.compile("\\043"); 3498 Matcher m = p.matcher("#"); 3499 if (!m.find()) 3500 failCount++; 3501 3502 p = Pattern.compile("\\x23"); 3503 m = p.matcher("#"); 3504 if (!m.find()) 3505 failCount++; 3506 3507 p = Pattern.compile("\\u0023"); 3508 m = p.matcher("#"); 3509 if (!m.find()) 3510 failCount++; 3511 3512 report("Escape sequences"); 3513 } 3514 3515 /** 3516 * Checks the handling of blank input situations. These 3517 * tests are incompatible with my test file format. 3518 */ 3519 private static void blankInput() throws Exception { 3520 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3521 Matcher m = p.matcher(""); 3522 if (m.find()) 3523 failCount++; 3524 3525 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3526 m = p.matcher(""); 3527 if (!m.find()) 3528 failCount++; 3529 3530 p = Pattern.compile("abc"); 3531 m = p.matcher(""); 3532 if (m.find()) 3533 failCount++; 3534 3535 p = Pattern.compile("a*"); 3536 m = p.matcher(""); 3537 if (!m.find()) 3538 failCount++; 3539 3540 report("Blank input"); 3541 } 3542 3543 /** 3544 * Tests the Boyer-Moore pattern matching of a character sequence 3545 * on randomly generated patterns. 3546 */ 3547 private static void bm() throws Exception { 3548 doBnM('a'); 3549 report("Boyer Moore (ASCII)"); 3550 3551 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3552 report("Boyer Moore (Supplementary)"); 3553 } 3554 3555 private static void doBnM(int baseCharacter) throws Exception { 3556 int achar=0; 3557 3558 for (int i=0; i<100; i++) { 3559 // Create a short pattern to search for 3560 int patternLength = generator.nextInt(7) + 4; 3561 StringBuffer patternBuffer = new StringBuffer(patternLength); 3562 String pattern; 3563 retry: for (;;) { 3564 for (int x=0; x<patternLength; x++) { 3565 int ch = baseCharacter + generator.nextInt(26); 3566 if (Character.isSupplementaryCodePoint(ch)) { 3567 patternBuffer.append(Character.toChars(ch)); 3568 } else { 3569 patternBuffer.append((char)ch); 3570 } 3571 } 3572 pattern = patternBuffer.toString(); 3573 3574 // Avoid patterns that start and end with the same substring 3575 // See JDK-6854417 3576 for (int x=1; x < pattern.length(); x++) { 3577 if (pattern.startsWith(pattern.substring(x))) 3578 continue retry; 3579 } 3580 break; 3581 } 3582 Pattern p = Pattern.compile(pattern); 3583 3584 // Create a buffer with random ASCII chars that does 3585 // not match the sample 3586 String toSearch = null; 3587 StringBuffer s = null; 3588 Matcher m = p.matcher(""); 3589 do { 3590 s = new StringBuffer(100); 3591 for (int x=0; x<100; x++) { 3592 int ch = baseCharacter + generator.nextInt(26); 3593 if (Character.isSupplementaryCodePoint(ch)) { 3594 s.append(Character.toChars(ch)); 3595 } else { 3596 s.append((char)ch); 3597 } 3598 } 3599 toSearch = s.toString(); 3600 m.reset(toSearch); 3601 } while (m.find()); 3602 3603 // Insert the pattern at a random spot 3604 int insertIndex = generator.nextInt(99); 3605 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3606 insertIndex++; 3607 s = s.insert(insertIndex, pattern); 3608 toSearch = s.toString(); 3609 3610 // Make sure that the pattern is found 3611 m.reset(toSearch); 3612 if (!m.find()) 3613 failCount++; 3614 3615 // Make sure that the match text is the pattern 3616 if (!m.group().equals(pattern)) 3617 failCount++; 3618 3619 // Make sure match occured at insertion point 3620 if (m.start() != insertIndex) 3621 failCount++; 3622 } 3623 } 3624 3625 /** 3626 * Tests the matching of slices on randomly generated patterns. 3627 * The Boyer-Moore optimization is not done on these patterns 3628 * because it uses unicode case folding. 3629 */ 3630 private static void slice() throws Exception { 3631 doSlice(Character.MAX_VALUE); 3632 report("Slice"); 3633 3634 doSlice(Character.MAX_CODE_POINT); 3635 report("Slice (Supplementary)"); 3636 } 3637 3638 private static void doSlice(int maxCharacter) throws Exception { 3639 Random generator = new Random(); 3640 int achar=0; 3641 3642 for (int i=0; i<100; i++) { 3643 // Create a short pattern to search for 3644 int patternLength = generator.nextInt(7) + 4; 3645 StringBuffer patternBuffer = new StringBuffer(patternLength); 3646 for (int x=0; x<patternLength; x++) { 3647 int randomChar = 0; 3648 while (!Character.isLetterOrDigit(randomChar)) 3649 randomChar = generator.nextInt(maxCharacter); 3650 if (Character.isSupplementaryCodePoint(randomChar)) { 3651 patternBuffer.append(Character.toChars(randomChar)); 3652 } else { 3653 patternBuffer.append((char) randomChar); 3654 } 3655 } 3656 String pattern = patternBuffer.toString(); 3657 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3658 3659 // Create a buffer with random chars that does not match the sample 3660 String toSearch = null; 3661 StringBuffer s = null; 3662 Matcher m = p.matcher(""); 3663 do { 3664 s = new StringBuffer(100); 3665 for (int x=0; x<100; x++) { 3666 int randomChar = 0; 3667 while (!Character.isLetterOrDigit(randomChar)) 3668 randomChar = generator.nextInt(maxCharacter); 3669 if (Character.isSupplementaryCodePoint(randomChar)) { 3670 s.append(Character.toChars(randomChar)); 3671 } else { 3672 s.append((char) randomChar); 3673 } 3674 } 3675 toSearch = s.toString(); 3676 m.reset(toSearch); 3677 } while (m.find()); 3678 3679 // Insert the pattern at a random spot 3680 int insertIndex = generator.nextInt(99); 3681 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3682 insertIndex++; 3683 s = s.insert(insertIndex, pattern); 3684 toSearch = s.toString(); 3685 3686 // Make sure that the pattern is found 3687 m.reset(toSearch); 3688 if (!m.find()) 3689 failCount++; 3690 3691 // Make sure that the match text is the pattern 3692 if (!m.group().equals(pattern)) 3693 failCount++; 3694 3695 // Make sure match occured at insertion point 3696 if (m.start() != insertIndex) 3697 failCount++; 3698 } 3699 } 3700 3701 private static void explainFailure(String pattern, String data, 3702 String expected, String actual) { 3703 System.err.println("----------------------------------------"); 3704 System.err.println("Pattern = "+pattern); 3705 System.err.println("Data = "+data); 3706 System.err.println("Expected = " + expected); 3707 System.err.println("Actual = " + actual); 3708 } 3709 3710 private static void explainFailure(String pattern, String data, 3711 Throwable t) { 3712 System.err.println("----------------------------------------"); 3713 System.err.println("Pattern = "+pattern); 3714 System.err.println("Data = "+data); 3715 t.printStackTrace(System.err); 3716 } 3717 3718 // Testing examples from a file 3719 3720 /** 3721 * Goes through the file "TestCases.txt" and creates many patterns 3722 * described in the file, matching the patterns against input lines in 3723 * the file, and comparing the results against the correct results 3724 * also found in the file. The file format is described in comments 3725 * at the head of the file. 3726 */ 3727 private static void processFile(String fileName) throws Exception { 3728 File testCases = new File(System.getProperty("test.src", "."), 3729 fileName); 3730 FileInputStream in = new FileInputStream(testCases); 3731 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3732 3733 // Process next test case. 3734 String aLine; 3735 while((aLine = r.readLine()) != null) { 3736 // Read a line for pattern 3737 String patternString = grabLine(r); 3738 Pattern p = null; 3739 try { 3740 p = compileTestPattern(patternString); 3741 } catch (PatternSyntaxException e) { 3742 String dataString = grabLine(r); 3743 String expectedResult = grabLine(r); 3744 if (expectedResult.startsWith("error")) 3745 continue; 3746 explainFailure(patternString, dataString, e); 3747 failCount++; 3748 continue; 3749 } 3750 3751 // Read a line for input string 3752 String dataString = grabLine(r); 3753 Matcher m = p.matcher(dataString); 3754 StringBuffer result = new StringBuffer(); 3755 3756 // Check for IllegalStateExceptions before a match 3757 failCount += preMatchInvariants(m); 3758 3759 boolean found = m.find(); 3760 3761 if (found) 3762 failCount += postTrueMatchInvariants(m); 3763 else 3764 failCount += postFalseMatchInvariants(m); 3765 3766 if (found) { 3767 result.append("true "); 3768 result.append(m.group(0) + " "); 3769 } else { 3770 result.append("false "); 3771 } 3772 3773 result.append(m.groupCount()); 3774 3775 if (found) { 3776 for (int i=1; i<m.groupCount()+1; i++) 3777 if (m.group(i) != null) 3778 result.append(" " +m.group(i)); 3779 } 3780 3781 // Read a line for the expected result 3782 String expectedResult = grabLine(r); 3783 3784 if (!result.toString().equals(expectedResult)) { 3785 explainFailure(patternString, dataString, expectedResult, result.toString()); 3786 failCount++; 3787 } 3788 } 3789 3790 report(fileName); 3791 } 3792 3793 private static int preMatchInvariants(Matcher m) { 3794 int failCount = 0; 3795 try { 3796 m.start(); 3797 failCount++; 3798 } catch (IllegalStateException ise) {} 3799 try { 3800 m.end(); 3801 failCount++; 3802 } catch (IllegalStateException ise) {} 3803 try { 3804 m.group(); 3805 failCount++; 3806 } catch (IllegalStateException ise) {} 3807 return failCount; 3808 } 3809 3810 private static int postFalseMatchInvariants(Matcher m) { 3811 int failCount = 0; 3812 try { 3813 m.group(); 3814 failCount++; 3815 } catch (IllegalStateException ise) {} 3816 try { 3817 m.start(); 3818 failCount++; 3819 } catch (IllegalStateException ise) {} 3820 try { 3821 m.end(); 3822 failCount++; 3823 } catch (IllegalStateException ise) {} 3824 return failCount; 3825 } 3826 3827 private static int postTrueMatchInvariants(Matcher m) { 3828 int failCount = 0; 3829 //assert(m.start() = m.start(0); 3830 if (m.start() != m.start(0)) 3831 failCount++; 3832 //assert(m.end() = m.end(0); 3833 if (m.start() != m.start(0)) 3834 failCount++; 3835 //assert(m.group() = m.group(0); 3836 if (!m.group().equals(m.group(0))) 3837 failCount++; 3838 try { 3839 m.group(50); 3840 failCount++; 3841 } catch (IndexOutOfBoundsException ise) {} 3842 3843 return failCount; 3844 } 3845 3846 private static Pattern compileTestPattern(String patternString) { 3847 if (!patternString.startsWith("'")) { 3848 return Pattern.compile(patternString); 3849 } 3850 3851 int break1 = patternString.lastIndexOf("'"); 3852 String flagString = patternString.substring( 3853 break1+1, patternString.length()); 3854 patternString = patternString.substring(1, break1); 3855 3856 if (flagString.equals("i")) 3857 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3858 3859 if (flagString.equals("m")) 3860 return Pattern.compile(patternString, Pattern.MULTILINE); 3861 3862 return Pattern.compile(patternString); 3863 } 3864 3865 /** 3866 * Reads a line from the input file. Keeps reading lines until a non 3867 * empty non comment line is read. If the line contains a \n then 3868 * these two characters are replaced by a newline char. If a \\uxxxx 3869 * sequence is read then the sequence is replaced by the unicode char. 3870 */ 3871 private static String grabLine(BufferedReader r) throws Exception { 3872 int index = 0; 3873 String line = r.readLine(); 3874 while (line.startsWith("//") || line.length() < 1) 3875 line = r.readLine(); 3876 while ((index = line.indexOf("\\n")) != -1) { 3877 StringBuffer temp = new StringBuffer(line); 3878 temp.replace(index, index+2, "\n"); 3879 line = temp.toString(); 3880 } 3881 while ((index = line.indexOf("\\u")) != -1) { 3882 StringBuffer temp = new StringBuffer(line); 3883 String value = temp.substring(index+2, index+6); 3884 char aChar = (char)Integer.parseInt(value, 16); 3885 String unicodeChar = "" + aChar; 3886 temp.replace(index, index+6, unicodeChar); 3887 line = temp.toString(); 3888 } 3889 3890 return line; 3891 } 3892 3893 private static void check(Pattern p, String s, String g, String expected) { 3894 Matcher m = p.matcher(s); 3895 m.find(); 3896 if (!m.group(g).equals(expected) || 3897 s.charAt(m.start(g)) != expected.charAt(0) || 3898 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3899 failCount++; 3900 } 3901 3902 private static void checkReplaceFirst(String p, String s, String r, String expected) 3903 { 3904 if (!expected.equals(Pattern.compile(p) 3905 .matcher(s) 3906 .replaceFirst(r))) 3907 failCount++; 3908 } 3909 3910 private static void checkReplaceAll(String p, String s, String r, String expected) 3911 { 3912 if (!expected.equals(Pattern.compile(p) 3913 .matcher(s) 3914 .replaceAll(r))) 3915 failCount++; 3916 } 3917 3918 private static void checkExpectedFail(String p) { 3919 try { 3920 Pattern.compile(p); 3921 } catch (PatternSyntaxException pse) { 3922 //pse.printStackTrace(); 3923 return; 3924 } 3925 failCount++; 3926 } 3927 3928 private static void checkExpectedIAE(Matcher m, String g) { 3929 m.find(); 3930 try { 3931 m.group(g); 3932 } catch (IllegalArgumentException x) { 3933 //iae.printStackTrace(); 3934 try { 3935 m.start(g); 3936 } catch (IllegalArgumentException xx) { 3937 try { 3938 m.start(g); 3939 } catch (IllegalArgumentException xxx) { 3940 return; 3941 } 3942 } 3943 } 3944 failCount++; 3945 } 3946 3947 private static void checkExpectedNPE(Matcher m) { 3948 m.find(); 3949 try { 3950 m.group(null); 3951 } catch (NullPointerException x) { 3952 try { 3953 m.start(null); 3954 } catch (NullPointerException xx) { 3955 try { 3956 m.end(null); 3957 } catch (NullPointerException xxx) { 3958 return; 3959 } 3960 } 3961 } 3962 failCount++; 3963 } 3964 3965 private static void namedGroupCaptureTest() throws Exception { 3966 check(Pattern.compile("x+(?<gname>y+)z+"), 3967 "xxxyyyzzz", 3968 "gname", 3969 "yyy"); 3970 3971 check(Pattern.compile("x+(?<gname8>y+)z+"), 3972 "xxxyyyzzz", 3973 "gname8", 3974 "yyy"); 3975 3976 //backref 3977 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3978 check(pattern, "zzzaabcazzz", true); // found "abca" 3979 3980 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3981 "zzzaabcaazzz", true); 3982 3983 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3984 "abcdefabc", true); 3985 3986 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3987 "abcdefghijkk", true); 3988 3989 // Supplementary character tests 3990 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3991 toSupplementaries("zzzaabcazzz"), true); 3992 3993 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3994 toSupplementaries("zzzaabcaazzz"), true); 3995 3996 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3997 toSupplementaries("abcdefabc"), true); 3998 3999 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4000 "(?<gname>" + 4001 toSupplementaries("k)") + "\\k<gname>"), 4002 toSupplementaries("abcdefghijkk"), true); 4003 4004 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4005 "xxxyyyzzzyyy", 4006 "gname", 4007 "yyy"); 4008 4009 //replaceFirst/All 4010 checkReplaceFirst("(?<gn>ab)(c*)", 4011 "abccczzzabcczzzabccc", 4012 "${gn}", 4013 "abzzzabcczzzabccc"); 4014 4015 checkReplaceAll("(?<gn>ab)(c*)", 4016 "abccczzzabcczzzabccc", 4017 "${gn}", 4018 "abzzzabzzzab"); 4019 4020 4021 checkReplaceFirst("(?<gn>ab)(c*)", 4022 "zzzabccczzzabcczzzabccczzz", 4023 "${gn}", 4024 "zzzabzzzabcczzzabccczzz"); 4025 4026 checkReplaceAll("(?<gn>ab)(c*)", 4027 "zzzabccczzzabcczzzabccczzz", 4028 "${gn}", 4029 "zzzabzzzabzzzabzzz"); 4030 4031 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4032 "zzzabccczzzabcczzzabccczzz", 4033 "${gn2}", 4034 "zzzccczzzabcczzzabccczzz"); 4035 4036 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4037 "zzzabccczzzabcczzzabccczzz", 4038 "${gn2}", 4039 "zzzccczzzcczzzccczzz"); 4040 4041 //toSupplementaries("(ab)(c*)")); 4042 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4043 ")(?<gn2>" + toSupplementaries("c") + "*)", 4044 toSupplementaries("abccczzzabcczzzabccc"), 4045 "${gn1}", 4046 toSupplementaries("abzzzabcczzzabccc")); 4047 4048 4049 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4050 ")(?<gn2>" + toSupplementaries("c") + "*)", 4051 toSupplementaries("abccczzzabcczzzabccc"), 4052 "${gn1}", 4053 toSupplementaries("abzzzabzzzab")); 4054 4055 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4056 ")(?<gn2>" + toSupplementaries("c") + "*)", 4057 toSupplementaries("abccczzzabcczzzabccc"), 4058 "${gn2}", 4059 toSupplementaries("ccczzzabcczzzabccc")); 4060 4061 4062 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4063 ")(?<gn2>" + toSupplementaries("c") + "*)", 4064 toSupplementaries("abccczzzabcczzzabccc"), 4065 "${gn2}", 4066 toSupplementaries("ccczzzcczzzccc")); 4067 4068 checkReplaceFirst("(?<dog>Dog)AndCat", 4069 "zzzDogAndCatzzzDogAndCatzzz", 4070 "${dog}", 4071 "zzzDogzzzDogAndCatzzz"); 4072 4073 4074 checkReplaceAll("(?<dog>Dog)AndCat", 4075 "zzzDogAndCatzzzDogAndCatzzz", 4076 "${dog}", 4077 "zzzDogzzzDogzzz"); 4078 4079 // backref in Matcher & String 4080 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4081 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4082 failCount++; 4083 4084 // negative 4085 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4086 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4087 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4088 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4089 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4090 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4091 "gnameX"); 4092 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4093 report("NamedGroupCapture"); 4094 } 4095 4096 // This is for bug 6919132 4097 private static void nonBmpClassComplementTest() throws Exception { 4098 Pattern p = Pattern.compile("\\P{Lu}"); 4099 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4100 4101 if (m.find() && m.start() == 1) 4102 failCount++; 4103 4104 // from a unicode category 4105 p = Pattern.compile("\\P{Lu}"); 4106 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4107 if (m.find()) 4108 failCount++; 4109 if (!m.hitEnd()) 4110 failCount++; 4111 4112 // block 4113 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4114 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4115 if (m.find() && m.start() == 1) 4116 failCount++; 4117 4118 p = Pattern.compile("\\P{sc=GRANTHA}"); 4119 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4120 if (m.find() && m.start() == 1) 4121 failCount++; 4122 4123 report("NonBmpClassComplement"); 4124 } 4125 4126 private static void unicodePropertiesTest() throws Exception { 4127 // different forms 4128 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4129 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4130 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4131 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4132 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4133 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4134 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4135 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4136 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4137 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4138 failCount++; 4139 4140 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4141 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4142 Matcher lastSM = common; 4143 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4144 4145 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4146 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4147 Matcher lastBM = latin; 4148 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4149 4150 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4151 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4152 continue; // only pick couple code points, they are the same 4153 } 4154 4155 // Unicode Script 4156 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4157 Matcher m; 4158 String str = new String(Character.toChars(cp)); 4159 if (script == lastScript) { 4160 m = lastSM; 4161 m.reset(str); 4162 } else { 4163 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4164 } 4165 if (!m.matches()) { 4166 failCount++; 4167 } 4168 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4169 other.reset(str); 4170 if (other.matches()) { 4171 failCount++; 4172 } 4173 lastSM = m; 4174 lastScript = script; 4175 4176 // Unicode Block 4177 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4178 if (block == null) { 4179 //System.out.printf("Not a Block: cp=%x%n", cp); 4180 continue; 4181 } 4182 if (block == lastBlock) { 4183 m = lastBM; 4184 m.reset(str); 4185 } else { 4186 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4187 } 4188 if (!m.matches()) { 4189 failCount++; 4190 } 4191 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4192 other.reset(str); 4193 if (other.matches()) { 4194 failCount++; 4195 } 4196 lastBM = m; 4197 lastBlock = block; 4198 } 4199 report("unicodeProperties"); 4200 } 4201 4202 private static void unicodeHexNotationTest() throws Exception { 4203 4204 // negative 4205 checkExpectedFail("\\x{-23}"); 4206 checkExpectedFail("\\x{110000}"); 4207 checkExpectedFail("\\x{}"); 4208 checkExpectedFail("\\x{AB[ef]"); 4209 4210 // codepoint 4211 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4212 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4213 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4214 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4215 4216 // in class 4217 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4218 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4219 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4220 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4221 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4222 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4223 4224 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4225 String s = "A" + new String(Character.toChars(cp)) + "B"; 4226 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4227 : String.format("\\u%04x\\u%04x", 4228 (int) Character.toChars(cp)[0], 4229 (int) Character.toChars(cp)[1]); 4230 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4231 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4232 failCount++; 4233 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4234 failCount++; 4235 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4236 failCount++; 4237 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4238 failCount++; 4239 } 4240 report("unicodeHexNotation"); 4241 } 4242 4243 private static void unicodeClassesTest() throws Exception { 4244 4245 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4246 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4247 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4248 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4249 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4250 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4251 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4252 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4253 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4254 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4255 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4256 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4257 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4258 Matcher bound = Pattern.compile("\\b").matcher(""); 4259 Matcher word = Pattern.compile("\\w++").matcher(""); 4260 // UNICODE_CHARACTER_CLASS 4261 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4262 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4263 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4264 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4265 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4266 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4267 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4268 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4269 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4270 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4271 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4272 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4273 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4274 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4275 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4276 // embedded flag (?U) 4277 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4278 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4279 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4280 4281 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4282 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4283 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4284 // properties 4285 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4286 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4287 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4288 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4289 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4290 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4291 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4292 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4293 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4294 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4295 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4296 4297 // javaMethod 4298 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4299 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4300 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4301 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4302 4303 for (int cp = 1; cp < 0x30000; cp++) { 4304 String str = new String(Character.toChars(cp)); 4305 int type = Character.getType(cp); 4306 if (// lower 4307 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4308 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4309 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4310 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4311 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4312 // upper 4313 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4314 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4315 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4316 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4317 // alpha 4318 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4319 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4320 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4321 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4322 // digit 4323 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4324 Character.isDigit(cp) != digitU.reset(str).matches() || 4325 // alnum 4326 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4327 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4328 // punct 4329 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4330 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4331 // graph 4332 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4333 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4334 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4335 // blank 4336 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4337 != blank.reset(str).matches() || 4338 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4339 // print 4340 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4341 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4342 // cntrl 4343 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4344 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4345 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4346 // hexdigit 4347 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4348 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4349 // space 4350 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4351 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4352 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4353 // word 4354 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4355 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4356 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4357 // bwordb 4358 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4359 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4360 // properties 4361 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4362 Character.isLetter(cp) != letterP.reset(str).matches()|| 4363 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4364 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4365 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4366 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4367 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4368 failCount++; 4369 } 4370 4371 // bounds/word align 4372 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4373 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4374 failCount++; 4375 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4376 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4377 failCount++; 4378 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4379 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4380 failCount++; 4381 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4382 failCount++; 4383 report("unicodePredefinedClasses"); 4384 } 4385 4386 private static void unicodeCharacterNameTest() throws Exception { 4387 4388 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4389 if (!Character.isValidCodePoint(cp) || 4390 Character.getType(cp) == Character.UNASSIGNED) 4391 continue; 4392 String str = new String(Character.toChars(cp)); 4393 // single 4394 String p = "\\N{" + Character.getName(cp) + "}"; 4395 if (!Pattern.compile(p).matcher(str).matches()) { 4396 failCount++; 4397 } 4398 // class[c] 4399 p = "[\\N{" + Character.getName(cp) + "}]"; 4400 if (!Pattern.compile(p).matcher(str).matches()) { 4401 failCount++; 4402 } 4403 } 4404 4405 // range 4406 for (int i = 0; i < 10; i++) { 4407 int start = generator.nextInt(20); 4408 int end = start + generator.nextInt(200); 4409 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4410 String str; 4411 for (int cp = start; cp < end; cp++) { 4412 str = new String(Character.toChars(cp)); 4413 if (!Pattern.compile(p).matcher(str).matches()) { 4414 failCount++; 4415 } 4416 } 4417 str = new String(Character.toChars(end + 10)); 4418 if (Pattern.compile(p).matcher(str).matches()) { 4419 failCount++; 4420 } 4421 } 4422 4423 // slice 4424 for (int i = 0; i < 10; i++) { 4425 int n = generator.nextInt(256); 4426 int[] buf = new int[n]; 4427 StringBuffer sb = new StringBuffer(1024); 4428 for (int j = 0; j < n; j++) { 4429 int cp = generator.nextInt(1000); 4430 if (!Character.isValidCodePoint(cp) || 4431 Character.getType(cp) == Character.UNASSIGNED) 4432 cp = 0x4e00; // just use 4e00 4433 sb.append("\\N{" + Character.getName(cp) + "}"); 4434 buf[j] = cp; 4435 } 4436 String p = sb.toString(); 4437 String str = new String(buf, 0, buf.length); 4438 if (!Pattern.compile(p).matcher(str).matches()) { 4439 failCount++; 4440 } 4441 } 4442 report("unicodeCharacterName"); 4443 } 4444 4445 private static void horizontalAndVerticalWSTest() throws Exception { 4446 String hws = new String (new char[] { 4447 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4448 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4449 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4450 0x202f, 0x205f, 0x3000 }); 4451 String vws = new String (new char[] { 4452 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4453 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4454 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4455 failCount++; 4456 if (Pattern.compile("\\H").matcher(hws).find() || 4457 Pattern.compile("[\\H]").matcher(hws).find()) 4458 failCount++; 4459 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4460 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4461 failCount++; 4462 if (Pattern.compile("\\V").matcher(vws).find() || 4463 Pattern.compile("[\\V]").matcher(vws).find()) 4464 failCount++; 4465 String prefix = "abcd"; 4466 String suffix = "efgh"; 4467 String ng = "A"; 4468 for (int i = 0; i < hws.length(); i++) { 4469 String c = String.valueOf(hws.charAt(i)); 4470 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4471 if (!m.find() || !c.equals(m.group())) 4472 failCount++; 4473 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4474 if (!m.find() || !c.equals(m.group())) 4475 failCount++; 4476 4477 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4478 if (!m.find() || !ng.equals(m.group())) 4479 failCount++; 4480 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4481 if (!m.find() || !ng.equals(m.group())) 4482 failCount++; 4483 } 4484 for (int i = 0; i < vws.length(); i++) { 4485 String c = String.valueOf(vws.charAt(i)); 4486 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4487 if (!m.find() || !c.equals(m.group())) 4488 failCount++; 4489 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4490 if (!m.find() || !c.equals(m.group())) 4491 failCount++; 4492 4493 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4494 if (!m.find() || !ng.equals(m.group())) 4495 failCount++; 4496 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4497 if (!m.find() || !ng.equals(m.group())) 4498 failCount++; 4499 } 4500 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4501 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4502 failCount++; 4503 report("horizontalAndVerticalWSTest"); 4504 } 4505 4506 private static void linebreakTest() throws Exception { 4507 String linebreaks = new String (new char[] { 4508 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4509 String crnl = "\r\n"; 4510 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4511 !Pattern.compile("\\R").matcher(crnl).matches() || 4512 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4513 failCount++; 4514 report("linebreakTest"); 4515 } 4516 4517 // #7189363 4518 private static void branchTest() throws Exception { 4519 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4520 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4521 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4522 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4523 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4524 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4525 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4526 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4527 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4528 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4529 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4530 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4531 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4532 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4533 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4534 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4535 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4536 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4537 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4538 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4539 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4540 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4541 failCount++; 4542 report("branchTest"); 4543 } 4544 4545 // This test is for 8007395 4546 private static void groupCurlyNotFoundSuppTest() throws Exception { 4547 String input = "test this as \ud83d\ude0d"; 4548 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4549 "test(.)*(@[a-zA-Z.]+)", 4550 "test([^B])+(@[a-zA-Z.]+)", 4551 "test([^B])*(@[a-zA-Z.]+)", 4552 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4553 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4554 }) { 4555 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4556 .matcher(input); 4557 try { 4558 if (m.find()) { 4559 failCount++; 4560 } 4561 } catch (Exception x) { 4562 failCount++; 4563 } 4564 } 4565 report("GroupCurly NotFoundSupp"); 4566 } 4567 4568 // This test is for 8023647 4569 private static void groupCurlyBackoffTest() throws Exception { 4570 if (!"abc1c".matches("(\\w)+1\\1") || 4571 "abc11".matches("(\\w)+1\\1")) { 4572 failCount++; 4573 } 4574 report("GroupCurly backoff"); 4575 } 4576 4577 // This test is for 8012646 4578 private static void patternAsPredicate() throws Exception { 4579 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4580 4581 if (p.test("")) { 4582 failCount++; 4583 } 4584 if (!p.test("word")) { 4585 failCount++; 4586 } 4587 if (p.test("1234")) { 4588 failCount++; 4589 } 4590 report("Pattern.asPredicate"); 4591 } 4592 4593 // This test is for 8035975 4594 private static void invalidFlags() throws Exception { 4595 for (int flag = 1; flag != 0; flag <<= 1) { 4596 switch (flag) { 4597 case Pattern.CASE_INSENSITIVE: 4598 case Pattern.MULTILINE: 4599 case Pattern.DOTALL: 4600 case Pattern.UNICODE_CASE: 4601 case Pattern.CANON_EQ: 4602 case Pattern.UNIX_LINES: 4603 case Pattern.LITERAL: 4604 case Pattern.UNICODE_CHARACTER_CLASS: 4605 case Pattern.COMMENTS: 4606 // valid flag, continue 4607 break; 4608 default: 4609 try { 4610 Pattern.compile(".", flag); 4611 failCount++; 4612 } catch (IllegalArgumentException expected) { 4613 } 4614 } 4615 } 4616 report("Invalid compile flags"); 4617 } 4618 4619 private static void grapheme() throws Exception { 4620 Files.lines(Paths.get(System.getProperty("test.src", "."), 4621 "GraphemeBreakTest.txt")) 4622 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4623 .forEach( ln -> { 4624 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4625 // System.out.println(str); 4626 String[] strs = ln.split("\u00f7|\u00d7"); 4627 StringBuilder src = new StringBuilder(); 4628 ArrayList<String> graphemes = new ArrayList<>(); 4629 StringBuilder buf = new StringBuilder(); 4630 int offBk = 0; 4631 for (String str : strs) { 4632 if (str.length() == 0) // first empty str 4633 continue; 4634 int cp = Integer.parseInt(str, 16); 4635 src.appendCodePoint(cp); 4636 buf.appendCodePoint(cp); 4637 offBk += (str.length() + 1); 4638 if (ln.charAt(offBk) == '\u00f7') { // DIV 4639 graphemes.add(buf.toString()); 4640 buf = new StringBuilder(); 4641 } 4642 } 4643 Pattern p = Pattern.compile("\\X"); 4644 Matcher m = p.matcher(src.toString()); 4645 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4646 for (String g : graphemes) { 4647 // System.out.printf(" grapheme:=[%s]%n", g); 4648 // (1) test \\X directly 4649 if (!m.find() || !m.group().equals(g)) { 4650 System.out.println("Failed \\X [" + ln + "] : " + g); 4651 failCount++; 4652 } 4653 // (2) test \\b{g} + \\X via Scanner 4654 boolean hasNext = s.hasNext(p); 4655 // if (!s.hasNext() || !s.next().equals(next)) { 4656 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4657 System.out.println("Failed b{g} [" + ln + "] : " + g); 4658 failCount++; 4659 } 4660 } 4661 }); 4662 // some sanity checks 4663 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4664 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4665 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4666 failCount++; 4667 // make sure "\b{n}" still works 4668 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4669 failCount++; 4670 report("Unicode extended grapheme cluster"); 4671 } 4672 }