1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * @library /lib/testlibrary 37 * @build jdk.testlibrary.* 38 * @run main RegExTest 39 * @key randomness 40 */ 41 42 import java.util.function.Function; 43 import java.util.regex.*; 44 import java.util.Random; 45 import java.util.Scanner; 46 import java.io.*; 47 import java.nio.file.*; 48 import java.util.*; 49 import java.nio.CharBuffer; 50 import java.util.function.Predicate; 51 import jdk.testlibrary.RandomFactory; 52 53 /** 54 * This is a test class created to check the operation of 55 * the Pattern and Matcher classes. 56 */ 57 public class RegExTest { 58 59 private static Random generator = RandomFactory.getRandom(); 60 private static boolean failure = false; 61 private static int failCount = 0; 62 private static String firstFailure = null; 63 64 /** 65 * Main to interpret arguments and run several tests. 66 * 67 */ 68 public static void main(String[] args) throws Exception { 69 // Most of the tests are in a file 70 processFile("TestCases.txt"); 71 //processFile("PerlCases.txt"); 72 processFile("BMPTestCases.txt"); 73 processFile("SupplementaryTestCases.txt"); 74 75 // These test many randomly generated char patterns 76 bm(); 77 slice(); 78 79 // These are hard to put into the file 80 escapes(); 81 blankInput(); 82 83 // Substitition tests on randomly generated sequences 84 globalSubstitute(); 85 stringbufferSubstitute(); 86 stringbuilderSubstitute(); 87 88 substitutionBasher(); 89 substitutionBasher2(); 90 91 // Canonical Equivalence 92 ceTest(); 93 94 // Anchors 95 anchorTest(); 96 97 // boolean match calls 98 matchesTest(); 99 lookingAtTest(); 100 101 // Pattern API 102 patternMatchesTest(); 103 104 // Misc 105 lookbehindTest(); 106 nullArgumentTest(); 107 backRefTest(); 108 groupCaptureTest(); 109 caretTest(); 110 charClassTest(); 111 emptyPatternTest(); 112 findIntTest(); 113 group0Test(); 114 longPatternTest(); 115 octalTest(); 116 ampersandTest(); 117 negationTest(); 118 splitTest(); 119 appendTest(); 120 caseFoldingTest(); 121 commentsTest(); 122 unixLinesTest(); 123 replaceFirstTest(); 124 gTest(); 125 zTest(); 126 serializeTest(); 127 reluctantRepetitionTest(); 128 multilineDollarTest(); 129 dollarAtEndTest(); 130 caretBetweenTerminatorsTest(); 131 // This RFE rejected in Tiger numOccurrencesTest(); 132 javaCharClassTest(); 133 nonCaptureRepetitionTest(); 134 notCapturedGroupCurlyMatchTest(); 135 escapedSegmentTest(); 136 literalPatternTest(); 137 literalReplacementTest(); 138 regionTest(); 139 toStringTest(); 140 negatedCharClassTest(); 141 findFromTest(); 142 boundsTest(); 143 unicodeWordBoundsTest(); 144 caretAtEndTest(); 145 wordSearchTest(); 146 hitEndTest(); 147 toMatchResultTest(); 148 toMatchResultTest2(); 149 surrogatesInClassTest(); 150 removeQEQuotingTest(); 151 namedGroupCaptureTest(); 152 nonBmpClassComplementTest(); 153 unicodePropertiesTest(); 154 unicodeHexNotationTest(); 155 unicodeClassesTest(); 156 unicodeCharacterNameTest(); 157 horizontalAndVerticalWSTest(); 158 linebreakTest(); 159 branchTest(); 160 groupCurlyNotFoundSuppTest(); 161 groupCurlyBackoffTest(); 162 patternAsPredicate(); 163 invalidFlags(); 164 grapheme(); 165 166 if (failure) { 167 throw new 168 RuntimeException("RegExTest failed, 1st failure: " + 169 firstFailure); 170 } else { 171 System.err.println("OKAY: All tests passed."); 172 } 173 } 174 175 // Utility functions 176 177 private static String getRandomAlphaString(int length) { 178 StringBuffer buf = new StringBuffer(length); 179 for (int i=0; i<length; i++) { 180 char randChar = (char)(97 + generator.nextInt(26)); 181 buf.append(randChar); 182 } 183 return buf.toString(); 184 } 185 186 private static void check(Matcher m, String expected) { 187 m.find(); 188 if (!m.group().equals(expected)) 189 failCount++; 190 } 191 192 private static void check(Matcher m, String result, boolean expected) { 193 m.find(); 194 if (m.group().equals(result) != expected) 195 failCount++; 196 } 197 198 private static void check(Pattern p, String s, boolean expected) { 199 if (p.matcher(s).find() != expected) 200 failCount++; 201 } 202 203 private static void check(String p, String s, boolean expected) { 204 Matcher matcher = Pattern.compile(p).matcher(s); 205 if (matcher.find() != expected) 206 failCount++; 207 } 208 209 private static void check(String p, char c, boolean expected) { 210 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 211 Pattern pattern = Pattern.compile(propertyPattern); 212 char[] ca = new char[1]; ca[0] = c; 213 Matcher matcher = pattern.matcher(new String(ca)); 214 if (!matcher.find()) 215 failCount++; 216 } 217 218 private static void check(String p, int codePoint, boolean expected) { 219 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 220 Pattern pattern = Pattern.compile(propertyPattern); 221 char[] ca = Character.toChars(codePoint); 222 Matcher matcher = pattern.matcher(new String(ca)); 223 if (!matcher.find()) 224 failCount++; 225 } 226 227 private static void check(String p, int flag, String input, String s, 228 boolean expected) 229 { 230 Pattern pattern = Pattern.compile(p, flag); 231 Matcher matcher = pattern.matcher(input); 232 if (expected) 233 check(matcher, s, expected); 234 else 235 check(pattern, input, false); 236 } 237 238 private static void report(String testName) { 239 int spacesToAdd = 30 - testName.length(); 240 StringBuffer paddedNameBuffer = new StringBuffer(testName); 241 for (int i=0; i<spacesToAdd; i++) 242 paddedNameBuffer.append(" "); 243 String paddedName = paddedNameBuffer.toString(); 244 System.err.println(paddedName + ": " + 245 (failCount==0 ? "Passed":"Failed("+failCount+")")); 246 if (failCount > 0) { 247 failure = true; 248 249 if (firstFailure == null) { 250 firstFailure = testName; 251 } 252 } 253 254 failCount = 0; 255 } 256 257 /** 258 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 259 * supplementary characters. This method does NOT fully take care 260 * of the regex syntax. 261 */ 262 private static String toSupplementaries(String s) { 263 int length = s.length(); 264 StringBuffer sb = new StringBuffer(length * 2); 265 266 for (int i = 0; i < length; ) { 267 char c = s.charAt(i++); 268 if (c == '\\') { 269 sb.append(c); 270 if (i < length) { 271 c = s.charAt(i++); 272 sb.append(c); 273 if (c == 'u') { 274 // assume no syntax error 275 sb.append(s.charAt(i++)); 276 sb.append(s.charAt(i++)); 277 sb.append(s.charAt(i++)); 278 sb.append(s.charAt(i++)); 279 } 280 } 281 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 282 sb.append('\ud800').append((char)('\udc00'+c)); 283 } else { 284 sb.append(c); 285 } 286 } 287 return sb.toString(); 288 } 289 290 // Regular expression tests 291 292 // This is for bug 6178785 293 // Test if an expected NPE gets thrown when passing in a null argument 294 private static boolean check(Runnable test) { 295 try { 296 test.run(); 297 failCount++; 298 return false; 299 } catch (NullPointerException npe) { 300 return true; 301 } 302 } 303 304 private static void nullArgumentTest() { 305 check(() -> Pattern.compile(null)); 306 check(() -> Pattern.matches(null, null)); 307 check(() -> Pattern.matches("xyz", null)); 308 check(() -> Pattern.quote(null)); 309 check(() -> Pattern.compile("xyz").split(null)); 310 check(() -> Pattern.compile("xyz").matcher(null)); 311 312 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 313 m.matches(); 314 check(() -> m.appendTail((StringBuffer) null)); 315 check(() -> m.appendTail((StringBuilder)null)); 316 check(() -> m.replaceAll((String) null)); 317 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 318 check(() -> m.replaceFirst((String)null)); 319 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 320 check(() -> m.appendReplacement((StringBuffer)null, null)); 321 check(() -> m.appendReplacement((StringBuilder)null, null)); 322 check(() -> m.reset(null)); 323 check(() -> Matcher.quoteReplacement(null)); 324 //check(() -> m.usePattern(null)); 325 326 report("Null Argument"); 327 } 328 329 // This is for bug6635133 330 // Test if surrogate pair in Unicode escapes can be handled correctly. 331 private static void surrogatesInClassTest() throws Exception { 332 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 333 Matcher matcher = pattern.matcher("\ud834\udd22"); 334 if (!matcher.find()) 335 failCount++; 336 337 report("Surrogate pair in Unicode escape"); 338 } 339 340 // This is for bug6990617 341 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 342 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 343 // char is an octal digit. 344 private static void removeQEQuotingTest() throws Exception { 345 Pattern pattern = 346 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 347 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 348 if (!matcher.find()) 349 failCount++; 350 351 report("Remove Q/E Quoting"); 352 } 353 354 // This is for bug 4988891 355 // Test toMatchResult to see that it is a copy of the Matcher 356 // that is not affected by subsequent operations on the original 357 private static void toMatchResultTest() throws Exception { 358 Pattern pattern = Pattern.compile("squid"); 359 Matcher matcher = pattern.matcher( 360 "agiantsquidofdestinyasmallsquidoffate"); 361 matcher.find(); 362 int matcherStart1 = matcher.start(); 363 MatchResult mr = matcher.toMatchResult(); 364 if (mr == matcher) 365 failCount++; 366 int resultStart1 = mr.start(); 367 if (matcherStart1 != resultStart1) 368 failCount++; 369 matcher.find(); 370 int matcherStart2 = matcher.start(); 371 int resultStart2 = mr.start(); 372 if (matcherStart2 == resultStart2) 373 failCount++; 374 if (resultStart1 != resultStart2) 375 failCount++; 376 MatchResult mr2 = matcher.toMatchResult(); 377 if (mr == mr2) 378 failCount++; 379 if (mr2.start() != matcherStart2) 380 failCount++; 381 report("toMatchResult is a copy"); 382 } 383 384 private static void checkExpectedISE(Runnable test) { 385 try { 386 test.run(); 387 failCount++; 388 } catch (IllegalStateException x) { 389 } catch (IndexOutOfBoundsException xx) { 390 failCount++; 391 } 392 } 393 394 private static void checkExpectedIOOE(Runnable test) { 395 try { 396 test.run(); 397 failCount++; 398 } catch (IndexOutOfBoundsException x) {} 399 } 400 401 // This is for bug 8074678 402 // Test the result of toMatchResult throws ISE if no match is availble 403 private static void toMatchResultTest2() throws Exception { 404 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 405 matcher.find(); 406 MatchResult mr = matcher.toMatchResult(); 407 408 checkExpectedISE(() -> mr.start()); 409 checkExpectedISE(() -> mr.start(2)); 410 checkExpectedISE(() -> mr.end()); 411 checkExpectedISE(() -> mr.end(2)); 412 checkExpectedISE(() -> mr.group()); 413 checkExpectedISE(() -> mr.group(2)); 414 415 matcher = Pattern.compile("(match)").matcher("there is a match"); 416 matcher.find(); 417 MatchResult mr2 = matcher.toMatchResult(); 418 checkExpectedIOOE(() -> mr2.start(2)); 419 checkExpectedIOOE(() -> mr2.end(2)); 420 checkExpectedIOOE(() -> mr2.group(2)); 421 422 report("toMatchResult2 appropriate exceptions"); 423 } 424 425 // This is for bug 5013885 426 // Must test a slice to see if it reports hitEnd correctly 427 private static void hitEndTest() throws Exception { 428 // Basic test of Slice node 429 Pattern p = Pattern.compile("^squidattack"); 430 Matcher m = p.matcher("squack"); 431 m.find(); 432 if (m.hitEnd()) 433 failCount++; 434 m.reset("squid"); 435 m.find(); 436 if (!m.hitEnd()) 437 failCount++; 438 439 // Test Slice, SliceA and SliceU nodes 440 for (int i=0; i<3; i++) { 441 int flags = 0; 442 if (i==1) flags = Pattern.CASE_INSENSITIVE; 443 if (i==2) flags = Pattern.UNICODE_CASE; 444 p = Pattern.compile("^abc", flags); 445 m = p.matcher("ad"); 446 m.find(); 447 if (m.hitEnd()) 448 failCount++; 449 m.reset("ab"); 450 m.find(); 451 if (!m.hitEnd()) 452 failCount++; 453 } 454 455 // Test Boyer-Moore node 456 p = Pattern.compile("catattack"); 457 m = p.matcher("attack"); 458 m.find(); 459 if (!m.hitEnd()) 460 failCount++; 461 462 p = Pattern.compile("catattack"); 463 m = p.matcher("attackattackattackcatatta"); 464 m.find(); 465 if (!m.hitEnd()) 466 failCount++; 467 report("hitEnd from a Slice"); 468 } 469 470 // This is for bug 4997476 471 // It is weird code submitted by customer demonstrating a regression 472 private static void wordSearchTest() throws Exception { 473 String testString = new String("word1 word2 word3"); 474 Pattern p = Pattern.compile("\\b"); 475 Matcher m = p.matcher(testString); 476 int position = 0; 477 int start = 0; 478 while (m.find(position)) { 479 start = m.start(); 480 if (start == testString.length()) 481 break; 482 if (m.find(start+1)) { 483 position = m.start(); 484 } else { 485 position = testString.length(); 486 } 487 if (testString.substring(start, position).equals(" ")) 488 continue; 489 if (!testString.substring(start, position-1).startsWith("word")) 490 failCount++; 491 } 492 report("Customer word search"); 493 } 494 495 // This is for bug 4994840 496 private static void caretAtEndTest() throws Exception { 497 // Problem only occurs with multiline patterns 498 // containing a beginning-of-line caret "^" followed 499 // by an expression that also matches the empty string. 500 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 501 Matcher matcher = pattern.matcher("\r"); 502 matcher.find(); 503 matcher.find(); 504 report("Caret at end"); 505 } 506 507 // This test is for 4979006 508 // Check to see if word boundary construct properly handles unicode 509 // non spacing marks 510 private static void unicodeWordBoundsTest() throws Exception { 511 String spaces = " "; 512 String wordChar = "a"; 513 String nsm = "\u030a"; 514 515 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 516 517 Pattern pattern = Pattern.compile("\\b"); 518 Matcher matcher = pattern.matcher(""); 519 // S=other B=word character N=non spacing mark .=word boundary 520 // SS.BB.SS 521 String input = spaces + wordChar + wordChar + spaces; 522 twoFindIndexes(input, matcher, 2, 4); 523 // SS.BBN.SS 524 input = spaces + wordChar +wordChar + nsm + spaces; 525 twoFindIndexes(input, matcher, 2, 5); 526 // SS.BN.SS 527 input = spaces + wordChar + nsm + spaces; 528 twoFindIndexes(input, matcher, 2, 4); 529 // SS.BNN.SS 530 input = spaces + wordChar + nsm + nsm + spaces; 531 twoFindIndexes(input, matcher, 2, 5); 532 // SSN.BB.SS 533 input = spaces + nsm + wordChar + wordChar + spaces; 534 twoFindIndexes(input, matcher, 3, 5); 535 // SS.BNB.SS 536 input = spaces + wordChar + nsm + wordChar + spaces; 537 twoFindIndexes(input, matcher, 2, 5); 538 // SSNNSS 539 input = spaces + nsm + nsm + spaces; 540 matcher.reset(input); 541 if (matcher.find()) 542 failCount++; 543 // SSN.BBN.SS 544 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 545 twoFindIndexes(input, matcher, 3, 6); 546 547 report("Unicode word boundary"); 548 } 549 550 private static void twoFindIndexes(String input, Matcher matcher, int a, 551 int b) throws Exception 552 { 553 matcher.reset(input); 554 matcher.find(); 555 if (matcher.start() != a) 556 failCount++; 557 matcher.find(); 558 if (matcher.start() != b) 559 failCount++; 560 } 561 562 // This test is for 6284152 563 static void check(String regex, String input, String[] expected) { 564 List<String> result = new ArrayList<String>(); 565 Pattern p = Pattern.compile(regex); 566 Matcher m = p.matcher(input); 567 while (m.find()) { 568 result.add(m.group()); 569 } 570 if (!Arrays.asList(expected).equals(result)) 571 failCount++; 572 } 573 574 private static void lookbehindTest() throws Exception { 575 //Positive 576 check("(?<=%.{0,5})foo\\d", 577 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 578 new String[]{"foo1", "foo2", "foo3"}); 579 580 //boundary at end of the lookbehind sub-regex should work consistently 581 //with the boundary just after the lookbehind sub-regex 582 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 583 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 584 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 585 check("(?<!abc \\b)foo", "abc foo", new String[0]); 586 587 //Negative 588 check("(?<!%.{0,5})foo\\d", 589 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 590 new String[] {"foo4", "foo5"}); 591 592 //Positive greedy 593 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 594 595 //Positive reluctant 596 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 597 598 //supplementary 599 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 600 new String[] {"fo\ud800\udc00o"}); 601 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 602 new String[] {"fo\ud800\udc00o"}); 603 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 604 new String[] {"fo\ud800\udc00o"}); 605 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 606 new String[] {"fo\ud800\udc00o"}); 607 report("Lookbehind"); 608 } 609 610 // This test is for 4938995 611 // Check to see if weak region boundaries are transparent to 612 // lookahead and lookbehind constructs 613 private static void boundsTest() throws Exception { 614 String fullMessage = "catdogcat"; 615 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 616 Matcher matcher = pattern.matcher("catdogca"); 617 matcher.useTransparentBounds(true); 618 if (matcher.find()) 619 failCount++; 620 matcher.reset("atdogcat"); 621 if (matcher.find()) 622 failCount++; 623 matcher.reset(fullMessage); 624 if (!matcher.find()) 625 failCount++; 626 matcher.reset(fullMessage); 627 matcher.region(0,9); 628 if (!matcher.find()) 629 failCount++; 630 matcher.reset(fullMessage); 631 matcher.region(0,6); 632 if (!matcher.find()) 633 failCount++; 634 matcher.reset(fullMessage); 635 matcher.region(3,6); 636 if (!matcher.find()) 637 failCount++; 638 matcher.useTransparentBounds(false); 639 if (matcher.find()) 640 failCount++; 641 642 // Negative lookahead/lookbehind 643 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 644 matcher = pattern.matcher("dogcat"); 645 matcher.useTransparentBounds(true); 646 matcher.region(0,3); 647 if (matcher.find()) 648 failCount++; 649 matcher.reset("catdog"); 650 matcher.region(3,6); 651 if (matcher.find()) 652 failCount++; 653 matcher.useTransparentBounds(false); 654 matcher.reset("dogcat"); 655 matcher.region(0,3); 656 if (!matcher.find()) 657 failCount++; 658 matcher.reset("catdog"); 659 matcher.region(3,6); 660 if (!matcher.find()) 661 failCount++; 662 663 report("Region bounds transparency"); 664 } 665 666 // This test is for 4945394 667 private static void findFromTest() throws Exception { 668 String message = "This is 40 $0 message."; 669 Pattern pat = Pattern.compile("\\$0"); 670 Matcher match = pat.matcher(message); 671 if (!match.find()) 672 failCount++; 673 if (match.find()) 674 failCount++; 675 if (match.find()) 676 failCount++; 677 report("Check for alternating find"); 678 } 679 680 // This test is for 4872664 and 4892980 681 private static void negatedCharClassTest() throws Exception { 682 Pattern pattern = Pattern.compile("[^>]"); 683 Matcher matcher = pattern.matcher("\u203A"); 684 if (!matcher.matches()) 685 failCount++; 686 pattern = Pattern.compile("[^fr]"); 687 matcher = pattern.matcher("a"); 688 if (!matcher.find()) 689 failCount++; 690 matcher.reset("\u203A"); 691 if (!matcher.find()) 692 failCount++; 693 String s = "for"; 694 String result[] = s.split("[^fr]"); 695 if (!result[0].equals("f")) 696 failCount++; 697 if (!result[1].equals("r")) 698 failCount++; 699 s = "f\u203Ar"; 700 result = s.split("[^fr]"); 701 if (!result[0].equals("f")) 702 failCount++; 703 if (!result[1].equals("r")) 704 failCount++; 705 706 // Test adding to bits, subtracting a node, then adding to bits again 707 pattern = Pattern.compile("[^f\u203Ar]"); 708 matcher = pattern.matcher("a"); 709 if (!matcher.find()) 710 failCount++; 711 matcher.reset("f"); 712 if (matcher.find()) 713 failCount++; 714 matcher.reset("\u203A"); 715 if (matcher.find()) 716 failCount++; 717 matcher.reset("r"); 718 if (matcher.find()) 719 failCount++; 720 matcher.reset("\u203B"); 721 if (!matcher.find()) 722 failCount++; 723 724 // Test subtracting a node, adding to bits, subtracting again 725 pattern = Pattern.compile("[^\u203Ar\u203B]"); 726 matcher = pattern.matcher("a"); 727 if (!matcher.find()) 728 failCount++; 729 matcher.reset("\u203A"); 730 if (matcher.find()) 731 failCount++; 732 matcher.reset("r"); 733 if (matcher.find()) 734 failCount++; 735 matcher.reset("\u203B"); 736 if (matcher.find()) 737 failCount++; 738 matcher.reset("\u203C"); 739 if (!matcher.find()) 740 failCount++; 741 742 report("Negated Character Class"); 743 } 744 745 // This test is for 4628291 746 private static void toStringTest() throws Exception { 747 Pattern pattern = Pattern.compile("b+"); 748 if (pattern.toString() != "b+") 749 failCount++; 750 Matcher matcher = pattern.matcher("aaabbbccc"); 751 String matcherString = matcher.toString(); // unspecified 752 matcher.find(); 753 matcherString = matcher.toString(); // unspecified 754 matcher.region(0,3); 755 matcherString = matcher.toString(); // unspecified 756 matcher.reset(); 757 matcherString = matcher.toString(); // unspecified 758 report("toString"); 759 } 760 761 // This test is for 4808962 762 private static void literalPatternTest() throws Exception { 763 int flags = Pattern.LITERAL; 764 765 Pattern pattern = Pattern.compile("abc\\t$^", flags); 766 check(pattern, "abc\\t$^", true); 767 768 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 769 check(pattern, "abc\\t$^", true); 770 771 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 772 check(pattern, "\\Qa^$bcabc\\E", true); 773 check(pattern, "a^$bcabc", false); 774 775 pattern = Pattern.compile("\\\\Q\\\\E"); 776 check(pattern, "\\Q\\E", true); 777 778 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 779 check(pattern, "abcefg\\Q\\Ehij", true); 780 781 pattern = Pattern.compile("\\\\\\Q\\\\E"); 782 check(pattern, "\\\\\\\\", true); 783 784 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 785 check(pattern, "\\Qa^$bcabc\\E", true); 786 check(pattern, "a^$bcabc", false); 787 788 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 789 check(pattern, "\\Qabc\\Edef", true); 790 check(pattern, "abcdef", false); 791 792 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 793 check(pattern, "abc\\Edef", true); 794 check(pattern, "abcdef", false); 795 796 pattern = Pattern.compile(Pattern.quote("\\E")); 797 check(pattern, "\\E", true); 798 799 pattern = Pattern.compile("((((abc.+?:)", flags); 800 check(pattern, "((((abc.+?:)", true); 801 802 flags |= Pattern.MULTILINE; 803 804 pattern = Pattern.compile("^cat$", flags); 805 check(pattern, "abc^cat$def", true); 806 check(pattern, "cat", false); 807 808 flags |= Pattern.CASE_INSENSITIVE; 809 810 pattern = Pattern.compile("abcdef", flags); 811 check(pattern, "ABCDEF", true); 812 check(pattern, "AbCdEf", true); 813 814 flags |= Pattern.DOTALL; 815 816 pattern = Pattern.compile("a...b", flags); 817 check(pattern, "A...b", true); 818 check(pattern, "Axxxb", false); 819 820 flags |= Pattern.CANON_EQ; 821 822 Pattern p = Pattern.compile("testa\u030a", flags); 823 check(pattern, "testa\u030a", false); 824 check(pattern, "test\u00e5", false); 825 826 // Supplementary character test 827 flags = Pattern.LITERAL; 828 829 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 830 check(pattern, toSupplementaries("abc\\t$^"), true); 831 832 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 833 check(pattern, toSupplementaries("abc\\t$^"), true); 834 835 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 836 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 837 check(pattern, toSupplementaries("a^$bcabc"), false); 838 839 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 840 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 841 check(pattern, toSupplementaries("a^$bcabc"), false); 842 843 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 844 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 845 check(pattern, toSupplementaries("abcdef"), false); 846 847 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 848 check(pattern, toSupplementaries("abc\\Edef"), true); 849 check(pattern, toSupplementaries("abcdef"), false); 850 851 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 852 check(pattern, toSupplementaries("((((abc.+?:)"), true); 853 854 flags |= Pattern.MULTILINE; 855 856 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 857 check(pattern, toSupplementaries("abc^cat$def"), true); 858 check(pattern, toSupplementaries("cat"), false); 859 860 flags |= Pattern.DOTALL; 861 862 // note: this is case-sensitive. 863 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 864 check(pattern, toSupplementaries("a...b"), true); 865 check(pattern, toSupplementaries("axxxb"), false); 866 867 flags |= Pattern.CANON_EQ; 868 869 String t = toSupplementaries("test"); 870 p = Pattern.compile(t + "a\u030a", flags); 871 check(pattern, t + "a\u030a", false); 872 check(pattern, t + "\u00e5", false); 873 874 report("Literal pattern"); 875 } 876 877 // This test is for 4803179 878 // This test is also for 4808962, replacement parts 879 private static void literalReplacementTest() throws Exception { 880 int flags = Pattern.LITERAL; 881 882 Pattern pattern = Pattern.compile("abc", flags); 883 Matcher matcher = pattern.matcher("zzzabczzz"); 884 String replaceTest = "$0"; 885 String result = matcher.replaceAll(replaceTest); 886 if (!result.equals("zzzabczzz")) 887 failCount++; 888 889 matcher.reset(); 890 String literalReplacement = matcher.quoteReplacement(replaceTest); 891 result = matcher.replaceAll(literalReplacement); 892 if (!result.equals("zzz$0zzz")) 893 failCount++; 894 895 matcher.reset(); 896 replaceTest = "\\t$\\$"; 897 literalReplacement = matcher.quoteReplacement(replaceTest); 898 result = matcher.replaceAll(literalReplacement); 899 if (!result.equals("zzz\\t$\\$zzz")) 900 failCount++; 901 902 // Supplementary character test 903 pattern = Pattern.compile(toSupplementaries("abc"), flags); 904 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 905 replaceTest = "$0"; 906 result = matcher.replaceAll(replaceTest); 907 if (!result.equals(toSupplementaries("zzzabczzz"))) 908 failCount++; 909 910 matcher.reset(); 911 literalReplacement = matcher.quoteReplacement(replaceTest); 912 result = matcher.replaceAll(literalReplacement); 913 if (!result.equals(toSupplementaries("zzz$0zzz"))) 914 failCount++; 915 916 matcher.reset(); 917 replaceTest = "\\t$\\$"; 918 literalReplacement = matcher.quoteReplacement(replaceTest); 919 result = matcher.replaceAll(literalReplacement); 920 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 921 failCount++; 922 923 // IAE should be thrown if backslash or '$' is the last character 924 // in replacement string 925 try { 926 "\uac00".replaceAll("\uac00", "$"); 927 failCount++; 928 } catch (IllegalArgumentException iie) { 929 } catch (Exception e) { 930 failCount++; 931 } 932 try { 933 "\uac00".replaceAll("\uac00", "\\"); 934 failCount++; 935 } catch (IllegalArgumentException iie) { 936 } catch (Exception e) { 937 failCount++; 938 } 939 report("Literal replacement"); 940 } 941 942 // This test is for 4757029 943 private static void regionTest() throws Exception { 944 Pattern pattern = Pattern.compile("abc"); 945 Matcher matcher = pattern.matcher("abcdefabc"); 946 947 matcher.region(0,9); 948 if (!matcher.find()) 949 failCount++; 950 if (!matcher.find()) 951 failCount++; 952 matcher.region(0,3); 953 if (!matcher.find()) 954 failCount++; 955 matcher.region(3,6); 956 if (matcher.find()) 957 failCount++; 958 matcher.region(0,2); 959 if (matcher.find()) 960 failCount++; 961 962 expectRegionFail(matcher, 1, -1); 963 expectRegionFail(matcher, -1, -1); 964 expectRegionFail(matcher, -1, 1); 965 expectRegionFail(matcher, 5, 3); 966 expectRegionFail(matcher, 5, 12); 967 expectRegionFail(matcher, 12, 12); 968 969 pattern = Pattern.compile("^abc$"); 970 matcher = pattern.matcher("zzzabczzz"); 971 matcher.region(0,9); 972 if (matcher.find()) 973 failCount++; 974 matcher.region(3,6); 975 if (!matcher.find()) 976 failCount++; 977 matcher.region(3,6); 978 matcher.useAnchoringBounds(false); 979 if (matcher.find()) 980 failCount++; 981 982 // Supplementary character test 983 pattern = Pattern.compile(toSupplementaries("abc")); 984 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 985 matcher.region(0,9*2); 986 if (!matcher.find()) 987 failCount++; 988 if (!matcher.find()) 989 failCount++; 990 matcher.region(0,3*2); 991 if (!matcher.find()) 992 failCount++; 993 matcher.region(1,3*2); 994 if (matcher.find()) 995 failCount++; 996 matcher.region(3*2,6*2); 997 if (matcher.find()) 998 failCount++; 999 matcher.region(0,2*2); 1000 if (matcher.find()) 1001 failCount++; 1002 matcher.region(0,2*2+1); 1003 if (matcher.find()) 1004 failCount++; 1005 1006 expectRegionFail(matcher, 1*2, -1); 1007 expectRegionFail(matcher, -1, -1); 1008 expectRegionFail(matcher, -1, 1*2); 1009 expectRegionFail(matcher, 5*2, 3*2); 1010 expectRegionFail(matcher, 5*2, 12*2); 1011 expectRegionFail(matcher, 12*2, 12*2); 1012 1013 pattern = Pattern.compile(toSupplementaries("^abc$")); 1014 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1015 matcher.region(0,9*2); 1016 if (matcher.find()) 1017 failCount++; 1018 matcher.region(3*2,6*2); 1019 if (!matcher.find()) 1020 failCount++; 1021 matcher.region(3*2+1,6*2); 1022 if (matcher.find()) 1023 failCount++; 1024 matcher.region(3*2,6*2-1); 1025 if (matcher.find()) 1026 failCount++; 1027 matcher.region(3*2,6*2); 1028 matcher.useAnchoringBounds(false); 1029 if (matcher.find()) 1030 failCount++; 1031 report("Regions"); 1032 } 1033 1034 private static void expectRegionFail(Matcher matcher, int index1, 1035 int index2) 1036 { 1037 try { 1038 matcher.region(index1, index2); 1039 failCount++; 1040 } catch (IndexOutOfBoundsException ioobe) { 1041 // Correct result 1042 } catch (IllegalStateException ise) { 1043 // Correct result 1044 } 1045 } 1046 1047 // This test is for 4803197 1048 private static void escapedSegmentTest() throws Exception { 1049 1050 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1051 check(pattern, "dir1\\dir2", true); 1052 1053 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1054 check(pattern, "dir1\\dir2\\", true); 1055 1056 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1057 check(pattern, "dir1\\dir2\\", true); 1058 1059 // Supplementary character test 1060 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1061 check(pattern, toSupplementaries("dir1\\dir2"), true); 1062 1063 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1064 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1065 1066 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1067 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1068 1069 report("Escaped segment"); 1070 } 1071 1072 // This test is for 4792284 1073 private static void nonCaptureRepetitionTest() throws Exception { 1074 String input = "abcdefgh;"; 1075 1076 String[] patterns = new String[] { 1077 "(?:\\w{4})+;", 1078 "(?:\\w{8})*;", 1079 "(?:\\w{2}){2,4};", 1080 "(?:\\w{4}){2,};", // only matches the 1081 ".*?(?:\\w{5})+;", // specified minimum 1082 ".*?(?:\\w{9})*;", // number of reps - OK 1083 "(?:\\w{4})+?;", // lazy repetition - OK 1084 "(?:\\w{4})++;", // possessive repetition - OK 1085 "(?:\\w{2,}?)+;", // non-deterministic - OK 1086 "(\\w{4})+;", // capturing group - OK 1087 }; 1088 1089 for (int i = 0; i < patterns.length; i++) { 1090 // Check find() 1091 check(patterns[i], 0, input, input, true); 1092 // Check matches() 1093 Pattern p = Pattern.compile(patterns[i]); 1094 Matcher m = p.matcher(input); 1095 1096 if (m.matches()) { 1097 if (!m.group(0).equals(input)) 1098 failCount++; 1099 } else { 1100 failCount++; 1101 } 1102 } 1103 1104 report("Non capturing repetition"); 1105 } 1106 1107 // This test is for 6358731 1108 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1109 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1110 Matcher matcher = pattern.matcher("abcd"); 1111 if (!matcher.matches() || 1112 matcher.group(1) != null || 1113 !matcher.group(2).equals("abcd")) { 1114 failCount++; 1115 } 1116 report("Not captured GroupCurly"); 1117 } 1118 1119 // This test is for 4706545 1120 private static void javaCharClassTest() throws Exception { 1121 for (int i=0; i<1000; i++) { 1122 char c = (char)generator.nextInt(); 1123 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1124 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1125 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1126 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1127 check("{javaDigit}", c, Character.isDigit(c)); 1128 check("{javaDefined}", c, Character.isDefined(c)); 1129 check("{javaLetter}", c, Character.isLetter(c)); 1130 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1131 check("{javaJavaIdentifierStart}", c, 1132 Character.isJavaIdentifierStart(c)); 1133 check("{javaJavaIdentifierPart}", c, 1134 Character.isJavaIdentifierPart(c)); 1135 check("{javaUnicodeIdentifierStart}", c, 1136 Character.isUnicodeIdentifierStart(c)); 1137 check("{javaUnicodeIdentifierPart}", c, 1138 Character.isUnicodeIdentifierPart(c)); 1139 check("{javaIdentifierIgnorable}", c, 1140 Character.isIdentifierIgnorable(c)); 1141 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1142 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1143 check("{javaISOControl}", c, Character.isISOControl(c)); 1144 check("{javaMirrored}", c, Character.isMirrored(c)); 1145 1146 } 1147 1148 // Supplementary character test 1149 for (int i=0; i<1000; i++) { 1150 int c = generator.nextInt(Character.MAX_CODE_POINT 1151 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1152 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1153 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1154 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1155 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1156 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1157 check("{javaDigit}", c, Character.isDigit(c)); 1158 check("{javaDefined}", c, Character.isDefined(c)); 1159 check("{javaLetter}", c, Character.isLetter(c)); 1160 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1161 check("{javaJavaIdentifierStart}", c, 1162 Character.isJavaIdentifierStart(c)); 1163 check("{javaJavaIdentifierPart}", c, 1164 Character.isJavaIdentifierPart(c)); 1165 check("{javaUnicodeIdentifierStart}", c, 1166 Character.isUnicodeIdentifierStart(c)); 1167 check("{javaUnicodeIdentifierPart}", c, 1168 Character.isUnicodeIdentifierPart(c)); 1169 check("{javaIdentifierIgnorable}", c, 1170 Character.isIdentifierIgnorable(c)); 1171 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1172 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1173 check("{javaISOControl}", c, Character.isISOControl(c)); 1174 check("{javaMirrored}", c, Character.isMirrored(c)); 1175 } 1176 1177 report("Java character classes"); 1178 } 1179 1180 // This test is for 4523620 1181 /* 1182 private static void numOccurrencesTest() throws Exception { 1183 Pattern pattern = Pattern.compile("aaa"); 1184 1185 if (pattern.numOccurrences("aaaaaa", false) != 2) 1186 failCount++; 1187 if (pattern.numOccurrences("aaaaaa", true) != 4) 1188 failCount++; 1189 1190 pattern = Pattern.compile("^"); 1191 if (pattern.numOccurrences("aaaaaa", false) != 1) 1192 failCount++; 1193 if (pattern.numOccurrences("aaaaaa", true) != 1) 1194 failCount++; 1195 1196 report("Number of Occurrences"); 1197 } 1198 */ 1199 1200 // This test is for 4776374 1201 private static void caretBetweenTerminatorsTest() throws Exception { 1202 int flags1 = Pattern.DOTALL; 1203 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1204 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1205 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1206 1207 check("^....", flags1, "test\ntest", "test", true); 1208 check(".....^", flags1, "test\ntest", "test", false); 1209 check(".....^", flags1, "test\n", "test", false); 1210 check("....^", flags1, "test\r\n", "test", false); 1211 1212 check("^....", flags2, "test\ntest", "test", true); 1213 check("....^", flags2, "test\ntest", "test", false); 1214 check(".....^", flags2, "test\n", "test", false); 1215 check("....^", flags2, "test\r\n", "test", false); 1216 1217 check("^....", flags3, "test\ntest", "test", true); 1218 check(".....^", flags3, "test\ntest", "test\n", true); 1219 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1220 check(".....^", flags3, "test\n", "test", false); 1221 check(".....^", flags3, "test\r\n", "test", false); 1222 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1223 1224 check("^....", flags4, "test\ntest", "test", true); 1225 check(".....^", flags3, "test\ntest", "test\n", true); 1226 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1227 check(".....^", flags4, "test\n", "test\n", false); 1228 check(".....^", flags4, "test\r\n", "test\r", false); 1229 1230 // Supplementary character test 1231 String t = toSupplementaries("test"); 1232 check("^....", flags1, t+"\n"+t, t, true); 1233 check(".....^", flags1, t+"\n"+t, t, false); 1234 check(".....^", flags1, t+"\n", t, false); 1235 check("....^", flags1, t+"\r\n", t, false); 1236 1237 check("^....", flags2, t+"\n"+t, t, true); 1238 check("....^", flags2, t+"\n"+t, t, false); 1239 check(".....^", flags2, t+"\n", t, false); 1240 check("....^", flags2, t+"\r\n", t, false); 1241 1242 check("^....", flags3, t+"\n"+t, t, true); 1243 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1244 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1245 check(".....^", flags3, t+"\n", t, false); 1246 check(".....^", flags3, t+"\r\n", t, false); 1247 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1248 1249 check("^....", flags4, t+"\n"+t, t, true); 1250 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1251 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1252 check(".....^", flags4, t+"\n", t+"\n", false); 1253 check(".....^", flags4, t+"\r\n", t+"\r", false); 1254 1255 report("Caret between terminators"); 1256 } 1257 1258 // This test is for 4727935 1259 private static void dollarAtEndTest() throws Exception { 1260 int flags1 = Pattern.DOTALL; 1261 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1262 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1263 1264 check("....$", flags1, "test\n", "test", true); 1265 check("....$", flags1, "test\r\n", "test", true); 1266 check(".....$", flags1, "test\n", "test\n", true); 1267 check(".....$", flags1, "test\u0085", "test\u0085", true); 1268 check("....$", flags1, "test\u0085", "test", true); 1269 1270 check("....$", flags2, "test\n", "test", true); 1271 check(".....$", flags2, "test\n", "test\n", true); 1272 check(".....$", flags2, "test\u0085", "test\u0085", true); 1273 check("....$", flags2, "test\u0085", "est\u0085", true); 1274 1275 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1276 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1277 check("....$blah", flags3, "test\nblah", "!!!!", false); 1278 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1279 1280 // Supplementary character test 1281 String t = toSupplementaries("test"); 1282 String b = toSupplementaries("blah"); 1283 check("....$", flags1, t+"\n", t, true); 1284 check("....$", flags1, t+"\r\n", t, true); 1285 check(".....$", flags1, t+"\n", t+"\n", true); 1286 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1287 check("....$", flags1, t+"\u0085", t, true); 1288 1289 check("....$", flags2, t+"\n", t, true); 1290 check(".....$", flags2, t+"\n", t+"\n", true); 1291 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1292 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1293 1294 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1295 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1296 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1297 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1298 1299 report("Dollar at End"); 1300 } 1301 1302 // This test is for 4711773 1303 private static void multilineDollarTest() throws Exception { 1304 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1305 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1306 matcher.find(); 1307 if (matcher.start(0) != 9) 1308 failCount++; 1309 matcher.find(); 1310 if (matcher.start(0) != 20) 1311 failCount++; 1312 1313 // Supplementary character test 1314 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1315 matcher.find(); 1316 if (matcher.start(0) != 9*2) 1317 failCount++; 1318 matcher.find(); 1319 if (matcher.start(0) != 20*2) 1320 failCount++; 1321 1322 report("Multiline Dollar"); 1323 } 1324 1325 private static void reluctantRepetitionTest() throws Exception { 1326 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1327 check(p, "1 word word word 2", true); 1328 check(p, "1 wor wo w 2", true); 1329 check(p, "1 word word 2", true); 1330 check(p, "1 word 2", true); 1331 check(p, "1 wo w w 2", true); 1332 check(p, "1 wo w 2", true); 1333 check(p, "1 wor w 2", true); 1334 1335 p = Pattern.compile("([a-z])+?c"); 1336 Matcher m = p.matcher("ababcdefdec"); 1337 check(m, "ababc"); 1338 1339 // Supplementary character test 1340 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1341 m = p.matcher(toSupplementaries("ababcdefdec")); 1342 check(m, toSupplementaries("ababc")); 1343 1344 report("Reluctant Repetition"); 1345 } 1346 1347 private static void serializeTest() throws Exception { 1348 String patternStr = "(b)"; 1349 String matchStr = "b"; 1350 Pattern pattern = Pattern.compile(patternStr); 1351 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1352 ObjectOutputStream oos = new ObjectOutputStream(baos); 1353 oos.writeObject(pattern); 1354 oos.close(); 1355 ObjectInputStream ois = new ObjectInputStream( 1356 new ByteArrayInputStream(baos.toByteArray())); 1357 Pattern serializedPattern = (Pattern)ois.readObject(); 1358 ois.close(); 1359 Matcher matcher = serializedPattern.matcher(matchStr); 1360 if (!matcher.matches()) 1361 failCount++; 1362 if (matcher.groupCount() != 1) 1363 failCount++; 1364 1365 report("Serialization"); 1366 } 1367 1368 private static void gTest() { 1369 Pattern pattern = Pattern.compile("\\G\\w"); 1370 Matcher matcher = pattern.matcher("abc#x#x"); 1371 matcher.find(); 1372 matcher.find(); 1373 matcher.find(); 1374 if (matcher.find()) 1375 failCount++; 1376 1377 pattern = Pattern.compile("\\GA*"); 1378 matcher = pattern.matcher("1A2AA3"); 1379 matcher.find(); 1380 if (matcher.find()) 1381 failCount++; 1382 1383 pattern = Pattern.compile("\\GA*"); 1384 matcher = pattern.matcher("1A2AA3"); 1385 if (!matcher.find(1)) 1386 failCount++; 1387 matcher.find(); 1388 if (matcher.find()) 1389 failCount++; 1390 1391 report("\\G"); 1392 } 1393 1394 private static void zTest() { 1395 Pattern pattern = Pattern.compile("foo\\Z"); 1396 // Positives 1397 check(pattern, "foo\u0085", true); 1398 check(pattern, "foo\u2028", true); 1399 check(pattern, "foo\u2029", true); 1400 check(pattern, "foo\n", true); 1401 check(pattern, "foo\r", true); 1402 check(pattern, "foo\r\n", true); 1403 // Negatives 1404 check(pattern, "fooo", false); 1405 check(pattern, "foo\n\r", false); 1406 1407 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1408 // Positives 1409 check(pattern, "foo", true); 1410 check(pattern, "foo\n", true); 1411 // Negatives 1412 check(pattern, "foo\r", false); 1413 check(pattern, "foo\u0085", false); 1414 check(pattern, "foo\u2028", false); 1415 check(pattern, "foo\u2029", false); 1416 1417 report("\\Z"); 1418 } 1419 1420 private static void replaceFirstTest() { 1421 Pattern pattern = Pattern.compile("(ab)(c*)"); 1422 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1423 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1424 failCount++; 1425 1426 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1427 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1428 failCount++; 1429 1430 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1431 String result = matcher.replaceFirst("$1"); 1432 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1433 failCount++; 1434 1435 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1436 result = matcher.replaceFirst("$2"); 1437 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1438 failCount++; 1439 1440 pattern = Pattern.compile("a*"); 1441 matcher = pattern.matcher("aaaaaaaaaa"); 1442 if (!matcher.replaceFirst("test").equals("test")) 1443 failCount++; 1444 1445 pattern = Pattern.compile("a+"); 1446 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1447 if (!matcher.replaceFirst("test").equals("zzztest")) 1448 failCount++; 1449 1450 // Supplementary character test 1451 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1452 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1453 if (!matcher.replaceFirst(toSupplementaries("test")) 1454 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1455 failCount++; 1456 1457 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1458 if (!matcher.replaceFirst(toSupplementaries("test")). 1459 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1460 failCount++; 1461 1462 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1463 result = matcher.replaceFirst("$1"); 1464 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1465 failCount++; 1466 1467 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1468 result = matcher.replaceFirst("$2"); 1469 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1470 failCount++; 1471 1472 pattern = Pattern.compile(toSupplementaries("a*")); 1473 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1474 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1475 failCount++; 1476 1477 pattern = Pattern.compile(toSupplementaries("a+")); 1478 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1479 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1480 failCount++; 1481 1482 report("Replace First"); 1483 } 1484 1485 private static void unixLinesTest() { 1486 Pattern pattern = Pattern.compile(".*"); 1487 Matcher matcher = pattern.matcher("aa\u2028blah"); 1488 matcher.find(); 1489 if (!matcher.group(0).equals("aa")) 1490 failCount++; 1491 1492 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1493 matcher = pattern.matcher("aa\u2028blah"); 1494 matcher.find(); 1495 if (!matcher.group(0).equals("aa\u2028blah")) 1496 failCount++; 1497 1498 pattern = Pattern.compile("[az]$", 1499 Pattern.MULTILINE | Pattern.UNIX_LINES); 1500 matcher = pattern.matcher("aa\u2028zz"); 1501 check(matcher, "a\u2028", false); 1502 1503 // Supplementary character test 1504 pattern = Pattern.compile(".*"); 1505 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1506 matcher.find(); 1507 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1508 failCount++; 1509 1510 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1511 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1512 matcher.find(); 1513 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1514 failCount++; 1515 1516 pattern = Pattern.compile(toSupplementaries("[az]$"), 1517 Pattern.MULTILINE | Pattern.UNIX_LINES); 1518 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1519 check(matcher, toSupplementaries("a\u2028"), false); 1520 1521 report("Unix Lines"); 1522 } 1523 1524 private static void commentsTest() { 1525 int flags = Pattern.COMMENTS; 1526 1527 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1528 Matcher matcher = pattern.matcher("aa#aa"); 1529 if (!matcher.matches()) 1530 failCount++; 1531 1532 pattern = Pattern.compile("aa # blah", flags); 1533 matcher = pattern.matcher("aa"); 1534 if (!matcher.matches()) 1535 failCount++; 1536 1537 pattern = Pattern.compile("aa blah", flags); 1538 matcher = pattern.matcher("aablah"); 1539 if (!matcher.matches()) 1540 failCount++; 1541 1542 pattern = Pattern.compile("aa # blah blech ", flags); 1543 matcher = pattern.matcher("aa"); 1544 if (!matcher.matches()) 1545 failCount++; 1546 1547 pattern = Pattern.compile("aa # blah\n ", flags); 1548 matcher = pattern.matcher("aa"); 1549 if (!matcher.matches()) 1550 failCount++; 1551 1552 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1553 matcher = pattern.matcher("aabc"); 1554 if (!matcher.matches()) 1555 failCount++; 1556 1557 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1558 matcher = pattern.matcher("aabc"); 1559 if (!matcher.matches()) 1560 failCount++; 1561 1562 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1563 matcher = pattern.matcher("aabc#blech"); 1564 if (!matcher.matches()) 1565 failCount++; 1566 1567 // Supplementary character test 1568 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1569 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1570 if (!matcher.matches()) 1571 failCount++; 1572 1573 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1574 matcher = pattern.matcher(toSupplementaries("aa")); 1575 if (!matcher.matches()) 1576 failCount++; 1577 1578 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1579 matcher = pattern.matcher(toSupplementaries("aablah")); 1580 if (!matcher.matches()) 1581 failCount++; 1582 1583 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1584 matcher = pattern.matcher(toSupplementaries("aa")); 1585 if (!matcher.matches()) 1586 failCount++; 1587 1588 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1589 matcher = pattern.matcher(toSupplementaries("aa")); 1590 if (!matcher.matches()) 1591 failCount++; 1592 1593 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1594 matcher = pattern.matcher(toSupplementaries("aabc")); 1595 if (!matcher.matches()) 1596 failCount++; 1597 1598 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1599 matcher = pattern.matcher(toSupplementaries("aabc")); 1600 if (!matcher.matches()) 1601 failCount++; 1602 1603 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1604 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1605 if (!matcher.matches()) 1606 failCount++; 1607 1608 report("Comments"); 1609 } 1610 1611 private static void caseFoldingTest() { // bug 4504687 1612 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1613 Pattern pattern = Pattern.compile("aa", flags); 1614 Matcher matcher = pattern.matcher("ab"); 1615 if (matcher.matches()) 1616 failCount++; 1617 1618 pattern = Pattern.compile("aA", flags); 1619 matcher = pattern.matcher("ab"); 1620 if (matcher.matches()) 1621 failCount++; 1622 1623 pattern = Pattern.compile("aa", flags); 1624 matcher = pattern.matcher("aB"); 1625 if (matcher.matches()) 1626 failCount++; 1627 matcher = pattern.matcher("Ab"); 1628 if (matcher.matches()) 1629 failCount++; 1630 1631 // ASCII "a" 1632 // Latin-1 Supplement "a" + grave 1633 // Cyrillic "a" 1634 String[] patterns = new String[] { 1635 //single 1636 "a", "\u00e0", "\u0430", 1637 //slice 1638 "ab", "\u00e0\u00e1", "\u0430\u0431", 1639 //class single 1640 "[a]", "[\u00e0]", "[\u0430]", 1641 //class range 1642 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1643 //back reference 1644 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1645 }; 1646 1647 String[] texts = new String[] { 1648 "A", "\u00c0", "\u0410", 1649 "AB", "\u00c0\u00c1", "\u0410\u0411", 1650 "A", "\u00c0", "\u0410", 1651 "B", "\u00c2", "\u0411", 1652 "aA", "\u00e0\u00c0", "\u0430\u0410" 1653 }; 1654 1655 boolean[] expected = new boolean[] { 1656 true, false, false, 1657 true, false, false, 1658 true, false, false, 1659 true, false, false, 1660 true, false, false 1661 }; 1662 1663 flags = Pattern.CASE_INSENSITIVE; 1664 for (int i = 0; i < patterns.length; i++) { 1665 pattern = Pattern.compile(patterns[i], flags); 1666 matcher = pattern.matcher(texts[i]); 1667 if (matcher.matches() != expected[i]) { 1668 System.out.println("<1> Failed at " + i); 1669 failCount++; 1670 } 1671 } 1672 1673 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1674 for (int i = 0; i < patterns.length; i++) { 1675 pattern = Pattern.compile(patterns[i], flags); 1676 matcher = pattern.matcher(texts[i]); 1677 if (!matcher.matches()) { 1678 System.out.println("<2> Failed at " + i); 1679 failCount++; 1680 } 1681 } 1682 // flag unicode_case alone should do nothing 1683 flags = Pattern.UNICODE_CASE; 1684 for (int i = 0; i < patterns.length; i++) { 1685 pattern = Pattern.compile(patterns[i], flags); 1686 matcher = pattern.matcher(texts[i]); 1687 if (matcher.matches()) { 1688 System.out.println("<3> Failed at " + i); 1689 failCount++; 1690 } 1691 } 1692 1693 // Special cases: i, I, u+0131 and u+0130 1694 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1695 pattern = Pattern.compile("[h-j]+", flags); 1696 if (!pattern.matcher("\u0131\u0130").matches()) 1697 failCount++; 1698 report("Case Folding"); 1699 } 1700 1701 private static void appendTest() { 1702 Pattern pattern = Pattern.compile("(ab)(cd)"); 1703 Matcher matcher = pattern.matcher("abcd"); 1704 String result = matcher.replaceAll("$2$1"); 1705 if (!result.equals("cdab")) 1706 failCount++; 1707 1708 String s1 = "Swap all: first = 123, second = 456"; 1709 String s2 = "Swap one: first = 123, second = 456"; 1710 String r = "$3$2$1"; 1711 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1712 matcher = pattern.matcher(s1); 1713 1714 result = matcher.replaceAll(r); 1715 if (!result.equals("Swap all: 123 = first, 456 = second")) 1716 failCount++; 1717 1718 matcher = pattern.matcher(s2); 1719 1720 if (matcher.find()) { 1721 StringBuffer sb = new StringBuffer(); 1722 matcher.appendReplacement(sb, r); 1723 matcher.appendTail(sb); 1724 result = sb.toString(); 1725 if (!result.equals("Swap one: 123 = first, second = 456")) 1726 failCount++; 1727 } 1728 1729 // Supplementary character test 1730 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1731 matcher = pattern.matcher(toSupplementaries("abcd")); 1732 result = matcher.replaceAll("$2$1"); 1733 if (!result.equals(toSupplementaries("cdab"))) 1734 failCount++; 1735 1736 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1737 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1738 r = toSupplementaries("$3$2$1"); 1739 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1740 matcher = pattern.matcher(s1); 1741 1742 result = matcher.replaceAll(r); 1743 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1744 failCount++; 1745 1746 matcher = pattern.matcher(s2); 1747 1748 if (matcher.find()) { 1749 StringBuffer sb = new StringBuffer(); 1750 matcher.appendReplacement(sb, r); 1751 matcher.appendTail(sb); 1752 result = sb.toString(); 1753 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1754 failCount++; 1755 } 1756 report("Append"); 1757 } 1758 1759 private static void splitTest() { 1760 Pattern pattern = Pattern.compile(":"); 1761 String[] result = pattern.split("foo:and:boo", 2); 1762 if (!result[0].equals("foo")) 1763 failCount++; 1764 if (!result[1].equals("and:boo")) 1765 failCount++; 1766 // Supplementary character test 1767 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1768 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1769 if (!result[0].equals(toSupplementaries("foo"))) 1770 failCount++; 1771 if (!result[1].equals(toSupplementaries("andXboo"))) 1772 failCount++; 1773 1774 CharBuffer cb = CharBuffer.allocate(100); 1775 cb.put("foo:and:boo"); 1776 cb.flip(); 1777 result = pattern.split(cb); 1778 if (!result[0].equals("foo")) 1779 failCount++; 1780 if (!result[1].equals("and")) 1781 failCount++; 1782 if (!result[2].equals("boo")) 1783 failCount++; 1784 1785 // Supplementary character test 1786 CharBuffer cbs = CharBuffer.allocate(100); 1787 cbs.put(toSupplementaries("fooXandXboo")); 1788 cbs.flip(); 1789 result = patternX.split(cbs); 1790 if (!result[0].equals(toSupplementaries("foo"))) 1791 failCount++; 1792 if (!result[1].equals(toSupplementaries("and"))) 1793 failCount++; 1794 if (!result[2].equals(toSupplementaries("boo"))) 1795 failCount++; 1796 1797 String source = "0123456789"; 1798 for (int limit=-2; limit<3; limit++) { 1799 for (int x=0; x<10; x++) { 1800 result = source.split(Integer.toString(x), limit); 1801 int expectedLength = limit < 1 ? 2 : limit; 1802 1803 if ((limit == 0) && (x == 9)) { 1804 // expected dropping of "" 1805 if (result.length != 1) 1806 failCount++; 1807 if (!result[0].equals("012345678")) { 1808 failCount++; 1809 } 1810 } else { 1811 if (result.length != expectedLength) { 1812 failCount++; 1813 } 1814 if (!result[0].equals(source.substring(0,x))) { 1815 if (limit != 1) { 1816 failCount++; 1817 } else { 1818 if (!result[0].equals(source.substring(0,10))) { 1819 failCount++; 1820 } 1821 } 1822 } 1823 if (expectedLength > 1) { // Check segment 2 1824 if (!result[1].equals(source.substring(x+1,10))) 1825 failCount++; 1826 } 1827 } 1828 } 1829 } 1830 // Check the case for no match found 1831 for (int limit=-2; limit<3; limit++) { 1832 result = source.split("e", limit); 1833 if (result.length != 1) 1834 failCount++; 1835 if (!result[0].equals(source)) 1836 failCount++; 1837 } 1838 // Check the case for limit == 0, source = ""; 1839 // split() now returns 0-length for empty source "" see #6559590 1840 source = ""; 1841 result = source.split("e", 0); 1842 if (result.length != 1) 1843 failCount++; 1844 if (!result[0].equals(source)) 1845 failCount++; 1846 1847 // Check both split() and splitAsStraem(), especially for zero-lenth 1848 // input and zero-lenth match cases 1849 String[][] input = new String[][] { 1850 { " ", "Abc Efg Hij" }, // normal non-zero-match 1851 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1852 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1853 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1854 { "(?=\\p{Lu})", "AbcEfg" }, 1855 { "(?=\\p{Lu})", "Abc" }, 1856 { " ", "" }, // zero-length input 1857 { ".*", "" }, 1858 1859 // some tests from PatternStreamTest.java 1860 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1861 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1862 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1863 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1864 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1865 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1866 { "\u56da", "" }, 1867 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1868 { "o", "boo:and:foo" }, 1869 { "o", "booooo:and:fooooo" }, 1870 { "o", "fooooo:" }, 1871 }; 1872 1873 String[][] expected = new String[][] { 1874 { "Abc", "Efg", "Hij" }, 1875 { "", "Abc", "Efg", "Hij" }, 1876 { "Abc", "", "Efg", "Hij" }, 1877 { "Abc", "Efg", "Hij" }, 1878 { "Abc", "Efg" }, 1879 { "Abc" }, 1880 { "" }, 1881 { "" }, 1882 1883 { "awgqwefg1fefw", "vssv1vvv1" }, 1884 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1885 { "awgqwefg", "fefw4vssv", "vvv" }, 1886 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1887 { "1", "23", "456", "7890" }, 1888 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1889 { "" }, 1890 { "This", "is", "testing", "", "with", "different", "separators" }, 1891 { "b", "", ":and:f" }, 1892 { "b", "", "", "", "", ":and:f" }, 1893 { "f", "", "", "", "", ":" }, 1894 }; 1895 for (int i = 0; i < input.length; i++) { 1896 pattern = Pattern.compile(input[i][0]); 1897 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1898 failCount++; 1899 } 1900 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1901 // array for zero-length input for now 1902 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1903 expected[i])) { 1904 failCount++; 1905 } 1906 } 1907 report("Split"); 1908 } 1909 1910 private static void negationTest() { 1911 Pattern pattern = Pattern.compile("[\\[@^]+"); 1912 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1913 if (!matcher.find()) 1914 failCount++; 1915 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1916 failCount++; 1917 pattern = Pattern.compile("[@\\[^]+"); 1918 matcher = pattern.matcher("@@@@[[[[^^^^"); 1919 if (!matcher.find()) 1920 failCount++; 1921 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1922 failCount++; 1923 pattern = Pattern.compile("[@\\[^@]+"); 1924 matcher = pattern.matcher("@@@@[[[[^^^^"); 1925 if (!matcher.find()) 1926 failCount++; 1927 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1928 failCount++; 1929 1930 pattern = Pattern.compile("\\)"); 1931 matcher = pattern.matcher("xxx)xxx"); 1932 if (!matcher.find()) 1933 failCount++; 1934 1935 report("Negation"); 1936 } 1937 1938 private static void ampersandTest() { 1939 Pattern pattern = Pattern.compile("[&@]+"); 1940 check(pattern, "@@@@&&&&", true); 1941 1942 pattern = Pattern.compile("[@&]+"); 1943 check(pattern, "@@@@&&&&", true); 1944 1945 pattern = Pattern.compile("[@\\&]+"); 1946 check(pattern, "@@@@&&&&", true); 1947 1948 report("Ampersand"); 1949 } 1950 1951 private static void octalTest() throws Exception { 1952 Pattern pattern = Pattern.compile("\\u0007"); 1953 Matcher matcher = pattern.matcher("\u0007"); 1954 if (!matcher.matches()) 1955 failCount++; 1956 pattern = Pattern.compile("\\07"); 1957 matcher = pattern.matcher("\u0007"); 1958 if (!matcher.matches()) 1959 failCount++; 1960 pattern = Pattern.compile("\\007"); 1961 matcher = pattern.matcher("\u0007"); 1962 if (!matcher.matches()) 1963 failCount++; 1964 pattern = Pattern.compile("\\0007"); 1965 matcher = pattern.matcher("\u0007"); 1966 if (!matcher.matches()) 1967 failCount++; 1968 pattern = Pattern.compile("\\040"); 1969 matcher = pattern.matcher("\u0020"); 1970 if (!matcher.matches()) 1971 failCount++; 1972 pattern = Pattern.compile("\\0403"); 1973 matcher = pattern.matcher("\u00203"); 1974 if (!matcher.matches()) 1975 failCount++; 1976 pattern = Pattern.compile("\\0103"); 1977 matcher = pattern.matcher("\u0043"); 1978 if (!matcher.matches()) 1979 failCount++; 1980 1981 report("Octal"); 1982 } 1983 1984 private static void longPatternTest() throws Exception { 1985 try { 1986 Pattern pattern = Pattern.compile( 1987 "a 32-character-long pattern xxxx"); 1988 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1989 pattern = Pattern.compile("a thirty four character long regex"); 1990 StringBuffer patternToBe = new StringBuffer(101); 1991 for (int i=0; i<100; i++) 1992 patternToBe.append((char)(97 + i%26)); 1993 pattern = Pattern.compile(patternToBe.toString()); 1994 } catch (PatternSyntaxException e) { 1995 failCount++; 1996 } 1997 1998 // Supplementary character test 1999 try { 2000 Pattern pattern = Pattern.compile( 2001 toSupplementaries("a 32-character-long pattern xxxx")); 2002 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2003 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2004 StringBuffer patternToBe = new StringBuffer(101*2); 2005 for (int i=0; i<100; i++) 2006 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2007 + 97 + i%26)); 2008 pattern = Pattern.compile(patternToBe.toString()); 2009 } catch (PatternSyntaxException e) { 2010 failCount++; 2011 } 2012 report("LongPattern"); 2013 } 2014 2015 private static void group0Test() throws Exception { 2016 Pattern pattern = Pattern.compile("(tes)ting"); 2017 Matcher matcher = pattern.matcher("testing"); 2018 check(matcher, "testing"); 2019 2020 matcher.reset("testing"); 2021 if (matcher.lookingAt()) { 2022 if (!matcher.group(0).equals("testing")) 2023 failCount++; 2024 } else { 2025 failCount++; 2026 } 2027 2028 matcher.reset("testing"); 2029 if (matcher.matches()) { 2030 if (!matcher.group(0).equals("testing")) 2031 failCount++; 2032 } else { 2033 failCount++; 2034 } 2035 2036 pattern = Pattern.compile("(tes)ting"); 2037 matcher = pattern.matcher("testing"); 2038 if (matcher.lookingAt()) { 2039 if (!matcher.group(0).equals("testing")) 2040 failCount++; 2041 } else { 2042 failCount++; 2043 } 2044 2045 pattern = Pattern.compile("^(tes)ting"); 2046 matcher = pattern.matcher("testing"); 2047 if (matcher.matches()) { 2048 if (!matcher.group(0).equals("testing")) 2049 failCount++; 2050 } else { 2051 failCount++; 2052 } 2053 2054 // Supplementary character test 2055 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2056 matcher = pattern.matcher(toSupplementaries("testing")); 2057 check(matcher, toSupplementaries("testing")); 2058 2059 matcher.reset(toSupplementaries("testing")); 2060 if (matcher.lookingAt()) { 2061 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2062 failCount++; 2063 } else { 2064 failCount++; 2065 } 2066 2067 matcher.reset(toSupplementaries("testing")); 2068 if (matcher.matches()) { 2069 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2070 failCount++; 2071 } else { 2072 failCount++; 2073 } 2074 2075 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2076 matcher = pattern.matcher(toSupplementaries("testing")); 2077 if (matcher.lookingAt()) { 2078 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2079 failCount++; 2080 } else { 2081 failCount++; 2082 } 2083 2084 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2085 matcher = pattern.matcher(toSupplementaries("testing")); 2086 if (matcher.matches()) { 2087 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2088 failCount++; 2089 } else { 2090 failCount++; 2091 } 2092 2093 report("Group0"); 2094 } 2095 2096 private static void findIntTest() throws Exception { 2097 Pattern p = Pattern.compile("blah"); 2098 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2099 boolean result = m.find(2); 2100 if (!result) 2101 failCount++; 2102 2103 p = Pattern.compile("$"); 2104 m = p.matcher("1234567890"); 2105 result = m.find(10); 2106 if (!result) 2107 failCount++; 2108 try { 2109 result = m.find(11); 2110 failCount++; 2111 } catch (IndexOutOfBoundsException e) { 2112 // correct result 2113 } 2114 2115 // Supplementary character test 2116 p = Pattern.compile(toSupplementaries("blah")); 2117 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2118 result = m.find(2); 2119 if (!result) 2120 failCount++; 2121 2122 report("FindInt"); 2123 } 2124 2125 private static void emptyPatternTest() throws Exception { 2126 Pattern p = Pattern.compile(""); 2127 Matcher m = p.matcher("foo"); 2128 2129 // Should find empty pattern at beginning of input 2130 boolean result = m.find(); 2131 if (result != true) 2132 failCount++; 2133 if (m.start() != 0) 2134 failCount++; 2135 2136 // Should not match entire input if input is not empty 2137 m.reset(); 2138 result = m.matches(); 2139 if (result == true) 2140 failCount++; 2141 2142 try { 2143 m.start(0); 2144 failCount++; 2145 } catch (IllegalStateException e) { 2146 // Correct result 2147 } 2148 2149 // Should match entire input if input is empty 2150 m.reset(""); 2151 result = m.matches(); 2152 if (result != true) 2153 failCount++; 2154 2155 result = Pattern.matches("", ""); 2156 if (result != true) 2157 failCount++; 2158 2159 result = Pattern.matches("", "foo"); 2160 if (result == true) 2161 failCount++; 2162 report("EmptyPattern"); 2163 } 2164 2165 private static void charClassTest() throws Exception { 2166 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2167 check(pattern, "blahb]blech", true); 2168 2169 pattern = Pattern.compile("[abc[def]]"); 2170 check(pattern, "b", true); 2171 2172 // Supplementary character tests 2173 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2174 check(pattern, toSupplementaries("blahb]blech"), true); 2175 2176 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2177 check(pattern, toSupplementaries("b"), true); 2178 2179 try { 2180 // u00ff when UNICODE_CASE 2181 pattern = Pattern.compile("[ab\u00ffcd]", 2182 Pattern.CASE_INSENSITIVE| 2183 Pattern.UNICODE_CASE); 2184 check(pattern, "ab\u00ffcd", true); 2185 check(pattern, "Ab\u0178Cd", true); 2186 2187 // u00b5 when UNICODE_CASE 2188 pattern = Pattern.compile("[ab\u00b5cd]", 2189 Pattern.CASE_INSENSITIVE| 2190 Pattern.UNICODE_CASE); 2191 check(pattern, "ab\u00b5cd", true); 2192 check(pattern, "Ab\u039cCd", true); 2193 } catch (Exception e) { failCount++; } 2194 2195 /* Special cases 2196 (1)LatinSmallLetterLongS u+017f 2197 (2)LatinSmallLetterDotlessI u+0131 2198 (3)LatineCapitalLetterIWithDotAbove u+0130 2199 (4)KelvinSign u+212a 2200 (5)AngstromSign u+212b 2201 */ 2202 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2203 pattern = Pattern.compile("[sik\u00c5]+", flags); 2204 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2205 failCount++; 2206 2207 report("CharClass"); 2208 } 2209 2210 private static void caretTest() throws Exception { 2211 Pattern pattern = Pattern.compile("\\w*"); 2212 Matcher matcher = pattern.matcher("a#bc#def##g"); 2213 check(matcher, "a"); 2214 check(matcher, ""); 2215 check(matcher, "bc"); 2216 check(matcher, ""); 2217 check(matcher, "def"); 2218 check(matcher, ""); 2219 check(matcher, ""); 2220 check(matcher, "g"); 2221 check(matcher, ""); 2222 if (matcher.find()) 2223 failCount++; 2224 2225 pattern = Pattern.compile("^\\w*"); 2226 matcher = pattern.matcher("a#bc#def##g"); 2227 check(matcher, "a"); 2228 if (matcher.find()) 2229 failCount++; 2230 2231 pattern = Pattern.compile("\\w"); 2232 matcher = pattern.matcher("abc##x"); 2233 check(matcher, "a"); 2234 check(matcher, "b"); 2235 check(matcher, "c"); 2236 check(matcher, "x"); 2237 if (matcher.find()) 2238 failCount++; 2239 2240 pattern = Pattern.compile("^\\w"); 2241 matcher = pattern.matcher("abc##x"); 2242 check(matcher, "a"); 2243 if (matcher.find()) 2244 failCount++; 2245 2246 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2247 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2248 check(matcher, "abc"); 2249 if (matcher.find()) 2250 failCount++; 2251 2252 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2253 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2254 check(matcher, "abc"); 2255 check(matcher, "jkl"); 2256 if (matcher.find()) 2257 failCount++; 2258 2259 pattern = Pattern.compile("^", Pattern.MULTILINE); 2260 matcher = pattern.matcher("this is some text"); 2261 String result = matcher.replaceAll("X"); 2262 if (!result.equals("Xthis is some text")) 2263 failCount++; 2264 2265 pattern = Pattern.compile("^"); 2266 matcher = pattern.matcher("this is some text"); 2267 result = matcher.replaceAll("X"); 2268 if (!result.equals("Xthis is some text")) 2269 failCount++; 2270 2271 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2272 matcher = pattern.matcher("this is some text\n"); 2273 result = matcher.replaceAll("X"); 2274 if (!result.equals("Xthis is some text\n")) 2275 failCount++; 2276 2277 report("Caret"); 2278 } 2279 2280 private static void groupCaptureTest() throws Exception { 2281 // Independent group 2282 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2283 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2284 matcher.find(); 2285 try { 2286 String blah = matcher.group(1); 2287 failCount++; 2288 } catch (IndexOutOfBoundsException ioobe) { 2289 // Good result 2290 } 2291 // Pure group 2292 pattern = Pattern.compile("x+(?:y+)z+"); 2293 matcher = pattern.matcher("xxxyyyzzz"); 2294 matcher.find(); 2295 try { 2296 String blah = matcher.group(1); 2297 failCount++; 2298 } catch (IndexOutOfBoundsException ioobe) { 2299 // Good result 2300 } 2301 2302 // Supplementary character tests 2303 // Independent group 2304 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2305 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2306 matcher.find(); 2307 try { 2308 String blah = matcher.group(1); 2309 failCount++; 2310 } catch (IndexOutOfBoundsException ioobe) { 2311 // Good result 2312 } 2313 // Pure group 2314 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2315 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2316 matcher.find(); 2317 try { 2318 String blah = matcher.group(1); 2319 failCount++; 2320 } catch (IndexOutOfBoundsException ioobe) { 2321 // Good result 2322 } 2323 2324 report("GroupCapture"); 2325 } 2326 2327 private static void backRefTest() throws Exception { 2328 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2329 check(pattern, "zzzaabcazzz", true); 2330 2331 pattern = Pattern.compile("(a*)bc\\1"); 2332 check(pattern, "zzzaabcaazzz", true); 2333 2334 pattern = Pattern.compile("(abc)(def)\\1"); 2335 check(pattern, "abcdefabc", true); 2336 2337 pattern = Pattern.compile("(abc)(def)\\3"); 2338 check(pattern, "abcdefabc", false); 2339 2340 try { 2341 for (int i = 1; i < 10; i++) { 2342 // Make sure backref 1-9 are always accepted 2343 pattern = Pattern.compile("abcdef\\" + i); 2344 // and fail to match if the target group does not exit 2345 check(pattern, "abcdef", false); 2346 } 2347 } catch(PatternSyntaxException e) { 2348 failCount++; 2349 } 2350 2351 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2352 check(pattern, "abcdefghija", false); 2353 check(pattern, "abcdefghija1", true); 2354 2355 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2356 check(pattern, "abcdefghijkk", true); 2357 2358 pattern = Pattern.compile("(a)bcdefghij\\11"); 2359 check(pattern, "abcdefghija1", true); 2360 2361 // Supplementary character tests 2362 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2363 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2364 2365 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2366 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2367 2368 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2369 check(pattern, toSupplementaries("abcdefabc"), true); 2370 2371 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2372 check(pattern, toSupplementaries("abcdefabc"), false); 2373 2374 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2375 check(pattern, toSupplementaries("abcdefghija"), false); 2376 check(pattern, toSupplementaries("abcdefghija1"), true); 2377 2378 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2379 check(pattern, toSupplementaries("abcdefghijkk"), true); 2380 2381 report("BackRef"); 2382 } 2383 2384 /** 2385 * Unicode Technical Report #18, section 2.6 End of Line 2386 * There is no empty line to be matched in the sequence \u000D\u000A 2387 * but there is an empty line in the sequence \u000A\u000D. 2388 */ 2389 private static void anchorTest() throws Exception { 2390 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2391 Matcher m = p.matcher("blah1\r\nblah2"); 2392 m.find(); 2393 m.find(); 2394 if (!m.group().equals("blah2")) 2395 failCount++; 2396 2397 m.reset("blah1\n\rblah2"); 2398 m.find(); 2399 m.find(); 2400 m.find(); 2401 if (!m.group().equals("blah2")) 2402 failCount++; 2403 2404 // Test behavior of $ with \r\n at end of input 2405 p = Pattern.compile(".+$"); 2406 m = p.matcher("blah1\r\n"); 2407 if (!m.find()) 2408 failCount++; 2409 if (!m.group().equals("blah1")) 2410 failCount++; 2411 if (m.find()) 2412 failCount++; 2413 2414 // Test behavior of $ with \r\n at end of input in multiline 2415 p = Pattern.compile(".+$", Pattern.MULTILINE); 2416 m = p.matcher("blah1\r\n"); 2417 if (!m.find()) 2418 failCount++; 2419 if (m.find()) 2420 failCount++; 2421 2422 // Test for $ recognition of \u0085 for bug 4527731 2423 p = Pattern.compile(".+$", Pattern.MULTILINE); 2424 m = p.matcher("blah1\u0085"); 2425 if (!m.find()) 2426 failCount++; 2427 2428 // Supplementary character test 2429 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2430 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2431 m.find(); 2432 m.find(); 2433 if (!m.group().equals(toSupplementaries("blah2"))) 2434 failCount++; 2435 2436 m.reset(toSupplementaries("blah1\n\rblah2")); 2437 m.find(); 2438 m.find(); 2439 m.find(); 2440 if (!m.group().equals(toSupplementaries("blah2"))) 2441 failCount++; 2442 2443 // Test behavior of $ with \r\n at end of input 2444 p = Pattern.compile(".+$"); 2445 m = p.matcher(toSupplementaries("blah1\r\n")); 2446 if (!m.find()) 2447 failCount++; 2448 if (!m.group().equals(toSupplementaries("blah1"))) 2449 failCount++; 2450 if (m.find()) 2451 failCount++; 2452 2453 // Test behavior of $ with \r\n at end of input in multiline 2454 p = Pattern.compile(".+$", Pattern.MULTILINE); 2455 m = p.matcher(toSupplementaries("blah1\r\n")); 2456 if (!m.find()) 2457 failCount++; 2458 if (m.find()) 2459 failCount++; 2460 2461 // Test for $ recognition of \u0085 for bug 4527731 2462 p = Pattern.compile(".+$", Pattern.MULTILINE); 2463 m = p.matcher(toSupplementaries("blah1\u0085")); 2464 if (!m.find()) 2465 failCount++; 2466 2467 report("Anchors"); 2468 } 2469 2470 /** 2471 * A basic sanity test of Matcher.lookingAt(). 2472 */ 2473 private static void lookingAtTest() throws Exception { 2474 Pattern p = Pattern.compile("(ab)(c*)"); 2475 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2476 2477 if (!m.lookingAt()) 2478 failCount++; 2479 2480 if (!m.group().equals(m.group(0))) 2481 failCount++; 2482 2483 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2484 if (m.lookingAt()) 2485 failCount++; 2486 2487 // Supplementary character test 2488 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2489 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2490 2491 if (!m.lookingAt()) 2492 failCount++; 2493 2494 if (!m.group().equals(m.group(0))) 2495 failCount++; 2496 2497 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2498 if (m.lookingAt()) 2499 failCount++; 2500 2501 report("Looking At"); 2502 } 2503 2504 /** 2505 * A basic sanity test of Matcher.matches(). 2506 */ 2507 private static void matchesTest() throws Exception { 2508 // matches() 2509 Pattern p = Pattern.compile("ulb(c*)"); 2510 Matcher m = p.matcher("ulbcccccc"); 2511 if (!m.matches()) 2512 failCount++; 2513 2514 // find() but not matches() 2515 m.reset("zzzulbcccccc"); 2516 if (m.matches()) 2517 failCount++; 2518 2519 // lookingAt() but not matches() 2520 m.reset("ulbccccccdef"); 2521 if (m.matches()) 2522 failCount++; 2523 2524 // matches() 2525 p = Pattern.compile("a|ad"); 2526 m = p.matcher("ad"); 2527 if (!m.matches()) 2528 failCount++; 2529 2530 // Supplementary character test 2531 // matches() 2532 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2533 m = p.matcher(toSupplementaries("ulbcccccc")); 2534 if (!m.matches()) 2535 failCount++; 2536 2537 // find() but not matches() 2538 m.reset(toSupplementaries("zzzulbcccccc")); 2539 if (m.matches()) 2540 failCount++; 2541 2542 // lookingAt() but not matches() 2543 m.reset(toSupplementaries("ulbccccccdef")); 2544 if (m.matches()) 2545 failCount++; 2546 2547 // matches() 2548 p = Pattern.compile(toSupplementaries("a|ad")); 2549 m = p.matcher(toSupplementaries("ad")); 2550 if (!m.matches()) 2551 failCount++; 2552 2553 report("Matches"); 2554 } 2555 2556 /** 2557 * A basic sanity test of Pattern.matches(). 2558 */ 2559 private static void patternMatchesTest() throws Exception { 2560 // matches() 2561 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2562 toSupplementaries("ulbcccccc"))) 2563 failCount++; 2564 2565 // find() but not matches() 2566 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2567 toSupplementaries("zzzulbcccccc"))) 2568 failCount++; 2569 2570 // lookingAt() but not matches() 2571 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2572 toSupplementaries("ulbccccccdef"))) 2573 failCount++; 2574 2575 // Supplementary character test 2576 // matches() 2577 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2578 toSupplementaries("ulbcccccc"))) 2579 failCount++; 2580 2581 // find() but not matches() 2582 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2583 toSupplementaries("zzzulbcccccc"))) 2584 failCount++; 2585 2586 // lookingAt() but not matches() 2587 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2588 toSupplementaries("ulbccccccdef"))) 2589 failCount++; 2590 2591 report("Pattern Matches"); 2592 } 2593 2594 /** 2595 * Canonical equivalence testing. Tests the ability of the engine 2596 * to match sequences that are not explicitly specified in the 2597 * pattern when they are considered equivalent by the Unicode Standard. 2598 */ 2599 private static void ceTest() throws Exception { 2600 // Decomposed char outside char classes 2601 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2602 Matcher m = p.matcher("test\u00e5"); 2603 if (!m.matches()) 2604 failCount++; 2605 2606 m.reset("testa\u030a"); 2607 if (!m.matches()) 2608 failCount++; 2609 2610 // Composed char outside char classes 2611 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2612 m = p.matcher("test\u00e5"); 2613 if (!m.matches()) 2614 failCount++; 2615 2616 m.reset("testa\u030a"); 2617 if (!m.find()) 2618 failCount++; 2619 2620 // Decomposed char inside a char class 2621 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2622 m = p.matcher("test\u00e5"); 2623 if (!m.find()) 2624 failCount++; 2625 2626 m.reset("testa\u030a"); 2627 if (!m.find()) 2628 failCount++; 2629 2630 // Composed char inside a char class 2631 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2632 m = p.matcher("test\u00e5"); 2633 if (!m.find()) 2634 failCount++; 2635 2636 m.reset("testa\u0300"); 2637 if (!m.find()) 2638 failCount++; 2639 2640 m.reset("testa\u030a"); 2641 if (!m.find()) 2642 failCount++; 2643 2644 // Marks that cannot legally change order and be equivalent 2645 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2646 check(p, "testa\u0308\u0300", true); 2647 check(p, "testa\u0300\u0308", false); 2648 2649 // Marks that can legally change order and be equivalent 2650 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2651 check(p, "testa\u0308\u0323", true); 2652 check(p, "testa\u0323\u0308", true); 2653 2654 // Test all equivalences of the sequence a\u0308\u0323\u0300 2655 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2656 check(p, "testa\u0308\u0323\u0300", true); 2657 check(p, "testa\u0323\u0308\u0300", true); 2658 check(p, "testa\u0308\u0300\u0323", true); 2659 check(p, "test\u00e4\u0323\u0300", true); 2660 check(p, "test\u00e4\u0300\u0323", true); 2661 2662 /* 2663 * The following canonical equivalence tests don't work. Bug id: 4916384. 2664 * 2665 // Decomposed hangul (jamos) 2666 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2667 m = p.matcher("\u1100\u1161"); 2668 if (!m.matches()) 2669 failCount++; 2670 2671 m.reset("\uac00"); 2672 if (!m.matches()) 2673 failCount++; 2674 2675 // Composed hangul 2676 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2677 m = p.matcher("\u1100\u1161"); 2678 if (!m.matches()) 2679 failCount++; 2680 2681 m.reset("\uac00"); 2682 if (!m.matches()) 2683 failCount++; 2684 2685 // Decomposed supplementary outside char classes 2686 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2687 m = p.matcher("test\ud834\uddc0"); 2688 if (!m.matches()) 2689 failCount++; 2690 2691 m.reset("test\ud834\uddbc\ud834\udd6f"); 2692 if (!m.matches()) 2693 failCount++; 2694 2695 // Composed supplementary outside char classes 2696 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2697 m.reset("test\ud834\uddbc\ud834\udd6f"); 2698 if (!m.matches()) 2699 failCount++; 2700 2701 m = p.matcher("test\ud834\uddc0"); 2702 if (!m.matches()) 2703 failCount++; 2704 2705 */ 2706 2707 report("Canonical Equivalence"); 2708 } 2709 2710 /** 2711 * A basic sanity test of Matcher.replaceAll(). 2712 */ 2713 private static void globalSubstitute() throws Exception { 2714 // Global substitution with a literal 2715 Pattern p = Pattern.compile("(ab)(c*)"); 2716 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2717 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2718 failCount++; 2719 2720 m.reset("zzzabccczzzabcczzzabccczzz"); 2721 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2722 failCount++; 2723 2724 // Global substitution with groups 2725 m.reset("zzzabccczzzabcczzzabccczzz"); 2726 String result = m.replaceAll("$1"); 2727 if (!result.equals("zzzabzzzabzzzabzzz")) 2728 failCount++; 2729 2730 // Supplementary character test 2731 // Global substitution with a literal 2732 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2733 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2734 if (!m.replaceAll(toSupplementaries("test")). 2735 equals(toSupplementaries("testzzztestzzztest"))) 2736 failCount++; 2737 2738 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2739 if (!m.replaceAll(toSupplementaries("test")). 2740 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2741 failCount++; 2742 2743 // Global substitution with groups 2744 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2745 result = m.replaceAll("$1"); 2746 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2747 failCount++; 2748 2749 report("Global Substitution"); 2750 } 2751 2752 /** 2753 * Tests the usage of Matcher.appendReplacement() with literal 2754 * and group substitutions. 2755 */ 2756 private static void stringbufferSubstitute() throws Exception { 2757 // SB substitution with literal 2758 String blah = "zzzblahzzz"; 2759 Pattern p = Pattern.compile("blah"); 2760 Matcher m = p.matcher(blah); 2761 StringBuffer result = new StringBuffer(); 2762 try { 2763 m.appendReplacement(result, "blech"); 2764 failCount++; 2765 } catch (IllegalStateException e) { 2766 } 2767 m.find(); 2768 m.appendReplacement(result, "blech"); 2769 if (!result.toString().equals("zzzblech")) 2770 failCount++; 2771 2772 m.appendTail(result); 2773 if (!result.toString().equals("zzzblechzzz")) 2774 failCount++; 2775 2776 // SB substitution with groups 2777 blah = "zzzabcdzzz"; 2778 p = Pattern.compile("(ab)(cd)*"); 2779 m = p.matcher(blah); 2780 result = new StringBuffer(); 2781 try { 2782 m.appendReplacement(result, "$1"); 2783 failCount++; 2784 } catch (IllegalStateException e) { 2785 } 2786 m.find(); 2787 m.appendReplacement(result, "$1"); 2788 if (!result.toString().equals("zzzab")) 2789 failCount++; 2790 2791 m.appendTail(result); 2792 if (!result.toString().equals("zzzabzzz")) 2793 failCount++; 2794 2795 // SB substitution with 3 groups 2796 blah = "zzzabcdcdefzzz"; 2797 p = Pattern.compile("(ab)(cd)*(ef)"); 2798 m = p.matcher(blah); 2799 result = new StringBuffer(); 2800 try { 2801 m.appendReplacement(result, "$1w$2w$3"); 2802 failCount++; 2803 } catch (IllegalStateException e) { 2804 } 2805 m.find(); 2806 m.appendReplacement(result, "$1w$2w$3"); 2807 if (!result.toString().equals("zzzabwcdwef")) 2808 failCount++; 2809 2810 m.appendTail(result); 2811 if (!result.toString().equals("zzzabwcdwefzzz")) 2812 failCount++; 2813 2814 // SB substitution with groups and three matches 2815 // skipping middle match 2816 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2817 p = Pattern.compile("(ab)(cd*)"); 2818 m = p.matcher(blah); 2819 result = new StringBuffer(); 2820 try { 2821 m.appendReplacement(result, "$1"); 2822 failCount++; 2823 } catch (IllegalStateException e) { 2824 } 2825 m.find(); 2826 m.appendReplacement(result, "$1"); 2827 if (!result.toString().equals("zzzab")) 2828 failCount++; 2829 2830 m.find(); 2831 m.find(); 2832 m.appendReplacement(result, "$2"); 2833 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2834 failCount++; 2835 2836 m.appendTail(result); 2837 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2838 failCount++; 2839 2840 // Check to make sure escaped $ is ignored 2841 blah = "zzzabcdcdefzzz"; 2842 p = Pattern.compile("(ab)(cd)*(ef)"); 2843 m = p.matcher(blah); 2844 result = new StringBuffer(); 2845 m.find(); 2846 m.appendReplacement(result, "$1w\\$2w$3"); 2847 if (!result.toString().equals("zzzabw$2wef")) 2848 failCount++; 2849 2850 m.appendTail(result); 2851 if (!result.toString().equals("zzzabw$2wefzzz")) 2852 failCount++; 2853 2854 // Check to make sure a reference to nonexistent group causes error 2855 blah = "zzzabcdcdefzzz"; 2856 p = Pattern.compile("(ab)(cd)*(ef)"); 2857 m = p.matcher(blah); 2858 result = new StringBuffer(); 2859 m.find(); 2860 try { 2861 m.appendReplacement(result, "$1w$5w$3"); 2862 failCount++; 2863 } catch (IndexOutOfBoundsException ioobe) { 2864 // Correct result 2865 } 2866 2867 // Check double digit group references 2868 blah = "zzz123456789101112zzz"; 2869 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2870 m = p.matcher(blah); 2871 result = new StringBuffer(); 2872 m.find(); 2873 m.appendReplacement(result, "$1w$11w$3"); 2874 if (!result.toString().equals("zzz1w11w3")) 2875 failCount++; 2876 2877 // Check to make sure it backs off $15 to $1 if only three groups 2878 blah = "zzzabcdcdefzzz"; 2879 p = Pattern.compile("(ab)(cd)*(ef)"); 2880 m = p.matcher(blah); 2881 result = new StringBuffer(); 2882 m.find(); 2883 m.appendReplacement(result, "$1w$15w$3"); 2884 if (!result.toString().equals("zzzabwab5wef")) 2885 failCount++; 2886 2887 2888 // Supplementary character test 2889 // SB substitution with literal 2890 blah = toSupplementaries("zzzblahzzz"); 2891 p = Pattern.compile(toSupplementaries("blah")); 2892 m = p.matcher(blah); 2893 result = new StringBuffer(); 2894 try { 2895 m.appendReplacement(result, toSupplementaries("blech")); 2896 failCount++; 2897 } catch (IllegalStateException e) { 2898 } 2899 m.find(); 2900 m.appendReplacement(result, toSupplementaries("blech")); 2901 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2902 failCount++; 2903 2904 m.appendTail(result); 2905 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2906 failCount++; 2907 2908 // SB substitution with groups 2909 blah = toSupplementaries("zzzabcdzzz"); 2910 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2911 m = p.matcher(blah); 2912 result = new StringBuffer(); 2913 try { 2914 m.appendReplacement(result, "$1"); 2915 failCount++; 2916 } catch (IllegalStateException e) { 2917 } 2918 m.find(); 2919 m.appendReplacement(result, "$1"); 2920 if (!result.toString().equals(toSupplementaries("zzzab"))) 2921 failCount++; 2922 2923 m.appendTail(result); 2924 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2925 failCount++; 2926 2927 // SB substitution with 3 groups 2928 blah = toSupplementaries("zzzabcdcdefzzz"); 2929 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2930 m = p.matcher(blah); 2931 result = new StringBuffer(); 2932 try { 2933 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2934 failCount++; 2935 } catch (IllegalStateException e) { 2936 } 2937 m.find(); 2938 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2939 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2940 failCount++; 2941 2942 m.appendTail(result); 2943 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2944 failCount++; 2945 2946 // SB substitution with groups and three matches 2947 // skipping middle match 2948 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2949 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2950 m = p.matcher(blah); 2951 result = new StringBuffer(); 2952 try { 2953 m.appendReplacement(result, "$1"); 2954 failCount++; 2955 } catch (IllegalStateException e) { 2956 } 2957 m.find(); 2958 m.appendReplacement(result, "$1"); 2959 if (!result.toString().equals(toSupplementaries("zzzab"))) 2960 failCount++; 2961 2962 m.find(); 2963 m.find(); 2964 m.appendReplacement(result, "$2"); 2965 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2966 failCount++; 2967 2968 m.appendTail(result); 2969 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2970 failCount++; 2971 2972 // Check to make sure escaped $ is ignored 2973 blah = toSupplementaries("zzzabcdcdefzzz"); 2974 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2975 m = p.matcher(blah); 2976 result = new StringBuffer(); 2977 m.find(); 2978 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2979 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2980 failCount++; 2981 2982 m.appendTail(result); 2983 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2984 failCount++; 2985 2986 // Check to make sure a reference to nonexistent group causes error 2987 blah = toSupplementaries("zzzabcdcdefzzz"); 2988 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2989 m = p.matcher(blah); 2990 result = new StringBuffer(); 2991 m.find(); 2992 try { 2993 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2994 failCount++; 2995 } catch (IndexOutOfBoundsException ioobe) { 2996 // Correct result 2997 } 2998 2999 // Check double digit group references 3000 blah = toSupplementaries("zzz123456789101112zzz"); 3001 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3002 m = p.matcher(blah); 3003 result = new StringBuffer(); 3004 m.find(); 3005 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3006 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3007 failCount++; 3008 3009 // Check to make sure it backs off $15 to $1 if only three groups 3010 blah = toSupplementaries("zzzabcdcdefzzz"); 3011 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3012 m = p.matcher(blah); 3013 result = new StringBuffer(); 3014 m.find(); 3015 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3016 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3017 failCount++; 3018 3019 // Check nothing has been appended into the output buffer if 3020 // the replacement string triggers IllegalArgumentException. 3021 p = Pattern.compile("(abc)"); 3022 m = p.matcher("abcd"); 3023 result = new StringBuffer(); 3024 m.find(); 3025 try { 3026 m.appendReplacement(result, ("xyz$g")); 3027 failCount++; 3028 } catch (IllegalArgumentException iae) { 3029 if (result.length() != 0) 3030 failCount++; 3031 } 3032 3033 report("SB Substitution"); 3034 } 3035 3036 /** 3037 * Tests the usage of Matcher.appendReplacement() with literal 3038 * and group substitutions. 3039 */ 3040 private static void stringbuilderSubstitute() throws Exception { 3041 // SB substitution with literal 3042 String blah = "zzzblahzzz"; 3043 Pattern p = Pattern.compile("blah"); 3044 Matcher m = p.matcher(blah); 3045 StringBuilder result = new StringBuilder(); 3046 try { 3047 m.appendReplacement(result, "blech"); 3048 failCount++; 3049 } catch (IllegalStateException e) { 3050 } 3051 m.find(); 3052 m.appendReplacement(result, "blech"); 3053 if (!result.toString().equals("zzzblech")) 3054 failCount++; 3055 3056 m.appendTail(result); 3057 if (!result.toString().equals("zzzblechzzz")) 3058 failCount++; 3059 3060 // SB substitution with groups 3061 blah = "zzzabcdzzz"; 3062 p = Pattern.compile("(ab)(cd)*"); 3063 m = p.matcher(blah); 3064 result = new StringBuilder(); 3065 try { 3066 m.appendReplacement(result, "$1"); 3067 failCount++; 3068 } catch (IllegalStateException e) { 3069 } 3070 m.find(); 3071 m.appendReplacement(result, "$1"); 3072 if (!result.toString().equals("zzzab")) 3073 failCount++; 3074 3075 m.appendTail(result); 3076 if (!result.toString().equals("zzzabzzz")) 3077 failCount++; 3078 3079 // SB substitution with 3 groups 3080 blah = "zzzabcdcdefzzz"; 3081 p = Pattern.compile("(ab)(cd)*(ef)"); 3082 m = p.matcher(blah); 3083 result = new StringBuilder(); 3084 try { 3085 m.appendReplacement(result, "$1w$2w$3"); 3086 failCount++; 3087 } catch (IllegalStateException e) { 3088 } 3089 m.find(); 3090 m.appendReplacement(result, "$1w$2w$3"); 3091 if (!result.toString().equals("zzzabwcdwef")) 3092 failCount++; 3093 3094 m.appendTail(result); 3095 if (!result.toString().equals("zzzabwcdwefzzz")) 3096 failCount++; 3097 3098 // SB substitution with groups and three matches 3099 // skipping middle match 3100 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3101 p = Pattern.compile("(ab)(cd*)"); 3102 m = p.matcher(blah); 3103 result = new StringBuilder(); 3104 try { 3105 m.appendReplacement(result, "$1"); 3106 failCount++; 3107 } catch (IllegalStateException e) { 3108 } 3109 m.find(); 3110 m.appendReplacement(result, "$1"); 3111 if (!result.toString().equals("zzzab")) 3112 failCount++; 3113 3114 m.find(); 3115 m.find(); 3116 m.appendReplacement(result, "$2"); 3117 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3118 failCount++; 3119 3120 m.appendTail(result); 3121 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3122 failCount++; 3123 3124 // Check to make sure escaped $ is ignored 3125 blah = "zzzabcdcdefzzz"; 3126 p = Pattern.compile("(ab)(cd)*(ef)"); 3127 m = p.matcher(blah); 3128 result = new StringBuilder(); 3129 m.find(); 3130 m.appendReplacement(result, "$1w\\$2w$3"); 3131 if (!result.toString().equals("zzzabw$2wef")) 3132 failCount++; 3133 3134 m.appendTail(result); 3135 if (!result.toString().equals("zzzabw$2wefzzz")) 3136 failCount++; 3137 3138 // Check to make sure a reference to nonexistent group causes error 3139 blah = "zzzabcdcdefzzz"; 3140 p = Pattern.compile("(ab)(cd)*(ef)"); 3141 m = p.matcher(blah); 3142 result = new StringBuilder(); 3143 m.find(); 3144 try { 3145 m.appendReplacement(result, "$1w$5w$3"); 3146 failCount++; 3147 } catch (IndexOutOfBoundsException ioobe) { 3148 // Correct result 3149 } 3150 3151 // Check double digit group references 3152 blah = "zzz123456789101112zzz"; 3153 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3154 m = p.matcher(blah); 3155 result = new StringBuilder(); 3156 m.find(); 3157 m.appendReplacement(result, "$1w$11w$3"); 3158 if (!result.toString().equals("zzz1w11w3")) 3159 failCount++; 3160 3161 // Check to make sure it backs off $15 to $1 if only three groups 3162 blah = "zzzabcdcdefzzz"; 3163 p = Pattern.compile("(ab)(cd)*(ef)"); 3164 m = p.matcher(blah); 3165 result = new StringBuilder(); 3166 m.find(); 3167 m.appendReplacement(result, "$1w$15w$3"); 3168 if (!result.toString().equals("zzzabwab5wef")) 3169 failCount++; 3170 3171 3172 // Supplementary character test 3173 // SB substitution with literal 3174 blah = toSupplementaries("zzzblahzzz"); 3175 p = Pattern.compile(toSupplementaries("blah")); 3176 m = p.matcher(blah); 3177 result = new StringBuilder(); 3178 try { 3179 m.appendReplacement(result, toSupplementaries("blech")); 3180 failCount++; 3181 } catch (IllegalStateException e) { 3182 } 3183 m.find(); 3184 m.appendReplacement(result, toSupplementaries("blech")); 3185 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3186 failCount++; 3187 m.appendTail(result); 3188 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3189 failCount++; 3190 3191 // SB substitution with groups 3192 blah = toSupplementaries("zzzabcdzzz"); 3193 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3194 m = p.matcher(blah); 3195 result = new StringBuilder(); 3196 try { 3197 m.appendReplacement(result, "$1"); 3198 failCount++; 3199 } catch (IllegalStateException e) { 3200 } 3201 m.find(); 3202 m.appendReplacement(result, "$1"); 3203 if (!result.toString().equals(toSupplementaries("zzzab"))) 3204 failCount++; 3205 3206 m.appendTail(result); 3207 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3208 failCount++; 3209 3210 // SB substitution with 3 groups 3211 blah = toSupplementaries("zzzabcdcdefzzz"); 3212 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3213 m = p.matcher(blah); 3214 result = new StringBuilder(); 3215 try { 3216 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3217 failCount++; 3218 } catch (IllegalStateException e) { 3219 } 3220 m.find(); 3221 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3222 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3223 failCount++; 3224 3225 m.appendTail(result); 3226 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3227 failCount++; 3228 3229 // SB substitution with groups and three matches 3230 // skipping middle match 3231 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3232 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3233 m = p.matcher(blah); 3234 result = new StringBuilder(); 3235 try { 3236 m.appendReplacement(result, "$1"); 3237 failCount++; 3238 } catch (IllegalStateException e) { 3239 } 3240 m.find(); 3241 m.appendReplacement(result, "$1"); 3242 if (!result.toString().equals(toSupplementaries("zzzab"))) 3243 failCount++; 3244 3245 m.find(); 3246 m.find(); 3247 m.appendReplacement(result, "$2"); 3248 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3249 failCount++; 3250 3251 m.appendTail(result); 3252 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3253 failCount++; 3254 3255 // Check to make sure escaped $ is ignored 3256 blah = toSupplementaries("zzzabcdcdefzzz"); 3257 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3258 m = p.matcher(blah); 3259 result = new StringBuilder(); 3260 m.find(); 3261 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3262 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3263 failCount++; 3264 3265 m.appendTail(result); 3266 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3267 failCount++; 3268 3269 // Check to make sure a reference to nonexistent group causes error 3270 blah = toSupplementaries("zzzabcdcdefzzz"); 3271 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3272 m = p.matcher(blah); 3273 result = new StringBuilder(); 3274 m.find(); 3275 try { 3276 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3277 failCount++; 3278 } catch (IndexOutOfBoundsException ioobe) { 3279 // Correct result 3280 } 3281 // Check double digit group references 3282 blah = toSupplementaries("zzz123456789101112zzz"); 3283 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3284 m = p.matcher(blah); 3285 result = new StringBuilder(); 3286 m.find(); 3287 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3288 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3289 failCount++; 3290 3291 // Check to make sure it backs off $15 to $1 if only three groups 3292 blah = toSupplementaries("zzzabcdcdefzzz"); 3293 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3294 m = p.matcher(blah); 3295 result = new StringBuilder(); 3296 m.find(); 3297 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3298 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3299 failCount++; 3300 // Check nothing has been appended into the output buffer if 3301 // the replacement string triggers IllegalArgumentException. 3302 p = Pattern.compile("(abc)"); 3303 m = p.matcher("abcd"); 3304 result = new StringBuilder(); 3305 m.find(); 3306 try { 3307 m.appendReplacement(result, ("xyz$g")); 3308 failCount++; 3309 } catch (IllegalArgumentException iae) { 3310 if (result.length() != 0) 3311 failCount++; 3312 } 3313 report("SB Substitution 2"); 3314 } 3315 3316 /* 3317 * 5 groups of characters are created to make a substitution string. 3318 * A base string will be created including random lead chars, the 3319 * substitution string, and random trailing chars. 3320 * A pattern containing the 5 groups is searched for and replaced with: 3321 * random group + random string + random group. 3322 * The results are checked for correctness. 3323 */ 3324 private static void substitutionBasher() { 3325 for (int runs = 0; runs<1000; runs++) { 3326 // Create a base string to work in 3327 int leadingChars = generator.nextInt(10); 3328 StringBuffer baseBuffer = new StringBuffer(100); 3329 String leadingString = getRandomAlphaString(leadingChars); 3330 baseBuffer.append(leadingString); 3331 3332 // Create 5 groups of random number of random chars 3333 // Create the string to substitute 3334 // Create the pattern string to search for 3335 StringBuffer bufferToSub = new StringBuffer(25); 3336 StringBuffer bufferToPat = new StringBuffer(50); 3337 String[] groups = new String[5]; 3338 for(int i=0; i<5; i++) { 3339 int aGroupSize = generator.nextInt(5)+1; 3340 groups[i] = getRandomAlphaString(aGroupSize); 3341 bufferToSub.append(groups[i]); 3342 bufferToPat.append('('); 3343 bufferToPat.append(groups[i]); 3344 bufferToPat.append(')'); 3345 } 3346 String stringToSub = bufferToSub.toString(); 3347 String pattern = bufferToPat.toString(); 3348 3349 // Place sub string into working string at random index 3350 baseBuffer.append(stringToSub); 3351 3352 // Append random chars to end 3353 int trailingChars = generator.nextInt(10); 3354 String trailingString = getRandomAlphaString(trailingChars); 3355 baseBuffer.append(trailingString); 3356 String baseString = baseBuffer.toString(); 3357 3358 // Create test pattern and matcher 3359 Pattern p = Pattern.compile(pattern); 3360 Matcher m = p.matcher(baseString); 3361 3362 // Reject candidate if pattern happens to start early 3363 m.find(); 3364 if (m.start() < leadingChars) 3365 continue; 3366 3367 // Reject candidate if more than one match 3368 if (m.find()) 3369 continue; 3370 3371 // Construct a replacement string with : 3372 // random group + random string + random group 3373 StringBuffer bufferToRep = new StringBuffer(); 3374 int groupIndex1 = generator.nextInt(5); 3375 bufferToRep.append("$" + (groupIndex1 + 1)); 3376 String randomMidString = getRandomAlphaString(5); 3377 bufferToRep.append(randomMidString); 3378 int groupIndex2 = generator.nextInt(5); 3379 bufferToRep.append("$" + (groupIndex2 + 1)); 3380 String replacement = bufferToRep.toString(); 3381 3382 // Do the replacement 3383 String result = m.replaceAll(replacement); 3384 3385 // Construct expected result 3386 StringBuffer bufferToRes = new StringBuffer(); 3387 bufferToRes.append(leadingString); 3388 bufferToRes.append(groups[groupIndex1]); 3389 bufferToRes.append(randomMidString); 3390 bufferToRes.append(groups[groupIndex2]); 3391 bufferToRes.append(trailingString); 3392 String expectedResult = bufferToRes.toString(); 3393 3394 // Check results 3395 if (!result.equals(expectedResult)) 3396 failCount++; 3397 } 3398 3399 report("Substitution Basher"); 3400 } 3401 3402 /* 3403 * 5 groups of characters are created to make a substitution string. 3404 * A base string will be created including random lead chars, the 3405 * substitution string, and random trailing chars. 3406 * A pattern containing the 5 groups is searched for and replaced with: 3407 * random group + random string + random group. 3408 * The results are checked for correctness. 3409 */ 3410 private static void substitutionBasher2() { 3411 for (int runs = 0; runs<1000; runs++) { 3412 // Create a base string to work in 3413 int leadingChars = generator.nextInt(10); 3414 StringBuilder baseBuffer = new StringBuilder(100); 3415 String leadingString = getRandomAlphaString(leadingChars); 3416 baseBuffer.append(leadingString); 3417 3418 // Create 5 groups of random number of random chars 3419 // Create the string to substitute 3420 // Create the pattern string to search for 3421 StringBuilder bufferToSub = new StringBuilder(25); 3422 StringBuilder bufferToPat = new StringBuilder(50); 3423 String[] groups = new String[5]; 3424 for(int i=0; i<5; i++) { 3425 int aGroupSize = generator.nextInt(5)+1; 3426 groups[i] = getRandomAlphaString(aGroupSize); 3427 bufferToSub.append(groups[i]); 3428 bufferToPat.append('('); 3429 bufferToPat.append(groups[i]); 3430 bufferToPat.append(')'); 3431 } 3432 String stringToSub = bufferToSub.toString(); 3433 String pattern = bufferToPat.toString(); 3434 3435 // Place sub string into working string at random index 3436 baseBuffer.append(stringToSub); 3437 3438 // Append random chars to end 3439 int trailingChars = generator.nextInt(10); 3440 String trailingString = getRandomAlphaString(trailingChars); 3441 baseBuffer.append(trailingString); 3442 String baseString = baseBuffer.toString(); 3443 3444 // Create test pattern and matcher 3445 Pattern p = Pattern.compile(pattern); 3446 Matcher m = p.matcher(baseString); 3447 3448 // Reject candidate if pattern happens to start early 3449 m.find(); 3450 if (m.start() < leadingChars) 3451 continue; 3452 3453 // Reject candidate if more than one match 3454 if (m.find()) 3455 continue; 3456 3457 // Construct a replacement string with : 3458 // random group + random string + random group 3459 StringBuilder bufferToRep = new StringBuilder(); 3460 int groupIndex1 = generator.nextInt(5); 3461 bufferToRep.append("$" + (groupIndex1 + 1)); 3462 String randomMidString = getRandomAlphaString(5); 3463 bufferToRep.append(randomMidString); 3464 int groupIndex2 = generator.nextInt(5); 3465 bufferToRep.append("$" + (groupIndex2 + 1)); 3466 String replacement = bufferToRep.toString(); 3467 3468 // Do the replacement 3469 String result = m.replaceAll(replacement); 3470 3471 // Construct expected result 3472 StringBuilder bufferToRes = new StringBuilder(); 3473 bufferToRes.append(leadingString); 3474 bufferToRes.append(groups[groupIndex1]); 3475 bufferToRes.append(randomMidString); 3476 bufferToRes.append(groups[groupIndex2]); 3477 bufferToRes.append(trailingString); 3478 String expectedResult = bufferToRes.toString(); 3479 3480 // Check results 3481 if (!result.equals(expectedResult)) { 3482 failCount++; 3483 } 3484 } 3485 3486 report("Substitution Basher 2"); 3487 } 3488 3489 /** 3490 * Checks the handling of some escape sequences that the Pattern 3491 * class should process instead of the java compiler. These are 3492 * not in the file because the escapes should be be processed 3493 * by the Pattern class when the regex is compiled. 3494 */ 3495 private static void escapes() throws Exception { 3496 Pattern p = Pattern.compile("\\043"); 3497 Matcher m = p.matcher("#"); 3498 if (!m.find()) 3499 failCount++; 3500 3501 p = Pattern.compile("\\x23"); 3502 m = p.matcher("#"); 3503 if (!m.find()) 3504 failCount++; 3505 3506 p = Pattern.compile("\\u0023"); 3507 m = p.matcher("#"); 3508 if (!m.find()) 3509 failCount++; 3510 3511 report("Escape sequences"); 3512 } 3513 3514 /** 3515 * Checks the handling of blank input situations. These 3516 * tests are incompatible with my test file format. 3517 */ 3518 private static void blankInput() throws Exception { 3519 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3520 Matcher m = p.matcher(""); 3521 if (m.find()) 3522 failCount++; 3523 3524 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3525 m = p.matcher(""); 3526 if (!m.find()) 3527 failCount++; 3528 3529 p = Pattern.compile("abc"); 3530 m = p.matcher(""); 3531 if (m.find()) 3532 failCount++; 3533 3534 p = Pattern.compile("a*"); 3535 m = p.matcher(""); 3536 if (!m.find()) 3537 failCount++; 3538 3539 report("Blank input"); 3540 } 3541 3542 /** 3543 * Tests the Boyer-Moore pattern matching of a character sequence 3544 * on randomly generated patterns. 3545 */ 3546 private static void bm() throws Exception { 3547 doBnM('a'); 3548 report("Boyer Moore (ASCII)"); 3549 3550 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3551 report("Boyer Moore (Supplementary)"); 3552 } 3553 3554 private static void doBnM(int baseCharacter) throws Exception { 3555 int achar=0; 3556 3557 for (int i=0; i<100; i++) { 3558 // Create a short pattern to search for 3559 int patternLength = generator.nextInt(7) + 4; 3560 StringBuffer patternBuffer = new StringBuffer(patternLength); 3561 String pattern; 3562 retry: for (;;) { 3563 for (int x=0; x<patternLength; x++) { 3564 int ch = baseCharacter + generator.nextInt(26); 3565 if (Character.isSupplementaryCodePoint(ch)) { 3566 patternBuffer.append(Character.toChars(ch)); 3567 } else { 3568 patternBuffer.append((char)ch); 3569 } 3570 } 3571 pattern = patternBuffer.toString(); 3572 3573 // Avoid patterns that start and end with the same substring 3574 // See JDK-6854417 3575 for (int x=1; x < pattern.length(); x++) { 3576 if (pattern.startsWith(pattern.substring(x))) 3577 continue retry; 3578 } 3579 break; 3580 } 3581 Pattern p = Pattern.compile(pattern); 3582 3583 // Create a buffer with random ASCII chars that does 3584 // not match the sample 3585 String toSearch = null; 3586 StringBuffer s = null; 3587 Matcher m = p.matcher(""); 3588 do { 3589 s = new StringBuffer(100); 3590 for (int x=0; x<100; x++) { 3591 int ch = baseCharacter + generator.nextInt(26); 3592 if (Character.isSupplementaryCodePoint(ch)) { 3593 s.append(Character.toChars(ch)); 3594 } else { 3595 s.append((char)ch); 3596 } 3597 } 3598 toSearch = s.toString(); 3599 m.reset(toSearch); 3600 } while (m.find()); 3601 3602 // Insert the pattern at a random spot 3603 int insertIndex = generator.nextInt(99); 3604 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3605 insertIndex++; 3606 s = s.insert(insertIndex, pattern); 3607 toSearch = s.toString(); 3608 3609 // Make sure that the pattern is found 3610 m.reset(toSearch); 3611 if (!m.find()) 3612 failCount++; 3613 3614 // Make sure that the match text is the pattern 3615 if (!m.group().equals(pattern)) 3616 failCount++; 3617 3618 // Make sure match occured at insertion point 3619 if (m.start() != insertIndex) 3620 failCount++; 3621 } 3622 } 3623 3624 /** 3625 * Tests the matching of slices on randomly generated patterns. 3626 * The Boyer-Moore optimization is not done on these patterns 3627 * because it uses unicode case folding. 3628 */ 3629 private static void slice() throws Exception { 3630 doSlice(Character.MAX_VALUE); 3631 report("Slice"); 3632 3633 doSlice(Character.MAX_CODE_POINT); 3634 report("Slice (Supplementary)"); 3635 } 3636 3637 private static void doSlice(int maxCharacter) throws Exception { 3638 Random generator = new Random(); 3639 int achar=0; 3640 3641 for (int i=0; i<100; i++) { 3642 // Create a short pattern to search for 3643 int patternLength = generator.nextInt(7) + 4; 3644 StringBuffer patternBuffer = new StringBuffer(patternLength); 3645 for (int x=0; x<patternLength; x++) { 3646 int randomChar = 0; 3647 while (!Character.isLetterOrDigit(randomChar)) 3648 randomChar = generator.nextInt(maxCharacter); 3649 if (Character.isSupplementaryCodePoint(randomChar)) { 3650 patternBuffer.append(Character.toChars(randomChar)); 3651 } else { 3652 patternBuffer.append((char) randomChar); 3653 } 3654 } 3655 String pattern = patternBuffer.toString(); 3656 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3657 3658 // Create a buffer with random chars that does not match the sample 3659 String toSearch = null; 3660 StringBuffer s = null; 3661 Matcher m = p.matcher(""); 3662 do { 3663 s = new StringBuffer(100); 3664 for (int x=0; x<100; x++) { 3665 int randomChar = 0; 3666 while (!Character.isLetterOrDigit(randomChar)) 3667 randomChar = generator.nextInt(maxCharacter); 3668 if (Character.isSupplementaryCodePoint(randomChar)) { 3669 s.append(Character.toChars(randomChar)); 3670 } else { 3671 s.append((char) randomChar); 3672 } 3673 } 3674 toSearch = s.toString(); 3675 m.reset(toSearch); 3676 } while (m.find()); 3677 3678 // Insert the pattern at a random spot 3679 int insertIndex = generator.nextInt(99); 3680 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3681 insertIndex++; 3682 s = s.insert(insertIndex, pattern); 3683 toSearch = s.toString(); 3684 3685 // Make sure that the pattern is found 3686 m.reset(toSearch); 3687 if (!m.find()) 3688 failCount++; 3689 3690 // Make sure that the match text is the pattern 3691 if (!m.group().equals(pattern)) 3692 failCount++; 3693 3694 // Make sure match occured at insertion point 3695 if (m.start() != insertIndex) 3696 failCount++; 3697 } 3698 } 3699 3700 private static void explainFailure(String pattern, String data, 3701 String expected, String actual) { 3702 System.err.println("----------------------------------------"); 3703 System.err.println("Pattern = "+pattern); 3704 System.err.println("Data = "+data); 3705 System.err.println("Expected = " + expected); 3706 System.err.println("Actual = " + actual); 3707 } 3708 3709 private static void explainFailure(String pattern, String data, 3710 Throwable t) { 3711 System.err.println("----------------------------------------"); 3712 System.err.println("Pattern = "+pattern); 3713 System.err.println("Data = "+data); 3714 t.printStackTrace(System.err); 3715 } 3716 3717 // Testing examples from a file 3718 3719 /** 3720 * Goes through the file "TestCases.txt" and creates many patterns 3721 * described in the file, matching the patterns against input lines in 3722 * the file, and comparing the results against the correct results 3723 * also found in the file. The file format is described in comments 3724 * at the head of the file. 3725 */ 3726 private static void processFile(String fileName) throws Exception { 3727 File testCases = new File(System.getProperty("test.src", "."), 3728 fileName); 3729 FileInputStream in = new FileInputStream(testCases); 3730 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3731 3732 // Process next test case. 3733 String aLine; 3734 while((aLine = r.readLine()) != null) { 3735 // Read a line for pattern 3736 String patternString = grabLine(r); 3737 Pattern p = null; 3738 try { 3739 p = compileTestPattern(patternString); 3740 } catch (PatternSyntaxException e) { 3741 String dataString = grabLine(r); 3742 String expectedResult = grabLine(r); 3743 if (expectedResult.startsWith("error")) 3744 continue; 3745 explainFailure(patternString, dataString, e); 3746 failCount++; 3747 continue; 3748 } 3749 3750 // Read a line for input string 3751 String dataString = grabLine(r); 3752 Matcher m = p.matcher(dataString); 3753 StringBuffer result = new StringBuffer(); 3754 3755 // Check for IllegalStateExceptions before a match 3756 failCount += preMatchInvariants(m); 3757 3758 boolean found = m.find(); 3759 3760 if (found) 3761 failCount += postTrueMatchInvariants(m); 3762 else 3763 failCount += postFalseMatchInvariants(m); 3764 3765 if (found) { 3766 result.append("true "); 3767 result.append(m.group(0) + " "); 3768 } else { 3769 result.append("false "); 3770 } 3771 3772 result.append(m.groupCount()); 3773 3774 if (found) { 3775 for (int i=1; i<m.groupCount()+1; i++) 3776 if (m.group(i) != null) 3777 result.append(" " +m.group(i)); 3778 } 3779 3780 // Read a line for the expected result 3781 String expectedResult = grabLine(r); 3782 3783 if (!result.toString().equals(expectedResult)) { 3784 explainFailure(patternString, dataString, expectedResult, result.toString()); 3785 failCount++; 3786 } 3787 } 3788 3789 report(fileName); 3790 } 3791 3792 private static int preMatchInvariants(Matcher m) { 3793 int failCount = 0; 3794 try { 3795 m.start(); 3796 failCount++; 3797 } catch (IllegalStateException ise) {} 3798 try { 3799 m.end(); 3800 failCount++; 3801 } catch (IllegalStateException ise) {} 3802 try { 3803 m.group(); 3804 failCount++; 3805 } catch (IllegalStateException ise) {} 3806 return failCount; 3807 } 3808 3809 private static int postFalseMatchInvariants(Matcher m) { 3810 int failCount = 0; 3811 try { 3812 m.group(); 3813 failCount++; 3814 } catch (IllegalStateException ise) {} 3815 try { 3816 m.start(); 3817 failCount++; 3818 } catch (IllegalStateException ise) {} 3819 try { 3820 m.end(); 3821 failCount++; 3822 } catch (IllegalStateException ise) {} 3823 return failCount; 3824 } 3825 3826 private static int postTrueMatchInvariants(Matcher m) { 3827 int failCount = 0; 3828 //assert(m.start() = m.start(0); 3829 if (m.start() != m.start(0)) 3830 failCount++; 3831 //assert(m.end() = m.end(0); 3832 if (m.start() != m.start(0)) 3833 failCount++; 3834 //assert(m.group() = m.group(0); 3835 if (!m.group().equals(m.group(0))) 3836 failCount++; 3837 try { 3838 m.group(50); 3839 failCount++; 3840 } catch (IndexOutOfBoundsException ise) {} 3841 3842 return failCount; 3843 } 3844 3845 private static Pattern compileTestPattern(String patternString) { 3846 if (!patternString.startsWith("'")) { 3847 return Pattern.compile(patternString); 3848 } 3849 3850 int break1 = patternString.lastIndexOf("'"); 3851 String flagString = patternString.substring( 3852 break1+1, patternString.length()); 3853 patternString = patternString.substring(1, break1); 3854 3855 if (flagString.equals("i")) 3856 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3857 3858 if (flagString.equals("m")) 3859 return Pattern.compile(patternString, Pattern.MULTILINE); 3860 3861 return Pattern.compile(patternString); 3862 } 3863 3864 /** 3865 * Reads a line from the input file. Keeps reading lines until a non 3866 * empty non comment line is read. If the line contains a \n then 3867 * these two characters are replaced by a newline char. If a \\uxxxx 3868 * sequence is read then the sequence is replaced by the unicode char. 3869 */ 3870 private static String grabLine(BufferedReader r) throws Exception { 3871 int index = 0; 3872 String line = r.readLine(); 3873 while (line.startsWith("//") || line.length() < 1) 3874 line = r.readLine(); 3875 while ((index = line.indexOf("\\n")) != -1) { 3876 StringBuffer temp = new StringBuffer(line); 3877 temp.replace(index, index+2, "\n"); 3878 line = temp.toString(); 3879 } 3880 while ((index = line.indexOf("\\u")) != -1) { 3881 StringBuffer temp = new StringBuffer(line); 3882 String value = temp.substring(index+2, index+6); 3883 char aChar = (char)Integer.parseInt(value, 16); 3884 String unicodeChar = "" + aChar; 3885 temp.replace(index, index+6, unicodeChar); 3886 line = temp.toString(); 3887 } 3888 3889 return line; 3890 } 3891 3892 private static void check(Pattern p, String s, String g, String expected) { 3893 Matcher m = p.matcher(s); 3894 m.find(); 3895 if (!m.group(g).equals(expected) || 3896 s.charAt(m.start(g)) != expected.charAt(0) || 3897 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3898 failCount++; 3899 } 3900 3901 private static void checkReplaceFirst(String p, String s, String r, String expected) 3902 { 3903 if (!expected.equals(Pattern.compile(p) 3904 .matcher(s) 3905 .replaceFirst(r))) 3906 failCount++; 3907 } 3908 3909 private static void checkReplaceAll(String p, String s, String r, String expected) 3910 { 3911 if (!expected.equals(Pattern.compile(p) 3912 .matcher(s) 3913 .replaceAll(r))) 3914 failCount++; 3915 } 3916 3917 private static void checkExpectedFail(String p) { 3918 try { 3919 Pattern.compile(p); 3920 } catch (PatternSyntaxException pse) { 3921 //pse.printStackTrace(); 3922 return; 3923 } 3924 failCount++; 3925 } 3926 3927 private static void checkExpectedIAE(Matcher m, String g) { 3928 m.find(); 3929 try { 3930 m.group(g); 3931 } catch (IllegalArgumentException x) { 3932 //iae.printStackTrace(); 3933 try { 3934 m.start(g); 3935 } catch (IllegalArgumentException xx) { 3936 try { 3937 m.start(g); 3938 } catch (IllegalArgumentException xxx) { 3939 return; 3940 } 3941 } 3942 } 3943 failCount++; 3944 } 3945 3946 private static void checkExpectedNPE(Matcher m) { 3947 m.find(); 3948 try { 3949 m.group(null); 3950 } catch (NullPointerException x) { 3951 try { 3952 m.start(null); 3953 } catch (NullPointerException xx) { 3954 try { 3955 m.end(null); 3956 } catch (NullPointerException xxx) { 3957 return; 3958 } 3959 } 3960 } 3961 failCount++; 3962 } 3963 3964 private static void namedGroupCaptureTest() throws Exception { 3965 check(Pattern.compile("x+(?<gname>y+)z+"), 3966 "xxxyyyzzz", 3967 "gname", 3968 "yyy"); 3969 3970 check(Pattern.compile("x+(?<gname8>y+)z+"), 3971 "xxxyyyzzz", 3972 "gname8", 3973 "yyy"); 3974 3975 //backref 3976 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3977 check(pattern, "zzzaabcazzz", true); // found "abca" 3978 3979 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3980 "zzzaabcaazzz", true); 3981 3982 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3983 "abcdefabc", true); 3984 3985 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3986 "abcdefghijkk", true); 3987 3988 // Supplementary character tests 3989 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3990 toSupplementaries("zzzaabcazzz"), true); 3991 3992 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3993 toSupplementaries("zzzaabcaazzz"), true); 3994 3995 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3996 toSupplementaries("abcdefabc"), true); 3997 3998 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3999 "(?<gname>" + 4000 toSupplementaries("k)") + "\\k<gname>"), 4001 toSupplementaries("abcdefghijkk"), true); 4002 4003 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4004 "xxxyyyzzzyyy", 4005 "gname", 4006 "yyy"); 4007 4008 //replaceFirst/All 4009 checkReplaceFirst("(?<gn>ab)(c*)", 4010 "abccczzzabcczzzabccc", 4011 "${gn}", 4012 "abzzzabcczzzabccc"); 4013 4014 checkReplaceAll("(?<gn>ab)(c*)", 4015 "abccczzzabcczzzabccc", 4016 "${gn}", 4017 "abzzzabzzzab"); 4018 4019 4020 checkReplaceFirst("(?<gn>ab)(c*)", 4021 "zzzabccczzzabcczzzabccczzz", 4022 "${gn}", 4023 "zzzabzzzabcczzzabccczzz"); 4024 4025 checkReplaceAll("(?<gn>ab)(c*)", 4026 "zzzabccczzzabcczzzabccczzz", 4027 "${gn}", 4028 "zzzabzzzabzzzabzzz"); 4029 4030 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4031 "zzzabccczzzabcczzzabccczzz", 4032 "${gn2}", 4033 "zzzccczzzabcczzzabccczzz"); 4034 4035 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4036 "zzzabccczzzabcczzzabccczzz", 4037 "${gn2}", 4038 "zzzccczzzcczzzccczzz"); 4039 4040 //toSupplementaries("(ab)(c*)")); 4041 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4042 ")(?<gn2>" + toSupplementaries("c") + "*)", 4043 toSupplementaries("abccczzzabcczzzabccc"), 4044 "${gn1}", 4045 toSupplementaries("abzzzabcczzzabccc")); 4046 4047 4048 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4049 ")(?<gn2>" + toSupplementaries("c") + "*)", 4050 toSupplementaries("abccczzzabcczzzabccc"), 4051 "${gn1}", 4052 toSupplementaries("abzzzabzzzab")); 4053 4054 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4055 ")(?<gn2>" + toSupplementaries("c") + "*)", 4056 toSupplementaries("abccczzzabcczzzabccc"), 4057 "${gn2}", 4058 toSupplementaries("ccczzzabcczzzabccc")); 4059 4060 4061 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4062 ")(?<gn2>" + toSupplementaries("c") + "*)", 4063 toSupplementaries("abccczzzabcczzzabccc"), 4064 "${gn2}", 4065 toSupplementaries("ccczzzcczzzccc")); 4066 4067 checkReplaceFirst("(?<dog>Dog)AndCat", 4068 "zzzDogAndCatzzzDogAndCatzzz", 4069 "${dog}", 4070 "zzzDogzzzDogAndCatzzz"); 4071 4072 4073 checkReplaceAll("(?<dog>Dog)AndCat", 4074 "zzzDogAndCatzzzDogAndCatzzz", 4075 "${dog}", 4076 "zzzDogzzzDogzzz"); 4077 4078 // backref in Matcher & String 4079 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4080 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4081 failCount++; 4082 4083 // negative 4084 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4085 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4086 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4087 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4088 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4089 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4090 "gnameX"); 4091 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4092 report("NamedGroupCapture"); 4093 } 4094 4095 // This is for bug 6969132 4096 private static void nonBmpClassComplementTest() throws Exception { 4097 Pattern p = Pattern.compile("\\P{Lu}"); 4098 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4099 if (m.find() && m.start() == 1) 4100 failCount++; 4101 4102 // from a unicode category 4103 p = Pattern.compile("\\P{Lu}"); 4104 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4105 if (m.find()) 4106 failCount++; 4107 if (!m.hitEnd()) 4108 failCount++; 4109 4110 // block 4111 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4112 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4113 if (m.find() && m.start() == 1) 4114 failCount++; 4115 4116 report("NonBmpClassComplement"); 4117 } 4118 4119 private static void unicodePropertiesTest() throws Exception { 4120 // different forms 4121 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4122 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4123 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4124 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4125 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4126 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4127 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4128 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4129 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4130 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4131 failCount++; 4132 4133 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4134 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4135 Matcher lastSM = common; 4136 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4137 4138 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4139 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4140 Matcher lastBM = latin; 4141 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4142 4143 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4144 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4145 continue; // only pick couple code points, they are the same 4146 } 4147 4148 // Unicode Script 4149 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4150 Matcher m; 4151 String str = new String(Character.toChars(cp)); 4152 if (script == lastScript) { 4153 m = lastSM; 4154 m.reset(str); 4155 } else { 4156 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4157 } 4158 if (!m.matches()) { 4159 failCount++; 4160 } 4161 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4162 other.reset(str); 4163 if (other.matches()) { 4164 failCount++; 4165 } 4166 lastSM = m; 4167 lastScript = script; 4168 4169 // Unicode Block 4170 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4171 if (block == null) { 4172 //System.out.printf("Not a Block: cp=%x%n", cp); 4173 continue; 4174 } 4175 if (block == lastBlock) { 4176 m = lastBM; 4177 m.reset(str); 4178 } else { 4179 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4180 } 4181 if (!m.matches()) { 4182 failCount++; 4183 } 4184 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4185 other.reset(str); 4186 if (other.matches()) { 4187 failCount++; 4188 } 4189 lastBM = m; 4190 lastBlock = block; 4191 } 4192 report("unicodeProperties"); 4193 } 4194 4195 private static void unicodeHexNotationTest() throws Exception { 4196 4197 // negative 4198 checkExpectedFail("\\x{-23}"); 4199 checkExpectedFail("\\x{110000}"); 4200 checkExpectedFail("\\x{}"); 4201 checkExpectedFail("\\x{AB[ef]"); 4202 4203 // codepoint 4204 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4205 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4206 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4207 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4208 4209 // in class 4210 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4211 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4212 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4213 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4214 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4215 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4216 4217 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4218 String s = "A" + new String(Character.toChars(cp)) + "B"; 4219 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4220 : String.format("\\u%04x\\u%04x", 4221 (int) Character.toChars(cp)[0], 4222 (int) Character.toChars(cp)[1]); 4223 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4224 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4225 failCount++; 4226 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4227 failCount++; 4228 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4229 failCount++; 4230 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4231 failCount++; 4232 } 4233 report("unicodeHexNotation"); 4234 } 4235 4236 private static void unicodeClassesTest() throws Exception { 4237 4238 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4239 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4240 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4241 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4242 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4243 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4244 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4245 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4246 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4247 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4248 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4249 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4250 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4251 Matcher bound = Pattern.compile("\\b").matcher(""); 4252 Matcher word = Pattern.compile("\\w++").matcher(""); 4253 // UNICODE_CHARACTER_CLASS 4254 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4255 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4256 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4257 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4258 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4259 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4260 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4261 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4262 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4263 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4264 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4265 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4266 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4267 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4268 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4269 // embedded flag (?U) 4270 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4271 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4272 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4273 4274 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4275 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4276 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4277 // properties 4278 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4279 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4280 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4281 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4282 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4283 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4284 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4285 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4286 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4287 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4288 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4289 4290 // javaMethod 4291 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4292 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4293 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4294 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4295 4296 for (int cp = 1; cp < 0x30000; cp++) { 4297 String str = new String(Character.toChars(cp)); 4298 int type = Character.getType(cp); 4299 if (// lower 4300 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4301 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4302 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4303 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4304 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4305 // upper 4306 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4307 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4308 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4309 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4310 // alpha 4311 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4312 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4313 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4314 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4315 // digit 4316 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4317 Character.isDigit(cp) != digitU.reset(str).matches() || 4318 // alnum 4319 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4320 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4321 // punct 4322 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4323 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4324 // graph 4325 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4326 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4327 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4328 // blank 4329 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4330 != blank.reset(str).matches() || 4331 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4332 // print 4333 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4334 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4335 // cntrl 4336 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4337 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4338 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4339 // hexdigit 4340 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4341 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4342 // space 4343 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4344 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4345 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4346 // word 4347 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4348 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4349 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4350 // bwordb 4351 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4352 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4353 // properties 4354 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4355 Character.isLetter(cp) != letterP.reset(str).matches()|| 4356 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4357 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4358 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4359 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4360 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4361 failCount++; 4362 } 4363 4364 // bounds/word align 4365 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4366 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4367 failCount++; 4368 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4369 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4370 failCount++; 4371 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4372 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4373 failCount++; 4374 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4375 failCount++; 4376 report("unicodePredefinedClasses"); 4377 } 4378 4379 private static void unicodeCharacterNameTest() throws Exception { 4380 4381 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4382 if (!Character.isValidCodePoint(cp) || 4383 Character.getType(cp) == Character.UNASSIGNED) 4384 continue; 4385 String str = new String(Character.toChars(cp)); 4386 // single 4387 String p = "\\N{" + Character.getName(cp) + "}"; 4388 if (!Pattern.compile(p).matcher(str).matches()) { 4389 failCount++; 4390 } 4391 // class[c] 4392 p = "[\\N{" + Character.getName(cp) + "}]"; 4393 if (!Pattern.compile(p).matcher(str).matches()) { 4394 failCount++; 4395 } 4396 } 4397 4398 // range 4399 for (int i = 0; i < 10; i++) { 4400 int start = generator.nextInt(20); 4401 int end = start + generator.nextInt(200); 4402 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4403 String str; 4404 for (int cp = start; cp < end; cp++) { 4405 str = new String(Character.toChars(cp)); 4406 if (!Pattern.compile(p).matcher(str).matches()) { 4407 failCount++; 4408 } 4409 } 4410 str = new String(Character.toChars(end + 10)); 4411 if (Pattern.compile(p).matcher(str).matches()) { 4412 failCount++; 4413 } 4414 } 4415 4416 // slice 4417 for (int i = 0; i < 10; i++) { 4418 int n = generator.nextInt(256); 4419 int[] buf = new int[n]; 4420 StringBuffer sb = new StringBuffer(1024); 4421 for (int j = 0; j < n; j++) { 4422 int cp = generator.nextInt(1000); 4423 if (!Character.isValidCodePoint(cp) || 4424 Character.getType(cp) == Character.UNASSIGNED) 4425 cp = 0x4e00; // just use 4e00 4426 sb.append("\\N{" + Character.getName(cp) + "}"); 4427 buf[j] = cp; 4428 } 4429 String p = sb.toString(); 4430 String str = new String(buf, 0, buf.length); 4431 if (!Pattern.compile(p).matcher(str).matches()) { 4432 failCount++; 4433 } 4434 } 4435 report("unicodeCharacterName"); 4436 } 4437 4438 private static void horizontalAndVerticalWSTest() throws Exception { 4439 String hws = new String (new char[] { 4440 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4441 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4442 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4443 0x202f, 0x205f, 0x3000 }); 4444 String vws = new String (new char[] { 4445 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4446 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4447 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4448 failCount++; 4449 if (Pattern.compile("\\H").matcher(hws).find() || 4450 Pattern.compile("[\\H]").matcher(hws).find()) 4451 failCount++; 4452 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4453 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4454 failCount++; 4455 if (Pattern.compile("\\V").matcher(vws).find() || 4456 Pattern.compile("[\\V]").matcher(vws).find()) 4457 failCount++; 4458 String prefix = "abcd"; 4459 String suffix = "efgh"; 4460 String ng = "A"; 4461 for (int i = 0; i < hws.length(); i++) { 4462 String c = String.valueOf(hws.charAt(i)); 4463 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4464 if (!m.find() || !c.equals(m.group())) 4465 failCount++; 4466 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4467 if (!m.find() || !c.equals(m.group())) 4468 failCount++; 4469 4470 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4471 if (!m.find() || !ng.equals(m.group())) 4472 failCount++; 4473 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4474 if (!m.find() || !ng.equals(m.group())) 4475 failCount++; 4476 } 4477 for (int i = 0; i < vws.length(); i++) { 4478 String c = String.valueOf(vws.charAt(i)); 4479 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4480 if (!m.find() || !c.equals(m.group())) 4481 failCount++; 4482 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4483 if (!m.find() || !c.equals(m.group())) 4484 failCount++; 4485 4486 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4487 if (!m.find() || !ng.equals(m.group())) 4488 failCount++; 4489 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4490 if (!m.find() || !ng.equals(m.group())) 4491 failCount++; 4492 } 4493 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4494 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4495 failCount++; 4496 report("horizontalAndVerticalWSTest"); 4497 } 4498 4499 private static void linebreakTest() throws Exception { 4500 String linebreaks = new String (new char[] { 4501 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4502 String crnl = "\r\n"; 4503 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4504 !Pattern.compile("\\R").matcher(crnl).matches() || 4505 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4506 failCount++; 4507 report("linebreakTest"); 4508 } 4509 4510 // #7189363 4511 private static void branchTest() throws Exception { 4512 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4513 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4514 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4515 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4516 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4517 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4518 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4519 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4520 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4521 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4522 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4523 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4524 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4525 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4526 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4527 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4528 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4529 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4530 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4531 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4532 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4533 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4534 failCount++; 4535 report("branchTest"); 4536 } 4537 4538 // This test is for 8007395 4539 private static void groupCurlyNotFoundSuppTest() throws Exception { 4540 String input = "test this as \ud83d\ude0d"; 4541 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4542 "test(.)*(@[a-zA-Z.]+)", 4543 "test([^B])+(@[a-zA-Z.]+)", 4544 "test([^B])*(@[a-zA-Z.]+)", 4545 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4546 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4547 }) { 4548 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4549 .matcher(input); 4550 try { 4551 if (m.find()) { 4552 failCount++; 4553 } 4554 } catch (Exception x) { 4555 failCount++; 4556 } 4557 } 4558 report("GroupCurly NotFoundSupp"); 4559 } 4560 4561 // This test is for 8023647 4562 private static void groupCurlyBackoffTest() throws Exception { 4563 if (!"abc1c".matches("(\\w)+1\\1") || 4564 "abc11".matches("(\\w)+1\\1")) { 4565 failCount++; 4566 } 4567 report("GroupCurly backoff"); 4568 } 4569 4570 // This test is for 8012646 4571 private static void patternAsPredicate() throws Exception { 4572 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4573 4574 if (p.test("")) { 4575 failCount++; 4576 } 4577 if (!p.test("word")) { 4578 failCount++; 4579 } 4580 if (p.test("1234")) { 4581 failCount++; 4582 } 4583 report("Pattern.asPredicate"); 4584 } 4585 4586 // This test is for 8035975 4587 private static void invalidFlags() throws Exception { 4588 for (int flag = 1; flag != 0; flag <<= 1) { 4589 switch (flag) { 4590 case Pattern.CASE_INSENSITIVE: 4591 case Pattern.MULTILINE: 4592 case Pattern.DOTALL: 4593 case Pattern.UNICODE_CASE: 4594 case Pattern.CANON_EQ: 4595 case Pattern.UNIX_LINES: 4596 case Pattern.LITERAL: 4597 case Pattern.UNICODE_CHARACTER_CLASS: 4598 case Pattern.COMMENTS: 4599 // valid flag, continue 4600 break; 4601 default: 4602 try { 4603 Pattern.compile(".", flag); 4604 failCount++; 4605 } catch (IllegalArgumentException expected) { 4606 } 4607 } 4608 } 4609 report("Invalid compile flags"); 4610 } 4611 4612 private static void grapheme() throws Exception { 4613 Files.lines(Paths.get(System.getProperty("test.src", "."), 4614 "GraphemeBreakTest.txt")) 4615 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4616 .forEach( ln -> { 4617 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4618 // System.out.println(str); 4619 String[] strs = ln.split("\u00f7|\u00d7"); 4620 StringBuilder src = new StringBuilder(); 4621 ArrayList<String> graphemes = new ArrayList<>(); 4622 StringBuilder buf = new StringBuilder(); 4623 int offBk = 0; 4624 for (String str : strs) { 4625 if (str.length() == 0) // first empty str 4626 continue; 4627 int cp = Integer.parseInt(str, 16); 4628 src.appendCodePoint(cp); 4629 buf.appendCodePoint(cp); 4630 offBk += (str.length() + 1); 4631 if (ln.charAt(offBk) == '\u00f7') { // DIV 4632 graphemes.add(buf.toString()); 4633 buf = new StringBuilder(); 4634 } 4635 } 4636 Pattern p = Pattern.compile("\\X"); 4637 Matcher m = p.matcher(src.toString()); 4638 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4639 for (String g : graphemes) { 4640 // System.out.printf(" grapheme:=[%s]%n", g); 4641 // (1) test \\X directly 4642 if (!m.find() || !m.group().equals(g)) { 4643 System.out.println("Failed \\X [" + ln + "] : " + g); 4644 failCount++; 4645 } 4646 // (2) test \\b{g} + \\X via Scanner 4647 boolean hasNext = s.hasNext(p); 4648 // if (!s.hasNext() || !s.next().equals(next)) { 4649 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4650 System.out.println("Failed b{g} [" + ln + "] : " + g); 4651 failCount++; 4652 } 4653 } 4654 }); 4655 // some sanity checks 4656 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4657 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4658 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4659 failCount++; 4660 // make sure "\b{n}" still works 4661 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4662 failCount++; 4663 report("Unicode extended grapheme cluster"); 4664 } 4665 }