1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework (use -Dseed=X to set PRNG seed) 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819 36 * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 37 * @library /lib/testlibrary 38 * @build jdk.testlibrary.* 39 * @run main RegExTest 40 * @key randomness 41 */ 42 43 import java.util.function.Function; 44 import java.util.regex.*; 45 import java.util.Random; 46 import java.util.Scanner; 47 import java.io.*; 48 import java.nio.file.*; 49 import java.util.*; 50 import java.nio.CharBuffer; 51 import java.util.function.Predicate; 52 import jdk.testlibrary.RandomFactory; 53 54 /** 55 * This is a test class created to check the operation of 56 * the Pattern and Matcher classes. 57 */ 58 public class RegExTest { 59 60 private static Random generator = RandomFactory.getRandom(); 61 private static boolean failure = false; 62 private static int failCount = 0; 63 private static String firstFailure = null; 64 65 /** 66 * Main to interpret arguments and run several tests. 67 * 68 */ 69 public static void main(String[] args) throws Exception { 70 // Most of the tests are in a file 71 processFile("TestCases.txt"); 72 //processFile("PerlCases.txt"); 73 processFile("BMPTestCases.txt"); 74 processFile("SupplementaryTestCases.txt"); 75 76 // These test many randomly generated char patterns 77 bm(); 78 slice(); 79 80 // These are hard to put into the file 81 escapes(); 82 blankInput(); 83 84 // Substitition tests on randomly generated sequences 85 globalSubstitute(); 86 stringbufferSubstitute(); 87 stringbuilderSubstitute(); 88 89 substitutionBasher(); 90 substitutionBasher2(); 91 92 // Canonical Equivalence 93 ceTest(); 94 95 // Anchors 96 anchorTest(); 97 98 // boolean match calls 99 matchesTest(); 100 lookingAtTest(); 101 102 // Pattern API 103 patternMatchesTest(); 104 105 // Misc 106 lookbehindTest(); 107 nullArgumentTest(); 108 backRefTest(); 109 groupCaptureTest(); 110 caretTest(); 111 charClassTest(); 112 emptyPatternTest(); 113 findIntTest(); 114 group0Test(); 115 longPatternTest(); 116 octalTest(); 117 ampersandTest(); 118 negationTest(); 119 splitTest(); 120 appendTest(); 121 caseFoldingTest(); 122 commentsTest(); 123 unixLinesTest(); 124 replaceFirstTest(); 125 gTest(); 126 zTest(); 127 serializeTest(); 128 reluctantRepetitionTest(); 129 multilineDollarTest(); 130 dollarAtEndTest(); 131 caretBetweenTerminatorsTest(); 132 // This RFE rejected in Tiger numOccurrencesTest(); 133 javaCharClassTest(); 134 nonCaptureRepetitionTest(); 135 notCapturedGroupCurlyMatchTest(); 136 escapedSegmentTest(); 137 literalPatternTest(); 138 literalReplacementTest(); 139 regionTest(); 140 toStringTest(); 141 negatedCharClassTest(); 142 findFromTest(); 143 boundsTest(); 144 unicodeWordBoundsTest(); 145 caretAtEndTest(); 146 wordSearchTest(); 147 hitEndTest(); 148 toMatchResultTest(); 149 toMatchResultTest2(); 150 surrogatesInClassTest(); 151 removeQEQuotingTest(); 152 namedGroupCaptureTest(); 153 nonBmpClassComplementTest(); 154 unicodePropertiesTest(); 155 unicodeHexNotationTest(); 156 unicodeClassesTest(); 157 unicodeCharacterNameTest(); 158 horizontalAndVerticalWSTest(); 159 linebreakTest(); 160 branchTest(); 161 groupCurlyNotFoundSuppTest(); 162 groupCurlyBackoffTest(); 163 patternAsPredicate(); 164 invalidFlags(); 165 grapheme(); 166 167 if (failure) { 168 throw new 169 RuntimeException("RegExTest failed, 1st failure: " + 170 firstFailure); 171 } else { 172 System.err.println("OKAY: All tests passed."); 173 } 174 } 175 176 // Utility functions 177 178 private static String getRandomAlphaString(int length) { 179 StringBuffer buf = new StringBuffer(length); 180 for (int i=0; i<length; i++) { 181 char randChar = (char)(97 + generator.nextInt(26)); 182 buf.append(randChar); 183 } 184 return buf.toString(); 185 } 186 187 private static void check(Matcher m, String expected) { 188 m.find(); 189 if (!m.group().equals(expected)) 190 failCount++; 191 } 192 193 private static void check(Matcher m, String result, boolean expected) { 194 m.find(); 195 if (m.group().equals(result) != expected) 196 failCount++; 197 } 198 199 private static void check(Pattern p, String s, boolean expected) { 200 if (p.matcher(s).find() != expected) 201 failCount++; 202 } 203 204 private static void check(String p, String s, boolean expected) { 205 Matcher matcher = Pattern.compile(p).matcher(s); 206 if (matcher.find() != expected) 207 failCount++; 208 } 209 210 private static void check(String p, char c, boolean expected) { 211 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 212 Pattern pattern = Pattern.compile(propertyPattern); 213 char[] ca = new char[1]; ca[0] = c; 214 Matcher matcher = pattern.matcher(new String(ca)); 215 if (!matcher.find()) 216 failCount++; 217 } 218 219 private static void check(String p, int codePoint, boolean expected) { 220 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 221 Pattern pattern = Pattern.compile(propertyPattern); 222 char[] ca = Character.toChars(codePoint); 223 Matcher matcher = pattern.matcher(new String(ca)); 224 if (!matcher.find()) 225 failCount++; 226 } 227 228 private static void check(String p, int flag, String input, String s, 229 boolean expected) 230 { 231 Pattern pattern = Pattern.compile(p, flag); 232 Matcher matcher = pattern.matcher(input); 233 if (expected) 234 check(matcher, s, expected); 235 else 236 check(pattern, input, false); 237 } 238 239 private static void report(String testName) { 240 int spacesToAdd = 30 - testName.length(); 241 StringBuffer paddedNameBuffer = new StringBuffer(testName); 242 for (int i=0; i<spacesToAdd; i++) 243 paddedNameBuffer.append(" "); 244 String paddedName = paddedNameBuffer.toString(); 245 System.err.println(paddedName + ": " + 246 (failCount==0 ? "Passed":"Failed("+failCount+")")); 247 if (failCount > 0) { 248 failure = true; 249 250 if (firstFailure == null) { 251 firstFailure = testName; 252 } 253 } 254 255 failCount = 0; 256 } 257 258 /** 259 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 260 * supplementary characters. This method does NOT fully take care 261 * of the regex syntax. 262 */ 263 private static String toSupplementaries(String s) { 264 int length = s.length(); 265 StringBuffer sb = new StringBuffer(length * 2); 266 267 for (int i = 0; i < length; ) { 268 char c = s.charAt(i++); 269 if (c == '\\') { 270 sb.append(c); 271 if (i < length) { 272 c = s.charAt(i++); 273 sb.append(c); 274 if (c == 'u') { 275 // assume no syntax error 276 sb.append(s.charAt(i++)); 277 sb.append(s.charAt(i++)); 278 sb.append(s.charAt(i++)); 279 sb.append(s.charAt(i++)); 280 } 281 } 282 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 283 sb.append('\ud800').append((char)('\udc00'+c)); 284 } else { 285 sb.append(c); 286 } 287 } 288 return sb.toString(); 289 } 290 291 // Regular expression tests 292 293 // This is for bug 6178785 294 // Test if an expected NPE gets thrown when passing in a null argument 295 private static boolean check(Runnable test) { 296 try { 297 test.run(); 298 failCount++; 299 return false; 300 } catch (NullPointerException npe) { 301 return true; 302 } 303 } 304 305 private static void nullArgumentTest() { 306 check(() -> Pattern.compile(null)); 307 check(() -> Pattern.matches(null, null)); 308 check(() -> Pattern.matches("xyz", null)); 309 check(() -> Pattern.quote(null)); 310 check(() -> Pattern.compile("xyz").split(null)); 311 check(() -> Pattern.compile("xyz").matcher(null)); 312 313 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 314 m.matches(); 315 check(() -> m.appendTail((StringBuffer) null)); 316 check(() -> m.appendTail((StringBuilder)null)); 317 check(() -> m.replaceAll((String) null)); 318 check(() -> m.replaceAll((Function<MatchResult, String>)null)); 319 check(() -> m.replaceFirst((String)null)); 320 check(() -> m.replaceFirst((Function<MatchResult, String>) null)); 321 check(() -> m.appendReplacement((StringBuffer)null, null)); 322 check(() -> m.appendReplacement((StringBuilder)null, null)); 323 check(() -> m.reset(null)); 324 check(() -> Matcher.quoteReplacement(null)); 325 //check(() -> m.usePattern(null)); 326 327 report("Null Argument"); 328 } 329 330 // This is for bug6635133 331 // Test if surrogate pair in Unicode escapes can be handled correctly. 332 private static void surrogatesInClassTest() throws Exception { 333 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 334 Matcher matcher = pattern.matcher("\ud834\udd22"); 335 if (!matcher.find()) 336 failCount++; 337 338 report("Surrogate pair in Unicode escape"); 339 } 340 341 // This is for bug6990617 342 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 343 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 344 // char is an octal digit. 345 private static void removeQEQuotingTest() throws Exception { 346 Pattern pattern = 347 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 348 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 349 if (!matcher.find()) 350 failCount++; 351 352 report("Remove Q/E Quoting"); 353 } 354 355 // This is for bug 4988891 356 // Test toMatchResult to see that it is a copy of the Matcher 357 // that is not affected by subsequent operations on the original 358 private static void toMatchResultTest() throws Exception { 359 Pattern pattern = Pattern.compile("squid"); 360 Matcher matcher = pattern.matcher( 361 "agiantsquidofdestinyasmallsquidoffate"); 362 matcher.find(); 363 int matcherStart1 = matcher.start(); 364 MatchResult mr = matcher.toMatchResult(); 365 if (mr == matcher) 366 failCount++; 367 int resultStart1 = mr.start(); 368 if (matcherStart1 != resultStart1) 369 failCount++; 370 matcher.find(); 371 int matcherStart2 = matcher.start(); 372 int resultStart2 = mr.start(); 373 if (matcherStart2 == resultStart2) 374 failCount++; 375 if (resultStart1 != resultStart2) 376 failCount++; 377 MatchResult mr2 = matcher.toMatchResult(); 378 if (mr == mr2) 379 failCount++; 380 if (mr2.start() != matcherStart2) 381 failCount++; 382 report("toMatchResult is a copy"); 383 } 384 385 private static void checkExpectedISE(Runnable test) { 386 try { 387 test.run(); 388 failCount++; 389 } catch (IllegalStateException x) { 390 } catch (IndexOutOfBoundsException xx) { 391 failCount++; 392 } 393 } 394 395 private static void checkExpectedIOOE(Runnable test) { 396 try { 397 test.run(); 398 failCount++; 399 } catch (IndexOutOfBoundsException x) {} 400 } 401 402 // This is for bug 8074678 403 // Test the result of toMatchResult throws ISE if no match is availble 404 private static void toMatchResultTest2() throws Exception { 405 Matcher matcher = Pattern.compile("nomatch").matcher("hello world"); 406 matcher.find(); 407 MatchResult mr = matcher.toMatchResult(); 408 409 checkExpectedISE(() -> mr.start()); 410 checkExpectedISE(() -> mr.start(2)); 411 checkExpectedISE(() -> mr.end()); 412 checkExpectedISE(() -> mr.end(2)); 413 checkExpectedISE(() -> mr.group()); 414 checkExpectedISE(() -> mr.group(2)); 415 416 matcher = Pattern.compile("(match)").matcher("there is a match"); 417 matcher.find(); 418 MatchResult mr2 = matcher.toMatchResult(); 419 checkExpectedIOOE(() -> mr2.start(2)); 420 checkExpectedIOOE(() -> mr2.end(2)); 421 checkExpectedIOOE(() -> mr2.group(2)); 422 423 report("toMatchResult2 appropriate exceptions"); 424 } 425 426 // This is for bug 5013885 427 // Must test a slice to see if it reports hitEnd correctly 428 private static void hitEndTest() throws Exception { 429 // Basic test of Slice node 430 Pattern p = Pattern.compile("^squidattack"); 431 Matcher m = p.matcher("squack"); 432 m.find(); 433 if (m.hitEnd()) 434 failCount++; 435 m.reset("squid"); 436 m.find(); 437 if (!m.hitEnd()) 438 failCount++; 439 440 // Test Slice, SliceA and SliceU nodes 441 for (int i=0; i<3; i++) { 442 int flags = 0; 443 if (i==1) flags = Pattern.CASE_INSENSITIVE; 444 if (i==2) flags = Pattern.UNICODE_CASE; 445 p = Pattern.compile("^abc", flags); 446 m = p.matcher("ad"); 447 m.find(); 448 if (m.hitEnd()) 449 failCount++; 450 m.reset("ab"); 451 m.find(); 452 if (!m.hitEnd()) 453 failCount++; 454 } 455 456 // Test Boyer-Moore node 457 p = Pattern.compile("catattack"); 458 m = p.matcher("attack"); 459 m.find(); 460 if (!m.hitEnd()) 461 failCount++; 462 463 p = Pattern.compile("catattack"); 464 m = p.matcher("attackattackattackcatatta"); 465 m.find(); 466 if (!m.hitEnd()) 467 failCount++; 468 report("hitEnd from a Slice"); 469 } 470 471 // This is for bug 4997476 472 // It is weird code submitted by customer demonstrating a regression 473 private static void wordSearchTest() throws Exception { 474 String testString = new String("word1 word2 word3"); 475 Pattern p = Pattern.compile("\\b"); 476 Matcher m = p.matcher(testString); 477 int position = 0; 478 int start = 0; 479 while (m.find(position)) { 480 start = m.start(); 481 if (start == testString.length()) 482 break; 483 if (m.find(start+1)) { 484 position = m.start(); 485 } else { 486 position = testString.length(); 487 } 488 if (testString.substring(start, position).equals(" ")) 489 continue; 490 if (!testString.substring(start, position-1).startsWith("word")) 491 failCount++; 492 } 493 report("Customer word search"); 494 } 495 496 // This is for bug 4994840 497 private static void caretAtEndTest() throws Exception { 498 // Problem only occurs with multiline patterns 499 // containing a beginning-of-line caret "^" followed 500 // by an expression that also matches the empty string. 501 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 502 Matcher matcher = pattern.matcher("\r"); 503 matcher.find(); 504 matcher.find(); 505 report("Caret at end"); 506 } 507 508 // This test is for 4979006 509 // Check to see if word boundary construct properly handles unicode 510 // non spacing marks 511 private static void unicodeWordBoundsTest() throws Exception { 512 String spaces = " "; 513 String wordChar = "a"; 514 String nsm = "\u030a"; 515 516 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 517 518 Pattern pattern = Pattern.compile("\\b"); 519 Matcher matcher = pattern.matcher(""); 520 // S=other B=word character N=non spacing mark .=word boundary 521 // SS.BB.SS 522 String input = spaces + wordChar + wordChar + spaces; 523 twoFindIndexes(input, matcher, 2, 4); 524 // SS.BBN.SS 525 input = spaces + wordChar +wordChar + nsm + spaces; 526 twoFindIndexes(input, matcher, 2, 5); 527 // SS.BN.SS 528 input = spaces + wordChar + nsm + spaces; 529 twoFindIndexes(input, matcher, 2, 4); 530 // SS.BNN.SS 531 input = spaces + wordChar + nsm + nsm + spaces; 532 twoFindIndexes(input, matcher, 2, 5); 533 // SSN.BB.SS 534 input = spaces + nsm + wordChar + wordChar + spaces; 535 twoFindIndexes(input, matcher, 3, 5); 536 // SS.BNB.SS 537 input = spaces + wordChar + nsm + wordChar + spaces; 538 twoFindIndexes(input, matcher, 2, 5); 539 // SSNNSS 540 input = spaces + nsm + nsm + spaces; 541 matcher.reset(input); 542 if (matcher.find()) 543 failCount++; 544 // SSN.BBN.SS 545 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 546 twoFindIndexes(input, matcher, 3, 6); 547 548 report("Unicode word boundary"); 549 } 550 551 private static void twoFindIndexes(String input, Matcher matcher, int a, 552 int b) throws Exception 553 { 554 matcher.reset(input); 555 matcher.find(); 556 if (matcher.start() != a) 557 failCount++; 558 matcher.find(); 559 if (matcher.start() != b) 560 failCount++; 561 } 562 563 // This test is for 6284152 564 static void check(String regex, String input, String[] expected) { 565 List<String> result = new ArrayList<String>(); 566 Pattern p = Pattern.compile(regex); 567 Matcher m = p.matcher(input); 568 while (m.find()) { 569 result.add(m.group()); 570 } 571 if (!Arrays.asList(expected).equals(result)) 572 failCount++; 573 } 574 575 private static void lookbehindTest() throws Exception { 576 //Positive 577 check("(?<=%.{0,5})foo\\d", 578 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 579 new String[]{"foo1", "foo2", "foo3"}); 580 581 //boundary at end of the lookbehind sub-regex should work consistently 582 //with the boundary just after the lookbehind sub-regex 583 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 584 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 585 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 586 check("(?<!abc \\b)foo", "abc foo", new String[0]); 587 588 //Negative 589 check("(?<!%.{0,5})foo\\d", 590 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 591 new String[] {"foo4", "foo5"}); 592 593 //Positive greedy 594 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 595 596 //Positive reluctant 597 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 598 599 //supplementary 600 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 601 new String[] {"fo\ud800\udc00o"}); 602 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 603 new String[] {"fo\ud800\udc00o"}); 604 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 605 new String[] {"fo\ud800\udc00o"}); 606 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 607 new String[] {"fo\ud800\udc00o"}); 608 report("Lookbehind"); 609 } 610 611 // This test is for 4938995 612 // Check to see if weak region boundaries are transparent to 613 // lookahead and lookbehind constructs 614 private static void boundsTest() throws Exception { 615 String fullMessage = "catdogcat"; 616 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 617 Matcher matcher = pattern.matcher("catdogca"); 618 matcher.useTransparentBounds(true); 619 if (matcher.find()) 620 failCount++; 621 matcher.reset("atdogcat"); 622 if (matcher.find()) 623 failCount++; 624 matcher.reset(fullMessage); 625 if (!matcher.find()) 626 failCount++; 627 matcher.reset(fullMessage); 628 matcher.region(0,9); 629 if (!matcher.find()) 630 failCount++; 631 matcher.reset(fullMessage); 632 matcher.region(0,6); 633 if (!matcher.find()) 634 failCount++; 635 matcher.reset(fullMessage); 636 matcher.region(3,6); 637 if (!matcher.find()) 638 failCount++; 639 matcher.useTransparentBounds(false); 640 if (matcher.find()) 641 failCount++; 642 643 // Negative lookahead/lookbehind 644 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 645 matcher = pattern.matcher("dogcat"); 646 matcher.useTransparentBounds(true); 647 matcher.region(0,3); 648 if (matcher.find()) 649 failCount++; 650 matcher.reset("catdog"); 651 matcher.region(3,6); 652 if (matcher.find()) 653 failCount++; 654 matcher.useTransparentBounds(false); 655 matcher.reset("dogcat"); 656 matcher.region(0,3); 657 if (!matcher.find()) 658 failCount++; 659 matcher.reset("catdog"); 660 matcher.region(3,6); 661 if (!matcher.find()) 662 failCount++; 663 664 report("Region bounds transparency"); 665 } 666 667 // This test is for 4945394 668 private static void findFromTest() throws Exception { 669 String message = "This is 40 $0 message."; 670 Pattern pat = Pattern.compile("\\$0"); 671 Matcher match = pat.matcher(message); 672 if (!match.find()) 673 failCount++; 674 if (match.find()) 675 failCount++; 676 if (match.find()) 677 failCount++; 678 report("Check for alternating find"); 679 } 680 681 // This test is for 4872664 and 4892980 682 private static void negatedCharClassTest() throws Exception { 683 Pattern pattern = Pattern.compile("[^>]"); 684 Matcher matcher = pattern.matcher("\u203A"); 685 if (!matcher.matches()) 686 failCount++; 687 pattern = Pattern.compile("[^fr]"); 688 matcher = pattern.matcher("a"); 689 if (!matcher.find()) 690 failCount++; 691 matcher.reset("\u203A"); 692 if (!matcher.find()) 693 failCount++; 694 String s = "for"; 695 String result[] = s.split("[^fr]"); 696 if (!result[0].equals("f")) 697 failCount++; 698 if (!result[1].equals("r")) 699 failCount++; 700 s = "f\u203Ar"; 701 result = s.split("[^fr]"); 702 if (!result[0].equals("f")) 703 failCount++; 704 if (!result[1].equals("r")) 705 failCount++; 706 707 // Test adding to bits, subtracting a node, then adding to bits again 708 pattern = Pattern.compile("[^f\u203Ar]"); 709 matcher = pattern.matcher("a"); 710 if (!matcher.find()) 711 failCount++; 712 matcher.reset("f"); 713 if (matcher.find()) 714 failCount++; 715 matcher.reset("\u203A"); 716 if (matcher.find()) 717 failCount++; 718 matcher.reset("r"); 719 if (matcher.find()) 720 failCount++; 721 matcher.reset("\u203B"); 722 if (!matcher.find()) 723 failCount++; 724 725 // Test subtracting a node, adding to bits, subtracting again 726 pattern = Pattern.compile("[^\u203Ar\u203B]"); 727 matcher = pattern.matcher("a"); 728 if (!matcher.find()) 729 failCount++; 730 matcher.reset("\u203A"); 731 if (matcher.find()) 732 failCount++; 733 matcher.reset("r"); 734 if (matcher.find()) 735 failCount++; 736 matcher.reset("\u203B"); 737 if (matcher.find()) 738 failCount++; 739 matcher.reset("\u203C"); 740 if (!matcher.find()) 741 failCount++; 742 743 report("Negated Character Class"); 744 } 745 746 // This test is for 4628291 747 private static void toStringTest() throws Exception { 748 Pattern pattern = Pattern.compile("b+"); 749 if (pattern.toString() != "b+") 750 failCount++; 751 Matcher matcher = pattern.matcher("aaabbbccc"); 752 String matcherString = matcher.toString(); // unspecified 753 matcher.find(); 754 matcherString = matcher.toString(); // unspecified 755 matcher.region(0,3); 756 matcherString = matcher.toString(); // unspecified 757 matcher.reset(); 758 matcherString = matcher.toString(); // unspecified 759 report("toString"); 760 } 761 762 // This test is for 4808962 763 private static void literalPatternTest() throws Exception { 764 int flags = Pattern.LITERAL; 765 766 Pattern pattern = Pattern.compile("abc\\t$^", flags); 767 check(pattern, "abc\\t$^", true); 768 769 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 770 check(pattern, "abc\\t$^", true); 771 772 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 773 check(pattern, "\\Qa^$bcabc\\E", true); 774 check(pattern, "a^$bcabc", false); 775 776 pattern = Pattern.compile("\\\\Q\\\\E"); 777 check(pattern, "\\Q\\E", true); 778 779 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 780 check(pattern, "abcefg\\Q\\Ehij", true); 781 782 pattern = Pattern.compile("\\\\\\Q\\\\E"); 783 check(pattern, "\\\\\\\\", true); 784 785 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 786 check(pattern, "\\Qa^$bcabc\\E", true); 787 check(pattern, "a^$bcabc", false); 788 789 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 790 check(pattern, "\\Qabc\\Edef", true); 791 check(pattern, "abcdef", false); 792 793 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 794 check(pattern, "abc\\Edef", true); 795 check(pattern, "abcdef", false); 796 797 pattern = Pattern.compile(Pattern.quote("\\E")); 798 check(pattern, "\\E", true); 799 800 pattern = Pattern.compile("((((abc.+?:)", flags); 801 check(pattern, "((((abc.+?:)", true); 802 803 flags |= Pattern.MULTILINE; 804 805 pattern = Pattern.compile("^cat$", flags); 806 check(pattern, "abc^cat$def", true); 807 check(pattern, "cat", false); 808 809 flags |= Pattern.CASE_INSENSITIVE; 810 811 pattern = Pattern.compile("abcdef", flags); 812 check(pattern, "ABCDEF", true); 813 check(pattern, "AbCdEf", true); 814 815 flags |= Pattern.DOTALL; 816 817 pattern = Pattern.compile("a...b", flags); 818 check(pattern, "A...b", true); 819 check(pattern, "Axxxb", false); 820 821 flags |= Pattern.CANON_EQ; 822 823 Pattern p = Pattern.compile("testa\u030a", flags); 824 check(pattern, "testa\u030a", false); 825 check(pattern, "test\u00e5", false); 826 827 // Supplementary character test 828 flags = Pattern.LITERAL; 829 830 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 831 check(pattern, toSupplementaries("abc\\t$^"), true); 832 833 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 834 check(pattern, toSupplementaries("abc\\t$^"), true); 835 836 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 837 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 838 check(pattern, toSupplementaries("a^$bcabc"), false); 839 840 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 841 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 842 check(pattern, toSupplementaries("a^$bcabc"), false); 843 844 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 845 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 846 check(pattern, toSupplementaries("abcdef"), false); 847 848 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 849 check(pattern, toSupplementaries("abc\\Edef"), true); 850 check(pattern, toSupplementaries("abcdef"), false); 851 852 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 853 check(pattern, toSupplementaries("((((abc.+?:)"), true); 854 855 flags |= Pattern.MULTILINE; 856 857 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 858 check(pattern, toSupplementaries("abc^cat$def"), true); 859 check(pattern, toSupplementaries("cat"), false); 860 861 flags |= Pattern.DOTALL; 862 863 // note: this is case-sensitive. 864 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 865 check(pattern, toSupplementaries("a...b"), true); 866 check(pattern, toSupplementaries("axxxb"), false); 867 868 flags |= Pattern.CANON_EQ; 869 870 String t = toSupplementaries("test"); 871 p = Pattern.compile(t + "a\u030a", flags); 872 check(pattern, t + "a\u030a", false); 873 check(pattern, t + "\u00e5", false); 874 875 report("Literal pattern"); 876 } 877 878 // This test is for 4803179 879 // This test is also for 4808962, replacement parts 880 private static void literalReplacementTest() throws Exception { 881 int flags = Pattern.LITERAL; 882 883 Pattern pattern = Pattern.compile("abc", flags); 884 Matcher matcher = pattern.matcher("zzzabczzz"); 885 String replaceTest = "$0"; 886 String result = matcher.replaceAll(replaceTest); 887 if (!result.equals("zzzabczzz")) 888 failCount++; 889 890 matcher.reset(); 891 String literalReplacement = matcher.quoteReplacement(replaceTest); 892 result = matcher.replaceAll(literalReplacement); 893 if (!result.equals("zzz$0zzz")) 894 failCount++; 895 896 matcher.reset(); 897 replaceTest = "\\t$\\$"; 898 literalReplacement = matcher.quoteReplacement(replaceTest); 899 result = matcher.replaceAll(literalReplacement); 900 if (!result.equals("zzz\\t$\\$zzz")) 901 failCount++; 902 903 // Supplementary character test 904 pattern = Pattern.compile(toSupplementaries("abc"), flags); 905 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 906 replaceTest = "$0"; 907 result = matcher.replaceAll(replaceTest); 908 if (!result.equals(toSupplementaries("zzzabczzz"))) 909 failCount++; 910 911 matcher.reset(); 912 literalReplacement = matcher.quoteReplacement(replaceTest); 913 result = matcher.replaceAll(literalReplacement); 914 if (!result.equals(toSupplementaries("zzz$0zzz"))) 915 failCount++; 916 917 matcher.reset(); 918 replaceTest = "\\t$\\$"; 919 literalReplacement = matcher.quoteReplacement(replaceTest); 920 result = matcher.replaceAll(literalReplacement); 921 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 922 failCount++; 923 924 // IAE should be thrown if backslash or '$' is the last character 925 // in replacement string 926 try { 927 "\uac00".replaceAll("\uac00", "$"); 928 failCount++; 929 } catch (IllegalArgumentException iie) { 930 } catch (Exception e) { 931 failCount++; 932 } 933 try { 934 "\uac00".replaceAll("\uac00", "\\"); 935 failCount++; 936 } catch (IllegalArgumentException iie) { 937 } catch (Exception e) { 938 failCount++; 939 } 940 report("Literal replacement"); 941 } 942 943 // This test is for 4757029 944 private static void regionTest() throws Exception { 945 Pattern pattern = Pattern.compile("abc"); 946 Matcher matcher = pattern.matcher("abcdefabc"); 947 948 matcher.region(0,9); 949 if (!matcher.find()) 950 failCount++; 951 if (!matcher.find()) 952 failCount++; 953 matcher.region(0,3); 954 if (!matcher.find()) 955 failCount++; 956 matcher.region(3,6); 957 if (matcher.find()) 958 failCount++; 959 matcher.region(0,2); 960 if (matcher.find()) 961 failCount++; 962 963 expectRegionFail(matcher, 1, -1); 964 expectRegionFail(matcher, -1, -1); 965 expectRegionFail(matcher, -1, 1); 966 expectRegionFail(matcher, 5, 3); 967 expectRegionFail(matcher, 5, 12); 968 expectRegionFail(matcher, 12, 12); 969 970 pattern = Pattern.compile("^abc$"); 971 matcher = pattern.matcher("zzzabczzz"); 972 matcher.region(0,9); 973 if (matcher.find()) 974 failCount++; 975 matcher.region(3,6); 976 if (!matcher.find()) 977 failCount++; 978 matcher.region(3,6); 979 matcher.useAnchoringBounds(false); 980 if (matcher.find()) 981 failCount++; 982 983 // Supplementary character test 984 pattern = Pattern.compile(toSupplementaries("abc")); 985 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 986 matcher.region(0,9*2); 987 if (!matcher.find()) 988 failCount++; 989 if (!matcher.find()) 990 failCount++; 991 matcher.region(0,3*2); 992 if (!matcher.find()) 993 failCount++; 994 matcher.region(1,3*2); 995 if (matcher.find()) 996 failCount++; 997 matcher.region(3*2,6*2); 998 if (matcher.find()) 999 failCount++; 1000 matcher.region(0,2*2); 1001 if (matcher.find()) 1002 failCount++; 1003 matcher.region(0,2*2+1); 1004 if (matcher.find()) 1005 failCount++; 1006 1007 expectRegionFail(matcher, 1*2, -1); 1008 expectRegionFail(matcher, -1, -1); 1009 expectRegionFail(matcher, -1, 1*2); 1010 expectRegionFail(matcher, 5*2, 3*2); 1011 expectRegionFail(matcher, 5*2, 12*2); 1012 expectRegionFail(matcher, 12*2, 12*2); 1013 1014 pattern = Pattern.compile(toSupplementaries("^abc$")); 1015 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 1016 matcher.region(0,9*2); 1017 if (matcher.find()) 1018 failCount++; 1019 matcher.region(3*2,6*2); 1020 if (!matcher.find()) 1021 failCount++; 1022 matcher.region(3*2+1,6*2); 1023 if (matcher.find()) 1024 failCount++; 1025 matcher.region(3*2,6*2-1); 1026 if (matcher.find()) 1027 failCount++; 1028 matcher.region(3*2,6*2); 1029 matcher.useAnchoringBounds(false); 1030 if (matcher.find()) 1031 failCount++; 1032 report("Regions"); 1033 } 1034 1035 private static void expectRegionFail(Matcher matcher, int index1, 1036 int index2) 1037 { 1038 try { 1039 matcher.region(index1, index2); 1040 failCount++; 1041 } catch (IndexOutOfBoundsException ioobe) { 1042 // Correct result 1043 } catch (IllegalStateException ise) { 1044 // Correct result 1045 } 1046 } 1047 1048 // This test is for 4803197 1049 private static void escapedSegmentTest() throws Exception { 1050 1051 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 1052 check(pattern, "dir1\\dir2", true); 1053 1054 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 1055 check(pattern, "dir1\\dir2\\", true); 1056 1057 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1058 check(pattern, "dir1\\dir2\\", true); 1059 1060 // Supplementary character test 1061 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1062 check(pattern, toSupplementaries("dir1\\dir2"), true); 1063 1064 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1065 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1066 1067 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1068 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1069 1070 report("Escaped segment"); 1071 } 1072 1073 // This test is for 4792284 1074 private static void nonCaptureRepetitionTest() throws Exception { 1075 String input = "abcdefgh;"; 1076 1077 String[] patterns = new String[] { 1078 "(?:\\w{4})+;", 1079 "(?:\\w{8})*;", 1080 "(?:\\w{2}){2,4};", 1081 "(?:\\w{4}){2,};", // only matches the 1082 ".*?(?:\\w{5})+;", // specified minimum 1083 ".*?(?:\\w{9})*;", // number of reps - OK 1084 "(?:\\w{4})+?;", // lazy repetition - OK 1085 "(?:\\w{4})++;", // possessive repetition - OK 1086 "(?:\\w{2,}?)+;", // non-deterministic - OK 1087 "(\\w{4})+;", // capturing group - OK 1088 }; 1089 1090 for (int i = 0; i < patterns.length; i++) { 1091 // Check find() 1092 check(patterns[i], 0, input, input, true); 1093 // Check matches() 1094 Pattern p = Pattern.compile(patterns[i]); 1095 Matcher m = p.matcher(input); 1096 1097 if (m.matches()) { 1098 if (!m.group(0).equals(input)) 1099 failCount++; 1100 } else { 1101 failCount++; 1102 } 1103 } 1104 1105 report("Non capturing repetition"); 1106 } 1107 1108 // This test is for 6358731 1109 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1110 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1111 Matcher matcher = pattern.matcher("abcd"); 1112 if (!matcher.matches() || 1113 matcher.group(1) != null || 1114 !matcher.group(2).equals("abcd")) { 1115 failCount++; 1116 } 1117 report("Not captured GroupCurly"); 1118 } 1119 1120 // This test is for 4706545 1121 private static void javaCharClassTest() throws Exception { 1122 for (int i=0; i<1000; i++) { 1123 char c = (char)generator.nextInt(); 1124 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1125 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1126 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1127 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1128 check("{javaDigit}", c, Character.isDigit(c)); 1129 check("{javaDefined}", c, Character.isDefined(c)); 1130 check("{javaLetter}", c, Character.isLetter(c)); 1131 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1132 check("{javaJavaIdentifierStart}", c, 1133 Character.isJavaIdentifierStart(c)); 1134 check("{javaJavaIdentifierPart}", c, 1135 Character.isJavaIdentifierPart(c)); 1136 check("{javaUnicodeIdentifierStart}", c, 1137 Character.isUnicodeIdentifierStart(c)); 1138 check("{javaUnicodeIdentifierPart}", c, 1139 Character.isUnicodeIdentifierPart(c)); 1140 check("{javaIdentifierIgnorable}", c, 1141 Character.isIdentifierIgnorable(c)); 1142 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1143 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1144 check("{javaISOControl}", c, Character.isISOControl(c)); 1145 check("{javaMirrored}", c, Character.isMirrored(c)); 1146 1147 } 1148 1149 // Supplementary character test 1150 for (int i=0; i<1000; i++) { 1151 int c = generator.nextInt(Character.MAX_CODE_POINT 1152 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1153 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1154 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1155 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1156 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1157 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1158 check("{javaDigit}", c, Character.isDigit(c)); 1159 check("{javaDefined}", c, Character.isDefined(c)); 1160 check("{javaLetter}", c, Character.isLetter(c)); 1161 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1162 check("{javaJavaIdentifierStart}", c, 1163 Character.isJavaIdentifierStart(c)); 1164 check("{javaJavaIdentifierPart}", c, 1165 Character.isJavaIdentifierPart(c)); 1166 check("{javaUnicodeIdentifierStart}", c, 1167 Character.isUnicodeIdentifierStart(c)); 1168 check("{javaUnicodeIdentifierPart}", c, 1169 Character.isUnicodeIdentifierPart(c)); 1170 check("{javaIdentifierIgnorable}", c, 1171 Character.isIdentifierIgnorable(c)); 1172 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1173 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1174 check("{javaISOControl}", c, Character.isISOControl(c)); 1175 check("{javaMirrored}", c, Character.isMirrored(c)); 1176 } 1177 1178 report("Java character classes"); 1179 } 1180 1181 // This test is for 4523620 1182 /* 1183 private static void numOccurrencesTest() throws Exception { 1184 Pattern pattern = Pattern.compile("aaa"); 1185 1186 if (pattern.numOccurrences("aaaaaa", false) != 2) 1187 failCount++; 1188 if (pattern.numOccurrences("aaaaaa", true) != 4) 1189 failCount++; 1190 1191 pattern = Pattern.compile("^"); 1192 if (pattern.numOccurrences("aaaaaa", false) != 1) 1193 failCount++; 1194 if (pattern.numOccurrences("aaaaaa", true) != 1) 1195 failCount++; 1196 1197 report("Number of Occurrences"); 1198 } 1199 */ 1200 1201 // This test is for 4776374 1202 private static void caretBetweenTerminatorsTest() throws Exception { 1203 int flags1 = Pattern.DOTALL; 1204 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1205 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1206 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1207 1208 check("^....", flags1, "test\ntest", "test", true); 1209 check(".....^", flags1, "test\ntest", "test", false); 1210 check(".....^", flags1, "test\n", "test", false); 1211 check("....^", flags1, "test\r\n", "test", false); 1212 1213 check("^....", flags2, "test\ntest", "test", true); 1214 check("....^", flags2, "test\ntest", "test", false); 1215 check(".....^", flags2, "test\n", "test", false); 1216 check("....^", flags2, "test\r\n", "test", false); 1217 1218 check("^....", flags3, "test\ntest", "test", true); 1219 check(".....^", flags3, "test\ntest", "test\n", true); 1220 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1221 check(".....^", flags3, "test\n", "test", false); 1222 check(".....^", flags3, "test\r\n", "test", false); 1223 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1224 1225 check("^....", flags4, "test\ntest", "test", true); 1226 check(".....^", flags3, "test\ntest", "test\n", true); 1227 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1228 check(".....^", flags4, "test\n", "test\n", false); 1229 check(".....^", flags4, "test\r\n", "test\r", false); 1230 1231 // Supplementary character test 1232 String t = toSupplementaries("test"); 1233 check("^....", flags1, t+"\n"+t, t, true); 1234 check(".....^", flags1, t+"\n"+t, t, false); 1235 check(".....^", flags1, t+"\n", t, false); 1236 check("....^", flags1, t+"\r\n", t, false); 1237 1238 check("^....", flags2, t+"\n"+t, t, true); 1239 check("....^", flags2, t+"\n"+t, t, false); 1240 check(".....^", flags2, t+"\n", t, false); 1241 check("....^", flags2, t+"\r\n", t, false); 1242 1243 check("^....", flags3, t+"\n"+t, t, true); 1244 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1245 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1246 check(".....^", flags3, t+"\n", t, false); 1247 check(".....^", flags3, t+"\r\n", t, false); 1248 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1249 1250 check("^....", flags4, t+"\n"+t, t, true); 1251 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1252 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1253 check(".....^", flags4, t+"\n", t+"\n", false); 1254 check(".....^", flags4, t+"\r\n", t+"\r", false); 1255 1256 report("Caret between terminators"); 1257 } 1258 1259 // This test is for 4727935 1260 private static void dollarAtEndTest() throws Exception { 1261 int flags1 = Pattern.DOTALL; 1262 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1263 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1264 1265 check("....$", flags1, "test\n", "test", true); 1266 check("....$", flags1, "test\r\n", "test", true); 1267 check(".....$", flags1, "test\n", "test\n", true); 1268 check(".....$", flags1, "test\u0085", "test\u0085", true); 1269 check("....$", flags1, "test\u0085", "test", true); 1270 1271 check("....$", flags2, "test\n", "test", true); 1272 check(".....$", flags2, "test\n", "test\n", true); 1273 check(".....$", flags2, "test\u0085", "test\u0085", true); 1274 check("....$", flags2, "test\u0085", "est\u0085", true); 1275 1276 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1277 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1278 check("....$blah", flags3, "test\nblah", "!!!!", false); 1279 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1280 1281 // Supplementary character test 1282 String t = toSupplementaries("test"); 1283 String b = toSupplementaries("blah"); 1284 check("....$", flags1, t+"\n", t, true); 1285 check("....$", flags1, t+"\r\n", t, true); 1286 check(".....$", flags1, t+"\n", t+"\n", true); 1287 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1288 check("....$", flags1, t+"\u0085", t, true); 1289 1290 check("....$", flags2, t+"\n", t, true); 1291 check(".....$", flags2, t+"\n", t+"\n", true); 1292 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1293 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1294 1295 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1296 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1297 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1298 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1299 1300 report("Dollar at End"); 1301 } 1302 1303 // This test is for 4711773 1304 private static void multilineDollarTest() throws Exception { 1305 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1306 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1307 matcher.find(); 1308 if (matcher.start(0) != 9) 1309 failCount++; 1310 matcher.find(); 1311 if (matcher.start(0) != 20) 1312 failCount++; 1313 1314 // Supplementary character test 1315 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1316 matcher.find(); 1317 if (matcher.start(0) != 9*2) 1318 failCount++; 1319 matcher.find(); 1320 if (matcher.start(0) != 20*2) 1321 failCount++; 1322 1323 report("Multiline Dollar"); 1324 } 1325 1326 private static void reluctantRepetitionTest() throws Exception { 1327 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1328 check(p, "1 word word word 2", true); 1329 check(p, "1 wor wo w 2", true); 1330 check(p, "1 word word 2", true); 1331 check(p, "1 word 2", true); 1332 check(p, "1 wo w w 2", true); 1333 check(p, "1 wo w 2", true); 1334 check(p, "1 wor w 2", true); 1335 1336 p = Pattern.compile("([a-z])+?c"); 1337 Matcher m = p.matcher("ababcdefdec"); 1338 check(m, "ababc"); 1339 1340 // Supplementary character test 1341 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1342 m = p.matcher(toSupplementaries("ababcdefdec")); 1343 check(m, toSupplementaries("ababc")); 1344 1345 report("Reluctant Repetition"); 1346 } 1347 1348 private static void serializeTest() throws Exception { 1349 String patternStr = "(b)"; 1350 String matchStr = "b"; 1351 Pattern pattern = Pattern.compile(patternStr); 1352 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1353 ObjectOutputStream oos = new ObjectOutputStream(baos); 1354 oos.writeObject(pattern); 1355 oos.close(); 1356 ObjectInputStream ois = new ObjectInputStream( 1357 new ByteArrayInputStream(baos.toByteArray())); 1358 Pattern serializedPattern = (Pattern)ois.readObject(); 1359 ois.close(); 1360 Matcher matcher = serializedPattern.matcher(matchStr); 1361 if (!matcher.matches()) 1362 failCount++; 1363 if (matcher.groupCount() != 1) 1364 failCount++; 1365 1366 report("Serialization"); 1367 } 1368 1369 private static void gTest() { 1370 Pattern pattern = Pattern.compile("\\G\\w"); 1371 Matcher matcher = pattern.matcher("abc#x#x"); 1372 matcher.find(); 1373 matcher.find(); 1374 matcher.find(); 1375 if (matcher.find()) 1376 failCount++; 1377 1378 pattern = Pattern.compile("\\GA*"); 1379 matcher = pattern.matcher("1A2AA3"); 1380 matcher.find(); 1381 if (matcher.find()) 1382 failCount++; 1383 1384 pattern = Pattern.compile("\\GA*"); 1385 matcher = pattern.matcher("1A2AA3"); 1386 if (!matcher.find(1)) 1387 failCount++; 1388 matcher.find(); 1389 if (matcher.find()) 1390 failCount++; 1391 1392 report("\\G"); 1393 } 1394 1395 private static void zTest() { 1396 Pattern pattern = Pattern.compile("foo\\Z"); 1397 // Positives 1398 check(pattern, "foo\u0085", true); 1399 check(pattern, "foo\u2028", true); 1400 check(pattern, "foo\u2029", true); 1401 check(pattern, "foo\n", true); 1402 check(pattern, "foo\r", true); 1403 check(pattern, "foo\r\n", true); 1404 // Negatives 1405 check(pattern, "fooo", false); 1406 check(pattern, "foo\n\r", false); 1407 1408 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1409 // Positives 1410 check(pattern, "foo", true); 1411 check(pattern, "foo\n", true); 1412 // Negatives 1413 check(pattern, "foo\r", false); 1414 check(pattern, "foo\u0085", false); 1415 check(pattern, "foo\u2028", false); 1416 check(pattern, "foo\u2029", false); 1417 1418 report("\\Z"); 1419 } 1420 1421 private static void replaceFirstTest() { 1422 Pattern pattern = Pattern.compile("(ab)(c*)"); 1423 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1424 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1425 failCount++; 1426 1427 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1428 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1429 failCount++; 1430 1431 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1432 String result = matcher.replaceFirst("$1"); 1433 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1434 failCount++; 1435 1436 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1437 result = matcher.replaceFirst("$2"); 1438 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1439 failCount++; 1440 1441 pattern = Pattern.compile("a*"); 1442 matcher = pattern.matcher("aaaaaaaaaa"); 1443 if (!matcher.replaceFirst("test").equals("test")) 1444 failCount++; 1445 1446 pattern = Pattern.compile("a+"); 1447 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1448 if (!matcher.replaceFirst("test").equals("zzztest")) 1449 failCount++; 1450 1451 // Supplementary character test 1452 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1453 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1454 if (!matcher.replaceFirst(toSupplementaries("test")) 1455 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1456 failCount++; 1457 1458 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1459 if (!matcher.replaceFirst(toSupplementaries("test")). 1460 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1461 failCount++; 1462 1463 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1464 result = matcher.replaceFirst("$1"); 1465 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1466 failCount++; 1467 1468 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1469 result = matcher.replaceFirst("$2"); 1470 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1471 failCount++; 1472 1473 pattern = Pattern.compile(toSupplementaries("a*")); 1474 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1475 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1476 failCount++; 1477 1478 pattern = Pattern.compile(toSupplementaries("a+")); 1479 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1480 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1481 failCount++; 1482 1483 report("Replace First"); 1484 } 1485 1486 private static void unixLinesTest() { 1487 Pattern pattern = Pattern.compile(".*"); 1488 Matcher matcher = pattern.matcher("aa\u2028blah"); 1489 matcher.find(); 1490 if (!matcher.group(0).equals("aa")) 1491 failCount++; 1492 1493 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1494 matcher = pattern.matcher("aa\u2028blah"); 1495 matcher.find(); 1496 if (!matcher.group(0).equals("aa\u2028blah")) 1497 failCount++; 1498 1499 pattern = Pattern.compile("[az]$", 1500 Pattern.MULTILINE | Pattern.UNIX_LINES); 1501 matcher = pattern.matcher("aa\u2028zz"); 1502 check(matcher, "a\u2028", false); 1503 1504 // Supplementary character test 1505 pattern = Pattern.compile(".*"); 1506 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1507 matcher.find(); 1508 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1509 failCount++; 1510 1511 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1512 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1513 matcher.find(); 1514 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1515 failCount++; 1516 1517 pattern = Pattern.compile(toSupplementaries("[az]$"), 1518 Pattern.MULTILINE | Pattern.UNIX_LINES); 1519 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1520 check(matcher, toSupplementaries("a\u2028"), false); 1521 1522 report("Unix Lines"); 1523 } 1524 1525 private static void commentsTest() { 1526 int flags = Pattern.COMMENTS; 1527 1528 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1529 Matcher matcher = pattern.matcher("aa#aa"); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile("aa # blah", flags); 1534 matcher = pattern.matcher("aa"); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile("aa blah", flags); 1539 matcher = pattern.matcher("aablah"); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile("aa # blah blech ", flags); 1544 matcher = pattern.matcher("aa"); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile("aa # blah\n ", flags); 1549 matcher = pattern.matcher("aa"); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1554 matcher = pattern.matcher("aabc"); 1555 if (!matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1559 matcher = pattern.matcher("aabc"); 1560 if (!matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1564 matcher = pattern.matcher("aabc#blech"); 1565 if (!matcher.matches()) 1566 failCount++; 1567 1568 // Supplementary character test 1569 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1570 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1571 if (!matcher.matches()) 1572 failCount++; 1573 1574 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1575 matcher = pattern.matcher(toSupplementaries("aa")); 1576 if (!matcher.matches()) 1577 failCount++; 1578 1579 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1580 matcher = pattern.matcher(toSupplementaries("aablah")); 1581 if (!matcher.matches()) 1582 failCount++; 1583 1584 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1585 matcher = pattern.matcher(toSupplementaries("aa")); 1586 if (!matcher.matches()) 1587 failCount++; 1588 1589 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1590 matcher = pattern.matcher(toSupplementaries("aa")); 1591 if (!matcher.matches()) 1592 failCount++; 1593 1594 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1595 matcher = pattern.matcher(toSupplementaries("aabc")); 1596 if (!matcher.matches()) 1597 failCount++; 1598 1599 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1600 matcher = pattern.matcher(toSupplementaries("aabc")); 1601 if (!matcher.matches()) 1602 failCount++; 1603 1604 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1605 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1606 if (!matcher.matches()) 1607 failCount++; 1608 1609 report("Comments"); 1610 } 1611 1612 private static void caseFoldingTest() { // bug 4504687 1613 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1614 Pattern pattern = Pattern.compile("aa", flags); 1615 Matcher matcher = pattern.matcher("ab"); 1616 if (matcher.matches()) 1617 failCount++; 1618 1619 pattern = Pattern.compile("aA", flags); 1620 matcher = pattern.matcher("ab"); 1621 if (matcher.matches()) 1622 failCount++; 1623 1624 pattern = Pattern.compile("aa", flags); 1625 matcher = pattern.matcher("aB"); 1626 if (matcher.matches()) 1627 failCount++; 1628 matcher = pattern.matcher("Ab"); 1629 if (matcher.matches()) 1630 failCount++; 1631 1632 // ASCII "a" 1633 // Latin-1 Supplement "a" + grave 1634 // Cyrillic "a" 1635 String[] patterns = new String[] { 1636 //single 1637 "a", "\u00e0", "\u0430", 1638 //slice 1639 "ab", "\u00e0\u00e1", "\u0430\u0431", 1640 //class single 1641 "[a]", "[\u00e0]", "[\u0430]", 1642 //class range 1643 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1644 //back reference 1645 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1646 }; 1647 1648 String[] texts = new String[] { 1649 "A", "\u00c0", "\u0410", 1650 "AB", "\u00c0\u00c1", "\u0410\u0411", 1651 "A", "\u00c0", "\u0410", 1652 "B", "\u00c2", "\u0411", 1653 "aA", "\u00e0\u00c0", "\u0430\u0410" 1654 }; 1655 1656 boolean[] expected = new boolean[] { 1657 true, false, false, 1658 true, false, false, 1659 true, false, false, 1660 true, false, false, 1661 true, false, false 1662 }; 1663 1664 flags = Pattern.CASE_INSENSITIVE; 1665 for (int i = 0; i < patterns.length; i++) { 1666 pattern = Pattern.compile(patterns[i], flags); 1667 matcher = pattern.matcher(texts[i]); 1668 if (matcher.matches() != expected[i]) { 1669 System.out.println("<1> Failed at " + i); 1670 failCount++; 1671 } 1672 } 1673 1674 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1675 for (int i = 0; i < patterns.length; i++) { 1676 pattern = Pattern.compile(patterns[i], flags); 1677 matcher = pattern.matcher(texts[i]); 1678 if (!matcher.matches()) { 1679 System.out.println("<2> Failed at " + i); 1680 failCount++; 1681 } 1682 } 1683 // flag unicode_case alone should do nothing 1684 flags = Pattern.UNICODE_CASE; 1685 for (int i = 0; i < patterns.length; i++) { 1686 pattern = Pattern.compile(patterns[i], flags); 1687 matcher = pattern.matcher(texts[i]); 1688 if (matcher.matches()) { 1689 System.out.println("<3> Failed at " + i); 1690 failCount++; 1691 } 1692 } 1693 1694 // Special cases: i, I, u+0131 and u+0130 1695 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1696 pattern = Pattern.compile("[h-j]+", flags); 1697 if (!pattern.matcher("\u0131\u0130").matches()) 1698 failCount++; 1699 report("Case Folding"); 1700 } 1701 1702 private static void appendTest() { 1703 Pattern pattern = Pattern.compile("(ab)(cd)"); 1704 Matcher matcher = pattern.matcher("abcd"); 1705 String result = matcher.replaceAll("$2$1"); 1706 if (!result.equals("cdab")) 1707 failCount++; 1708 1709 String s1 = "Swap all: first = 123, second = 456"; 1710 String s2 = "Swap one: first = 123, second = 456"; 1711 String r = "$3$2$1"; 1712 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1713 matcher = pattern.matcher(s1); 1714 1715 result = matcher.replaceAll(r); 1716 if (!result.equals("Swap all: 123 = first, 456 = second")) 1717 failCount++; 1718 1719 matcher = pattern.matcher(s2); 1720 1721 if (matcher.find()) { 1722 StringBuffer sb = new StringBuffer(); 1723 matcher.appendReplacement(sb, r); 1724 matcher.appendTail(sb); 1725 result = sb.toString(); 1726 if (!result.equals("Swap one: 123 = first, second = 456")) 1727 failCount++; 1728 } 1729 1730 // Supplementary character test 1731 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1732 matcher = pattern.matcher(toSupplementaries("abcd")); 1733 result = matcher.replaceAll("$2$1"); 1734 if (!result.equals(toSupplementaries("cdab"))) 1735 failCount++; 1736 1737 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1738 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1739 r = toSupplementaries("$3$2$1"); 1740 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1741 matcher = pattern.matcher(s1); 1742 1743 result = matcher.replaceAll(r); 1744 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1745 failCount++; 1746 1747 matcher = pattern.matcher(s2); 1748 1749 if (matcher.find()) { 1750 StringBuffer sb = new StringBuffer(); 1751 matcher.appendReplacement(sb, r); 1752 matcher.appendTail(sb); 1753 result = sb.toString(); 1754 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1755 failCount++; 1756 } 1757 report("Append"); 1758 } 1759 1760 private static void splitTest() { 1761 Pattern pattern = Pattern.compile(":"); 1762 String[] result = pattern.split("foo:and:boo", 2); 1763 if (!result[0].equals("foo")) 1764 failCount++; 1765 if (!result[1].equals("and:boo")) 1766 failCount++; 1767 // Supplementary character test 1768 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1769 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1770 if (!result[0].equals(toSupplementaries("foo"))) 1771 failCount++; 1772 if (!result[1].equals(toSupplementaries("andXboo"))) 1773 failCount++; 1774 1775 CharBuffer cb = CharBuffer.allocate(100); 1776 cb.put("foo:and:boo"); 1777 cb.flip(); 1778 result = pattern.split(cb); 1779 if (!result[0].equals("foo")) 1780 failCount++; 1781 if (!result[1].equals("and")) 1782 failCount++; 1783 if (!result[2].equals("boo")) 1784 failCount++; 1785 1786 // Supplementary character test 1787 CharBuffer cbs = CharBuffer.allocate(100); 1788 cbs.put(toSupplementaries("fooXandXboo")); 1789 cbs.flip(); 1790 result = patternX.split(cbs); 1791 if (!result[0].equals(toSupplementaries("foo"))) 1792 failCount++; 1793 if (!result[1].equals(toSupplementaries("and"))) 1794 failCount++; 1795 if (!result[2].equals(toSupplementaries("boo"))) 1796 failCount++; 1797 1798 String source = "0123456789"; 1799 for (int limit=-2; limit<3; limit++) { 1800 for (int x=0; x<10; x++) { 1801 result = source.split(Integer.toString(x), limit); 1802 int expectedLength = limit < 1 ? 2 : limit; 1803 1804 if ((limit == 0) && (x == 9)) { 1805 // expected dropping of "" 1806 if (result.length != 1) 1807 failCount++; 1808 if (!result[0].equals("012345678")) { 1809 failCount++; 1810 } 1811 } else { 1812 if (result.length != expectedLength) { 1813 failCount++; 1814 } 1815 if (!result[0].equals(source.substring(0,x))) { 1816 if (limit != 1) { 1817 failCount++; 1818 } else { 1819 if (!result[0].equals(source.substring(0,10))) { 1820 failCount++; 1821 } 1822 } 1823 } 1824 if (expectedLength > 1) { // Check segment 2 1825 if (!result[1].equals(source.substring(x+1,10))) 1826 failCount++; 1827 } 1828 } 1829 } 1830 } 1831 // Check the case for no match found 1832 for (int limit=-2; limit<3; limit++) { 1833 result = source.split("e", limit); 1834 if (result.length != 1) 1835 failCount++; 1836 if (!result[0].equals(source)) 1837 failCount++; 1838 } 1839 // Check the case for limit == 0, source = ""; 1840 // split() now returns 0-length for empty source "" see #6559590 1841 source = ""; 1842 result = source.split("e", 0); 1843 if (result.length != 1) 1844 failCount++; 1845 if (!result[0].equals(source)) 1846 failCount++; 1847 1848 // Check both split() and splitAsStraem(), especially for zero-lenth 1849 // input and zero-lenth match cases 1850 String[][] input = new String[][] { 1851 { " ", "Abc Efg Hij" }, // normal non-zero-match 1852 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1853 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1854 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1855 { "(?=\\p{Lu})", "AbcEfg" }, 1856 { "(?=\\p{Lu})", "Abc" }, 1857 { " ", "" }, // zero-length input 1858 { ".*", "" }, 1859 1860 // some tests from PatternStreamTest.java 1861 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1862 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1863 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1864 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1865 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1866 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1867 { "\u56da", "" }, 1868 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1869 { "o", "boo:and:foo" }, 1870 { "o", "booooo:and:fooooo" }, 1871 { "o", "fooooo:" }, 1872 }; 1873 1874 String[][] expected = new String[][] { 1875 { "Abc", "Efg", "Hij" }, 1876 { "", "Abc", "Efg", "Hij" }, 1877 { "Abc", "", "Efg", "Hij" }, 1878 { "Abc", "Efg", "Hij" }, 1879 { "Abc", "Efg" }, 1880 { "Abc" }, 1881 { "" }, 1882 { "" }, 1883 1884 { "awgqwefg1fefw", "vssv1vvv1" }, 1885 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1886 { "awgqwefg", "fefw4vssv", "vvv" }, 1887 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1888 { "1", "23", "456", "7890" }, 1889 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1890 { "" }, 1891 { "This", "is", "testing", "", "with", "different", "separators" }, 1892 { "b", "", ":and:f" }, 1893 { "b", "", "", "", "", ":and:f" }, 1894 { "f", "", "", "", "", ":" }, 1895 }; 1896 for (int i = 0; i < input.length; i++) { 1897 pattern = Pattern.compile(input[i][0]); 1898 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1899 failCount++; 1900 } 1901 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1902 // array for zero-length input for now 1903 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1904 expected[i])) { 1905 failCount++; 1906 } 1907 } 1908 report("Split"); 1909 } 1910 1911 private static void negationTest() { 1912 Pattern pattern = Pattern.compile("[\\[@^]+"); 1913 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1914 if (!matcher.find()) 1915 failCount++; 1916 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1917 failCount++; 1918 pattern = Pattern.compile("[@\\[^]+"); 1919 matcher = pattern.matcher("@@@@[[[[^^^^"); 1920 if (!matcher.find()) 1921 failCount++; 1922 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1923 failCount++; 1924 pattern = Pattern.compile("[@\\[^@]+"); 1925 matcher = pattern.matcher("@@@@[[[[^^^^"); 1926 if (!matcher.find()) 1927 failCount++; 1928 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1929 failCount++; 1930 1931 pattern = Pattern.compile("\\)"); 1932 matcher = pattern.matcher("xxx)xxx"); 1933 if (!matcher.find()) 1934 failCount++; 1935 1936 report("Negation"); 1937 } 1938 1939 private static void ampersandTest() { 1940 Pattern pattern = Pattern.compile("[&@]+"); 1941 check(pattern, "@@@@&&&&", true); 1942 1943 pattern = Pattern.compile("[@&]+"); 1944 check(pattern, "@@@@&&&&", true); 1945 1946 pattern = Pattern.compile("[@\\&]+"); 1947 check(pattern, "@@@@&&&&", true); 1948 1949 report("Ampersand"); 1950 } 1951 1952 private static void octalTest() throws Exception { 1953 Pattern pattern = Pattern.compile("\\u0007"); 1954 Matcher matcher = pattern.matcher("\u0007"); 1955 if (!matcher.matches()) 1956 failCount++; 1957 pattern = Pattern.compile("\\07"); 1958 matcher = pattern.matcher("\u0007"); 1959 if (!matcher.matches()) 1960 failCount++; 1961 pattern = Pattern.compile("\\007"); 1962 matcher = pattern.matcher("\u0007"); 1963 if (!matcher.matches()) 1964 failCount++; 1965 pattern = Pattern.compile("\\0007"); 1966 matcher = pattern.matcher("\u0007"); 1967 if (!matcher.matches()) 1968 failCount++; 1969 pattern = Pattern.compile("\\040"); 1970 matcher = pattern.matcher("\u0020"); 1971 if (!matcher.matches()) 1972 failCount++; 1973 pattern = Pattern.compile("\\0403"); 1974 matcher = pattern.matcher("\u00203"); 1975 if (!matcher.matches()) 1976 failCount++; 1977 pattern = Pattern.compile("\\0103"); 1978 matcher = pattern.matcher("\u0043"); 1979 if (!matcher.matches()) 1980 failCount++; 1981 1982 report("Octal"); 1983 } 1984 1985 private static void longPatternTest() throws Exception { 1986 try { 1987 Pattern pattern = Pattern.compile( 1988 "a 32-character-long pattern xxxx"); 1989 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1990 pattern = Pattern.compile("a thirty four character long regex"); 1991 StringBuffer patternToBe = new StringBuffer(101); 1992 for (int i=0; i<100; i++) 1993 patternToBe.append((char)(97 + i%26)); 1994 pattern = Pattern.compile(patternToBe.toString()); 1995 } catch (PatternSyntaxException e) { 1996 failCount++; 1997 } 1998 1999 // Supplementary character test 2000 try { 2001 Pattern pattern = Pattern.compile( 2002 toSupplementaries("a 32-character-long pattern xxxx")); 2003 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 2004 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 2005 StringBuffer patternToBe = new StringBuffer(101*2); 2006 for (int i=0; i<100; i++) 2007 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 2008 + 97 + i%26)); 2009 pattern = Pattern.compile(patternToBe.toString()); 2010 } catch (PatternSyntaxException e) { 2011 failCount++; 2012 } 2013 report("LongPattern"); 2014 } 2015 2016 private static void group0Test() throws Exception { 2017 Pattern pattern = Pattern.compile("(tes)ting"); 2018 Matcher matcher = pattern.matcher("testing"); 2019 check(matcher, "testing"); 2020 2021 matcher.reset("testing"); 2022 if (matcher.lookingAt()) { 2023 if (!matcher.group(0).equals("testing")) 2024 failCount++; 2025 } else { 2026 failCount++; 2027 } 2028 2029 matcher.reset("testing"); 2030 if (matcher.matches()) { 2031 if (!matcher.group(0).equals("testing")) 2032 failCount++; 2033 } else { 2034 failCount++; 2035 } 2036 2037 pattern = Pattern.compile("(tes)ting"); 2038 matcher = pattern.matcher("testing"); 2039 if (matcher.lookingAt()) { 2040 if (!matcher.group(0).equals("testing")) 2041 failCount++; 2042 } else { 2043 failCount++; 2044 } 2045 2046 pattern = Pattern.compile("^(tes)ting"); 2047 matcher = pattern.matcher("testing"); 2048 if (matcher.matches()) { 2049 if (!matcher.group(0).equals("testing")) 2050 failCount++; 2051 } else { 2052 failCount++; 2053 } 2054 2055 // Supplementary character test 2056 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2057 matcher = pattern.matcher(toSupplementaries("testing")); 2058 check(matcher, toSupplementaries("testing")); 2059 2060 matcher.reset(toSupplementaries("testing")); 2061 if (matcher.lookingAt()) { 2062 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2063 failCount++; 2064 } else { 2065 failCount++; 2066 } 2067 2068 matcher.reset(toSupplementaries("testing")); 2069 if (matcher.matches()) { 2070 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2071 failCount++; 2072 } else { 2073 failCount++; 2074 } 2075 2076 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2077 matcher = pattern.matcher(toSupplementaries("testing")); 2078 if (matcher.lookingAt()) { 2079 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2080 failCount++; 2081 } else { 2082 failCount++; 2083 } 2084 2085 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2086 matcher = pattern.matcher(toSupplementaries("testing")); 2087 if (matcher.matches()) { 2088 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2089 failCount++; 2090 } else { 2091 failCount++; 2092 } 2093 2094 report("Group0"); 2095 } 2096 2097 private static void findIntTest() throws Exception { 2098 Pattern p = Pattern.compile("blah"); 2099 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2100 boolean result = m.find(2); 2101 if (!result) 2102 failCount++; 2103 2104 p = Pattern.compile("$"); 2105 m = p.matcher("1234567890"); 2106 result = m.find(10); 2107 if (!result) 2108 failCount++; 2109 try { 2110 result = m.find(11); 2111 failCount++; 2112 } catch (IndexOutOfBoundsException e) { 2113 // correct result 2114 } 2115 2116 // Supplementary character test 2117 p = Pattern.compile(toSupplementaries("blah")); 2118 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2119 result = m.find(2); 2120 if (!result) 2121 failCount++; 2122 2123 report("FindInt"); 2124 } 2125 2126 private static void emptyPatternTest() throws Exception { 2127 Pattern p = Pattern.compile(""); 2128 Matcher m = p.matcher("foo"); 2129 2130 // Should find empty pattern at beginning of input 2131 boolean result = m.find(); 2132 if (result != true) 2133 failCount++; 2134 if (m.start() != 0) 2135 failCount++; 2136 2137 // Should not match entire input if input is not empty 2138 m.reset(); 2139 result = m.matches(); 2140 if (result == true) 2141 failCount++; 2142 2143 try { 2144 m.start(0); 2145 failCount++; 2146 } catch (IllegalStateException e) { 2147 // Correct result 2148 } 2149 2150 // Should match entire input if input is empty 2151 m.reset(""); 2152 result = m.matches(); 2153 if (result != true) 2154 failCount++; 2155 2156 result = Pattern.matches("", ""); 2157 if (result != true) 2158 failCount++; 2159 2160 result = Pattern.matches("", "foo"); 2161 if (result == true) 2162 failCount++; 2163 report("EmptyPattern"); 2164 } 2165 2166 private static void charClassTest() throws Exception { 2167 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2168 check(pattern, "blahb]blech", true); 2169 2170 pattern = Pattern.compile("[abc[def]]"); 2171 check(pattern, "b", true); 2172 2173 // Supplementary character tests 2174 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2175 check(pattern, toSupplementaries("blahb]blech"), true); 2176 2177 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2178 check(pattern, toSupplementaries("b"), true); 2179 2180 try { 2181 // u00ff when UNICODE_CASE 2182 pattern = Pattern.compile("[ab\u00ffcd]", 2183 Pattern.CASE_INSENSITIVE| 2184 Pattern.UNICODE_CASE); 2185 check(pattern, "ab\u00ffcd", true); 2186 check(pattern, "Ab\u0178Cd", true); 2187 2188 // u00b5 when UNICODE_CASE 2189 pattern = Pattern.compile("[ab\u00b5cd]", 2190 Pattern.CASE_INSENSITIVE| 2191 Pattern.UNICODE_CASE); 2192 check(pattern, "ab\u00b5cd", true); 2193 check(pattern, "Ab\u039cCd", true); 2194 } catch (Exception e) { failCount++; } 2195 2196 /* Special cases 2197 (1)LatinSmallLetterLongS u+017f 2198 (2)LatinSmallLetterDotlessI u+0131 2199 (3)LatineCapitalLetterIWithDotAbove u+0130 2200 (4)KelvinSign u+212a 2201 (5)AngstromSign u+212b 2202 */ 2203 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2204 pattern = Pattern.compile("[sik\u00c5]+", flags); 2205 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2206 failCount++; 2207 2208 report("CharClass"); 2209 } 2210 2211 private static void caretTest() throws Exception { 2212 Pattern pattern = Pattern.compile("\\w*"); 2213 Matcher matcher = pattern.matcher("a#bc#def##g"); 2214 check(matcher, "a"); 2215 check(matcher, ""); 2216 check(matcher, "bc"); 2217 check(matcher, ""); 2218 check(matcher, "def"); 2219 check(matcher, ""); 2220 check(matcher, ""); 2221 check(matcher, "g"); 2222 check(matcher, ""); 2223 if (matcher.find()) 2224 failCount++; 2225 2226 pattern = Pattern.compile("^\\w*"); 2227 matcher = pattern.matcher("a#bc#def##g"); 2228 check(matcher, "a"); 2229 if (matcher.find()) 2230 failCount++; 2231 2232 pattern = Pattern.compile("\\w"); 2233 matcher = pattern.matcher("abc##x"); 2234 check(matcher, "a"); 2235 check(matcher, "b"); 2236 check(matcher, "c"); 2237 check(matcher, "x"); 2238 if (matcher.find()) 2239 failCount++; 2240 2241 pattern = Pattern.compile("^\\w"); 2242 matcher = pattern.matcher("abc##x"); 2243 check(matcher, "a"); 2244 if (matcher.find()) 2245 failCount++; 2246 2247 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2248 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2249 check(matcher, "abc"); 2250 if (matcher.find()) 2251 failCount++; 2252 2253 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2254 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2255 check(matcher, "abc"); 2256 check(matcher, "jkl"); 2257 if (matcher.find()) 2258 failCount++; 2259 2260 pattern = Pattern.compile("^", Pattern.MULTILINE); 2261 matcher = pattern.matcher("this is some text"); 2262 String result = matcher.replaceAll("X"); 2263 if (!result.equals("Xthis is some text")) 2264 failCount++; 2265 2266 pattern = Pattern.compile("^"); 2267 matcher = pattern.matcher("this is some text"); 2268 result = matcher.replaceAll("X"); 2269 if (!result.equals("Xthis is some text")) 2270 failCount++; 2271 2272 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2273 matcher = pattern.matcher("this is some text\n"); 2274 result = matcher.replaceAll("X"); 2275 if (!result.equals("Xthis is some text\n")) 2276 failCount++; 2277 2278 report("Caret"); 2279 } 2280 2281 private static void groupCaptureTest() throws Exception { 2282 // Independent group 2283 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2284 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2285 matcher.find(); 2286 try { 2287 String blah = matcher.group(1); 2288 failCount++; 2289 } catch (IndexOutOfBoundsException ioobe) { 2290 // Good result 2291 } 2292 // Pure group 2293 pattern = Pattern.compile("x+(?:y+)z+"); 2294 matcher = pattern.matcher("xxxyyyzzz"); 2295 matcher.find(); 2296 try { 2297 String blah = matcher.group(1); 2298 failCount++; 2299 } catch (IndexOutOfBoundsException ioobe) { 2300 // Good result 2301 } 2302 2303 // Supplementary character tests 2304 // Independent group 2305 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2306 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2307 matcher.find(); 2308 try { 2309 String blah = matcher.group(1); 2310 failCount++; 2311 } catch (IndexOutOfBoundsException ioobe) { 2312 // Good result 2313 } 2314 // Pure group 2315 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2316 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2317 matcher.find(); 2318 try { 2319 String blah = matcher.group(1); 2320 failCount++; 2321 } catch (IndexOutOfBoundsException ioobe) { 2322 // Good result 2323 } 2324 2325 report("GroupCapture"); 2326 } 2327 2328 private static void backRefTest() throws Exception { 2329 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2330 check(pattern, "zzzaabcazzz", true); 2331 2332 pattern = Pattern.compile("(a*)bc\\1"); 2333 check(pattern, "zzzaabcaazzz", true); 2334 2335 pattern = Pattern.compile("(abc)(def)\\1"); 2336 check(pattern, "abcdefabc", true); 2337 2338 pattern = Pattern.compile("(abc)(def)\\3"); 2339 check(pattern, "abcdefabc", false); 2340 2341 try { 2342 for (int i = 1; i < 10; i++) { 2343 // Make sure backref 1-9 are always accepted 2344 pattern = Pattern.compile("abcdef\\" + i); 2345 // and fail to match if the target group does not exit 2346 check(pattern, "abcdef", false); 2347 } 2348 } catch(PatternSyntaxException e) { 2349 failCount++; 2350 } 2351 2352 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2353 check(pattern, "abcdefghija", false); 2354 check(pattern, "abcdefghija1", true); 2355 2356 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2357 check(pattern, "abcdefghijkk", true); 2358 2359 pattern = Pattern.compile("(a)bcdefghij\\11"); 2360 check(pattern, "abcdefghija1", true); 2361 2362 // Supplementary character tests 2363 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2364 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2365 2366 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2367 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2368 2369 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2370 check(pattern, toSupplementaries("abcdefabc"), true); 2371 2372 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2373 check(pattern, toSupplementaries("abcdefabc"), false); 2374 2375 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2376 check(pattern, toSupplementaries("abcdefghija"), false); 2377 check(pattern, toSupplementaries("abcdefghija1"), true); 2378 2379 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2380 check(pattern, toSupplementaries("abcdefghijkk"), true); 2381 2382 report("BackRef"); 2383 } 2384 2385 /** 2386 * Unicode Technical Report #18, section 2.6 End of Line 2387 * There is no empty line to be matched in the sequence \u000D\u000A 2388 * but there is an empty line in the sequence \u000A\u000D. 2389 */ 2390 private static void anchorTest() throws Exception { 2391 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2392 Matcher m = p.matcher("blah1\r\nblah2"); 2393 m.find(); 2394 m.find(); 2395 if (!m.group().equals("blah2")) 2396 failCount++; 2397 2398 m.reset("blah1\n\rblah2"); 2399 m.find(); 2400 m.find(); 2401 m.find(); 2402 if (!m.group().equals("blah2")) 2403 failCount++; 2404 2405 // Test behavior of $ with \r\n at end of input 2406 p = Pattern.compile(".+$"); 2407 m = p.matcher("blah1\r\n"); 2408 if (!m.find()) 2409 failCount++; 2410 if (!m.group().equals("blah1")) 2411 failCount++; 2412 if (m.find()) 2413 failCount++; 2414 2415 // Test behavior of $ with \r\n at end of input in multiline 2416 p = Pattern.compile(".+$", Pattern.MULTILINE); 2417 m = p.matcher("blah1\r\n"); 2418 if (!m.find()) 2419 failCount++; 2420 if (m.find()) 2421 failCount++; 2422 2423 // Test for $ recognition of \u0085 for bug 4527731 2424 p = Pattern.compile(".+$", Pattern.MULTILINE); 2425 m = p.matcher("blah1\u0085"); 2426 if (!m.find()) 2427 failCount++; 2428 2429 // Supplementary character test 2430 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2431 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2432 m.find(); 2433 m.find(); 2434 if (!m.group().equals(toSupplementaries("blah2"))) 2435 failCount++; 2436 2437 m.reset(toSupplementaries("blah1\n\rblah2")); 2438 m.find(); 2439 m.find(); 2440 m.find(); 2441 if (!m.group().equals(toSupplementaries("blah2"))) 2442 failCount++; 2443 2444 // Test behavior of $ with \r\n at end of input 2445 p = Pattern.compile(".+$"); 2446 m = p.matcher(toSupplementaries("blah1\r\n")); 2447 if (!m.find()) 2448 failCount++; 2449 if (!m.group().equals(toSupplementaries("blah1"))) 2450 failCount++; 2451 if (m.find()) 2452 failCount++; 2453 2454 // Test behavior of $ with \r\n at end of input in multiline 2455 p = Pattern.compile(".+$", Pattern.MULTILINE); 2456 m = p.matcher(toSupplementaries("blah1\r\n")); 2457 if (!m.find()) 2458 failCount++; 2459 if (m.find()) 2460 failCount++; 2461 2462 // Test for $ recognition of \u0085 for bug 4527731 2463 p = Pattern.compile(".+$", Pattern.MULTILINE); 2464 m = p.matcher(toSupplementaries("blah1\u0085")); 2465 if (!m.find()) 2466 failCount++; 2467 2468 report("Anchors"); 2469 } 2470 2471 /** 2472 * A basic sanity test of Matcher.lookingAt(). 2473 */ 2474 private static void lookingAtTest() throws Exception { 2475 Pattern p = Pattern.compile("(ab)(c*)"); 2476 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2477 2478 if (!m.lookingAt()) 2479 failCount++; 2480 2481 if (!m.group().equals(m.group(0))) 2482 failCount++; 2483 2484 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2485 if (m.lookingAt()) 2486 failCount++; 2487 2488 // Supplementary character test 2489 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2490 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2491 2492 if (!m.lookingAt()) 2493 failCount++; 2494 2495 if (!m.group().equals(m.group(0))) 2496 failCount++; 2497 2498 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2499 if (m.lookingAt()) 2500 failCount++; 2501 2502 report("Looking At"); 2503 } 2504 2505 /** 2506 * A basic sanity test of Matcher.matches(). 2507 */ 2508 private static void matchesTest() throws Exception { 2509 // matches() 2510 Pattern p = Pattern.compile("ulb(c*)"); 2511 Matcher m = p.matcher("ulbcccccc"); 2512 if (!m.matches()) 2513 failCount++; 2514 2515 // find() but not matches() 2516 m.reset("zzzulbcccccc"); 2517 if (m.matches()) 2518 failCount++; 2519 2520 // lookingAt() but not matches() 2521 m.reset("ulbccccccdef"); 2522 if (m.matches()) 2523 failCount++; 2524 2525 // matches() 2526 p = Pattern.compile("a|ad"); 2527 m = p.matcher("ad"); 2528 if (!m.matches()) 2529 failCount++; 2530 2531 // Supplementary character test 2532 // matches() 2533 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2534 m = p.matcher(toSupplementaries("ulbcccccc")); 2535 if (!m.matches()) 2536 failCount++; 2537 2538 // find() but not matches() 2539 m.reset(toSupplementaries("zzzulbcccccc")); 2540 if (m.matches()) 2541 failCount++; 2542 2543 // lookingAt() but not matches() 2544 m.reset(toSupplementaries("ulbccccccdef")); 2545 if (m.matches()) 2546 failCount++; 2547 2548 // matches() 2549 p = Pattern.compile(toSupplementaries("a|ad")); 2550 m = p.matcher(toSupplementaries("ad")); 2551 if (!m.matches()) 2552 failCount++; 2553 2554 report("Matches"); 2555 } 2556 2557 /** 2558 * A basic sanity test of Pattern.matches(). 2559 */ 2560 private static void patternMatchesTest() throws Exception { 2561 // matches() 2562 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2563 toSupplementaries("ulbcccccc"))) 2564 failCount++; 2565 2566 // find() but not matches() 2567 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2568 toSupplementaries("zzzulbcccccc"))) 2569 failCount++; 2570 2571 // lookingAt() but not matches() 2572 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2573 toSupplementaries("ulbccccccdef"))) 2574 failCount++; 2575 2576 // Supplementary character test 2577 // matches() 2578 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2579 toSupplementaries("ulbcccccc"))) 2580 failCount++; 2581 2582 // find() but not matches() 2583 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2584 toSupplementaries("zzzulbcccccc"))) 2585 failCount++; 2586 2587 // lookingAt() but not matches() 2588 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2589 toSupplementaries("ulbccccccdef"))) 2590 failCount++; 2591 2592 report("Pattern Matches"); 2593 } 2594 2595 /** 2596 * Canonical equivalence testing. Tests the ability of the engine 2597 * to match sequences that are not explicitly specified in the 2598 * pattern when they are considered equivalent by the Unicode Standard. 2599 */ 2600 private static void ceTest() throws Exception { 2601 // Decomposed char outside char classes 2602 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2603 Matcher m = p.matcher("test\u00e5"); 2604 if (!m.matches()) 2605 failCount++; 2606 2607 m.reset("testa\u030a"); 2608 if (!m.matches()) 2609 failCount++; 2610 2611 // Composed char outside char classes 2612 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2613 m = p.matcher("test\u00e5"); 2614 if (!m.matches()) 2615 failCount++; 2616 2617 m.reset("testa\u030a"); 2618 if (!m.find()) 2619 failCount++; 2620 2621 // Decomposed char inside a char class 2622 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2623 m = p.matcher("test\u00e5"); 2624 if (!m.find()) 2625 failCount++; 2626 2627 m.reset("testa\u030a"); 2628 if (!m.find()) 2629 failCount++; 2630 2631 // Composed char inside a char class 2632 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2633 m = p.matcher("test\u00e5"); 2634 if (!m.find()) 2635 failCount++; 2636 2637 m.reset("testa\u0300"); 2638 if (!m.find()) 2639 failCount++; 2640 2641 m.reset("testa\u030a"); 2642 if (!m.find()) 2643 failCount++; 2644 2645 // Marks that cannot legally change order and be equivalent 2646 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2647 check(p, "testa\u0308\u0300", true); 2648 check(p, "testa\u0300\u0308", false); 2649 2650 // Marks that can legally change order and be equivalent 2651 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2652 check(p, "testa\u0308\u0323", true); 2653 check(p, "testa\u0323\u0308", true); 2654 2655 // Test all equivalences of the sequence a\u0308\u0323\u0300 2656 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2657 check(p, "testa\u0308\u0323\u0300", true); 2658 check(p, "testa\u0323\u0308\u0300", true); 2659 check(p, "testa\u0308\u0300\u0323", true); 2660 check(p, "test\u00e4\u0323\u0300", true); 2661 check(p, "test\u00e4\u0300\u0323", true); 2662 2663 Object[][] data = new Object[][] { 2664 2665 // JDK-4867170 2666 { "[\u1f80-\u1f82]", "ab\u1f80cd", "f", true }, 2667 { "[\u1f80-\u1f82]", "ab\u1f81cd", "f", true }, 2668 { "[\u1f80-\u1f82]", "ab\u1f82cd", "f", true }, 2669 { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true }, 2670 { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true }, 2671 { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd", "f", true }, 2672 { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd", "f", true }, 2673 2674 { "\\p{IsGreek}", "ab\u1f80cd", "f", true }, 2675 { "\\p{IsGreek}", "ab\u1f81cd", "f", true }, 2676 { "\\p{IsGreek}", "ab\u1f82cd", "f", true }, 2677 { "\\p{IsGreek}", "ab\u03b1\u0314\u0345cd", "f", true }, 2678 { "\\p{IsGreek}", "ab\u1f01\u0345cd", "f", true }, 2679 2680 // backtracking, force to match "\u1f80", instead of \u1f82" 2681 { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true }, 2682 2683 { "[\\p{IsGreek}]", "\u03b1\u0314\u0345", "m", true }, 2684 { "\\p{IsGreek}", "\u03b1\u0314\u0345", "m", true }, 2685 2686 { "[^\u1f80-\u1f82]","\u1f81", "m", false }, 2687 { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345", "m", false }, 2688 { "[^\u1f01\u0345]", "\u1f81", "f", false }, 2689 2690 { "[^\u1f81]+", "\u1f80\u1f82", "f", true }, 2691 { "[\u1f80]", "ab\u1f80cd", "f", true }, 2692 { "\u1f80", "ab\u1f80cd", "f", true }, 2693 { "\u1f00\u0345\u0300", "\u1f82", "m", true }, 2694 { "\u1f80", "-\u1f00\u0345\u0300-", "f", true }, 2695 { "\u1f82", "\u1f00\u0345\u0300", "m", true }, 2696 { "\u1f82", "\u1f80\u0300", "m", true }, 2697 2698 // JDK-7080302 # compile failed 2699 { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true}, 2700 2701 // JDK-6728861, same cause as above one 2702 // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ); 2703 { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true}, 2704 2705 // JDK-6995635 2706 // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ); 2707 { "(\u00e9)", "e\u0301", "m", true }, 2708 2709 // JDK-6736245 2710 // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc 2711 { "\u2ADC", "\u2ADC", "m", true}, // NFC 2712 { "\u2ADC", "\u2ADD\u0338", "m", true}, // NFD 2713 2714 // 4916384. 2715 // Decomposed hangul (jamos) works inside clazz 2716 { "[\u1100\u1161]", "\u1100\u1161", "m", true}, 2717 { "[\u1100\u1161]", "\uac00", "m", true}, 2718 2719 { "[\uac00]", "\u1100\u1161", "m", true}, 2720 { "[\uac00]", "\uac00", "m", true}, 2721 2722 // Decomposed hangul (jamos) 2723 { "\u1100\u1161", "\u1100\u1161", "m", true}, 2724 { "\u1100\u1161", "\uac00", "m", true}, 2725 2726 // Composed hangul 2727 { "\uac00", "\u1100\u1161", "m", true }, 2728 { "\uac00", "\uac00", "m", true }, 2729 2730 /* Need a NFDSlice to nfd the source to solve this issue 2731 u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2732 u+1d1bc -> nfd: <u+1d1ba><u+1d165> -> nfc: <u+1d1ba><u+1d165> 2733 <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f> 2734 2735 // Decomposed supplementary outside char classes 2736 // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2737 // Composed supplementary outside char classes 2738 // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2739 */ 2740 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2741 { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true }, 2742 2743 { "test\ud834\uddc0", "test\ud834\uddc0", "m", true }, 2744 { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true }, 2745 }; 2746 2747 int failCount = 0; 2748 for (Object[] d : data) { 2749 String pn = (String)d[0]; 2750 String tt = (String)d[1]; 2751 boolean isFind = "f".equals(((String)d[2])); 2752 boolean expected = (boolean)d[3]; 2753 boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find() 2754 : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches(); 2755 if (ret != expected) { 2756 failCount++; 2757 continue; 2758 } 2759 } 2760 report("Canonical Equivalence"); 2761 } 2762 2763 /** 2764 * A basic sanity test of Matcher.replaceAll(). 2765 */ 2766 private static void globalSubstitute() throws Exception { 2767 // Global substitution with a literal 2768 Pattern p = Pattern.compile("(ab)(c*)"); 2769 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2770 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2771 failCount++; 2772 2773 m.reset("zzzabccczzzabcczzzabccczzz"); 2774 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2775 failCount++; 2776 2777 // Global substitution with groups 2778 m.reset("zzzabccczzzabcczzzabccczzz"); 2779 String result = m.replaceAll("$1"); 2780 if (!result.equals("zzzabzzzabzzzabzzz")) 2781 failCount++; 2782 2783 // Supplementary character test 2784 // Global substitution with a literal 2785 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2786 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2787 if (!m.replaceAll(toSupplementaries("test")). 2788 equals(toSupplementaries("testzzztestzzztest"))) 2789 failCount++; 2790 2791 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2792 if (!m.replaceAll(toSupplementaries("test")). 2793 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2794 failCount++; 2795 2796 // Global substitution with groups 2797 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2798 result = m.replaceAll("$1"); 2799 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2800 failCount++; 2801 2802 report("Global Substitution"); 2803 } 2804 2805 /** 2806 * Tests the usage of Matcher.appendReplacement() with literal 2807 * and group substitutions. 2808 */ 2809 private static void stringbufferSubstitute() throws Exception { 2810 // SB substitution with literal 2811 String blah = "zzzblahzzz"; 2812 Pattern p = Pattern.compile("blah"); 2813 Matcher m = p.matcher(blah); 2814 StringBuffer result = new StringBuffer(); 2815 try { 2816 m.appendReplacement(result, "blech"); 2817 failCount++; 2818 } catch (IllegalStateException e) { 2819 } 2820 m.find(); 2821 m.appendReplacement(result, "blech"); 2822 if (!result.toString().equals("zzzblech")) 2823 failCount++; 2824 2825 m.appendTail(result); 2826 if (!result.toString().equals("zzzblechzzz")) 2827 failCount++; 2828 2829 // SB substitution with groups 2830 blah = "zzzabcdzzz"; 2831 p = Pattern.compile("(ab)(cd)*"); 2832 m = p.matcher(blah); 2833 result = new StringBuffer(); 2834 try { 2835 m.appendReplacement(result, "$1"); 2836 failCount++; 2837 } catch (IllegalStateException e) { 2838 } 2839 m.find(); 2840 m.appendReplacement(result, "$1"); 2841 if (!result.toString().equals("zzzab")) 2842 failCount++; 2843 2844 m.appendTail(result); 2845 if (!result.toString().equals("zzzabzzz")) 2846 failCount++; 2847 2848 // SB substitution with 3 groups 2849 blah = "zzzabcdcdefzzz"; 2850 p = Pattern.compile("(ab)(cd)*(ef)"); 2851 m = p.matcher(blah); 2852 result = new StringBuffer(); 2853 try { 2854 m.appendReplacement(result, "$1w$2w$3"); 2855 failCount++; 2856 } catch (IllegalStateException e) { 2857 } 2858 m.find(); 2859 m.appendReplacement(result, "$1w$2w$3"); 2860 if (!result.toString().equals("zzzabwcdwef")) 2861 failCount++; 2862 2863 m.appendTail(result); 2864 if (!result.toString().equals("zzzabwcdwefzzz")) 2865 failCount++; 2866 2867 // SB substitution with groups and three matches 2868 // skipping middle match 2869 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2870 p = Pattern.compile("(ab)(cd*)"); 2871 m = p.matcher(blah); 2872 result = new StringBuffer(); 2873 try { 2874 m.appendReplacement(result, "$1"); 2875 failCount++; 2876 } catch (IllegalStateException e) { 2877 } 2878 m.find(); 2879 m.appendReplacement(result, "$1"); 2880 if (!result.toString().equals("zzzab")) 2881 failCount++; 2882 2883 m.find(); 2884 m.find(); 2885 m.appendReplacement(result, "$2"); 2886 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2887 failCount++; 2888 2889 m.appendTail(result); 2890 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2891 failCount++; 2892 2893 // Check to make sure escaped $ is ignored 2894 blah = "zzzabcdcdefzzz"; 2895 p = Pattern.compile("(ab)(cd)*(ef)"); 2896 m = p.matcher(blah); 2897 result = new StringBuffer(); 2898 m.find(); 2899 m.appendReplacement(result, "$1w\\$2w$3"); 2900 if (!result.toString().equals("zzzabw$2wef")) 2901 failCount++; 2902 2903 m.appendTail(result); 2904 if (!result.toString().equals("zzzabw$2wefzzz")) 2905 failCount++; 2906 2907 // Check to make sure a reference to nonexistent group causes error 2908 blah = "zzzabcdcdefzzz"; 2909 p = Pattern.compile("(ab)(cd)*(ef)"); 2910 m = p.matcher(blah); 2911 result = new StringBuffer(); 2912 m.find(); 2913 try { 2914 m.appendReplacement(result, "$1w$5w$3"); 2915 failCount++; 2916 } catch (IndexOutOfBoundsException ioobe) { 2917 // Correct result 2918 } 2919 2920 // Check double digit group references 2921 blah = "zzz123456789101112zzz"; 2922 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2923 m = p.matcher(blah); 2924 result = new StringBuffer(); 2925 m.find(); 2926 m.appendReplacement(result, "$1w$11w$3"); 2927 if (!result.toString().equals("zzz1w11w3")) 2928 failCount++; 2929 2930 // Check to make sure it backs off $15 to $1 if only three groups 2931 blah = "zzzabcdcdefzzz"; 2932 p = Pattern.compile("(ab)(cd)*(ef)"); 2933 m = p.matcher(blah); 2934 result = new StringBuffer(); 2935 m.find(); 2936 m.appendReplacement(result, "$1w$15w$3"); 2937 if (!result.toString().equals("zzzabwab5wef")) 2938 failCount++; 2939 2940 2941 // Supplementary character test 2942 // SB substitution with literal 2943 blah = toSupplementaries("zzzblahzzz"); 2944 p = Pattern.compile(toSupplementaries("blah")); 2945 m = p.matcher(blah); 2946 result = new StringBuffer(); 2947 try { 2948 m.appendReplacement(result, toSupplementaries("blech")); 2949 failCount++; 2950 } catch (IllegalStateException e) { 2951 } 2952 m.find(); 2953 m.appendReplacement(result, toSupplementaries("blech")); 2954 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2955 failCount++; 2956 2957 m.appendTail(result); 2958 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2959 failCount++; 2960 2961 // SB substitution with groups 2962 blah = toSupplementaries("zzzabcdzzz"); 2963 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2964 m = p.matcher(blah); 2965 result = new StringBuffer(); 2966 try { 2967 m.appendReplacement(result, "$1"); 2968 failCount++; 2969 } catch (IllegalStateException e) { 2970 } 2971 m.find(); 2972 m.appendReplacement(result, "$1"); 2973 if (!result.toString().equals(toSupplementaries("zzzab"))) 2974 failCount++; 2975 2976 m.appendTail(result); 2977 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2978 failCount++; 2979 2980 // SB substitution with 3 groups 2981 blah = toSupplementaries("zzzabcdcdefzzz"); 2982 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2983 m = p.matcher(blah); 2984 result = new StringBuffer(); 2985 try { 2986 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2987 failCount++; 2988 } catch (IllegalStateException e) { 2989 } 2990 m.find(); 2991 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2992 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2993 failCount++; 2994 2995 m.appendTail(result); 2996 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2997 failCount++; 2998 2999 // SB substitution with groups and three matches 3000 // skipping middle match 3001 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3002 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3003 m = p.matcher(blah); 3004 result = new StringBuffer(); 3005 try { 3006 m.appendReplacement(result, "$1"); 3007 failCount++; 3008 } catch (IllegalStateException e) { 3009 } 3010 m.find(); 3011 m.appendReplacement(result, "$1"); 3012 if (!result.toString().equals(toSupplementaries("zzzab"))) 3013 failCount++; 3014 3015 m.find(); 3016 m.find(); 3017 m.appendReplacement(result, "$2"); 3018 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3019 failCount++; 3020 3021 m.appendTail(result); 3022 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3023 failCount++; 3024 3025 // Check to make sure escaped $ is ignored 3026 blah = toSupplementaries("zzzabcdcdefzzz"); 3027 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3028 m = p.matcher(blah); 3029 result = new StringBuffer(); 3030 m.find(); 3031 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3032 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3033 failCount++; 3034 3035 m.appendTail(result); 3036 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3037 failCount++; 3038 3039 // Check to make sure a reference to nonexistent group causes error 3040 blah = toSupplementaries("zzzabcdcdefzzz"); 3041 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3042 m = p.matcher(blah); 3043 result = new StringBuffer(); 3044 m.find(); 3045 try { 3046 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3047 failCount++; 3048 } catch (IndexOutOfBoundsException ioobe) { 3049 // Correct result 3050 } 3051 3052 // Check double digit group references 3053 blah = toSupplementaries("zzz123456789101112zzz"); 3054 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3055 m = p.matcher(blah); 3056 result = new StringBuffer(); 3057 m.find(); 3058 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3059 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3060 failCount++; 3061 3062 // Check to make sure it backs off $15 to $1 if only three groups 3063 blah = toSupplementaries("zzzabcdcdefzzz"); 3064 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3065 m = p.matcher(blah); 3066 result = new StringBuffer(); 3067 m.find(); 3068 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3069 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3070 failCount++; 3071 3072 // Check nothing has been appended into the output buffer if 3073 // the replacement string triggers IllegalArgumentException. 3074 p = Pattern.compile("(abc)"); 3075 m = p.matcher("abcd"); 3076 result = new StringBuffer(); 3077 m.find(); 3078 try { 3079 m.appendReplacement(result, ("xyz$g")); 3080 failCount++; 3081 } catch (IllegalArgumentException iae) { 3082 if (result.length() != 0) 3083 failCount++; 3084 } 3085 3086 report("SB Substitution"); 3087 } 3088 3089 /** 3090 * Tests the usage of Matcher.appendReplacement() with literal 3091 * and group substitutions. 3092 */ 3093 private static void stringbuilderSubstitute() throws Exception { 3094 // SB substitution with literal 3095 String blah = "zzzblahzzz"; 3096 Pattern p = Pattern.compile("blah"); 3097 Matcher m = p.matcher(blah); 3098 StringBuilder result = new StringBuilder(); 3099 try { 3100 m.appendReplacement(result, "blech"); 3101 failCount++; 3102 } catch (IllegalStateException e) { 3103 } 3104 m.find(); 3105 m.appendReplacement(result, "blech"); 3106 if (!result.toString().equals("zzzblech")) 3107 failCount++; 3108 3109 m.appendTail(result); 3110 if (!result.toString().equals("zzzblechzzz")) 3111 failCount++; 3112 3113 // SB substitution with groups 3114 blah = "zzzabcdzzz"; 3115 p = Pattern.compile("(ab)(cd)*"); 3116 m = p.matcher(blah); 3117 result = new StringBuilder(); 3118 try { 3119 m.appendReplacement(result, "$1"); 3120 failCount++; 3121 } catch (IllegalStateException e) { 3122 } 3123 m.find(); 3124 m.appendReplacement(result, "$1"); 3125 if (!result.toString().equals("zzzab")) 3126 failCount++; 3127 3128 m.appendTail(result); 3129 if (!result.toString().equals("zzzabzzz")) 3130 failCount++; 3131 3132 // SB substitution with 3 groups 3133 blah = "zzzabcdcdefzzz"; 3134 p = Pattern.compile("(ab)(cd)*(ef)"); 3135 m = p.matcher(blah); 3136 result = new StringBuilder(); 3137 try { 3138 m.appendReplacement(result, "$1w$2w$3"); 3139 failCount++; 3140 } catch (IllegalStateException e) { 3141 } 3142 m.find(); 3143 m.appendReplacement(result, "$1w$2w$3"); 3144 if (!result.toString().equals("zzzabwcdwef")) 3145 failCount++; 3146 3147 m.appendTail(result); 3148 if (!result.toString().equals("zzzabwcdwefzzz")) 3149 failCount++; 3150 3151 // SB substitution with groups and three matches 3152 // skipping middle match 3153 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3154 p = Pattern.compile("(ab)(cd*)"); 3155 m = p.matcher(blah); 3156 result = new StringBuilder(); 3157 try { 3158 m.appendReplacement(result, "$1"); 3159 failCount++; 3160 } catch (IllegalStateException e) { 3161 } 3162 m.find(); 3163 m.appendReplacement(result, "$1"); 3164 if (!result.toString().equals("zzzab")) 3165 failCount++; 3166 3167 m.find(); 3168 m.find(); 3169 m.appendReplacement(result, "$2"); 3170 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3171 failCount++; 3172 3173 m.appendTail(result); 3174 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3175 failCount++; 3176 3177 // Check to make sure escaped $ is ignored 3178 blah = "zzzabcdcdefzzz"; 3179 p = Pattern.compile("(ab)(cd)*(ef)"); 3180 m = p.matcher(blah); 3181 result = new StringBuilder(); 3182 m.find(); 3183 m.appendReplacement(result, "$1w\\$2w$3"); 3184 if (!result.toString().equals("zzzabw$2wef")) 3185 failCount++; 3186 3187 m.appendTail(result); 3188 if (!result.toString().equals("zzzabw$2wefzzz")) 3189 failCount++; 3190 3191 // Check to make sure a reference to nonexistent group causes error 3192 blah = "zzzabcdcdefzzz"; 3193 p = Pattern.compile("(ab)(cd)*(ef)"); 3194 m = p.matcher(blah); 3195 result = new StringBuilder(); 3196 m.find(); 3197 try { 3198 m.appendReplacement(result, "$1w$5w$3"); 3199 failCount++; 3200 } catch (IndexOutOfBoundsException ioobe) { 3201 // Correct result 3202 } 3203 3204 // Check double digit group references 3205 blah = "zzz123456789101112zzz"; 3206 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3207 m = p.matcher(blah); 3208 result = new StringBuilder(); 3209 m.find(); 3210 m.appendReplacement(result, "$1w$11w$3"); 3211 if (!result.toString().equals("zzz1w11w3")) 3212 failCount++; 3213 3214 // Check to make sure it backs off $15 to $1 if only three groups 3215 blah = "zzzabcdcdefzzz"; 3216 p = Pattern.compile("(ab)(cd)*(ef)"); 3217 m = p.matcher(blah); 3218 result = new StringBuilder(); 3219 m.find(); 3220 m.appendReplacement(result, "$1w$15w$3"); 3221 if (!result.toString().equals("zzzabwab5wef")) 3222 failCount++; 3223 3224 3225 // Supplementary character test 3226 // SB substitution with literal 3227 blah = toSupplementaries("zzzblahzzz"); 3228 p = Pattern.compile(toSupplementaries("blah")); 3229 m = p.matcher(blah); 3230 result = new StringBuilder(); 3231 try { 3232 m.appendReplacement(result, toSupplementaries("blech")); 3233 failCount++; 3234 } catch (IllegalStateException e) { 3235 } 3236 m.find(); 3237 m.appendReplacement(result, toSupplementaries("blech")); 3238 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3239 failCount++; 3240 m.appendTail(result); 3241 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3242 failCount++; 3243 3244 // SB substitution with groups 3245 blah = toSupplementaries("zzzabcdzzz"); 3246 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3247 m = p.matcher(blah); 3248 result = new StringBuilder(); 3249 try { 3250 m.appendReplacement(result, "$1"); 3251 failCount++; 3252 } catch (IllegalStateException e) { 3253 } 3254 m.find(); 3255 m.appendReplacement(result, "$1"); 3256 if (!result.toString().equals(toSupplementaries("zzzab"))) 3257 failCount++; 3258 3259 m.appendTail(result); 3260 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3261 failCount++; 3262 3263 // SB substitution with 3 groups 3264 blah = toSupplementaries("zzzabcdcdefzzz"); 3265 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3266 m = p.matcher(blah); 3267 result = new StringBuilder(); 3268 try { 3269 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3270 failCount++; 3271 } catch (IllegalStateException e) { 3272 } 3273 m.find(); 3274 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3275 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3276 failCount++; 3277 3278 m.appendTail(result); 3279 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3280 failCount++; 3281 3282 // SB substitution with groups and three matches 3283 // skipping middle match 3284 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3285 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3286 m = p.matcher(blah); 3287 result = new StringBuilder(); 3288 try { 3289 m.appendReplacement(result, "$1"); 3290 failCount++; 3291 } catch (IllegalStateException e) { 3292 } 3293 m.find(); 3294 m.appendReplacement(result, "$1"); 3295 if (!result.toString().equals(toSupplementaries("zzzab"))) 3296 failCount++; 3297 3298 m.find(); 3299 m.find(); 3300 m.appendReplacement(result, "$2"); 3301 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3302 failCount++; 3303 3304 m.appendTail(result); 3305 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3306 failCount++; 3307 3308 // Check to make sure escaped $ is ignored 3309 blah = toSupplementaries("zzzabcdcdefzzz"); 3310 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3311 m = p.matcher(blah); 3312 result = new StringBuilder(); 3313 m.find(); 3314 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3315 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3316 failCount++; 3317 3318 m.appendTail(result); 3319 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3320 failCount++; 3321 3322 // Check to make sure a reference to nonexistent group causes error 3323 blah = toSupplementaries("zzzabcdcdefzzz"); 3324 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3325 m = p.matcher(blah); 3326 result = new StringBuilder(); 3327 m.find(); 3328 try { 3329 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3330 failCount++; 3331 } catch (IndexOutOfBoundsException ioobe) { 3332 // Correct result 3333 } 3334 // Check double digit group references 3335 blah = toSupplementaries("zzz123456789101112zzz"); 3336 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3337 m = p.matcher(blah); 3338 result = new StringBuilder(); 3339 m.find(); 3340 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3341 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3342 failCount++; 3343 3344 // Check to make sure it backs off $15 to $1 if only three groups 3345 blah = toSupplementaries("zzzabcdcdefzzz"); 3346 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3347 m = p.matcher(blah); 3348 result = new StringBuilder(); 3349 m.find(); 3350 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3351 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3352 failCount++; 3353 // Check nothing has been appended into the output buffer if 3354 // the replacement string triggers IllegalArgumentException. 3355 p = Pattern.compile("(abc)"); 3356 m = p.matcher("abcd"); 3357 result = new StringBuilder(); 3358 m.find(); 3359 try { 3360 m.appendReplacement(result, ("xyz$g")); 3361 failCount++; 3362 } catch (IllegalArgumentException iae) { 3363 if (result.length() != 0) 3364 failCount++; 3365 } 3366 report("SB Substitution 2"); 3367 } 3368 3369 /* 3370 * 5 groups of characters are created to make a substitution string. 3371 * A base string will be created including random lead chars, the 3372 * substitution string, and random trailing chars. 3373 * A pattern containing the 5 groups is searched for and replaced with: 3374 * random group + random string + random group. 3375 * The results are checked for correctness. 3376 */ 3377 private static void substitutionBasher() { 3378 for (int runs = 0; runs<1000; runs++) { 3379 // Create a base string to work in 3380 int leadingChars = generator.nextInt(10); 3381 StringBuffer baseBuffer = new StringBuffer(100); 3382 String leadingString = getRandomAlphaString(leadingChars); 3383 baseBuffer.append(leadingString); 3384 3385 // Create 5 groups of random number of random chars 3386 // Create the string to substitute 3387 // Create the pattern string to search for 3388 StringBuffer bufferToSub = new StringBuffer(25); 3389 StringBuffer bufferToPat = new StringBuffer(50); 3390 String[] groups = new String[5]; 3391 for(int i=0; i<5; i++) { 3392 int aGroupSize = generator.nextInt(5)+1; 3393 groups[i] = getRandomAlphaString(aGroupSize); 3394 bufferToSub.append(groups[i]); 3395 bufferToPat.append('('); 3396 bufferToPat.append(groups[i]); 3397 bufferToPat.append(')'); 3398 } 3399 String stringToSub = bufferToSub.toString(); 3400 String pattern = bufferToPat.toString(); 3401 3402 // Place sub string into working string at random index 3403 baseBuffer.append(stringToSub); 3404 3405 // Append random chars to end 3406 int trailingChars = generator.nextInt(10); 3407 String trailingString = getRandomAlphaString(trailingChars); 3408 baseBuffer.append(trailingString); 3409 String baseString = baseBuffer.toString(); 3410 3411 // Create test pattern and matcher 3412 Pattern p = Pattern.compile(pattern); 3413 Matcher m = p.matcher(baseString); 3414 3415 // Reject candidate if pattern happens to start early 3416 m.find(); 3417 if (m.start() < leadingChars) 3418 continue; 3419 3420 // Reject candidate if more than one match 3421 if (m.find()) 3422 continue; 3423 3424 // Construct a replacement string with : 3425 // random group + random string + random group 3426 StringBuffer bufferToRep = new StringBuffer(); 3427 int groupIndex1 = generator.nextInt(5); 3428 bufferToRep.append("$" + (groupIndex1 + 1)); 3429 String randomMidString = getRandomAlphaString(5); 3430 bufferToRep.append(randomMidString); 3431 int groupIndex2 = generator.nextInt(5); 3432 bufferToRep.append("$" + (groupIndex2 + 1)); 3433 String replacement = bufferToRep.toString(); 3434 3435 // Do the replacement 3436 String result = m.replaceAll(replacement); 3437 3438 // Construct expected result 3439 StringBuffer bufferToRes = new StringBuffer(); 3440 bufferToRes.append(leadingString); 3441 bufferToRes.append(groups[groupIndex1]); 3442 bufferToRes.append(randomMidString); 3443 bufferToRes.append(groups[groupIndex2]); 3444 bufferToRes.append(trailingString); 3445 String expectedResult = bufferToRes.toString(); 3446 3447 // Check results 3448 if (!result.equals(expectedResult)) 3449 failCount++; 3450 } 3451 3452 report("Substitution Basher"); 3453 } 3454 3455 /* 3456 * 5 groups of characters are created to make a substitution string. 3457 * A base string will be created including random lead chars, the 3458 * substitution string, and random trailing chars. 3459 * A pattern containing the 5 groups is searched for and replaced with: 3460 * random group + random string + random group. 3461 * The results are checked for correctness. 3462 */ 3463 private static void substitutionBasher2() { 3464 for (int runs = 0; runs<1000; runs++) { 3465 // Create a base string to work in 3466 int leadingChars = generator.nextInt(10); 3467 StringBuilder baseBuffer = new StringBuilder(100); 3468 String leadingString = getRandomAlphaString(leadingChars); 3469 baseBuffer.append(leadingString); 3470 3471 // Create 5 groups of random number of random chars 3472 // Create the string to substitute 3473 // Create the pattern string to search for 3474 StringBuilder bufferToSub = new StringBuilder(25); 3475 StringBuilder bufferToPat = new StringBuilder(50); 3476 String[] groups = new String[5]; 3477 for(int i=0; i<5; i++) { 3478 int aGroupSize = generator.nextInt(5)+1; 3479 groups[i] = getRandomAlphaString(aGroupSize); 3480 bufferToSub.append(groups[i]); 3481 bufferToPat.append('('); 3482 bufferToPat.append(groups[i]); 3483 bufferToPat.append(')'); 3484 } 3485 String stringToSub = bufferToSub.toString(); 3486 String pattern = bufferToPat.toString(); 3487 3488 // Place sub string into working string at random index 3489 baseBuffer.append(stringToSub); 3490 3491 // Append random chars to end 3492 int trailingChars = generator.nextInt(10); 3493 String trailingString = getRandomAlphaString(trailingChars); 3494 baseBuffer.append(trailingString); 3495 String baseString = baseBuffer.toString(); 3496 3497 // Create test pattern and matcher 3498 Pattern p = Pattern.compile(pattern); 3499 Matcher m = p.matcher(baseString); 3500 3501 // Reject candidate if pattern happens to start early 3502 m.find(); 3503 if (m.start() < leadingChars) 3504 continue; 3505 3506 // Reject candidate if more than one match 3507 if (m.find()) 3508 continue; 3509 3510 // Construct a replacement string with : 3511 // random group + random string + random group 3512 StringBuilder bufferToRep = new StringBuilder(); 3513 int groupIndex1 = generator.nextInt(5); 3514 bufferToRep.append("$" + (groupIndex1 + 1)); 3515 String randomMidString = getRandomAlphaString(5); 3516 bufferToRep.append(randomMidString); 3517 int groupIndex2 = generator.nextInt(5); 3518 bufferToRep.append("$" + (groupIndex2 + 1)); 3519 String replacement = bufferToRep.toString(); 3520 3521 // Do the replacement 3522 String result = m.replaceAll(replacement); 3523 3524 // Construct expected result 3525 StringBuilder bufferToRes = new StringBuilder(); 3526 bufferToRes.append(leadingString); 3527 bufferToRes.append(groups[groupIndex1]); 3528 bufferToRes.append(randomMidString); 3529 bufferToRes.append(groups[groupIndex2]); 3530 bufferToRes.append(trailingString); 3531 String expectedResult = bufferToRes.toString(); 3532 3533 // Check results 3534 if (!result.equals(expectedResult)) { 3535 failCount++; 3536 } 3537 } 3538 3539 report("Substitution Basher 2"); 3540 } 3541 3542 /** 3543 * Checks the handling of some escape sequences that the Pattern 3544 * class should process instead of the java compiler. These are 3545 * not in the file because the escapes should be be processed 3546 * by the Pattern class when the regex is compiled. 3547 */ 3548 private static void escapes() throws Exception { 3549 Pattern p = Pattern.compile("\\043"); 3550 Matcher m = p.matcher("#"); 3551 if (!m.find()) 3552 failCount++; 3553 3554 p = Pattern.compile("\\x23"); 3555 m = p.matcher("#"); 3556 if (!m.find()) 3557 failCount++; 3558 3559 p = Pattern.compile("\\u0023"); 3560 m = p.matcher("#"); 3561 if (!m.find()) 3562 failCount++; 3563 3564 report("Escape sequences"); 3565 } 3566 3567 /** 3568 * Checks the handling of blank input situations. These 3569 * tests are incompatible with my test file format. 3570 */ 3571 private static void blankInput() throws Exception { 3572 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3573 Matcher m = p.matcher(""); 3574 if (m.find()) 3575 failCount++; 3576 3577 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3578 m = p.matcher(""); 3579 if (!m.find()) 3580 failCount++; 3581 3582 p = Pattern.compile("abc"); 3583 m = p.matcher(""); 3584 if (m.find()) 3585 failCount++; 3586 3587 p = Pattern.compile("a*"); 3588 m = p.matcher(""); 3589 if (!m.find()) 3590 failCount++; 3591 3592 report("Blank input"); 3593 } 3594 3595 /** 3596 * Tests the Boyer-Moore pattern matching of a character sequence 3597 * on randomly generated patterns. 3598 */ 3599 private static void bm() throws Exception { 3600 doBnM('a'); 3601 report("Boyer Moore (ASCII)"); 3602 3603 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3604 report("Boyer Moore (Supplementary)"); 3605 } 3606 3607 private static void doBnM(int baseCharacter) throws Exception { 3608 int achar=0; 3609 3610 for (int i=0; i<100; i++) { 3611 // Create a short pattern to search for 3612 int patternLength = generator.nextInt(7) + 4; 3613 StringBuffer patternBuffer = new StringBuffer(patternLength); 3614 String pattern; 3615 retry: for (;;) { 3616 for (int x=0; x<patternLength; x++) { 3617 int ch = baseCharacter + generator.nextInt(26); 3618 if (Character.isSupplementaryCodePoint(ch)) { 3619 patternBuffer.append(Character.toChars(ch)); 3620 } else { 3621 patternBuffer.append((char)ch); 3622 } 3623 } 3624 pattern = patternBuffer.toString(); 3625 3626 // Avoid patterns that start and end with the same substring 3627 // See JDK-6854417 3628 for (int x=1; x < pattern.length(); x++) { 3629 if (pattern.startsWith(pattern.substring(x))) 3630 continue retry; 3631 } 3632 break; 3633 } 3634 Pattern p = Pattern.compile(pattern); 3635 3636 // Create a buffer with random ASCII chars that does 3637 // not match the sample 3638 String toSearch = null; 3639 StringBuffer s = null; 3640 Matcher m = p.matcher(""); 3641 do { 3642 s = new StringBuffer(100); 3643 for (int x=0; x<100; x++) { 3644 int ch = baseCharacter + generator.nextInt(26); 3645 if (Character.isSupplementaryCodePoint(ch)) { 3646 s.append(Character.toChars(ch)); 3647 } else { 3648 s.append((char)ch); 3649 } 3650 } 3651 toSearch = s.toString(); 3652 m.reset(toSearch); 3653 } while (m.find()); 3654 3655 // Insert the pattern at a random spot 3656 int insertIndex = generator.nextInt(99); 3657 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3658 insertIndex++; 3659 s = s.insert(insertIndex, pattern); 3660 toSearch = s.toString(); 3661 3662 // Make sure that the pattern is found 3663 m.reset(toSearch); 3664 if (!m.find()) 3665 failCount++; 3666 3667 // Make sure that the match text is the pattern 3668 if (!m.group().equals(pattern)) 3669 failCount++; 3670 3671 // Make sure match occured at insertion point 3672 if (m.start() != insertIndex) 3673 failCount++; 3674 } 3675 } 3676 3677 /** 3678 * Tests the matching of slices on randomly generated patterns. 3679 * The Boyer-Moore optimization is not done on these patterns 3680 * because it uses unicode case folding. 3681 */ 3682 private static void slice() throws Exception { 3683 doSlice(Character.MAX_VALUE); 3684 report("Slice"); 3685 3686 doSlice(Character.MAX_CODE_POINT); 3687 report("Slice (Supplementary)"); 3688 } 3689 3690 private static void doSlice(int maxCharacter) throws Exception { 3691 Random generator = new Random(); 3692 int achar=0; 3693 3694 for (int i=0; i<100; i++) { 3695 // Create a short pattern to search for 3696 int patternLength = generator.nextInt(7) + 4; 3697 StringBuffer patternBuffer = new StringBuffer(patternLength); 3698 for (int x=0; x<patternLength; x++) { 3699 int randomChar = 0; 3700 while (!Character.isLetterOrDigit(randomChar)) 3701 randomChar = generator.nextInt(maxCharacter); 3702 if (Character.isSupplementaryCodePoint(randomChar)) { 3703 patternBuffer.append(Character.toChars(randomChar)); 3704 } else { 3705 patternBuffer.append((char) randomChar); 3706 } 3707 } 3708 String pattern = patternBuffer.toString(); 3709 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3710 3711 // Create a buffer with random chars that does not match the sample 3712 String toSearch = null; 3713 StringBuffer s = null; 3714 Matcher m = p.matcher(""); 3715 do { 3716 s = new StringBuffer(100); 3717 for (int x=0; x<100; x++) { 3718 int randomChar = 0; 3719 while (!Character.isLetterOrDigit(randomChar)) 3720 randomChar = generator.nextInt(maxCharacter); 3721 if (Character.isSupplementaryCodePoint(randomChar)) { 3722 s.append(Character.toChars(randomChar)); 3723 } else { 3724 s.append((char) randomChar); 3725 } 3726 } 3727 toSearch = s.toString(); 3728 m.reset(toSearch); 3729 } while (m.find()); 3730 3731 // Insert the pattern at a random spot 3732 int insertIndex = generator.nextInt(99); 3733 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3734 insertIndex++; 3735 s = s.insert(insertIndex, pattern); 3736 toSearch = s.toString(); 3737 3738 // Make sure that the pattern is found 3739 m.reset(toSearch); 3740 if (!m.find()) 3741 failCount++; 3742 3743 // Make sure that the match text is the pattern 3744 if (!m.group().equals(pattern)) 3745 failCount++; 3746 3747 // Make sure match occured at insertion point 3748 if (m.start() != insertIndex) 3749 failCount++; 3750 } 3751 } 3752 3753 private static void explainFailure(String pattern, String data, 3754 String expected, String actual) { 3755 System.err.println("----------------------------------------"); 3756 System.err.println("Pattern = "+pattern); 3757 System.err.println("Data = "+data); 3758 System.err.println("Expected = " + expected); 3759 System.err.println("Actual = " + actual); 3760 } 3761 3762 private static void explainFailure(String pattern, String data, 3763 Throwable t) { 3764 System.err.println("----------------------------------------"); 3765 System.err.println("Pattern = "+pattern); 3766 System.err.println("Data = "+data); 3767 t.printStackTrace(System.err); 3768 } 3769 3770 // Testing examples from a file 3771 3772 /** 3773 * Goes through the file "TestCases.txt" and creates many patterns 3774 * described in the file, matching the patterns against input lines in 3775 * the file, and comparing the results against the correct results 3776 * also found in the file. The file format is described in comments 3777 * at the head of the file. 3778 */ 3779 private static void processFile(String fileName) throws Exception { 3780 File testCases = new File(System.getProperty("test.src", "."), 3781 fileName); 3782 FileInputStream in = new FileInputStream(testCases); 3783 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3784 3785 // Process next test case. 3786 String aLine; 3787 while((aLine = r.readLine()) != null) { 3788 // Read a line for pattern 3789 String patternString = grabLine(r); 3790 Pattern p = null; 3791 try { 3792 p = compileTestPattern(patternString); 3793 } catch (PatternSyntaxException e) { 3794 String dataString = grabLine(r); 3795 String expectedResult = grabLine(r); 3796 if (expectedResult.startsWith("error")) 3797 continue; 3798 explainFailure(patternString, dataString, e); 3799 failCount++; 3800 continue; 3801 } 3802 3803 // Read a line for input string 3804 String dataString = grabLine(r); 3805 Matcher m = p.matcher(dataString); 3806 StringBuffer result = new StringBuffer(); 3807 3808 // Check for IllegalStateExceptions before a match 3809 failCount += preMatchInvariants(m); 3810 3811 boolean found = m.find(); 3812 3813 if (found) 3814 failCount += postTrueMatchInvariants(m); 3815 else 3816 failCount += postFalseMatchInvariants(m); 3817 3818 if (found) { 3819 result.append("true "); 3820 result.append(m.group(0) + " "); 3821 } else { 3822 result.append("false "); 3823 } 3824 3825 result.append(m.groupCount()); 3826 3827 if (found) { 3828 for (int i=1; i<m.groupCount()+1; i++) 3829 if (m.group(i) != null) 3830 result.append(" " +m.group(i)); 3831 } 3832 3833 // Read a line for the expected result 3834 String expectedResult = grabLine(r); 3835 3836 if (!result.toString().equals(expectedResult)) { 3837 explainFailure(patternString, dataString, expectedResult, result.toString()); 3838 failCount++; 3839 } 3840 } 3841 3842 report(fileName); 3843 } 3844 3845 private static int preMatchInvariants(Matcher m) { 3846 int failCount = 0; 3847 try { 3848 m.start(); 3849 failCount++; 3850 } catch (IllegalStateException ise) {} 3851 try { 3852 m.end(); 3853 failCount++; 3854 } catch (IllegalStateException ise) {} 3855 try { 3856 m.group(); 3857 failCount++; 3858 } catch (IllegalStateException ise) {} 3859 return failCount; 3860 } 3861 3862 private static int postFalseMatchInvariants(Matcher m) { 3863 int failCount = 0; 3864 try { 3865 m.group(); 3866 failCount++; 3867 } catch (IllegalStateException ise) {} 3868 try { 3869 m.start(); 3870 failCount++; 3871 } catch (IllegalStateException ise) {} 3872 try { 3873 m.end(); 3874 failCount++; 3875 } catch (IllegalStateException ise) {} 3876 return failCount; 3877 } 3878 3879 private static int postTrueMatchInvariants(Matcher m) { 3880 int failCount = 0; 3881 //assert(m.start() = m.start(0); 3882 if (m.start() != m.start(0)) 3883 failCount++; 3884 //assert(m.end() = m.end(0); 3885 if (m.start() != m.start(0)) 3886 failCount++; 3887 //assert(m.group() = m.group(0); 3888 if (!m.group().equals(m.group(0))) 3889 failCount++; 3890 try { 3891 m.group(50); 3892 failCount++; 3893 } catch (IndexOutOfBoundsException ise) {} 3894 3895 return failCount; 3896 } 3897 3898 private static Pattern compileTestPattern(String patternString) { 3899 if (!patternString.startsWith("'")) { 3900 return Pattern.compile(patternString); 3901 } 3902 int break1 = patternString.lastIndexOf("'"); 3903 String flagString = patternString.substring( 3904 break1+1, patternString.length()); 3905 patternString = patternString.substring(1, break1); 3906 3907 if (flagString.equals("i")) 3908 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3909 3910 if (flagString.equals("m")) 3911 return Pattern.compile(patternString, Pattern.MULTILINE); 3912 3913 return Pattern.compile(patternString); 3914 } 3915 3916 /** 3917 * Reads a line from the input file. Keeps reading lines until a non 3918 * empty non comment line is read. If the line contains a \n then 3919 * these two characters are replaced by a newline char. If a \\uxxxx 3920 * sequence is read then the sequence is replaced by the unicode char. 3921 */ 3922 private static String grabLine(BufferedReader r) throws Exception { 3923 int index = 0; 3924 String line = r.readLine(); 3925 while (line.startsWith("//") || line.length() < 1) 3926 line = r.readLine(); 3927 while ((index = line.indexOf("\\n")) != -1) { 3928 StringBuffer temp = new StringBuffer(line); 3929 temp.replace(index, index+2, "\n"); 3930 line = temp.toString(); 3931 } 3932 while ((index = line.indexOf("\\u")) != -1) { 3933 StringBuffer temp = new StringBuffer(line); 3934 String value = temp.substring(index+2, index+6); 3935 char aChar = (char)Integer.parseInt(value, 16); 3936 String unicodeChar = "" + aChar; 3937 temp.replace(index, index+6, unicodeChar); 3938 line = temp.toString(); 3939 } 3940 3941 return line; 3942 } 3943 3944 private static void check(Pattern p, String s, String g, String expected) { 3945 Matcher m = p.matcher(s); 3946 m.find(); 3947 if (!m.group(g).equals(expected) || 3948 s.charAt(m.start(g)) != expected.charAt(0) || 3949 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3950 failCount++; 3951 } 3952 3953 private static void checkReplaceFirst(String p, String s, String r, String expected) 3954 { 3955 if (!expected.equals(Pattern.compile(p) 3956 .matcher(s) 3957 .replaceFirst(r))) 3958 failCount++; 3959 } 3960 3961 private static void checkReplaceAll(String p, String s, String r, String expected) 3962 { 3963 if (!expected.equals(Pattern.compile(p) 3964 .matcher(s) 3965 .replaceAll(r))) 3966 failCount++; 3967 } 3968 3969 private static void checkExpectedFail(String p) { 3970 try { 3971 Pattern.compile(p); 3972 } catch (PatternSyntaxException pse) { 3973 //pse.printStackTrace(); 3974 return; 3975 } 3976 failCount++; 3977 } 3978 3979 private static void checkExpectedIAE(Matcher m, String g) { 3980 m.find(); 3981 try { 3982 m.group(g); 3983 } catch (IllegalArgumentException x) { 3984 //iae.printStackTrace(); 3985 try { 3986 m.start(g); 3987 } catch (IllegalArgumentException xx) { 3988 try { 3989 m.start(g); 3990 } catch (IllegalArgumentException xxx) { 3991 return; 3992 } 3993 } 3994 } 3995 failCount++; 3996 } 3997 3998 private static void checkExpectedNPE(Matcher m) { 3999 m.find(); 4000 try { 4001 m.group(null); 4002 } catch (NullPointerException x) { 4003 try { 4004 m.start(null); 4005 } catch (NullPointerException xx) { 4006 try { 4007 m.end(null); 4008 } catch (NullPointerException xxx) { 4009 return; 4010 } 4011 } 4012 } 4013 failCount++; 4014 } 4015 4016 private static void namedGroupCaptureTest() throws Exception { 4017 check(Pattern.compile("x+(?<gname>y+)z+"), 4018 "xxxyyyzzz", 4019 "gname", 4020 "yyy"); 4021 4022 check(Pattern.compile("x+(?<gname8>y+)z+"), 4023 "xxxyyyzzz", 4024 "gname8", 4025 "yyy"); 4026 4027 //backref 4028 Pattern pattern = Pattern.compile("(a*)bc\\1"); 4029 check(pattern, "zzzaabcazzz", true); // found "abca" 4030 4031 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 4032 "zzzaabcaazzz", true); 4033 4034 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 4035 "abcdefabc", true); 4036 4037 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 4038 "abcdefghijkk", true); 4039 4040 // Supplementary character tests 4041 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4042 toSupplementaries("zzzaabcazzz"), true); 4043 4044 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 4045 toSupplementaries("zzzaabcaazzz"), true); 4046 4047 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 4048 toSupplementaries("abcdefabc"), true); 4049 4050 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 4051 "(?<gname>" + 4052 toSupplementaries("k)") + "\\k<gname>"), 4053 toSupplementaries("abcdefghijkk"), true); 4054 4055 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 4056 "xxxyyyzzzyyy", 4057 "gname", 4058 "yyy"); 4059 4060 //replaceFirst/All 4061 checkReplaceFirst("(?<gn>ab)(c*)", 4062 "abccczzzabcczzzabccc", 4063 "${gn}", 4064 "abzzzabcczzzabccc"); 4065 4066 checkReplaceAll("(?<gn>ab)(c*)", 4067 "abccczzzabcczzzabccc", 4068 "${gn}", 4069 "abzzzabzzzab"); 4070 4071 4072 checkReplaceFirst("(?<gn>ab)(c*)", 4073 "zzzabccczzzabcczzzabccczzz", 4074 "${gn}", 4075 "zzzabzzzabcczzzabccczzz"); 4076 4077 checkReplaceAll("(?<gn>ab)(c*)", 4078 "zzzabccczzzabcczzzabccczzz", 4079 "${gn}", 4080 "zzzabzzzabzzzabzzz"); 4081 4082 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 4083 "zzzabccczzzabcczzzabccczzz", 4084 "${gn2}", 4085 "zzzccczzzabcczzzabccczzz"); 4086 4087 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 4088 "zzzabccczzzabcczzzabccczzz", 4089 "${gn2}", 4090 "zzzccczzzcczzzccczzz"); 4091 4092 //toSupplementaries("(ab)(c*)")); 4093 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4094 ")(?<gn2>" + toSupplementaries("c") + "*)", 4095 toSupplementaries("abccczzzabcczzzabccc"), 4096 "${gn1}", 4097 toSupplementaries("abzzzabcczzzabccc")); 4098 4099 4100 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4101 ")(?<gn2>" + toSupplementaries("c") + "*)", 4102 toSupplementaries("abccczzzabcczzzabccc"), 4103 "${gn1}", 4104 toSupplementaries("abzzzabzzzab")); 4105 4106 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 4107 ")(?<gn2>" + toSupplementaries("c") + "*)", 4108 toSupplementaries("abccczzzabcczzzabccc"), 4109 "${gn2}", 4110 toSupplementaries("ccczzzabcczzzabccc")); 4111 4112 4113 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 4114 ")(?<gn2>" + toSupplementaries("c") + "*)", 4115 toSupplementaries("abccczzzabcczzzabccc"), 4116 "${gn2}", 4117 toSupplementaries("ccczzzcczzzccc")); 4118 4119 checkReplaceFirst("(?<dog>Dog)AndCat", 4120 "zzzDogAndCatzzzDogAndCatzzz", 4121 "${dog}", 4122 "zzzDogzzzDogAndCatzzz"); 4123 4124 4125 checkReplaceAll("(?<dog>Dog)AndCat", 4126 "zzzDogAndCatzzzDogAndCatzzz", 4127 "${dog}", 4128 "zzzDogzzzDogzzz"); 4129 4130 // backref in Matcher & String 4131 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4132 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4133 failCount++; 4134 4135 // negative 4136 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4137 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4138 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4139 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4140 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4141 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4142 "gnameX"); 4143 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4144 report("NamedGroupCapture"); 4145 } 4146 4147 // This is for bug 6919132 4148 private static void nonBmpClassComplementTest() throws Exception { 4149 Pattern p = Pattern.compile("\\P{Lu}"); 4150 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4151 4152 if (m.find() && m.start() == 1) 4153 failCount++; 4154 4155 // from a unicode category 4156 p = Pattern.compile("\\P{Lu}"); 4157 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4158 if (m.find()) 4159 failCount++; 4160 if (!m.hitEnd()) 4161 failCount++; 4162 4163 // block 4164 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4165 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4166 if (m.find() && m.start() == 1) 4167 failCount++; 4168 4169 p = Pattern.compile("\\P{sc=GRANTHA}"); 4170 m = p.matcher(new String(new int[] {0x11350}, 0, 1)); 4171 if (m.find() && m.start() == 1) 4172 failCount++; 4173 4174 report("NonBmpClassComplement"); 4175 } 4176 4177 private static void unicodePropertiesTest() throws Exception { 4178 // different forms 4179 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4180 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4181 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4182 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4183 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4184 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4185 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4186 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4187 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4188 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4189 failCount++; 4190 4191 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4192 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4193 Matcher lastSM = common; 4194 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4195 4196 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4197 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4198 Matcher lastBM = latin; 4199 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4200 4201 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4202 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4203 continue; // only pick couple code points, they are the same 4204 } 4205 4206 // Unicode Script 4207 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4208 Matcher m; 4209 String str = new String(Character.toChars(cp)); 4210 if (script == lastScript) { 4211 m = lastSM; 4212 m.reset(str); 4213 } else { 4214 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4215 } 4216 if (!m.matches()) { 4217 failCount++; 4218 } 4219 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4220 other.reset(str); 4221 if (other.matches()) { 4222 failCount++; 4223 } 4224 lastSM = m; 4225 lastScript = script; 4226 4227 // Unicode Block 4228 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4229 if (block == null) { 4230 //System.out.printf("Not a Block: cp=%x%n", cp); 4231 continue; 4232 } 4233 if (block == lastBlock) { 4234 m = lastBM; 4235 m.reset(str); 4236 } else { 4237 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4238 } 4239 if (!m.matches()) { 4240 failCount++; 4241 } 4242 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4243 other.reset(str); 4244 if (other.matches()) { 4245 failCount++; 4246 } 4247 lastBM = m; 4248 lastBlock = block; 4249 } 4250 report("unicodeProperties"); 4251 } 4252 4253 private static void unicodeHexNotationTest() throws Exception { 4254 4255 // negative 4256 checkExpectedFail("\\x{-23}"); 4257 checkExpectedFail("\\x{110000}"); 4258 checkExpectedFail("\\x{}"); 4259 checkExpectedFail("\\x{AB[ef]"); 4260 4261 // codepoint 4262 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4263 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4264 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4265 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4266 4267 // in class 4268 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4269 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4270 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4271 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4272 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4273 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4274 4275 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4276 String s = "A" + new String(Character.toChars(cp)) + "B"; 4277 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4278 : String.format("\\u%04x\\u%04x", 4279 (int) Character.toChars(cp)[0], 4280 (int) Character.toChars(cp)[1]); 4281 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4282 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4283 failCount++; 4284 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4285 failCount++; 4286 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4287 failCount++; 4288 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4289 failCount++; 4290 } 4291 report("unicodeHexNotation"); 4292 } 4293 4294 private static void unicodeClassesTest() throws Exception { 4295 4296 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4297 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4298 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4299 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4300 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4301 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4302 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4303 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4304 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4305 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4306 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4307 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4308 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4309 Matcher bound = Pattern.compile("\\b").matcher(""); 4310 Matcher word = Pattern.compile("\\w++").matcher(""); 4311 // UNICODE_CHARACTER_CLASS 4312 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4313 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4314 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4315 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4316 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4317 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4318 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4319 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4320 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4321 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4322 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4323 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4324 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4325 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4326 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4327 // embedded flag (?U) 4328 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4329 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4330 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4331 4332 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4333 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4334 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4335 // properties 4336 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4337 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4338 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4339 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4340 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4341 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4342 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4343 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4344 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4345 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4346 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4347 4348 // javaMethod 4349 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4350 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4351 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4352 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4353 4354 for (int cp = 1; cp < 0x30000; cp++) { 4355 String str = new String(Character.toChars(cp)); 4356 int type = Character.getType(cp); 4357 if (// lower 4358 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4359 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4360 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4361 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4362 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4363 // upper 4364 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4365 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4366 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4367 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4368 // alpha 4369 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4370 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4371 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4372 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4373 // digit 4374 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4375 Character.isDigit(cp) != digitU.reset(str).matches() || 4376 // alnum 4377 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4378 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4379 // punct 4380 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4381 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4382 // graph 4383 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4384 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4385 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4386 // blank 4387 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4388 != blank.reset(str).matches() || 4389 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4390 // print 4391 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4392 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4393 // cntrl 4394 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4395 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4396 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4397 // hexdigit 4398 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4399 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4400 // space 4401 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4402 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4403 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4404 // word 4405 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4406 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4407 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4408 // bwordb 4409 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4410 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4411 // properties 4412 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4413 Character.isLetter(cp) != letterP.reset(str).matches()|| 4414 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4415 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4416 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4417 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4418 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4419 failCount++; 4420 } 4421 4422 // bounds/word align 4423 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4424 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4425 failCount++; 4426 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4427 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4428 failCount++; 4429 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4430 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4431 failCount++; 4432 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4433 failCount++; 4434 report("unicodePredefinedClasses"); 4435 } 4436 4437 private static void unicodeCharacterNameTest() throws Exception { 4438 4439 for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) { 4440 if (!Character.isValidCodePoint(cp) || 4441 Character.getType(cp) == Character.UNASSIGNED) 4442 continue; 4443 String str = new String(Character.toChars(cp)); 4444 // single 4445 String p = "\\N{" + Character.getName(cp) + "}"; 4446 if (!Pattern.compile(p).matcher(str).matches()) { 4447 failCount++; 4448 } 4449 // class[c] 4450 p = "[\\N{" + Character.getName(cp) + "}]"; 4451 if (!Pattern.compile(p).matcher(str).matches()) { 4452 failCount++; 4453 } 4454 } 4455 4456 // range 4457 for (int i = 0; i < 10; i++) { 4458 int start = generator.nextInt(20); 4459 int end = start + generator.nextInt(200); 4460 String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]"; 4461 String str; 4462 for (int cp = start; cp < end; cp++) { 4463 str = new String(Character.toChars(cp)); 4464 if (!Pattern.compile(p).matcher(str).matches()) { 4465 failCount++; 4466 } 4467 } 4468 str = new String(Character.toChars(end + 10)); 4469 if (Pattern.compile(p).matcher(str).matches()) { 4470 failCount++; 4471 } 4472 } 4473 4474 // slice 4475 for (int i = 0; i < 10; i++) { 4476 int n = generator.nextInt(256); 4477 int[] buf = new int[n]; 4478 StringBuffer sb = new StringBuffer(1024); 4479 for (int j = 0; j < n; j++) { 4480 int cp = generator.nextInt(1000); 4481 if (!Character.isValidCodePoint(cp) || 4482 Character.getType(cp) == Character.UNASSIGNED) 4483 cp = 0x4e00; // just use 4e00 4484 sb.append("\\N{" + Character.getName(cp) + "}"); 4485 buf[j] = cp; 4486 } 4487 String p = sb.toString(); 4488 String str = new String(buf, 0, buf.length); 4489 if (!Pattern.compile(p).matcher(str).matches()) { 4490 failCount++; 4491 } 4492 } 4493 report("unicodeCharacterName"); 4494 } 4495 4496 private static void horizontalAndVerticalWSTest() throws Exception { 4497 String hws = new String (new char[] { 4498 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4499 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4500 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4501 0x202f, 0x205f, 0x3000 }); 4502 String vws = new String (new char[] { 4503 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4504 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4505 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4506 failCount++; 4507 if (Pattern.compile("\\H").matcher(hws).find() || 4508 Pattern.compile("[\\H]").matcher(hws).find()) 4509 failCount++; 4510 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4511 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4512 failCount++; 4513 if (Pattern.compile("\\V").matcher(vws).find() || 4514 Pattern.compile("[\\V]").matcher(vws).find()) 4515 failCount++; 4516 String prefix = "abcd"; 4517 String suffix = "efgh"; 4518 String ng = "A"; 4519 for (int i = 0; i < hws.length(); i++) { 4520 String c = String.valueOf(hws.charAt(i)); 4521 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4522 if (!m.find() || !c.equals(m.group())) 4523 failCount++; 4524 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4525 if (!m.find() || !c.equals(m.group())) 4526 failCount++; 4527 4528 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4529 if (!m.find() || !ng.equals(m.group())) 4530 failCount++; 4531 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4532 if (!m.find() || !ng.equals(m.group())) 4533 failCount++; 4534 } 4535 for (int i = 0; i < vws.length(); i++) { 4536 String c = String.valueOf(vws.charAt(i)); 4537 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4538 if (!m.find() || !c.equals(m.group())) 4539 failCount++; 4540 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4541 if (!m.find() || !c.equals(m.group())) 4542 failCount++; 4543 4544 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4545 if (!m.find() || !ng.equals(m.group())) 4546 failCount++; 4547 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4548 if (!m.find() || !ng.equals(m.group())) 4549 failCount++; 4550 } 4551 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4552 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4553 failCount++; 4554 report("horizontalAndVerticalWSTest"); 4555 } 4556 4557 private static void linebreakTest() throws Exception { 4558 String linebreaks = new String (new char[] { 4559 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4560 String crnl = "\r\n"; 4561 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4562 !Pattern.compile("\\R").matcher(crnl).matches() || 4563 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4564 failCount++; 4565 report("linebreakTest"); 4566 } 4567 4568 // #7189363 4569 private static void branchTest() throws Exception { 4570 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4571 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4572 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4573 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4574 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4575 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4576 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4577 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4578 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4579 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4580 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4581 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4582 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4583 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4584 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4585 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4586 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4587 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4588 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4589 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4590 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4591 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4592 failCount++; 4593 report("branchTest"); 4594 } 4595 4596 // This test is for 8007395 4597 private static void groupCurlyNotFoundSuppTest() throws Exception { 4598 String input = "test this as \ud83d\ude0d"; 4599 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4600 "test(.)*(@[a-zA-Z.]+)", 4601 "test([^B])+(@[a-zA-Z.]+)", 4602 "test([^B])*(@[a-zA-Z.]+)", 4603 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4604 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4605 }) { 4606 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4607 .matcher(input); 4608 try { 4609 if (m.find()) { 4610 failCount++; 4611 } 4612 } catch (Exception x) { 4613 failCount++; 4614 } 4615 } 4616 report("GroupCurly NotFoundSupp"); 4617 } 4618 4619 // This test is for 8023647 4620 private static void groupCurlyBackoffTest() throws Exception { 4621 if (!"abc1c".matches("(\\w)+1\\1") || 4622 "abc11".matches("(\\w)+1\\1")) { 4623 failCount++; 4624 } 4625 report("GroupCurly backoff"); 4626 } 4627 4628 // This test is for 8012646 4629 private static void patternAsPredicate() throws Exception { 4630 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4631 4632 if (p.test("")) { 4633 failCount++; 4634 } 4635 if (!p.test("word")) { 4636 failCount++; 4637 } 4638 if (p.test("1234")) { 4639 failCount++; 4640 } 4641 report("Pattern.asPredicate"); 4642 } 4643 4644 // This test is for 8035975 4645 private static void invalidFlags() throws Exception { 4646 for (int flag = 1; flag != 0; flag <<= 1) { 4647 switch (flag) { 4648 case Pattern.CASE_INSENSITIVE: 4649 case Pattern.MULTILINE: 4650 case Pattern.DOTALL: 4651 case Pattern.UNICODE_CASE: 4652 case Pattern.CANON_EQ: 4653 case Pattern.UNIX_LINES: 4654 case Pattern.LITERAL: 4655 case Pattern.UNICODE_CHARACTER_CLASS: 4656 case Pattern.COMMENTS: 4657 // valid flag, continue 4658 break; 4659 default: 4660 try { 4661 Pattern.compile(".", flag); 4662 failCount++; 4663 } catch (IllegalArgumentException expected) { 4664 } 4665 } 4666 } 4667 report("Invalid compile flags"); 4668 } 4669 4670 private static void grapheme() throws Exception { 4671 Files.lines(Paths.get(System.getProperty("test.src", "."), 4672 "GraphemeBreakTest.txt")) 4673 .filter( ln -> ln.length() != 0 && !ln.startsWith("#") ) 4674 .forEach( ln -> { 4675 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", ""); 4676 // System.out.println(str); 4677 String[] strs = ln.split("\u00f7|\u00d7"); 4678 StringBuilder src = new StringBuilder(); 4679 ArrayList<String> graphemes = new ArrayList<>(); 4680 StringBuilder buf = new StringBuilder(); 4681 int offBk = 0; 4682 for (String str : strs) { 4683 if (str.length() == 0) // first empty str 4684 continue; 4685 int cp = Integer.parseInt(str, 16); 4686 src.appendCodePoint(cp); 4687 buf.appendCodePoint(cp); 4688 offBk += (str.length() + 1); 4689 if (ln.charAt(offBk) == '\u00f7') { // DIV 4690 graphemes.add(buf.toString()); 4691 buf = new StringBuilder(); 4692 } 4693 } 4694 Pattern p = Pattern.compile("\\X"); 4695 Matcher m = p.matcher(src.toString()); 4696 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}"); 4697 for (String g : graphemes) { 4698 // System.out.printf(" grapheme:=[%s]%n", g); 4699 // (1) test \\X directly 4700 if (!m.find() || !m.group().equals(g)) { 4701 System.out.println("Failed \\X [" + ln + "] : " + g); 4702 failCount++; 4703 } 4704 // (2) test \\b{g} + \\X via Scanner 4705 boolean hasNext = s.hasNext(p); 4706 // if (!s.hasNext() || !s.next().equals(next)) { 4707 if (!s.hasNext(p) || !s.next(p).equals(g)) { 4708 System.out.println("Failed b{g} [" + ln + "] : " + g); 4709 failCount++; 4710 } 4711 } 4712 }); 4713 // some sanity checks 4714 if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() || 4715 !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() || 4716 !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches()) 4717 failCount++; 4718 // make sure "\b{n}" still works 4719 if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches()) 4720 failCount++; 4721 report("Unicode extended grapheme cluster"); 4722 } 4723 }