1 /* 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 8035076 8039124 36 */ 37 38 import java.util.regex.*; 39 import java.util.Random; 40 import java.io.*; 41 import java.util.*; 42 import java.nio.CharBuffer; 43 import java.util.function.Predicate; 44 45 /** 46 * This is a test class created to check the operation of 47 * the Pattern and Matcher classes. 48 */ 49 public class RegExTest { 50 51 private static Random generator = new Random(); 52 private static boolean failure = false; 53 private static int failCount = 0; 54 private static String firstFailure = null; 55 56 /** 57 * Main to interpret arguments and run several tests. 58 * 59 */ 60 public static void main(String[] args) throws Exception { 61 // Most of the tests are in a file 62 processFile("TestCases.txt"); 63 //processFile("PerlCases.txt"); 64 processFile("BMPTestCases.txt"); 65 processFile("SupplementaryTestCases.txt"); 66 67 // These test many randomly generated char patterns 68 bm(); 69 slice(); 70 71 // These are hard to put into the file 72 escapes(); 73 blankInput(); 74 75 // Substitition tests on randomly generated sequences 76 globalSubstitute(); 77 stringbufferSubstitute(); 78 stringbuilderSubstitute(); 79 80 substitutionBasher(); 81 substitutionBasher2(); 82 83 // Canonical Equivalence 84 ceTest(); 85 86 // Anchors 87 anchorTest(); 88 89 // boolean match calls 90 matchesTest(); 91 lookingAtTest(); 92 93 // Pattern API 94 patternMatchesTest(); 95 96 // Misc 97 lookbehindTest(); 98 nullArgumentTest(); 99 backRefTest(); 100 groupCaptureTest(); 101 caretTest(); 102 charClassTest(); 103 emptyPatternTest(); 104 findIntTest(); 105 group0Test(); 106 longPatternTest(); 107 octalTest(); 108 ampersandTest(); 109 negationTest(); 110 splitTest(); 111 appendTest(); 112 caseFoldingTest(); 113 commentsTest(); 114 unixLinesTest(); 115 replaceFirstTest(); 116 gTest(); 117 zTest(); 118 serializeTest(); 119 reluctantRepetitionTest(); 120 multilineDollarTest(); 121 dollarAtEndTest(); 122 caretBetweenTerminatorsTest(); 123 // This RFE rejected in Tiger numOccurrencesTest(); 124 javaCharClassTest(); 125 nonCaptureRepetitionTest(); 126 notCapturedGroupCurlyMatchTest(); 127 escapedSegmentTest(); 128 literalPatternTest(); 129 literalReplacementTest(); 130 regionTest(); 131 toStringTest(); 132 negatedCharClassTest(); 133 findFromTest(); 134 boundsTest(); 135 unicodeWordBoundsTest(); 136 caretAtEndTest(); 137 wordSearchTest(); 138 hitEndTest(); 139 toMatchResultTest(); 140 surrogatesInClassTest(); 141 removeQEQuotingTest(); 142 namedGroupCaptureTest(); 143 nonBmpClassComplementTest(); 144 unicodePropertiesTest(); 145 unicodeHexNotationTest(); 146 unicodeClassesTest(); 147 horizontalAndVerticalWSTest(); 148 linebreakTest(); 149 branchTest(); 150 groupCurlyNotFoundSuppTest(); 151 groupCurlyBackoffTest(); 152 patternAsPredicate(); 153 154 if (failure) { 155 throw new 156 RuntimeException("RegExTest failed, 1st failure: " + 157 firstFailure); 158 } else { 159 System.err.println("OKAY: All tests passed."); 160 } 161 } 162 163 // Utility functions 164 165 private static String getRandomAlphaString(int length) { 166 StringBuffer buf = new StringBuffer(length); 167 for (int i=0; i<length; i++) { 168 char randChar = (char)(97 + generator.nextInt(26)); 169 buf.append(randChar); 170 } 171 return buf.toString(); 172 } 173 174 private static void check(Matcher m, String expected) { 175 m.find(); 176 if (!m.group().equals(expected)) 177 failCount++; 178 } 179 180 private static void check(Matcher m, String result, boolean expected) { 181 m.find(); 182 if (m.group().equals(result) != expected) 183 failCount++; 184 } 185 186 private static void check(Pattern p, String s, boolean expected) { 187 if (p.matcher(s).find() != expected) 188 failCount++; 189 } 190 191 private static void check(String p, String s, boolean expected) { 192 Matcher matcher = Pattern.compile(p).matcher(s); 193 if (matcher.find() != expected) 194 failCount++; 195 } 196 197 private static void check(String p, char c, boolean expected) { 198 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 199 Pattern pattern = Pattern.compile(propertyPattern); 200 char[] ca = new char[1]; ca[0] = c; 201 Matcher matcher = pattern.matcher(new String(ca)); 202 if (!matcher.find()) 203 failCount++; 204 } 205 206 private static void check(String p, int codePoint, boolean expected) { 207 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 208 Pattern pattern = Pattern.compile(propertyPattern); 209 char[] ca = Character.toChars(codePoint); 210 Matcher matcher = pattern.matcher(new String(ca)); 211 if (!matcher.find()) 212 failCount++; 213 } 214 215 private static void check(String p, int flag, String input, String s, 216 boolean expected) 217 { 218 Pattern pattern = Pattern.compile(p, flag); 219 Matcher matcher = pattern.matcher(input); 220 if (expected) 221 check(matcher, s, expected); 222 else 223 check(pattern, input, false); 224 } 225 226 private static void report(String testName) { 227 int spacesToAdd = 30 - testName.length(); 228 StringBuffer paddedNameBuffer = new StringBuffer(testName); 229 for (int i=0; i<spacesToAdd; i++) 230 paddedNameBuffer.append(" "); 231 String paddedName = paddedNameBuffer.toString(); 232 System.err.println(paddedName + ": " + 233 (failCount==0 ? "Passed":"Failed("+failCount+")")); 234 if (failCount > 0) { 235 failure = true; 236 237 if (firstFailure == null) { 238 firstFailure = testName; 239 } 240 } 241 242 failCount = 0; 243 } 244 245 /** 246 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 247 * supplementary characters. This method does NOT fully take care 248 * of the regex syntax. 249 */ 250 private static String toSupplementaries(String s) { 251 int length = s.length(); 252 StringBuffer sb = new StringBuffer(length * 2); 253 254 for (int i = 0; i < length; ) { 255 char c = s.charAt(i++); 256 if (c == '\\') { 257 sb.append(c); 258 if (i < length) { 259 c = s.charAt(i++); 260 sb.append(c); 261 if (c == 'u') { 262 // assume no syntax error 263 sb.append(s.charAt(i++)); 264 sb.append(s.charAt(i++)); 265 sb.append(s.charAt(i++)); 266 sb.append(s.charAt(i++)); 267 } 268 } 269 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 270 sb.append('\ud800').append((char)('\udc00'+c)); 271 } else { 272 sb.append(c); 273 } 274 } 275 return sb.toString(); 276 } 277 278 // Regular expression tests 279 280 // This is for bug 6178785 281 // Test if an expected NPE gets thrown when passing in a null argument 282 private static boolean check(Runnable test) { 283 try { 284 test.run(); 285 failCount++; 286 return false; 287 } catch (NullPointerException npe) { 288 return true; 289 } 290 } 291 292 private static void nullArgumentTest() { 293 check(new Runnable() { public void run() { Pattern.compile(null); }}); 294 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 295 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 296 check(new Runnable() { public void run() { Pattern.quote(null);}}); 297 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 298 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 299 300 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 301 m.matches(); 302 check(new Runnable() { public void run() { m.appendTail((StringBuffer)null);}}); 303 check(new Runnable() { public void run() { m.appendTail((StringBuilder)null);}}); 304 check(new Runnable() { public void run() { m.replaceAll(null);}}); 305 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 306 check(new Runnable() { public void run() { m.appendReplacement((StringBuffer)null, null);}}); 307 check(new Runnable() { public void run() { m.appendReplacement((StringBuilder)null, null);}}); 308 check(new Runnable() { public void run() { m.reset(null);}}); 309 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 310 //check(new Runnable() { public void run() { m.usePattern(null);}}); 311 312 report("Null Argument"); 313 } 314 315 // This is for bug6635133 316 // Test if surrogate pair in Unicode escapes can be handled correctly. 317 private static void surrogatesInClassTest() throws Exception { 318 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 319 Matcher matcher = pattern.matcher("\ud834\udd22"); 320 if (!matcher.find()) 321 failCount++; 322 323 report("Surrogate pair in Unicode escape"); 324 } 325 326 // This is for bug6990617 327 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 328 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 329 // char is an octal digit. 330 private static void removeQEQuotingTest() throws Exception { 331 Pattern pattern = 332 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 333 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 334 if (!matcher.find()) 335 failCount++; 336 337 report("Remove Q/E Quoting"); 338 } 339 340 // This is for bug 4988891 341 // Test toMatchResult to see that it is a copy of the Matcher 342 // that is not affected by subsequent operations on the original 343 private static void toMatchResultTest() throws Exception { 344 Pattern pattern = Pattern.compile("squid"); 345 Matcher matcher = pattern.matcher( 346 "agiantsquidofdestinyasmallsquidoffate"); 347 matcher.find(); 348 int matcherStart1 = matcher.start(); 349 MatchResult mr = matcher.toMatchResult(); 350 if (mr == matcher) 351 failCount++; 352 int resultStart1 = mr.start(); 353 if (matcherStart1 != resultStart1) 354 failCount++; 355 matcher.find(); 356 int matcherStart2 = matcher.start(); 357 int resultStart2 = mr.start(); 358 if (matcherStart2 == resultStart2) 359 failCount++; 360 if (resultStart1 != resultStart2) 361 failCount++; 362 MatchResult mr2 = matcher.toMatchResult(); 363 if (mr == mr2) 364 failCount++; 365 if (mr2.start() != matcherStart2) 366 failCount++; 367 report("toMatchResult is a copy"); 368 } 369 370 // This is for bug 5013885 371 // Must test a slice to see if it reports hitEnd correctly 372 private static void hitEndTest() throws Exception { 373 // Basic test of Slice node 374 Pattern p = Pattern.compile("^squidattack"); 375 Matcher m = p.matcher("squack"); 376 m.find(); 377 if (m.hitEnd()) 378 failCount++; 379 m.reset("squid"); 380 m.find(); 381 if (!m.hitEnd()) 382 failCount++; 383 384 // Test Slice, SliceA and SliceU nodes 385 for (int i=0; i<3; i++) { 386 int flags = 0; 387 if (i==1) flags = Pattern.CASE_INSENSITIVE; 388 if (i==2) flags = Pattern.UNICODE_CASE; 389 p = Pattern.compile("^abc", flags); 390 m = p.matcher("ad"); 391 m.find(); 392 if (m.hitEnd()) 393 failCount++; 394 m.reset("ab"); 395 m.find(); 396 if (!m.hitEnd()) 397 failCount++; 398 } 399 400 // Test Boyer-Moore node 401 p = Pattern.compile("catattack"); 402 m = p.matcher("attack"); 403 m.find(); 404 if (!m.hitEnd()) 405 failCount++; 406 407 p = Pattern.compile("catattack"); 408 m = p.matcher("attackattackattackcatatta"); 409 m.find(); 410 if (!m.hitEnd()) 411 failCount++; 412 report("hitEnd from a Slice"); 413 } 414 415 // This is for bug 4997476 416 // It is weird code submitted by customer demonstrating a regression 417 private static void wordSearchTest() throws Exception { 418 String testString = new String("word1 word2 word3"); 419 Pattern p = Pattern.compile("\\b"); 420 Matcher m = p.matcher(testString); 421 int position = 0; 422 int start = 0; 423 while (m.find(position)) { 424 start = m.start(); 425 if (start == testString.length()) 426 break; 427 if (m.find(start+1)) { 428 position = m.start(); 429 } else { 430 position = testString.length(); 431 } 432 if (testString.substring(start, position).equals(" ")) 433 continue; 434 if (!testString.substring(start, position-1).startsWith("word")) 435 failCount++; 436 } 437 report("Customer word search"); 438 } 439 440 // This is for bug 4994840 441 private static void caretAtEndTest() throws Exception { 442 // Problem only occurs with multiline patterns 443 // containing a beginning-of-line caret "^" followed 444 // by an expression that also matches the empty string. 445 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 446 Matcher matcher = pattern.matcher("\r"); 447 matcher.find(); 448 matcher.find(); 449 report("Caret at end"); 450 } 451 452 // This test is for 4979006 453 // Check to see if word boundary construct properly handles unicode 454 // non spacing marks 455 private static void unicodeWordBoundsTest() throws Exception { 456 String spaces = " "; 457 String wordChar = "a"; 458 String nsm = "\u030a"; 459 460 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 461 462 Pattern pattern = Pattern.compile("\\b"); 463 Matcher matcher = pattern.matcher(""); 464 // S=other B=word character N=non spacing mark .=word boundary 465 // SS.BB.SS 466 String input = spaces + wordChar + wordChar + spaces; 467 twoFindIndexes(input, matcher, 2, 4); 468 // SS.BBN.SS 469 input = spaces + wordChar +wordChar + nsm + spaces; 470 twoFindIndexes(input, matcher, 2, 5); 471 // SS.BN.SS 472 input = spaces + wordChar + nsm + spaces; 473 twoFindIndexes(input, matcher, 2, 4); 474 // SS.BNN.SS 475 input = spaces + wordChar + nsm + nsm + spaces; 476 twoFindIndexes(input, matcher, 2, 5); 477 // SSN.BB.SS 478 input = spaces + nsm + wordChar + wordChar + spaces; 479 twoFindIndexes(input, matcher, 3, 5); 480 // SS.BNB.SS 481 input = spaces + wordChar + nsm + wordChar + spaces; 482 twoFindIndexes(input, matcher, 2, 5); 483 // SSNNSS 484 input = spaces + nsm + nsm + spaces; 485 matcher.reset(input); 486 if (matcher.find()) 487 failCount++; 488 // SSN.BBN.SS 489 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 490 twoFindIndexes(input, matcher, 3, 6); 491 492 report("Unicode word boundary"); 493 } 494 495 private static void twoFindIndexes(String input, Matcher matcher, int a, 496 int b) throws Exception 497 { 498 matcher.reset(input); 499 matcher.find(); 500 if (matcher.start() != a) 501 failCount++; 502 matcher.find(); 503 if (matcher.start() != b) 504 failCount++; 505 } 506 507 // This test is for 6284152 508 static void check(String regex, String input, String[] expected) { 509 List<String> result = new ArrayList<String>(); 510 Pattern p = Pattern.compile(regex); 511 Matcher m = p.matcher(input); 512 while (m.find()) { 513 result.add(m.group()); 514 } 515 if (!Arrays.asList(expected).equals(result)) 516 failCount++; 517 } 518 519 private static void lookbehindTest() throws Exception { 520 //Positive 521 check("(?<=%.{0,5})foo\\d", 522 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 523 new String[]{"foo1", "foo2", "foo3"}); 524 525 //boundary at end of the lookbehind sub-regex should work consistently 526 //with the boundary just after the lookbehind sub-regex 527 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 528 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 529 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 530 check("(?<!abc \\b)foo", "abc foo", new String[0]); 531 532 //Negative 533 check("(?<!%.{0,5})foo\\d", 534 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 535 new String[] {"foo4", "foo5"}); 536 537 //Positive greedy 538 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 539 540 //Positive reluctant 541 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 542 543 //supplementary 544 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 545 new String[] {"fo\ud800\udc00o"}); 546 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 547 new String[] {"fo\ud800\udc00o"}); 548 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 549 new String[] {"fo\ud800\udc00o"}); 550 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 551 new String[] {"fo\ud800\udc00o"}); 552 report("Lookbehind"); 553 } 554 555 // This test is for 4938995 556 // Check to see if weak region boundaries are transparent to 557 // lookahead and lookbehind constructs 558 private static void boundsTest() throws Exception { 559 String fullMessage = "catdogcat"; 560 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 561 Matcher matcher = pattern.matcher("catdogca"); 562 matcher.useTransparentBounds(true); 563 if (matcher.find()) 564 failCount++; 565 matcher.reset("atdogcat"); 566 if (matcher.find()) 567 failCount++; 568 matcher.reset(fullMessage); 569 if (!matcher.find()) 570 failCount++; 571 matcher.reset(fullMessage); 572 matcher.region(0,9); 573 if (!matcher.find()) 574 failCount++; 575 matcher.reset(fullMessage); 576 matcher.region(0,6); 577 if (!matcher.find()) 578 failCount++; 579 matcher.reset(fullMessage); 580 matcher.region(3,6); 581 if (!matcher.find()) 582 failCount++; 583 matcher.useTransparentBounds(false); 584 if (matcher.find()) 585 failCount++; 586 587 // Negative lookahead/lookbehind 588 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 589 matcher = pattern.matcher("dogcat"); 590 matcher.useTransparentBounds(true); 591 matcher.region(0,3); 592 if (matcher.find()) 593 failCount++; 594 matcher.reset("catdog"); 595 matcher.region(3,6); 596 if (matcher.find()) 597 failCount++; 598 matcher.useTransparentBounds(false); 599 matcher.reset("dogcat"); 600 matcher.region(0,3); 601 if (!matcher.find()) 602 failCount++; 603 matcher.reset("catdog"); 604 matcher.region(3,6); 605 if (!matcher.find()) 606 failCount++; 607 608 report("Region bounds transparency"); 609 } 610 611 // This test is for 4945394 612 private static void findFromTest() throws Exception { 613 String message = "This is 40 $0 message."; 614 Pattern pat = Pattern.compile("\\$0"); 615 Matcher match = pat.matcher(message); 616 if (!match.find()) 617 failCount++; 618 if (match.find()) 619 failCount++; 620 if (match.find()) 621 failCount++; 622 report("Check for alternating find"); 623 } 624 625 // This test is for 4872664 and 4892980 626 private static void negatedCharClassTest() throws Exception { 627 Pattern pattern = Pattern.compile("[^>]"); 628 Matcher matcher = pattern.matcher("\u203A"); 629 if (!matcher.matches()) 630 failCount++; 631 pattern = Pattern.compile("[^fr]"); 632 matcher = pattern.matcher("a"); 633 if (!matcher.find()) 634 failCount++; 635 matcher.reset("\u203A"); 636 if (!matcher.find()) 637 failCount++; 638 String s = "for"; 639 String result[] = s.split("[^fr]"); 640 if (!result[0].equals("f")) 641 failCount++; 642 if (!result[1].equals("r")) 643 failCount++; 644 s = "f\u203Ar"; 645 result = s.split("[^fr]"); 646 if (!result[0].equals("f")) 647 failCount++; 648 if (!result[1].equals("r")) 649 failCount++; 650 651 // Test adding to bits, subtracting a node, then adding to bits again 652 pattern = Pattern.compile("[^f\u203Ar]"); 653 matcher = pattern.matcher("a"); 654 if (!matcher.find()) 655 failCount++; 656 matcher.reset("f"); 657 if (matcher.find()) 658 failCount++; 659 matcher.reset("\u203A"); 660 if (matcher.find()) 661 failCount++; 662 matcher.reset("r"); 663 if (matcher.find()) 664 failCount++; 665 matcher.reset("\u203B"); 666 if (!matcher.find()) 667 failCount++; 668 669 // Test subtracting a node, adding to bits, subtracting again 670 pattern = Pattern.compile("[^\u203Ar\u203B]"); 671 matcher = pattern.matcher("a"); 672 if (!matcher.find()) 673 failCount++; 674 matcher.reset("\u203A"); 675 if (matcher.find()) 676 failCount++; 677 matcher.reset("r"); 678 if (matcher.find()) 679 failCount++; 680 matcher.reset("\u203B"); 681 if (matcher.find()) 682 failCount++; 683 matcher.reset("\u203C"); 684 if (!matcher.find()) 685 failCount++; 686 687 report("Negated Character Class"); 688 } 689 690 // This test is for 4628291 691 private static void toStringTest() throws Exception { 692 Pattern pattern = Pattern.compile("b+"); 693 if (pattern.toString() != "b+") 694 failCount++; 695 Matcher matcher = pattern.matcher("aaabbbccc"); 696 String matcherString = matcher.toString(); // unspecified 697 matcher.find(); 698 matcherString = matcher.toString(); // unspecified 699 matcher.region(0,3); 700 matcherString = matcher.toString(); // unspecified 701 matcher.reset(); 702 matcherString = matcher.toString(); // unspecified 703 report("toString"); 704 } 705 706 // This test is for 4808962 707 private static void literalPatternTest() throws Exception { 708 int flags = Pattern.LITERAL; 709 710 Pattern pattern = Pattern.compile("abc\\t$^", flags); 711 check(pattern, "abc\\t$^", true); 712 713 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 714 check(pattern, "abc\\t$^", true); 715 716 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 717 check(pattern, "\\Qa^$bcabc\\E", true); 718 check(pattern, "a^$bcabc", false); 719 720 pattern = Pattern.compile("\\\\Q\\\\E"); 721 check(pattern, "\\Q\\E", true); 722 723 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 724 check(pattern, "abcefg\\Q\\Ehij", true); 725 726 pattern = Pattern.compile("\\\\\\Q\\\\E"); 727 check(pattern, "\\\\\\\\", true); 728 729 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 730 check(pattern, "\\Qa^$bcabc\\E", true); 731 check(pattern, "a^$bcabc", false); 732 733 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 734 check(pattern, "\\Qabc\\Edef", true); 735 check(pattern, "abcdef", false); 736 737 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 738 check(pattern, "abc\\Edef", true); 739 check(pattern, "abcdef", false); 740 741 pattern = Pattern.compile(Pattern.quote("\\E")); 742 check(pattern, "\\E", true); 743 744 pattern = Pattern.compile("((((abc.+?:)", flags); 745 check(pattern, "((((abc.+?:)", true); 746 747 flags |= Pattern.MULTILINE; 748 749 pattern = Pattern.compile("^cat$", flags); 750 check(pattern, "abc^cat$def", true); 751 check(pattern, "cat", false); 752 753 flags |= Pattern.CASE_INSENSITIVE; 754 755 pattern = Pattern.compile("abcdef", flags); 756 check(pattern, "ABCDEF", true); 757 check(pattern, "AbCdEf", true); 758 759 flags |= Pattern.DOTALL; 760 761 pattern = Pattern.compile("a...b", flags); 762 check(pattern, "A...b", true); 763 check(pattern, "Axxxb", false); 764 765 flags |= Pattern.CANON_EQ; 766 767 Pattern p = Pattern.compile("testa\u030a", flags); 768 check(pattern, "testa\u030a", false); 769 check(pattern, "test\u00e5", false); 770 771 // Supplementary character test 772 flags = Pattern.LITERAL; 773 774 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 775 check(pattern, toSupplementaries("abc\\t$^"), true); 776 777 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 778 check(pattern, toSupplementaries("abc\\t$^"), true); 779 780 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 781 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 782 check(pattern, toSupplementaries("a^$bcabc"), false); 783 784 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 785 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 786 check(pattern, toSupplementaries("a^$bcabc"), false); 787 788 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 789 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 790 check(pattern, toSupplementaries("abcdef"), false); 791 792 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 793 check(pattern, toSupplementaries("abc\\Edef"), true); 794 check(pattern, toSupplementaries("abcdef"), false); 795 796 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 797 check(pattern, toSupplementaries("((((abc.+?:)"), true); 798 799 flags |= Pattern.MULTILINE; 800 801 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 802 check(pattern, toSupplementaries("abc^cat$def"), true); 803 check(pattern, toSupplementaries("cat"), false); 804 805 flags |= Pattern.DOTALL; 806 807 // note: this is case-sensitive. 808 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 809 check(pattern, toSupplementaries("a...b"), true); 810 check(pattern, toSupplementaries("axxxb"), false); 811 812 flags |= Pattern.CANON_EQ; 813 814 String t = toSupplementaries("test"); 815 p = Pattern.compile(t + "a\u030a", flags); 816 check(pattern, t + "a\u030a", false); 817 check(pattern, t + "\u00e5", false); 818 819 report("Literal pattern"); 820 } 821 822 // This test is for 4803179 823 // This test is also for 4808962, replacement parts 824 private static void literalReplacementTest() throws Exception { 825 int flags = Pattern.LITERAL; 826 827 Pattern pattern = Pattern.compile("abc", flags); 828 Matcher matcher = pattern.matcher("zzzabczzz"); 829 String replaceTest = "$0"; 830 String result = matcher.replaceAll(replaceTest); 831 if (!result.equals("zzzabczzz")) 832 failCount++; 833 834 matcher.reset(); 835 String literalReplacement = matcher.quoteReplacement(replaceTest); 836 result = matcher.replaceAll(literalReplacement); 837 if (!result.equals("zzz$0zzz")) 838 failCount++; 839 840 matcher.reset(); 841 replaceTest = "\\t$\\$"; 842 literalReplacement = matcher.quoteReplacement(replaceTest); 843 result = matcher.replaceAll(literalReplacement); 844 if (!result.equals("zzz\\t$\\$zzz")) 845 failCount++; 846 847 // Supplementary character test 848 pattern = Pattern.compile(toSupplementaries("abc"), flags); 849 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 850 replaceTest = "$0"; 851 result = matcher.replaceAll(replaceTest); 852 if (!result.equals(toSupplementaries("zzzabczzz"))) 853 failCount++; 854 855 matcher.reset(); 856 literalReplacement = matcher.quoteReplacement(replaceTest); 857 result = matcher.replaceAll(literalReplacement); 858 if (!result.equals(toSupplementaries("zzz$0zzz"))) 859 failCount++; 860 861 matcher.reset(); 862 replaceTest = "\\t$\\$"; 863 literalReplacement = matcher.quoteReplacement(replaceTest); 864 result = matcher.replaceAll(literalReplacement); 865 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 866 failCount++; 867 868 // IAE should be thrown if backslash or '$' is the last character 869 // in replacement string 870 try { 871 "\uac00".replaceAll("\uac00", "$"); 872 failCount++; 873 } catch (IllegalArgumentException iie) { 874 } catch (Exception e) { 875 failCount++; 876 } 877 try { 878 "\uac00".replaceAll("\uac00", "\\"); 879 failCount++; 880 } catch (IllegalArgumentException iie) { 881 } catch (Exception e) { 882 failCount++; 883 } 884 report("Literal replacement"); 885 } 886 887 // This test is for 4757029 888 private static void regionTest() throws Exception { 889 Pattern pattern = Pattern.compile("abc"); 890 Matcher matcher = pattern.matcher("abcdefabc"); 891 892 matcher.region(0,9); 893 if (!matcher.find()) 894 failCount++; 895 if (!matcher.find()) 896 failCount++; 897 matcher.region(0,3); 898 if (!matcher.find()) 899 failCount++; 900 matcher.region(3,6); 901 if (matcher.find()) 902 failCount++; 903 matcher.region(0,2); 904 if (matcher.find()) 905 failCount++; 906 907 expectRegionFail(matcher, 1, -1); 908 expectRegionFail(matcher, -1, -1); 909 expectRegionFail(matcher, -1, 1); 910 expectRegionFail(matcher, 5, 3); 911 expectRegionFail(matcher, 5, 12); 912 expectRegionFail(matcher, 12, 12); 913 914 pattern = Pattern.compile("^abc$"); 915 matcher = pattern.matcher("zzzabczzz"); 916 matcher.region(0,9); 917 if (matcher.find()) 918 failCount++; 919 matcher.region(3,6); 920 if (!matcher.find()) 921 failCount++; 922 matcher.region(3,6); 923 matcher.useAnchoringBounds(false); 924 if (matcher.find()) 925 failCount++; 926 927 // Supplementary character test 928 pattern = Pattern.compile(toSupplementaries("abc")); 929 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 930 matcher.region(0,9*2); 931 if (!matcher.find()) 932 failCount++; 933 if (!matcher.find()) 934 failCount++; 935 matcher.region(0,3*2); 936 if (!matcher.find()) 937 failCount++; 938 matcher.region(1,3*2); 939 if (matcher.find()) 940 failCount++; 941 matcher.region(3*2,6*2); 942 if (matcher.find()) 943 failCount++; 944 matcher.region(0,2*2); 945 if (matcher.find()) 946 failCount++; 947 matcher.region(0,2*2+1); 948 if (matcher.find()) 949 failCount++; 950 951 expectRegionFail(matcher, 1*2, -1); 952 expectRegionFail(matcher, -1, -1); 953 expectRegionFail(matcher, -1, 1*2); 954 expectRegionFail(matcher, 5*2, 3*2); 955 expectRegionFail(matcher, 5*2, 12*2); 956 expectRegionFail(matcher, 12*2, 12*2); 957 958 pattern = Pattern.compile(toSupplementaries("^abc$")); 959 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 960 matcher.region(0,9*2); 961 if (matcher.find()) 962 failCount++; 963 matcher.region(3*2,6*2); 964 if (!matcher.find()) 965 failCount++; 966 matcher.region(3*2+1,6*2); 967 if (matcher.find()) 968 failCount++; 969 matcher.region(3*2,6*2-1); 970 if (matcher.find()) 971 failCount++; 972 matcher.region(3*2,6*2); 973 matcher.useAnchoringBounds(false); 974 if (matcher.find()) 975 failCount++; 976 report("Regions"); 977 } 978 979 private static void expectRegionFail(Matcher matcher, int index1, 980 int index2) 981 { 982 try { 983 matcher.region(index1, index2); 984 failCount++; 985 } catch (IndexOutOfBoundsException ioobe) { 986 // Correct result 987 } catch (IllegalStateException ise) { 988 // Correct result 989 } 990 } 991 992 // This test is for 4803197 993 private static void escapedSegmentTest() throws Exception { 994 995 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 996 check(pattern, "dir1\\dir2", true); 997 998 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 999 check(pattern, "dir1\\dir2\\", true); 1000 1001 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 1002 check(pattern, "dir1\\dir2\\", true); 1003 1004 // Supplementary character test 1005 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1006 check(pattern, toSupplementaries("dir1\\dir2"), true); 1007 1008 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1009 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1010 1011 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1012 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1013 1014 report("Escaped segment"); 1015 } 1016 1017 // This test is for 4792284 1018 private static void nonCaptureRepetitionTest() throws Exception { 1019 String input = "abcdefgh;"; 1020 1021 String[] patterns = new String[] { 1022 "(?:\\w{4})+;", 1023 "(?:\\w{8})*;", 1024 "(?:\\w{2}){2,4};", 1025 "(?:\\w{4}){2,};", // only matches the 1026 ".*?(?:\\w{5})+;", // specified minimum 1027 ".*?(?:\\w{9})*;", // number of reps - OK 1028 "(?:\\w{4})+?;", // lazy repetition - OK 1029 "(?:\\w{4})++;", // possessive repetition - OK 1030 "(?:\\w{2,}?)+;", // non-deterministic - OK 1031 "(\\w{4})+;", // capturing group - OK 1032 }; 1033 1034 for (int i = 0; i < patterns.length; i++) { 1035 // Check find() 1036 check(patterns[i], 0, input, input, true); 1037 // Check matches() 1038 Pattern p = Pattern.compile(patterns[i]); 1039 Matcher m = p.matcher(input); 1040 1041 if (m.matches()) { 1042 if (!m.group(0).equals(input)) 1043 failCount++; 1044 } else { 1045 failCount++; 1046 } 1047 } 1048 1049 report("Non capturing repetition"); 1050 } 1051 1052 // This test is for 6358731 1053 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1054 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1055 Matcher matcher = pattern.matcher("abcd"); 1056 if (!matcher.matches() || 1057 matcher.group(1) != null || 1058 !matcher.group(2).equals("abcd")) { 1059 failCount++; 1060 } 1061 report("Not captured GroupCurly"); 1062 } 1063 1064 // This test is for 4706545 1065 private static void javaCharClassTest() throws Exception { 1066 for (int i=0; i<1000; i++) { 1067 char c = (char)generator.nextInt(); 1068 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1069 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1070 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1071 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1072 check("{javaDigit}", c, Character.isDigit(c)); 1073 check("{javaDefined}", c, Character.isDefined(c)); 1074 check("{javaLetter}", c, Character.isLetter(c)); 1075 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1076 check("{javaJavaIdentifierStart}", c, 1077 Character.isJavaIdentifierStart(c)); 1078 check("{javaJavaIdentifierPart}", c, 1079 Character.isJavaIdentifierPart(c)); 1080 check("{javaUnicodeIdentifierStart}", c, 1081 Character.isUnicodeIdentifierStart(c)); 1082 check("{javaUnicodeIdentifierPart}", c, 1083 Character.isUnicodeIdentifierPart(c)); 1084 check("{javaIdentifierIgnorable}", c, 1085 Character.isIdentifierIgnorable(c)); 1086 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1087 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1088 check("{javaISOControl}", c, Character.isISOControl(c)); 1089 check("{javaMirrored}", c, Character.isMirrored(c)); 1090 1091 } 1092 1093 // Supplementary character test 1094 for (int i=0; i<1000; i++) { 1095 int c = generator.nextInt(Character.MAX_CODE_POINT 1096 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1097 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1098 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1099 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1100 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1101 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1102 check("{javaDigit}", c, Character.isDigit(c)); 1103 check("{javaDefined}", c, Character.isDefined(c)); 1104 check("{javaLetter}", c, Character.isLetter(c)); 1105 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1106 check("{javaJavaIdentifierStart}", c, 1107 Character.isJavaIdentifierStart(c)); 1108 check("{javaJavaIdentifierPart}", c, 1109 Character.isJavaIdentifierPart(c)); 1110 check("{javaUnicodeIdentifierStart}", c, 1111 Character.isUnicodeIdentifierStart(c)); 1112 check("{javaUnicodeIdentifierPart}", c, 1113 Character.isUnicodeIdentifierPart(c)); 1114 check("{javaIdentifierIgnorable}", c, 1115 Character.isIdentifierIgnorable(c)); 1116 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1117 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1118 check("{javaISOControl}", c, Character.isISOControl(c)); 1119 check("{javaMirrored}", c, Character.isMirrored(c)); 1120 } 1121 1122 report("Java character classes"); 1123 } 1124 1125 // This test is for 4523620 1126 /* 1127 private static void numOccurrencesTest() throws Exception { 1128 Pattern pattern = Pattern.compile("aaa"); 1129 1130 if (pattern.numOccurrences("aaaaaa", false) != 2) 1131 failCount++; 1132 if (pattern.numOccurrences("aaaaaa", true) != 4) 1133 failCount++; 1134 1135 pattern = Pattern.compile("^"); 1136 if (pattern.numOccurrences("aaaaaa", false) != 1) 1137 failCount++; 1138 if (pattern.numOccurrences("aaaaaa", true) != 1) 1139 failCount++; 1140 1141 report("Number of Occurrences"); 1142 } 1143 */ 1144 1145 // This test is for 4776374 1146 private static void caretBetweenTerminatorsTest() throws Exception { 1147 int flags1 = Pattern.DOTALL; 1148 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1149 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1150 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1151 1152 check("^....", flags1, "test\ntest", "test", true); 1153 check(".....^", flags1, "test\ntest", "test", false); 1154 check(".....^", flags1, "test\n", "test", false); 1155 check("....^", flags1, "test\r\n", "test", false); 1156 1157 check("^....", flags2, "test\ntest", "test", true); 1158 check("....^", flags2, "test\ntest", "test", false); 1159 check(".....^", flags2, "test\n", "test", false); 1160 check("....^", flags2, "test\r\n", "test", false); 1161 1162 check("^....", flags3, "test\ntest", "test", true); 1163 check(".....^", flags3, "test\ntest", "test\n", true); 1164 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1165 check(".....^", flags3, "test\n", "test", false); 1166 check(".....^", flags3, "test\r\n", "test", false); 1167 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1168 1169 check("^....", flags4, "test\ntest", "test", true); 1170 check(".....^", flags3, "test\ntest", "test\n", true); 1171 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1172 check(".....^", flags4, "test\n", "test\n", false); 1173 check(".....^", flags4, "test\r\n", "test\r", false); 1174 1175 // Supplementary character test 1176 String t = toSupplementaries("test"); 1177 check("^....", flags1, t+"\n"+t, t, true); 1178 check(".....^", flags1, t+"\n"+t, t, false); 1179 check(".....^", flags1, t+"\n", t, false); 1180 check("....^", flags1, t+"\r\n", t, false); 1181 1182 check("^....", flags2, t+"\n"+t, t, true); 1183 check("....^", flags2, t+"\n"+t, t, false); 1184 check(".....^", flags2, t+"\n", t, false); 1185 check("....^", flags2, t+"\r\n", t, false); 1186 1187 check("^....", flags3, t+"\n"+t, t, true); 1188 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1189 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1190 check(".....^", flags3, t+"\n", t, false); 1191 check(".....^", flags3, t+"\r\n", t, false); 1192 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1193 1194 check("^....", flags4, t+"\n"+t, t, true); 1195 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1196 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1197 check(".....^", flags4, t+"\n", t+"\n", false); 1198 check(".....^", flags4, t+"\r\n", t+"\r", false); 1199 1200 report("Caret between terminators"); 1201 } 1202 1203 // This test is for 4727935 1204 private static void dollarAtEndTest() throws Exception { 1205 int flags1 = Pattern.DOTALL; 1206 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1207 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1208 1209 check("....$", flags1, "test\n", "test", true); 1210 check("....$", flags1, "test\r\n", "test", true); 1211 check(".....$", flags1, "test\n", "test\n", true); 1212 check(".....$", flags1, "test\u0085", "test\u0085", true); 1213 check("....$", flags1, "test\u0085", "test", true); 1214 1215 check("....$", flags2, "test\n", "test", true); 1216 check(".....$", flags2, "test\n", "test\n", true); 1217 check(".....$", flags2, "test\u0085", "test\u0085", true); 1218 check("....$", flags2, "test\u0085", "est\u0085", true); 1219 1220 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1221 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1222 check("....$blah", flags3, "test\nblah", "!!!!", false); 1223 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1224 1225 // Supplementary character test 1226 String t = toSupplementaries("test"); 1227 String b = toSupplementaries("blah"); 1228 check("....$", flags1, t+"\n", t, true); 1229 check("....$", flags1, t+"\r\n", t, true); 1230 check(".....$", flags1, t+"\n", t+"\n", true); 1231 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1232 check("....$", flags1, t+"\u0085", t, true); 1233 1234 check("....$", flags2, t+"\n", t, true); 1235 check(".....$", flags2, t+"\n", t+"\n", true); 1236 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1237 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1238 1239 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1240 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1241 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1242 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1243 1244 report("Dollar at End"); 1245 } 1246 1247 // This test is for 4711773 1248 private static void multilineDollarTest() throws Exception { 1249 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1250 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1251 matcher.find(); 1252 if (matcher.start(0) != 9) 1253 failCount++; 1254 matcher.find(); 1255 if (matcher.start(0) != 20) 1256 failCount++; 1257 1258 // Supplementary character test 1259 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1260 matcher.find(); 1261 if (matcher.start(0) != 9*2) 1262 failCount++; 1263 matcher.find(); 1264 if (matcher.start(0) != 20*2) 1265 failCount++; 1266 1267 report("Multiline Dollar"); 1268 } 1269 1270 private static void reluctantRepetitionTest() throws Exception { 1271 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1272 check(p, "1 word word word 2", true); 1273 check(p, "1 wor wo w 2", true); 1274 check(p, "1 word word 2", true); 1275 check(p, "1 word 2", true); 1276 check(p, "1 wo w w 2", true); 1277 check(p, "1 wo w 2", true); 1278 check(p, "1 wor w 2", true); 1279 1280 p = Pattern.compile("([a-z])+?c"); 1281 Matcher m = p.matcher("ababcdefdec"); 1282 check(m, "ababc"); 1283 1284 // Supplementary character test 1285 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1286 m = p.matcher(toSupplementaries("ababcdefdec")); 1287 check(m, toSupplementaries("ababc")); 1288 1289 report("Reluctant Repetition"); 1290 } 1291 1292 private static void serializeTest() throws Exception { 1293 String patternStr = "(b)"; 1294 String matchStr = "b"; 1295 Pattern pattern = Pattern.compile(patternStr); 1296 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1297 ObjectOutputStream oos = new ObjectOutputStream(baos); 1298 oos.writeObject(pattern); 1299 oos.close(); 1300 ObjectInputStream ois = new ObjectInputStream( 1301 new ByteArrayInputStream(baos.toByteArray())); 1302 Pattern serializedPattern = (Pattern)ois.readObject(); 1303 ois.close(); 1304 Matcher matcher = serializedPattern.matcher(matchStr); 1305 if (!matcher.matches()) 1306 failCount++; 1307 if (matcher.groupCount() != 1) 1308 failCount++; 1309 1310 report("Serialization"); 1311 } 1312 1313 private static void gTest() { 1314 Pattern pattern = Pattern.compile("\\G\\w"); 1315 Matcher matcher = pattern.matcher("abc#x#x"); 1316 matcher.find(); 1317 matcher.find(); 1318 matcher.find(); 1319 if (matcher.find()) 1320 failCount++; 1321 1322 pattern = Pattern.compile("\\GA*"); 1323 matcher = pattern.matcher("1A2AA3"); 1324 matcher.find(); 1325 if (matcher.find()) 1326 failCount++; 1327 1328 pattern = Pattern.compile("\\GA*"); 1329 matcher = pattern.matcher("1A2AA3"); 1330 if (!matcher.find(1)) 1331 failCount++; 1332 matcher.find(); 1333 if (matcher.find()) 1334 failCount++; 1335 1336 report("\\G"); 1337 } 1338 1339 private static void zTest() { 1340 Pattern pattern = Pattern.compile("foo\\Z"); 1341 // Positives 1342 check(pattern, "foo\u0085", true); 1343 check(pattern, "foo\u2028", true); 1344 check(pattern, "foo\u2029", true); 1345 check(pattern, "foo\n", true); 1346 check(pattern, "foo\r", true); 1347 check(pattern, "foo\r\n", true); 1348 // Negatives 1349 check(pattern, "fooo", false); 1350 check(pattern, "foo\n\r", false); 1351 1352 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1353 // Positives 1354 check(pattern, "foo", true); 1355 check(pattern, "foo\n", true); 1356 // Negatives 1357 check(pattern, "foo\r", false); 1358 check(pattern, "foo\u0085", false); 1359 check(pattern, "foo\u2028", false); 1360 check(pattern, "foo\u2029", false); 1361 1362 report("\\Z"); 1363 } 1364 1365 private static void replaceFirstTest() { 1366 Pattern pattern = Pattern.compile("(ab)(c*)"); 1367 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1368 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1369 failCount++; 1370 1371 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1372 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1373 failCount++; 1374 1375 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1376 String result = matcher.replaceFirst("$1"); 1377 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1378 failCount++; 1379 1380 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1381 result = matcher.replaceFirst("$2"); 1382 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1383 failCount++; 1384 1385 pattern = Pattern.compile("a*"); 1386 matcher = pattern.matcher("aaaaaaaaaa"); 1387 if (!matcher.replaceFirst("test").equals("test")) 1388 failCount++; 1389 1390 pattern = Pattern.compile("a+"); 1391 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1392 if (!matcher.replaceFirst("test").equals("zzztest")) 1393 failCount++; 1394 1395 // Supplementary character test 1396 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1397 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1398 if (!matcher.replaceFirst(toSupplementaries("test")) 1399 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1400 failCount++; 1401 1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1403 if (!matcher.replaceFirst(toSupplementaries("test")). 1404 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1405 failCount++; 1406 1407 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1408 result = matcher.replaceFirst("$1"); 1409 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1410 failCount++; 1411 1412 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1413 result = matcher.replaceFirst("$2"); 1414 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1415 failCount++; 1416 1417 pattern = Pattern.compile(toSupplementaries("a*")); 1418 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1419 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1420 failCount++; 1421 1422 pattern = Pattern.compile(toSupplementaries("a+")); 1423 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1424 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1425 failCount++; 1426 1427 report("Replace First"); 1428 } 1429 1430 private static void unixLinesTest() { 1431 Pattern pattern = Pattern.compile(".*"); 1432 Matcher matcher = pattern.matcher("aa\u2028blah"); 1433 matcher.find(); 1434 if (!matcher.group(0).equals("aa")) 1435 failCount++; 1436 1437 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1438 matcher = pattern.matcher("aa\u2028blah"); 1439 matcher.find(); 1440 if (!matcher.group(0).equals("aa\u2028blah")) 1441 failCount++; 1442 1443 pattern = Pattern.compile("[az]$", 1444 Pattern.MULTILINE | Pattern.UNIX_LINES); 1445 matcher = pattern.matcher("aa\u2028zz"); 1446 check(matcher, "a\u2028", false); 1447 1448 // Supplementary character test 1449 pattern = Pattern.compile(".*"); 1450 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1451 matcher.find(); 1452 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1453 failCount++; 1454 1455 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1456 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1457 matcher.find(); 1458 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1459 failCount++; 1460 1461 pattern = Pattern.compile(toSupplementaries("[az]$"), 1462 Pattern.MULTILINE | Pattern.UNIX_LINES); 1463 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1464 check(matcher, toSupplementaries("a\u2028"), false); 1465 1466 report("Unix Lines"); 1467 } 1468 1469 private static void commentsTest() { 1470 int flags = Pattern.COMMENTS; 1471 1472 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1473 Matcher matcher = pattern.matcher("aa#aa"); 1474 if (!matcher.matches()) 1475 failCount++; 1476 1477 pattern = Pattern.compile("aa # blah", flags); 1478 matcher = pattern.matcher("aa"); 1479 if (!matcher.matches()) 1480 failCount++; 1481 1482 pattern = Pattern.compile("aa blah", flags); 1483 matcher = pattern.matcher("aablah"); 1484 if (!matcher.matches()) 1485 failCount++; 1486 1487 pattern = Pattern.compile("aa # blah blech ", flags); 1488 matcher = pattern.matcher("aa"); 1489 if (!matcher.matches()) 1490 failCount++; 1491 1492 pattern = Pattern.compile("aa # blah\n ", flags); 1493 matcher = pattern.matcher("aa"); 1494 if (!matcher.matches()) 1495 failCount++; 1496 1497 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1498 matcher = pattern.matcher("aabc"); 1499 if (!matcher.matches()) 1500 failCount++; 1501 1502 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1503 matcher = pattern.matcher("aabc"); 1504 if (!matcher.matches()) 1505 failCount++; 1506 1507 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1508 matcher = pattern.matcher("aabc#blech"); 1509 if (!matcher.matches()) 1510 failCount++; 1511 1512 // Supplementary character test 1513 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1514 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1515 if (!matcher.matches()) 1516 failCount++; 1517 1518 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1519 matcher = pattern.matcher(toSupplementaries("aa")); 1520 if (!matcher.matches()) 1521 failCount++; 1522 1523 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1524 matcher = pattern.matcher(toSupplementaries("aablah")); 1525 if (!matcher.matches()) 1526 failCount++; 1527 1528 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1529 matcher = pattern.matcher(toSupplementaries("aa")); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1534 matcher = pattern.matcher(toSupplementaries("aa")); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1539 matcher = pattern.matcher(toSupplementaries("aabc")); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1544 matcher = pattern.matcher(toSupplementaries("aabc")); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1549 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1550 if (!matcher.matches()) 1551 failCount++; 1552 1553 report("Comments"); 1554 } 1555 1556 private static void caseFoldingTest() { // bug 4504687 1557 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1558 Pattern pattern = Pattern.compile("aa", flags); 1559 Matcher matcher = pattern.matcher("ab"); 1560 if (matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aA", flags); 1564 matcher = pattern.matcher("ab"); 1565 if (matcher.matches()) 1566 failCount++; 1567 1568 pattern = Pattern.compile("aa", flags); 1569 matcher = pattern.matcher("aB"); 1570 if (matcher.matches()) 1571 failCount++; 1572 matcher = pattern.matcher("Ab"); 1573 if (matcher.matches()) 1574 failCount++; 1575 1576 // ASCII "a" 1577 // Latin-1 Supplement "a" + grave 1578 // Cyrillic "a" 1579 String[] patterns = new String[] { 1580 //single 1581 "a", "\u00e0", "\u0430", 1582 //slice 1583 "ab", "\u00e0\u00e1", "\u0430\u0431", 1584 //class single 1585 "[a]", "[\u00e0]", "[\u0430]", 1586 //class range 1587 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1588 //back reference 1589 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1590 }; 1591 1592 String[] texts = new String[] { 1593 "A", "\u00c0", "\u0410", 1594 "AB", "\u00c0\u00c1", "\u0410\u0411", 1595 "A", "\u00c0", "\u0410", 1596 "B", "\u00c2", "\u0411", 1597 "aA", "\u00e0\u00c0", "\u0430\u0410" 1598 }; 1599 1600 boolean[] expected = new boolean[] { 1601 true, false, false, 1602 true, false, false, 1603 true, false, false, 1604 true, false, false, 1605 true, false, false 1606 }; 1607 1608 flags = Pattern.CASE_INSENSITIVE; 1609 for (int i = 0; i < patterns.length; i++) { 1610 pattern = Pattern.compile(patterns[i], flags); 1611 matcher = pattern.matcher(texts[i]); 1612 if (matcher.matches() != expected[i]) { 1613 System.out.println("<1> Failed at " + i); 1614 failCount++; 1615 } 1616 } 1617 1618 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1619 for (int i = 0; i < patterns.length; i++) { 1620 pattern = Pattern.compile(patterns[i], flags); 1621 matcher = pattern.matcher(texts[i]); 1622 if (!matcher.matches()) { 1623 System.out.println("<2> Failed at " + i); 1624 failCount++; 1625 } 1626 } 1627 // flag unicode_case alone should do nothing 1628 flags = Pattern.UNICODE_CASE; 1629 for (int i = 0; i < patterns.length; i++) { 1630 pattern = Pattern.compile(patterns[i], flags); 1631 matcher = pattern.matcher(texts[i]); 1632 if (matcher.matches()) { 1633 System.out.println("<3> Failed at " + i); 1634 failCount++; 1635 } 1636 } 1637 1638 // Special cases: i, I, u+0131 and u+0130 1639 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1640 pattern = Pattern.compile("[h-j]+", flags); 1641 if (!pattern.matcher("\u0131\u0130").matches()) 1642 failCount++; 1643 report("Case Folding"); 1644 } 1645 1646 private static void appendTest() { 1647 Pattern pattern = Pattern.compile("(ab)(cd)"); 1648 Matcher matcher = pattern.matcher("abcd"); 1649 String result = matcher.replaceAll("$2$1"); 1650 if (!result.equals("cdab")) 1651 failCount++; 1652 1653 String s1 = "Swap all: first = 123, second = 456"; 1654 String s2 = "Swap one: first = 123, second = 456"; 1655 String r = "$3$2$1"; 1656 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1657 matcher = pattern.matcher(s1); 1658 1659 result = matcher.replaceAll(r); 1660 if (!result.equals("Swap all: 123 = first, 456 = second")) 1661 failCount++; 1662 1663 matcher = pattern.matcher(s2); 1664 1665 if (matcher.find()) { 1666 StringBuffer sb = new StringBuffer(); 1667 matcher.appendReplacement(sb, r); 1668 matcher.appendTail(sb); 1669 result = sb.toString(); 1670 if (!result.equals("Swap one: 123 = first, second = 456")) 1671 failCount++; 1672 } 1673 1674 // Supplementary character test 1675 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1676 matcher = pattern.matcher(toSupplementaries("abcd")); 1677 result = matcher.replaceAll("$2$1"); 1678 if (!result.equals(toSupplementaries("cdab"))) 1679 failCount++; 1680 1681 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1682 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1683 r = toSupplementaries("$3$2$1"); 1684 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1685 matcher = pattern.matcher(s1); 1686 1687 result = matcher.replaceAll(r); 1688 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1689 failCount++; 1690 1691 matcher = pattern.matcher(s2); 1692 1693 if (matcher.find()) { 1694 StringBuffer sb = new StringBuffer(); 1695 matcher.appendReplacement(sb, r); 1696 matcher.appendTail(sb); 1697 result = sb.toString(); 1698 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1699 failCount++; 1700 } 1701 report("Append"); 1702 } 1703 1704 private static void splitTest() { 1705 Pattern pattern = Pattern.compile(":"); 1706 String[] result = pattern.split("foo:and:boo", 2); 1707 if (!result[0].equals("foo")) 1708 failCount++; 1709 if (!result[1].equals("and:boo")) 1710 failCount++; 1711 // Supplementary character test 1712 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1713 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1714 if (!result[0].equals(toSupplementaries("foo"))) 1715 failCount++; 1716 if (!result[1].equals(toSupplementaries("andXboo"))) 1717 failCount++; 1718 1719 CharBuffer cb = CharBuffer.allocate(100); 1720 cb.put("foo:and:boo"); 1721 cb.flip(); 1722 result = pattern.split(cb); 1723 if (!result[0].equals("foo")) 1724 failCount++; 1725 if (!result[1].equals("and")) 1726 failCount++; 1727 if (!result[2].equals("boo")) 1728 failCount++; 1729 1730 // Supplementary character test 1731 CharBuffer cbs = CharBuffer.allocate(100); 1732 cbs.put(toSupplementaries("fooXandXboo")); 1733 cbs.flip(); 1734 result = patternX.split(cbs); 1735 if (!result[0].equals(toSupplementaries("foo"))) 1736 failCount++; 1737 if (!result[1].equals(toSupplementaries("and"))) 1738 failCount++; 1739 if (!result[2].equals(toSupplementaries("boo"))) 1740 failCount++; 1741 1742 String source = "0123456789"; 1743 for (int limit=-2; limit<3; limit++) { 1744 for (int x=0; x<10; x++) { 1745 result = source.split(Integer.toString(x), limit); 1746 int expectedLength = limit < 1 ? 2 : limit; 1747 1748 if ((limit == 0) && (x == 9)) { 1749 // expected dropping of "" 1750 if (result.length != 1) 1751 failCount++; 1752 if (!result[0].equals("012345678")) { 1753 failCount++; 1754 } 1755 } else { 1756 if (result.length != expectedLength) { 1757 failCount++; 1758 } 1759 if (!result[0].equals(source.substring(0,x))) { 1760 if (limit != 1) { 1761 failCount++; 1762 } else { 1763 if (!result[0].equals(source.substring(0,10))) { 1764 failCount++; 1765 } 1766 } 1767 } 1768 if (expectedLength > 1) { // Check segment 2 1769 if (!result[1].equals(source.substring(x+1,10))) 1770 failCount++; 1771 } 1772 } 1773 } 1774 } 1775 // Check the case for no match found 1776 for (int limit=-2; limit<3; limit++) { 1777 result = source.split("e", limit); 1778 if (result.length != 1) 1779 failCount++; 1780 if (!result[0].equals(source)) 1781 failCount++; 1782 } 1783 // Check the case for limit == 0, source = ""; 1784 // split() now returns 0-length for empty source "" see #6559590 1785 source = ""; 1786 result = source.split("e", 0); 1787 if (result.length != 1) 1788 failCount++; 1789 if (!result[0].equals(source)) 1790 failCount++; 1791 1792 // Check both split() and splitAsStraem(), especially for zero-lenth 1793 // input and zero-lenth match cases 1794 String[][] input = new String[][] { 1795 { " ", "Abc Efg Hij" }, // normal non-zero-match 1796 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1797 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1798 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1799 { "(?=\\p{Lu})", "AbcEfg" }, 1800 { "(?=\\p{Lu})", "Abc" }, 1801 { " ", "" }, // zero-length input 1802 { ".*", "" }, 1803 1804 // some tests from PatternStreamTest.java 1805 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1806 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1807 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1808 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1809 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1810 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1811 { "\u56da", "" }, 1812 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1813 { "o", "boo:and:foo" }, 1814 { "o", "booooo:and:fooooo" }, 1815 { "o", "fooooo:" }, 1816 }; 1817 1818 String[][] expected = new String[][] { 1819 { "Abc", "Efg", "Hij" }, 1820 { "", "Abc", "Efg", "Hij" }, 1821 { "Abc", "", "Efg", "Hij" }, 1822 { "Abc", "Efg", "Hij" }, 1823 { "Abc", "Efg" }, 1824 { "Abc" }, 1825 { "" }, 1826 { "" }, 1827 1828 { "awgqwefg1fefw", "vssv1vvv1" }, 1829 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1830 { "awgqwefg", "fefw4vssv", "vvv" }, 1831 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1832 { "1", "23", "456", "7890" }, 1833 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1834 { "" }, 1835 { "This", "is", "testing", "", "with", "different", "separators" }, 1836 { "b", "", ":and:f" }, 1837 { "b", "", "", "", "", ":and:f" }, 1838 { "f", "", "", "", "", ":" }, 1839 }; 1840 for (int i = 0; i < input.length; i++) { 1841 pattern = Pattern.compile(input[i][0]); 1842 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1843 failCount++; 1844 } 1845 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1846 // array for zero-length input for now 1847 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1848 expected[i])) { 1849 failCount++; 1850 } 1851 } 1852 report("Split"); 1853 } 1854 1855 private static void negationTest() { 1856 Pattern pattern = Pattern.compile("[\\[@^]+"); 1857 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1858 if (!matcher.find()) 1859 failCount++; 1860 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1861 failCount++; 1862 pattern = Pattern.compile("[@\\[^]+"); 1863 matcher = pattern.matcher("@@@@[[[[^^^^"); 1864 if (!matcher.find()) 1865 failCount++; 1866 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1867 failCount++; 1868 pattern = Pattern.compile("[@\\[^@]+"); 1869 matcher = pattern.matcher("@@@@[[[[^^^^"); 1870 if (!matcher.find()) 1871 failCount++; 1872 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1873 failCount++; 1874 1875 pattern = Pattern.compile("\\)"); 1876 matcher = pattern.matcher("xxx)xxx"); 1877 if (!matcher.find()) 1878 failCount++; 1879 1880 report("Negation"); 1881 } 1882 1883 private static void ampersandTest() { 1884 Pattern pattern = Pattern.compile("[&@]+"); 1885 check(pattern, "@@@@&&&&", true); 1886 1887 pattern = Pattern.compile("[@&]+"); 1888 check(pattern, "@@@@&&&&", true); 1889 1890 pattern = Pattern.compile("[@\\&]+"); 1891 check(pattern, "@@@@&&&&", true); 1892 1893 report("Ampersand"); 1894 } 1895 1896 private static void octalTest() throws Exception { 1897 Pattern pattern = Pattern.compile("\\u0007"); 1898 Matcher matcher = pattern.matcher("\u0007"); 1899 if (!matcher.matches()) 1900 failCount++; 1901 pattern = Pattern.compile("\\07"); 1902 matcher = pattern.matcher("\u0007"); 1903 if (!matcher.matches()) 1904 failCount++; 1905 pattern = Pattern.compile("\\007"); 1906 matcher = pattern.matcher("\u0007"); 1907 if (!matcher.matches()) 1908 failCount++; 1909 pattern = Pattern.compile("\\0007"); 1910 matcher = pattern.matcher("\u0007"); 1911 if (!matcher.matches()) 1912 failCount++; 1913 pattern = Pattern.compile("\\040"); 1914 matcher = pattern.matcher("\u0020"); 1915 if (!matcher.matches()) 1916 failCount++; 1917 pattern = Pattern.compile("\\0403"); 1918 matcher = pattern.matcher("\u00203"); 1919 if (!matcher.matches()) 1920 failCount++; 1921 pattern = Pattern.compile("\\0103"); 1922 matcher = pattern.matcher("\u0043"); 1923 if (!matcher.matches()) 1924 failCount++; 1925 1926 report("Octal"); 1927 } 1928 1929 private static void longPatternTest() throws Exception { 1930 try { 1931 Pattern pattern = Pattern.compile( 1932 "a 32-character-long pattern xxxx"); 1933 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1934 pattern = Pattern.compile("a thirty four character long regex"); 1935 StringBuffer patternToBe = new StringBuffer(101); 1936 for (int i=0; i<100; i++) 1937 patternToBe.append((char)(97 + i%26)); 1938 pattern = Pattern.compile(patternToBe.toString()); 1939 } catch (PatternSyntaxException e) { 1940 failCount++; 1941 } 1942 1943 // Supplementary character test 1944 try { 1945 Pattern pattern = Pattern.compile( 1946 toSupplementaries("a 32-character-long pattern xxxx")); 1947 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1948 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1949 StringBuffer patternToBe = new StringBuffer(101*2); 1950 for (int i=0; i<100; i++) 1951 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1952 + 97 + i%26)); 1953 pattern = Pattern.compile(patternToBe.toString()); 1954 } catch (PatternSyntaxException e) { 1955 failCount++; 1956 } 1957 report("LongPattern"); 1958 } 1959 1960 private static void group0Test() throws Exception { 1961 Pattern pattern = Pattern.compile("(tes)ting"); 1962 Matcher matcher = pattern.matcher("testing"); 1963 check(matcher, "testing"); 1964 1965 matcher.reset("testing"); 1966 if (matcher.lookingAt()) { 1967 if (!matcher.group(0).equals("testing")) 1968 failCount++; 1969 } else { 1970 failCount++; 1971 } 1972 1973 matcher.reset("testing"); 1974 if (matcher.matches()) { 1975 if (!matcher.group(0).equals("testing")) 1976 failCount++; 1977 } else { 1978 failCount++; 1979 } 1980 1981 pattern = Pattern.compile("(tes)ting"); 1982 matcher = pattern.matcher("testing"); 1983 if (matcher.lookingAt()) { 1984 if (!matcher.group(0).equals("testing")) 1985 failCount++; 1986 } else { 1987 failCount++; 1988 } 1989 1990 pattern = Pattern.compile("^(tes)ting"); 1991 matcher = pattern.matcher("testing"); 1992 if (matcher.matches()) { 1993 if (!matcher.group(0).equals("testing")) 1994 failCount++; 1995 } else { 1996 failCount++; 1997 } 1998 1999 // Supplementary character test 2000 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2001 matcher = pattern.matcher(toSupplementaries("testing")); 2002 check(matcher, toSupplementaries("testing")); 2003 2004 matcher.reset(toSupplementaries("testing")); 2005 if (matcher.lookingAt()) { 2006 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2007 failCount++; 2008 } else { 2009 failCount++; 2010 } 2011 2012 matcher.reset(toSupplementaries("testing")); 2013 if (matcher.matches()) { 2014 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2015 failCount++; 2016 } else { 2017 failCount++; 2018 } 2019 2020 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2021 matcher = pattern.matcher(toSupplementaries("testing")); 2022 if (matcher.lookingAt()) { 2023 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2024 failCount++; 2025 } else { 2026 failCount++; 2027 } 2028 2029 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2030 matcher = pattern.matcher(toSupplementaries("testing")); 2031 if (matcher.matches()) { 2032 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2033 failCount++; 2034 } else { 2035 failCount++; 2036 } 2037 2038 report("Group0"); 2039 } 2040 2041 private static void findIntTest() throws Exception { 2042 Pattern p = Pattern.compile("blah"); 2043 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2044 boolean result = m.find(2); 2045 if (!result) 2046 failCount++; 2047 2048 p = Pattern.compile("$"); 2049 m = p.matcher("1234567890"); 2050 result = m.find(10); 2051 if (!result) 2052 failCount++; 2053 try { 2054 result = m.find(11); 2055 failCount++; 2056 } catch (IndexOutOfBoundsException e) { 2057 // correct result 2058 } 2059 2060 // Supplementary character test 2061 p = Pattern.compile(toSupplementaries("blah")); 2062 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2063 result = m.find(2); 2064 if (!result) 2065 failCount++; 2066 2067 report("FindInt"); 2068 } 2069 2070 private static void emptyPatternTest() throws Exception { 2071 Pattern p = Pattern.compile(""); 2072 Matcher m = p.matcher("foo"); 2073 2074 // Should find empty pattern at beginning of input 2075 boolean result = m.find(); 2076 if (result != true) 2077 failCount++; 2078 if (m.start() != 0) 2079 failCount++; 2080 2081 // Should not match entire input if input is not empty 2082 m.reset(); 2083 result = m.matches(); 2084 if (result == true) 2085 failCount++; 2086 2087 try { 2088 m.start(0); 2089 failCount++; 2090 } catch (IllegalStateException e) { 2091 // Correct result 2092 } 2093 2094 // Should match entire input if input is empty 2095 m.reset(""); 2096 result = m.matches(); 2097 if (result != true) 2098 failCount++; 2099 2100 result = Pattern.matches("", ""); 2101 if (result != true) 2102 failCount++; 2103 2104 result = Pattern.matches("", "foo"); 2105 if (result == true) 2106 failCount++; 2107 report("EmptyPattern"); 2108 } 2109 2110 private static void charClassTest() throws Exception { 2111 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2112 check(pattern, "blahb]blech", true); 2113 2114 pattern = Pattern.compile("[abc[def]]"); 2115 check(pattern, "b", true); 2116 2117 // Supplementary character tests 2118 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2119 check(pattern, toSupplementaries("blahb]blech"), true); 2120 2121 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2122 check(pattern, toSupplementaries("b"), true); 2123 2124 try { 2125 // u00ff when UNICODE_CASE 2126 pattern = Pattern.compile("[ab\u00ffcd]", 2127 Pattern.CASE_INSENSITIVE| 2128 Pattern.UNICODE_CASE); 2129 check(pattern, "ab\u00ffcd", true); 2130 check(pattern, "Ab\u0178Cd", true); 2131 2132 // u00b5 when UNICODE_CASE 2133 pattern = Pattern.compile("[ab\u00b5cd]", 2134 Pattern.CASE_INSENSITIVE| 2135 Pattern.UNICODE_CASE); 2136 check(pattern, "ab\u00b5cd", true); 2137 check(pattern, "Ab\u039cCd", true); 2138 } catch (Exception e) { failCount++; } 2139 2140 /* Special cases 2141 (1)LatinSmallLetterLongS u+017f 2142 (2)LatinSmallLetterDotlessI u+0131 2143 (3)LatineCapitalLetterIWithDotAbove u+0130 2144 (4)KelvinSign u+212a 2145 (5)AngstromSign u+212b 2146 */ 2147 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2148 pattern = Pattern.compile("[sik\u00c5]+", flags); 2149 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2150 failCount++; 2151 2152 report("CharClass"); 2153 } 2154 2155 private static void caretTest() throws Exception { 2156 Pattern pattern = Pattern.compile("\\w*"); 2157 Matcher matcher = pattern.matcher("a#bc#def##g"); 2158 check(matcher, "a"); 2159 check(matcher, ""); 2160 check(matcher, "bc"); 2161 check(matcher, ""); 2162 check(matcher, "def"); 2163 check(matcher, ""); 2164 check(matcher, ""); 2165 check(matcher, "g"); 2166 check(matcher, ""); 2167 if (matcher.find()) 2168 failCount++; 2169 2170 pattern = Pattern.compile("^\\w*"); 2171 matcher = pattern.matcher("a#bc#def##g"); 2172 check(matcher, "a"); 2173 if (matcher.find()) 2174 failCount++; 2175 2176 pattern = Pattern.compile("\\w"); 2177 matcher = pattern.matcher("abc##x"); 2178 check(matcher, "a"); 2179 check(matcher, "b"); 2180 check(matcher, "c"); 2181 check(matcher, "x"); 2182 if (matcher.find()) 2183 failCount++; 2184 2185 pattern = Pattern.compile("^\\w"); 2186 matcher = pattern.matcher("abc##x"); 2187 check(matcher, "a"); 2188 if (matcher.find()) 2189 failCount++; 2190 2191 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2192 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2193 check(matcher, "abc"); 2194 if (matcher.find()) 2195 failCount++; 2196 2197 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2198 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2199 check(matcher, "abc"); 2200 check(matcher, "jkl"); 2201 if (matcher.find()) 2202 failCount++; 2203 2204 pattern = Pattern.compile("^", Pattern.MULTILINE); 2205 matcher = pattern.matcher("this is some text"); 2206 String result = matcher.replaceAll("X"); 2207 if (!result.equals("Xthis is some text")) 2208 failCount++; 2209 2210 pattern = Pattern.compile("^"); 2211 matcher = pattern.matcher("this is some text"); 2212 result = matcher.replaceAll("X"); 2213 if (!result.equals("Xthis is some text")) 2214 failCount++; 2215 2216 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2217 matcher = pattern.matcher("this is some text\n"); 2218 result = matcher.replaceAll("X"); 2219 if (!result.equals("Xthis is some text\n")) 2220 failCount++; 2221 2222 report("Caret"); 2223 } 2224 2225 private static void groupCaptureTest() throws Exception { 2226 // Independent group 2227 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2228 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2229 matcher.find(); 2230 try { 2231 String blah = matcher.group(1); 2232 failCount++; 2233 } catch (IndexOutOfBoundsException ioobe) { 2234 // Good result 2235 } 2236 // Pure group 2237 pattern = Pattern.compile("x+(?:y+)z+"); 2238 matcher = pattern.matcher("xxxyyyzzz"); 2239 matcher.find(); 2240 try { 2241 String blah = matcher.group(1); 2242 failCount++; 2243 } catch (IndexOutOfBoundsException ioobe) { 2244 // Good result 2245 } 2246 2247 // Supplementary character tests 2248 // Independent group 2249 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2250 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2251 matcher.find(); 2252 try { 2253 String blah = matcher.group(1); 2254 failCount++; 2255 } catch (IndexOutOfBoundsException ioobe) { 2256 // Good result 2257 } 2258 // Pure group 2259 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2260 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2261 matcher.find(); 2262 try { 2263 String blah = matcher.group(1); 2264 failCount++; 2265 } catch (IndexOutOfBoundsException ioobe) { 2266 // Good result 2267 } 2268 2269 report("GroupCapture"); 2270 } 2271 2272 private static void backRefTest() throws Exception { 2273 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2274 check(pattern, "zzzaabcazzz", true); 2275 2276 pattern = Pattern.compile("(a*)bc\\1"); 2277 check(pattern, "zzzaabcaazzz", true); 2278 2279 pattern = Pattern.compile("(abc)(def)\\1"); 2280 check(pattern, "abcdefabc", true); 2281 2282 pattern = Pattern.compile("(abc)(def)\\3"); 2283 check(pattern, "abcdefabc", false); 2284 2285 try { 2286 for (int i = 1; i < 10; i++) { 2287 // Make sure backref 1-9 are always accepted 2288 pattern = Pattern.compile("abcdef\\" + i); 2289 // and fail to match if the target group does not exit 2290 check(pattern, "abcdef", false); 2291 } 2292 } catch(PatternSyntaxException e) { 2293 failCount++; 2294 } 2295 2296 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2297 check(pattern, "abcdefghija", false); 2298 check(pattern, "abcdefghija1", true); 2299 2300 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2301 check(pattern, "abcdefghijkk", true); 2302 2303 pattern = Pattern.compile("(a)bcdefghij\\11"); 2304 check(pattern, "abcdefghija1", true); 2305 2306 // Supplementary character tests 2307 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2308 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2309 2310 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2311 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2312 2313 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2314 check(pattern, toSupplementaries("abcdefabc"), true); 2315 2316 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2317 check(pattern, toSupplementaries("abcdefabc"), false); 2318 2319 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2320 check(pattern, toSupplementaries("abcdefghija"), false); 2321 check(pattern, toSupplementaries("abcdefghija1"), true); 2322 2323 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2324 check(pattern, toSupplementaries("abcdefghijkk"), true); 2325 2326 report("BackRef"); 2327 } 2328 2329 /** 2330 * Unicode Technical Report #18, section 2.6 End of Line 2331 * There is no empty line to be matched in the sequence \u000D\u000A 2332 * but there is an empty line in the sequence \u000A\u000D. 2333 */ 2334 private static void anchorTest() throws Exception { 2335 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2336 Matcher m = p.matcher("blah1\r\nblah2"); 2337 m.find(); 2338 m.find(); 2339 if (!m.group().equals("blah2")) 2340 failCount++; 2341 2342 m.reset("blah1\n\rblah2"); 2343 m.find(); 2344 m.find(); 2345 m.find(); 2346 if (!m.group().equals("blah2")) 2347 failCount++; 2348 2349 // Test behavior of $ with \r\n at end of input 2350 p = Pattern.compile(".+$"); 2351 m = p.matcher("blah1\r\n"); 2352 if (!m.find()) 2353 failCount++; 2354 if (!m.group().equals("blah1")) 2355 failCount++; 2356 if (m.find()) 2357 failCount++; 2358 2359 // Test behavior of $ with \r\n at end of input in multiline 2360 p = Pattern.compile(".+$", Pattern.MULTILINE); 2361 m = p.matcher("blah1\r\n"); 2362 if (!m.find()) 2363 failCount++; 2364 if (m.find()) 2365 failCount++; 2366 2367 // Test for $ recognition of \u0085 for bug 4527731 2368 p = Pattern.compile(".+$", Pattern.MULTILINE); 2369 m = p.matcher("blah1\u0085"); 2370 if (!m.find()) 2371 failCount++; 2372 2373 // Supplementary character test 2374 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2375 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2376 m.find(); 2377 m.find(); 2378 if (!m.group().equals(toSupplementaries("blah2"))) 2379 failCount++; 2380 2381 m.reset(toSupplementaries("blah1\n\rblah2")); 2382 m.find(); 2383 m.find(); 2384 m.find(); 2385 if (!m.group().equals(toSupplementaries("blah2"))) 2386 failCount++; 2387 2388 // Test behavior of $ with \r\n at end of input 2389 p = Pattern.compile(".+$"); 2390 m = p.matcher(toSupplementaries("blah1\r\n")); 2391 if (!m.find()) 2392 failCount++; 2393 if (!m.group().equals(toSupplementaries("blah1"))) 2394 failCount++; 2395 if (m.find()) 2396 failCount++; 2397 2398 // Test behavior of $ with \r\n at end of input in multiline 2399 p = Pattern.compile(".+$", Pattern.MULTILINE); 2400 m = p.matcher(toSupplementaries("blah1\r\n")); 2401 if (!m.find()) 2402 failCount++; 2403 if (m.find()) 2404 failCount++; 2405 2406 // Test for $ recognition of \u0085 for bug 4527731 2407 p = Pattern.compile(".+$", Pattern.MULTILINE); 2408 m = p.matcher(toSupplementaries("blah1\u0085")); 2409 if (!m.find()) 2410 failCount++; 2411 2412 report("Anchors"); 2413 } 2414 2415 /** 2416 * A basic sanity test of Matcher.lookingAt(). 2417 */ 2418 private static void lookingAtTest() throws Exception { 2419 Pattern p = Pattern.compile("(ab)(c*)"); 2420 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2421 2422 if (!m.lookingAt()) 2423 failCount++; 2424 2425 if (!m.group().equals(m.group(0))) 2426 failCount++; 2427 2428 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2429 if (m.lookingAt()) 2430 failCount++; 2431 2432 // Supplementary character test 2433 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2434 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2435 2436 if (!m.lookingAt()) 2437 failCount++; 2438 2439 if (!m.group().equals(m.group(0))) 2440 failCount++; 2441 2442 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2443 if (m.lookingAt()) 2444 failCount++; 2445 2446 report("Looking At"); 2447 } 2448 2449 /** 2450 * A basic sanity test of Matcher.matches(). 2451 */ 2452 private static void matchesTest() throws Exception { 2453 // matches() 2454 Pattern p = Pattern.compile("ulb(c*)"); 2455 Matcher m = p.matcher("ulbcccccc"); 2456 if (!m.matches()) 2457 failCount++; 2458 2459 // find() but not matches() 2460 m.reset("zzzulbcccccc"); 2461 if (m.matches()) 2462 failCount++; 2463 2464 // lookingAt() but not matches() 2465 m.reset("ulbccccccdef"); 2466 if (m.matches()) 2467 failCount++; 2468 2469 // matches() 2470 p = Pattern.compile("a|ad"); 2471 m = p.matcher("ad"); 2472 if (!m.matches()) 2473 failCount++; 2474 2475 // Supplementary character test 2476 // matches() 2477 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2478 m = p.matcher(toSupplementaries("ulbcccccc")); 2479 if (!m.matches()) 2480 failCount++; 2481 2482 // find() but not matches() 2483 m.reset(toSupplementaries("zzzulbcccccc")); 2484 if (m.matches()) 2485 failCount++; 2486 2487 // lookingAt() but not matches() 2488 m.reset(toSupplementaries("ulbccccccdef")); 2489 if (m.matches()) 2490 failCount++; 2491 2492 // matches() 2493 p = Pattern.compile(toSupplementaries("a|ad")); 2494 m = p.matcher(toSupplementaries("ad")); 2495 if (!m.matches()) 2496 failCount++; 2497 2498 report("Matches"); 2499 } 2500 2501 /** 2502 * A basic sanity test of Pattern.matches(). 2503 */ 2504 private static void patternMatchesTest() throws Exception { 2505 // matches() 2506 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2507 toSupplementaries("ulbcccccc"))) 2508 failCount++; 2509 2510 // find() but not matches() 2511 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2512 toSupplementaries("zzzulbcccccc"))) 2513 failCount++; 2514 2515 // lookingAt() but not matches() 2516 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2517 toSupplementaries("ulbccccccdef"))) 2518 failCount++; 2519 2520 // Supplementary character test 2521 // matches() 2522 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2523 toSupplementaries("ulbcccccc"))) 2524 failCount++; 2525 2526 // find() but not matches() 2527 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2528 toSupplementaries("zzzulbcccccc"))) 2529 failCount++; 2530 2531 // lookingAt() but not matches() 2532 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2533 toSupplementaries("ulbccccccdef"))) 2534 failCount++; 2535 2536 report("Pattern Matches"); 2537 } 2538 2539 /** 2540 * Canonical equivalence testing. Tests the ability of the engine 2541 * to match sequences that are not explicitly specified in the 2542 * pattern when they are considered equivalent by the Unicode Standard. 2543 */ 2544 private static void ceTest() throws Exception { 2545 // Decomposed char outside char classes 2546 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2547 Matcher m = p.matcher("test\u00e5"); 2548 if (!m.matches()) 2549 failCount++; 2550 2551 m.reset("testa\u030a"); 2552 if (!m.matches()) 2553 failCount++; 2554 2555 // Composed char outside char classes 2556 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2557 m = p.matcher("test\u00e5"); 2558 if (!m.matches()) 2559 failCount++; 2560 2561 m.reset("testa\u030a"); 2562 if (!m.find()) 2563 failCount++; 2564 2565 // Decomposed char inside a char class 2566 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2567 m = p.matcher("test\u00e5"); 2568 if (!m.find()) 2569 failCount++; 2570 2571 m.reset("testa\u030a"); 2572 if (!m.find()) 2573 failCount++; 2574 2575 // Composed char inside a char class 2576 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2577 m = p.matcher("test\u00e5"); 2578 if (!m.find()) 2579 failCount++; 2580 2581 m.reset("testa\u0300"); 2582 if (!m.find()) 2583 failCount++; 2584 2585 m.reset("testa\u030a"); 2586 if (!m.find()) 2587 failCount++; 2588 2589 // Marks that cannot legally change order and be equivalent 2590 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2591 check(p, "testa\u0308\u0300", true); 2592 check(p, "testa\u0300\u0308", false); 2593 2594 // Marks that can legally change order and be equivalent 2595 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2596 check(p, "testa\u0308\u0323", true); 2597 check(p, "testa\u0323\u0308", true); 2598 2599 // Test all equivalences of the sequence a\u0308\u0323\u0300 2600 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2601 check(p, "testa\u0308\u0323\u0300", true); 2602 check(p, "testa\u0323\u0308\u0300", true); 2603 check(p, "testa\u0308\u0300\u0323", true); 2604 check(p, "test\u00e4\u0323\u0300", true); 2605 check(p, "test\u00e4\u0300\u0323", true); 2606 2607 /* 2608 * The following canonical equivalence tests don't work. Bug id: 4916384. 2609 * 2610 // Decomposed hangul (jamos) 2611 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2612 m = p.matcher("\u1100\u1161"); 2613 if (!m.matches()) 2614 failCount++; 2615 2616 m.reset("\uac00"); 2617 if (!m.matches()) 2618 failCount++; 2619 2620 // Composed hangul 2621 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2622 m = p.matcher("\u1100\u1161"); 2623 if (!m.matches()) 2624 failCount++; 2625 2626 m.reset("\uac00"); 2627 if (!m.matches()) 2628 failCount++; 2629 2630 // Decomposed supplementary outside char classes 2631 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2632 m = p.matcher("test\ud834\uddc0"); 2633 if (!m.matches()) 2634 failCount++; 2635 2636 m.reset("test\ud834\uddbc\ud834\udd6f"); 2637 if (!m.matches()) 2638 failCount++; 2639 2640 // Composed supplementary outside char classes 2641 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2642 m.reset("test\ud834\uddbc\ud834\udd6f"); 2643 if (!m.matches()) 2644 failCount++; 2645 2646 m = p.matcher("test\ud834\uddc0"); 2647 if (!m.matches()) 2648 failCount++; 2649 2650 */ 2651 2652 report("Canonical Equivalence"); 2653 } 2654 2655 /** 2656 * A basic sanity test of Matcher.replaceAll(). 2657 */ 2658 private static void globalSubstitute() throws Exception { 2659 // Global substitution with a literal 2660 Pattern p = Pattern.compile("(ab)(c*)"); 2661 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2662 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2663 failCount++; 2664 2665 m.reset("zzzabccczzzabcczzzabccczzz"); 2666 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2667 failCount++; 2668 2669 // Global substitution with groups 2670 m.reset("zzzabccczzzabcczzzabccczzz"); 2671 String result = m.replaceAll("$1"); 2672 if (!result.equals("zzzabzzzabzzzabzzz")) 2673 failCount++; 2674 2675 // Supplementary character test 2676 // Global substitution with a literal 2677 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2678 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2679 if (!m.replaceAll(toSupplementaries("test")). 2680 equals(toSupplementaries("testzzztestzzztest"))) 2681 failCount++; 2682 2683 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2684 if (!m.replaceAll(toSupplementaries("test")). 2685 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2686 failCount++; 2687 2688 // Global substitution with groups 2689 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2690 result = m.replaceAll("$1"); 2691 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2692 failCount++; 2693 2694 report("Global Substitution"); 2695 } 2696 2697 /** 2698 * Tests the usage of Matcher.appendReplacement() with literal 2699 * and group substitutions. 2700 */ 2701 private static void stringbufferSubstitute() throws Exception { 2702 // SB substitution with literal 2703 String blah = "zzzblahzzz"; 2704 Pattern p = Pattern.compile("blah"); 2705 Matcher m = p.matcher(blah); 2706 StringBuffer result = new StringBuffer(); 2707 try { 2708 m.appendReplacement(result, "blech"); 2709 failCount++; 2710 } catch (IllegalStateException e) { 2711 } 2712 m.find(); 2713 m.appendReplacement(result, "blech"); 2714 if (!result.toString().equals("zzzblech")) 2715 failCount++; 2716 2717 m.appendTail(result); 2718 if (!result.toString().equals("zzzblechzzz")) 2719 failCount++; 2720 2721 // SB substitution with groups 2722 blah = "zzzabcdzzz"; 2723 p = Pattern.compile("(ab)(cd)*"); 2724 m = p.matcher(blah); 2725 result = new StringBuffer(); 2726 try { 2727 m.appendReplacement(result, "$1"); 2728 failCount++; 2729 } catch (IllegalStateException e) { 2730 } 2731 m.find(); 2732 m.appendReplacement(result, "$1"); 2733 if (!result.toString().equals("zzzab")) 2734 failCount++; 2735 2736 m.appendTail(result); 2737 if (!result.toString().equals("zzzabzzz")) 2738 failCount++; 2739 2740 // SB substitution with 3 groups 2741 blah = "zzzabcdcdefzzz"; 2742 p = Pattern.compile("(ab)(cd)*(ef)"); 2743 m = p.matcher(blah); 2744 result = new StringBuffer(); 2745 try { 2746 m.appendReplacement(result, "$1w$2w$3"); 2747 failCount++; 2748 } catch (IllegalStateException e) { 2749 } 2750 m.find(); 2751 m.appendReplacement(result, "$1w$2w$3"); 2752 if (!result.toString().equals("zzzabwcdwef")) 2753 failCount++; 2754 2755 m.appendTail(result); 2756 if (!result.toString().equals("zzzabwcdwefzzz")) 2757 failCount++; 2758 2759 // SB substitution with groups and three matches 2760 // skipping middle match 2761 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2762 p = Pattern.compile("(ab)(cd*)"); 2763 m = p.matcher(blah); 2764 result = new StringBuffer(); 2765 try { 2766 m.appendReplacement(result, "$1"); 2767 failCount++; 2768 } catch (IllegalStateException e) { 2769 } 2770 m.find(); 2771 m.appendReplacement(result, "$1"); 2772 if (!result.toString().equals("zzzab")) 2773 failCount++; 2774 2775 m.find(); 2776 m.find(); 2777 m.appendReplacement(result, "$2"); 2778 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2779 failCount++; 2780 2781 m.appendTail(result); 2782 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2783 failCount++; 2784 2785 // Check to make sure escaped $ is ignored 2786 blah = "zzzabcdcdefzzz"; 2787 p = Pattern.compile("(ab)(cd)*(ef)"); 2788 m = p.matcher(blah); 2789 result = new StringBuffer(); 2790 m.find(); 2791 m.appendReplacement(result, "$1w\\$2w$3"); 2792 if (!result.toString().equals("zzzabw$2wef")) 2793 failCount++; 2794 2795 m.appendTail(result); 2796 if (!result.toString().equals("zzzabw$2wefzzz")) 2797 failCount++; 2798 2799 // Check to make sure a reference to nonexistent group causes error 2800 blah = "zzzabcdcdefzzz"; 2801 p = Pattern.compile("(ab)(cd)*(ef)"); 2802 m = p.matcher(blah); 2803 result = new StringBuffer(); 2804 m.find(); 2805 try { 2806 m.appendReplacement(result, "$1w$5w$3"); 2807 failCount++; 2808 } catch (IndexOutOfBoundsException ioobe) { 2809 // Correct result 2810 } 2811 2812 // Check double digit group references 2813 blah = "zzz123456789101112zzz"; 2814 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2815 m = p.matcher(blah); 2816 result = new StringBuffer(); 2817 m.find(); 2818 m.appendReplacement(result, "$1w$11w$3"); 2819 if (!result.toString().equals("zzz1w11w3")) 2820 failCount++; 2821 2822 // Check to make sure it backs off $15 to $1 if only three groups 2823 blah = "zzzabcdcdefzzz"; 2824 p = Pattern.compile("(ab)(cd)*(ef)"); 2825 m = p.matcher(blah); 2826 result = new StringBuffer(); 2827 m.find(); 2828 m.appendReplacement(result, "$1w$15w$3"); 2829 if (!result.toString().equals("zzzabwab5wef")) 2830 failCount++; 2831 2832 2833 // Supplementary character test 2834 // SB substitution with literal 2835 blah = toSupplementaries("zzzblahzzz"); 2836 p = Pattern.compile(toSupplementaries("blah")); 2837 m = p.matcher(blah); 2838 result = new StringBuffer(); 2839 try { 2840 m.appendReplacement(result, toSupplementaries("blech")); 2841 failCount++; 2842 } catch (IllegalStateException e) { 2843 } 2844 m.find(); 2845 m.appendReplacement(result, toSupplementaries("blech")); 2846 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2847 failCount++; 2848 2849 m.appendTail(result); 2850 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2851 failCount++; 2852 2853 // SB substitution with groups 2854 blah = toSupplementaries("zzzabcdzzz"); 2855 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2856 m = p.matcher(blah); 2857 result = new StringBuffer(); 2858 try { 2859 m.appendReplacement(result, "$1"); 2860 failCount++; 2861 } catch (IllegalStateException e) { 2862 } 2863 m.find(); 2864 m.appendReplacement(result, "$1"); 2865 if (!result.toString().equals(toSupplementaries("zzzab"))) 2866 failCount++; 2867 2868 m.appendTail(result); 2869 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2870 failCount++; 2871 2872 // SB substitution with 3 groups 2873 blah = toSupplementaries("zzzabcdcdefzzz"); 2874 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2875 m = p.matcher(blah); 2876 result = new StringBuffer(); 2877 try { 2878 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2879 failCount++; 2880 } catch (IllegalStateException e) { 2881 } 2882 m.find(); 2883 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2884 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2885 failCount++; 2886 2887 m.appendTail(result); 2888 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2889 failCount++; 2890 2891 // SB substitution with groups and three matches 2892 // skipping middle match 2893 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2894 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2895 m = p.matcher(blah); 2896 result = new StringBuffer(); 2897 try { 2898 m.appendReplacement(result, "$1"); 2899 failCount++; 2900 } catch (IllegalStateException e) { 2901 } 2902 m.find(); 2903 m.appendReplacement(result, "$1"); 2904 if (!result.toString().equals(toSupplementaries("zzzab"))) 2905 failCount++; 2906 2907 m.find(); 2908 m.find(); 2909 m.appendReplacement(result, "$2"); 2910 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2911 failCount++; 2912 2913 m.appendTail(result); 2914 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2915 failCount++; 2916 2917 // Check to make sure escaped $ is ignored 2918 blah = toSupplementaries("zzzabcdcdefzzz"); 2919 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2920 m = p.matcher(blah); 2921 result = new StringBuffer(); 2922 m.find(); 2923 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2924 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2925 failCount++; 2926 2927 m.appendTail(result); 2928 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2929 failCount++; 2930 2931 // Check to make sure a reference to nonexistent group causes error 2932 blah = toSupplementaries("zzzabcdcdefzzz"); 2933 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2934 m = p.matcher(blah); 2935 result = new StringBuffer(); 2936 m.find(); 2937 try { 2938 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2939 failCount++; 2940 } catch (IndexOutOfBoundsException ioobe) { 2941 // Correct result 2942 } 2943 2944 // Check double digit group references 2945 blah = toSupplementaries("zzz123456789101112zzz"); 2946 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2947 m = p.matcher(blah); 2948 result = new StringBuffer(); 2949 m.find(); 2950 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2951 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2952 failCount++; 2953 2954 // Check to make sure it backs off $15 to $1 if only three groups 2955 blah = toSupplementaries("zzzabcdcdefzzz"); 2956 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2957 m = p.matcher(blah); 2958 result = new StringBuffer(); 2959 m.find(); 2960 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2961 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2962 failCount++; 2963 2964 // Check nothing has been appended into the output buffer if 2965 // the replacement string triggers IllegalArgumentException. 2966 p = Pattern.compile("(abc)"); 2967 m = p.matcher("abcd"); 2968 result = new StringBuffer(); 2969 m.find(); 2970 try { 2971 m.appendReplacement(result, ("xyz$g")); 2972 failCount++; 2973 } catch (IllegalArgumentException iae) { 2974 if (result.length() != 0) 2975 failCount++; 2976 } 2977 2978 report("SB Substitution"); 2979 } 2980 2981 /** 2982 * Tests the usage of Matcher.appendReplacement() with literal 2983 * and group substitutions. 2984 */ 2985 private static void stringbuilderSubstitute() throws Exception { 2986 // SB substitution with literal 2987 String blah = "zzzblahzzz"; 2988 Pattern p = Pattern.compile("blah"); 2989 Matcher m = p.matcher(blah); 2990 StringBuilder result = new StringBuilder(); 2991 try { 2992 m.appendReplacement(result, "blech"); 2993 failCount++; 2994 } catch (IllegalStateException e) { 2995 } 2996 m.find(); 2997 m.appendReplacement(result, "blech"); 2998 if (!result.toString().equals("zzzblech")) 2999 failCount++; 3000 3001 m.appendTail(result); 3002 if (!result.toString().equals("zzzblechzzz")) 3003 failCount++; 3004 3005 // SB substitution with groups 3006 blah = "zzzabcdzzz"; 3007 p = Pattern.compile("(ab)(cd)*"); 3008 m = p.matcher(blah); 3009 result = new StringBuilder(); 3010 try { 3011 m.appendReplacement(result, "$1"); 3012 failCount++; 3013 } catch (IllegalStateException e) { 3014 } 3015 m.find(); 3016 m.appendReplacement(result, "$1"); 3017 if (!result.toString().equals("zzzab")) 3018 failCount++; 3019 3020 m.appendTail(result); 3021 if (!result.toString().equals("zzzabzzz")) 3022 failCount++; 3023 3024 // SB substitution with 3 groups 3025 blah = "zzzabcdcdefzzz"; 3026 p = Pattern.compile("(ab)(cd)*(ef)"); 3027 m = p.matcher(blah); 3028 result = new StringBuilder(); 3029 try { 3030 m.appendReplacement(result, "$1w$2w$3"); 3031 failCount++; 3032 } catch (IllegalStateException e) { 3033 } 3034 m.find(); 3035 m.appendReplacement(result, "$1w$2w$3"); 3036 if (!result.toString().equals("zzzabwcdwef")) 3037 failCount++; 3038 3039 m.appendTail(result); 3040 if (!result.toString().equals("zzzabwcdwefzzz")) 3041 failCount++; 3042 3043 // SB substitution with groups and three matches 3044 // skipping middle match 3045 blah = "zzzabcdzzzabcddzzzabcdzzz"; 3046 p = Pattern.compile("(ab)(cd*)"); 3047 m = p.matcher(blah); 3048 result = new StringBuilder(); 3049 try { 3050 m.appendReplacement(result, "$1"); 3051 failCount++; 3052 } catch (IllegalStateException e) { 3053 } 3054 m.find(); 3055 m.appendReplacement(result, "$1"); 3056 if (!result.toString().equals("zzzab")) 3057 failCount++; 3058 3059 m.find(); 3060 m.find(); 3061 m.appendReplacement(result, "$2"); 3062 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 3063 failCount++; 3064 3065 m.appendTail(result); 3066 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 3067 failCount++; 3068 3069 // Check to make sure escaped $ is ignored 3070 blah = "zzzabcdcdefzzz"; 3071 p = Pattern.compile("(ab)(cd)*(ef)"); 3072 m = p.matcher(blah); 3073 result = new StringBuilder(); 3074 m.find(); 3075 m.appendReplacement(result, "$1w\\$2w$3"); 3076 if (!result.toString().equals("zzzabw$2wef")) 3077 failCount++; 3078 3079 m.appendTail(result); 3080 if (!result.toString().equals("zzzabw$2wefzzz")) 3081 failCount++; 3082 3083 // Check to make sure a reference to nonexistent group causes error 3084 blah = "zzzabcdcdefzzz"; 3085 p = Pattern.compile("(ab)(cd)*(ef)"); 3086 m = p.matcher(blah); 3087 result = new StringBuilder(); 3088 m.find(); 3089 try { 3090 m.appendReplacement(result, "$1w$5w$3"); 3091 failCount++; 3092 } catch (IndexOutOfBoundsException ioobe) { 3093 // Correct result 3094 } 3095 3096 // Check double digit group references 3097 blah = "zzz123456789101112zzz"; 3098 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3099 m = p.matcher(blah); 3100 result = new StringBuilder(); 3101 m.find(); 3102 m.appendReplacement(result, "$1w$11w$3"); 3103 if (!result.toString().equals("zzz1w11w3")) 3104 failCount++; 3105 3106 // Check to make sure it backs off $15 to $1 if only three groups 3107 blah = "zzzabcdcdefzzz"; 3108 p = Pattern.compile("(ab)(cd)*(ef)"); 3109 m = p.matcher(blah); 3110 result = new StringBuilder(); 3111 m.find(); 3112 m.appendReplacement(result, "$1w$15w$3"); 3113 if (!result.toString().equals("zzzabwab5wef")) 3114 failCount++; 3115 3116 3117 // Supplementary character test 3118 // SB substitution with literal 3119 blah = toSupplementaries("zzzblahzzz"); 3120 p = Pattern.compile(toSupplementaries("blah")); 3121 m = p.matcher(blah); 3122 result = new StringBuilder(); 3123 try { 3124 m.appendReplacement(result, toSupplementaries("blech")); 3125 failCount++; 3126 } catch (IllegalStateException e) { 3127 } 3128 m.find(); 3129 m.appendReplacement(result, toSupplementaries("blech")); 3130 if (!result.toString().equals(toSupplementaries("zzzblech"))) 3131 failCount++; 3132 m.appendTail(result); 3133 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 3134 failCount++; 3135 3136 // SB substitution with groups 3137 blah = toSupplementaries("zzzabcdzzz"); 3138 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 3139 m = p.matcher(blah); 3140 result = new StringBuilder(); 3141 try { 3142 m.appendReplacement(result, "$1"); 3143 failCount++; 3144 } catch (IllegalStateException e) { 3145 } 3146 m.find(); 3147 m.appendReplacement(result, "$1"); 3148 if (!result.toString().equals(toSupplementaries("zzzab"))) 3149 failCount++; 3150 3151 m.appendTail(result); 3152 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 3153 failCount++; 3154 3155 // SB substitution with 3 groups 3156 blah = toSupplementaries("zzzabcdcdefzzz"); 3157 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3158 m = p.matcher(blah); 3159 result = new StringBuilder(); 3160 try { 3161 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3162 failCount++; 3163 } catch (IllegalStateException e) { 3164 } 3165 m.find(); 3166 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 3167 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 3168 failCount++; 3169 3170 m.appendTail(result); 3171 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 3172 failCount++; 3173 3174 // SB substitution with groups and three matches 3175 // skipping middle match 3176 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 3177 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 3178 m = p.matcher(blah); 3179 result = new StringBuilder(); 3180 try { 3181 m.appendReplacement(result, "$1"); 3182 failCount++; 3183 } catch (IllegalStateException e) { 3184 } 3185 m.find(); 3186 m.appendReplacement(result, "$1"); 3187 if (!result.toString().equals(toSupplementaries("zzzab"))) 3188 failCount++; 3189 3190 m.find(); 3191 m.find(); 3192 m.appendReplacement(result, "$2"); 3193 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 3194 failCount++; 3195 3196 m.appendTail(result); 3197 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 3198 failCount++; 3199 3200 // Check to make sure escaped $ is ignored 3201 blah = toSupplementaries("zzzabcdcdefzzz"); 3202 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3203 m = p.matcher(blah); 3204 result = new StringBuilder(); 3205 m.find(); 3206 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 3207 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 3208 failCount++; 3209 3210 m.appendTail(result); 3211 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 3212 failCount++; 3213 3214 // Check to make sure a reference to nonexistent group causes error 3215 blah = toSupplementaries("zzzabcdcdefzzz"); 3216 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3217 m = p.matcher(blah); 3218 result = new StringBuilder(); 3219 m.find(); 3220 try { 3221 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 3222 failCount++; 3223 } catch (IndexOutOfBoundsException ioobe) { 3224 // Correct result 3225 } 3226 // Check double digit group references 3227 blah = toSupplementaries("zzz123456789101112zzz"); 3228 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 3229 m = p.matcher(blah); 3230 result = new StringBuilder(); 3231 m.find(); 3232 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 3233 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 3234 failCount++; 3235 3236 // Check to make sure it backs off $15 to $1 if only three groups 3237 blah = toSupplementaries("zzzabcdcdefzzz"); 3238 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 3239 m = p.matcher(blah); 3240 result = new StringBuilder(); 3241 m.find(); 3242 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 3243 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 3244 failCount++; 3245 // Check nothing has been appended into the output buffer if 3246 // the replacement string triggers IllegalArgumentException. 3247 p = Pattern.compile("(abc)"); 3248 m = p.matcher("abcd"); 3249 result = new StringBuilder(); 3250 m.find(); 3251 try { 3252 m.appendReplacement(result, ("xyz$g")); 3253 failCount++; 3254 } catch (IllegalArgumentException iae) { 3255 if (result.length() != 0) 3256 failCount++; 3257 } 3258 report("SB Substitution 2"); 3259 } 3260 3261 /* 3262 * 5 groups of characters are created to make a substitution string. 3263 * A base string will be created including random lead chars, the 3264 * substitution string, and random trailing chars. 3265 * A pattern containing the 5 groups is searched for and replaced with: 3266 * random group + random string + random group. 3267 * The results are checked for correctness. 3268 */ 3269 private static void substitutionBasher() { 3270 for (int runs = 0; runs<1000; runs++) { 3271 // Create a base string to work in 3272 int leadingChars = generator.nextInt(10); 3273 StringBuffer baseBuffer = new StringBuffer(100); 3274 String leadingString = getRandomAlphaString(leadingChars); 3275 baseBuffer.append(leadingString); 3276 3277 // Create 5 groups of random number of random chars 3278 // Create the string to substitute 3279 // Create the pattern string to search for 3280 StringBuffer bufferToSub = new StringBuffer(25); 3281 StringBuffer bufferToPat = new StringBuffer(50); 3282 String[] groups = new String[5]; 3283 for(int i=0; i<5; i++) { 3284 int aGroupSize = generator.nextInt(5)+1; 3285 groups[i] = getRandomAlphaString(aGroupSize); 3286 bufferToSub.append(groups[i]); 3287 bufferToPat.append('('); 3288 bufferToPat.append(groups[i]); 3289 bufferToPat.append(')'); 3290 } 3291 String stringToSub = bufferToSub.toString(); 3292 String pattern = bufferToPat.toString(); 3293 3294 // Place sub string into working string at random index 3295 baseBuffer.append(stringToSub); 3296 3297 // Append random chars to end 3298 int trailingChars = generator.nextInt(10); 3299 String trailingString = getRandomAlphaString(trailingChars); 3300 baseBuffer.append(trailingString); 3301 String baseString = baseBuffer.toString(); 3302 3303 // Create test pattern and matcher 3304 Pattern p = Pattern.compile(pattern); 3305 Matcher m = p.matcher(baseString); 3306 3307 // Reject candidate if pattern happens to start early 3308 m.find(); 3309 if (m.start() < leadingChars) 3310 continue; 3311 3312 // Reject candidate if more than one match 3313 if (m.find()) 3314 continue; 3315 3316 // Construct a replacement string with : 3317 // random group + random string + random group 3318 StringBuffer bufferToRep = new StringBuffer(); 3319 int groupIndex1 = generator.nextInt(5); 3320 bufferToRep.append("$" + (groupIndex1 + 1)); 3321 String randomMidString = getRandomAlphaString(5); 3322 bufferToRep.append(randomMidString); 3323 int groupIndex2 = generator.nextInt(5); 3324 bufferToRep.append("$" + (groupIndex2 + 1)); 3325 String replacement = bufferToRep.toString(); 3326 3327 // Do the replacement 3328 String result = m.replaceAll(replacement); 3329 3330 // Construct expected result 3331 StringBuffer bufferToRes = new StringBuffer(); 3332 bufferToRes.append(leadingString); 3333 bufferToRes.append(groups[groupIndex1]); 3334 bufferToRes.append(randomMidString); 3335 bufferToRes.append(groups[groupIndex2]); 3336 bufferToRes.append(trailingString); 3337 String expectedResult = bufferToRes.toString(); 3338 3339 // Check results 3340 if (!result.equals(expectedResult)) 3341 failCount++; 3342 } 3343 3344 report("Substitution Basher"); 3345 } 3346 3347 /* 3348 * 5 groups of characters are created to make a substitution string. 3349 * A base string will be created including random lead chars, the 3350 * substitution string, and random trailing chars. 3351 * A pattern containing the 5 groups is searched for and replaced with: 3352 * random group + random string + random group. 3353 * The results are checked for correctness. 3354 */ 3355 private static void substitutionBasher2() { 3356 for (int runs = 0; runs<1000; runs++) { 3357 // Create a base string to work in 3358 int leadingChars = generator.nextInt(10); 3359 StringBuilder baseBuffer = new StringBuilder(100); 3360 String leadingString = getRandomAlphaString(leadingChars); 3361 baseBuffer.append(leadingString); 3362 3363 // Create 5 groups of random number of random chars 3364 // Create the string to substitute 3365 // Create the pattern string to search for 3366 StringBuilder bufferToSub = new StringBuilder(25); 3367 StringBuilder bufferToPat = new StringBuilder(50); 3368 String[] groups = new String[5]; 3369 for(int i=0; i<5; i++) { 3370 int aGroupSize = generator.nextInt(5)+1; 3371 groups[i] = getRandomAlphaString(aGroupSize); 3372 bufferToSub.append(groups[i]); 3373 bufferToPat.append('('); 3374 bufferToPat.append(groups[i]); 3375 bufferToPat.append(')'); 3376 } 3377 String stringToSub = bufferToSub.toString(); 3378 String pattern = bufferToPat.toString(); 3379 3380 // Place sub string into working string at random index 3381 baseBuffer.append(stringToSub); 3382 3383 // Append random chars to end 3384 int trailingChars = generator.nextInt(10); 3385 String trailingString = getRandomAlphaString(trailingChars); 3386 baseBuffer.append(trailingString); 3387 String baseString = baseBuffer.toString(); 3388 3389 // Create test pattern and matcher 3390 Pattern p = Pattern.compile(pattern); 3391 Matcher m = p.matcher(baseString); 3392 3393 // Reject candidate if pattern happens to start early 3394 m.find(); 3395 if (m.start() < leadingChars) 3396 continue; 3397 3398 // Reject candidate if more than one match 3399 if (m.find()) 3400 continue; 3401 3402 // Construct a replacement string with : 3403 // random group + random string + random group 3404 StringBuilder bufferToRep = new StringBuilder(); 3405 int groupIndex1 = generator.nextInt(5); 3406 bufferToRep.append("$" + (groupIndex1 + 1)); 3407 String randomMidString = getRandomAlphaString(5); 3408 bufferToRep.append(randomMidString); 3409 int groupIndex2 = generator.nextInt(5); 3410 bufferToRep.append("$" + (groupIndex2 + 1)); 3411 String replacement = bufferToRep.toString(); 3412 3413 // Do the replacement 3414 String result = m.replaceAll(replacement); 3415 3416 // Construct expected result 3417 StringBuilder bufferToRes = new StringBuilder(); 3418 bufferToRes.append(leadingString); 3419 bufferToRes.append(groups[groupIndex1]); 3420 bufferToRes.append(randomMidString); 3421 bufferToRes.append(groups[groupIndex2]); 3422 bufferToRes.append(trailingString); 3423 String expectedResult = bufferToRes.toString(); 3424 3425 // Check results 3426 if (!result.equals(expectedResult)) { 3427 failCount++; 3428 } 3429 } 3430 3431 report("Substitution Basher 2"); 3432 } 3433 3434 /** 3435 * Checks the handling of some escape sequences that the Pattern 3436 * class should process instead of the java compiler. These are 3437 * not in the file because the escapes should be be processed 3438 * by the Pattern class when the regex is compiled. 3439 */ 3440 private static void escapes() throws Exception { 3441 Pattern p = Pattern.compile("\\043"); 3442 Matcher m = p.matcher("#"); 3443 if (!m.find()) 3444 failCount++; 3445 3446 p = Pattern.compile("\\x23"); 3447 m = p.matcher("#"); 3448 if (!m.find()) 3449 failCount++; 3450 3451 p = Pattern.compile("\\u0023"); 3452 m = p.matcher("#"); 3453 if (!m.find()) 3454 failCount++; 3455 3456 report("Escape sequences"); 3457 } 3458 3459 /** 3460 * Checks the handling of blank input situations. These 3461 * tests are incompatible with my test file format. 3462 */ 3463 private static void blankInput() throws Exception { 3464 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3465 Matcher m = p.matcher(""); 3466 if (m.find()) 3467 failCount++; 3468 3469 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3470 m = p.matcher(""); 3471 if (!m.find()) 3472 failCount++; 3473 3474 p = Pattern.compile("abc"); 3475 m = p.matcher(""); 3476 if (m.find()) 3477 failCount++; 3478 3479 p = Pattern.compile("a*"); 3480 m = p.matcher(""); 3481 if (!m.find()) 3482 failCount++; 3483 3484 report("Blank input"); 3485 } 3486 3487 /** 3488 * Tests the Boyer-Moore pattern matching of a character sequence 3489 * on randomly generated patterns. 3490 */ 3491 private static void bm() throws Exception { 3492 doBnM('a'); 3493 report("Boyer Moore (ASCII)"); 3494 3495 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3496 report("Boyer Moore (Supplementary)"); 3497 } 3498 3499 private static void doBnM(int baseCharacter) throws Exception { 3500 int achar=0; 3501 3502 for (int i=0; i<100; i++) { 3503 // Create a short pattern to search for 3504 int patternLength = generator.nextInt(7) + 4; 3505 StringBuffer patternBuffer = new StringBuffer(patternLength); 3506 for (int x=0; x<patternLength; x++) { 3507 int ch = baseCharacter + generator.nextInt(26); 3508 if (Character.isSupplementaryCodePoint(ch)) { 3509 patternBuffer.append(Character.toChars(ch)); 3510 } else { 3511 patternBuffer.append((char)ch); 3512 } 3513 } 3514 String pattern = patternBuffer.toString(); 3515 Pattern p = Pattern.compile(pattern); 3516 3517 // Create a buffer with random ASCII chars that does 3518 // not match the sample 3519 String toSearch = null; 3520 StringBuffer s = null; 3521 Matcher m = p.matcher(""); 3522 do { 3523 s = new StringBuffer(100); 3524 for (int x=0; x<100; x++) { 3525 int ch = baseCharacter + generator.nextInt(26); 3526 if (Character.isSupplementaryCodePoint(ch)) { 3527 s.append(Character.toChars(ch)); 3528 } else { 3529 s.append((char)ch); 3530 } 3531 } 3532 toSearch = s.toString(); 3533 m.reset(toSearch); 3534 } while (m.find()); 3535 3536 // Insert the pattern at a random spot 3537 int insertIndex = generator.nextInt(99); 3538 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3539 insertIndex++; 3540 s = s.insert(insertIndex, pattern); 3541 toSearch = s.toString(); 3542 3543 // Make sure that the pattern is found 3544 m.reset(toSearch); 3545 if (!m.find()) 3546 failCount++; 3547 3548 // Make sure that the match text is the pattern 3549 if (!m.group().equals(pattern)) 3550 failCount++; 3551 3552 // Make sure match occured at insertion point 3553 if (m.start() != insertIndex) 3554 failCount++; 3555 } 3556 } 3557 3558 /** 3559 * Tests the matching of slices on randomly generated patterns. 3560 * The Boyer-Moore optimization is not done on these patterns 3561 * because it uses unicode case folding. 3562 */ 3563 private static void slice() throws Exception { 3564 doSlice(Character.MAX_VALUE); 3565 report("Slice"); 3566 3567 doSlice(Character.MAX_CODE_POINT); 3568 report("Slice (Supplementary)"); 3569 } 3570 3571 private static void doSlice(int maxCharacter) throws Exception { 3572 Random generator = new Random(); 3573 int achar=0; 3574 3575 for (int i=0; i<100; i++) { 3576 // Create a short pattern to search for 3577 int patternLength = generator.nextInt(7) + 4; 3578 StringBuffer patternBuffer = new StringBuffer(patternLength); 3579 for (int x=0; x<patternLength; x++) { 3580 int randomChar = 0; 3581 while (!Character.isLetterOrDigit(randomChar)) 3582 randomChar = generator.nextInt(maxCharacter); 3583 if (Character.isSupplementaryCodePoint(randomChar)) { 3584 patternBuffer.append(Character.toChars(randomChar)); 3585 } else { 3586 patternBuffer.append((char) randomChar); 3587 } 3588 } 3589 String pattern = patternBuffer.toString(); 3590 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3591 3592 // Create a buffer with random chars that does not match the sample 3593 String toSearch = null; 3594 StringBuffer s = null; 3595 Matcher m = p.matcher(""); 3596 do { 3597 s = new StringBuffer(100); 3598 for (int x=0; x<100; x++) { 3599 int randomChar = 0; 3600 while (!Character.isLetterOrDigit(randomChar)) 3601 randomChar = generator.nextInt(maxCharacter); 3602 if (Character.isSupplementaryCodePoint(randomChar)) { 3603 s.append(Character.toChars(randomChar)); 3604 } else { 3605 s.append((char) randomChar); 3606 } 3607 } 3608 toSearch = s.toString(); 3609 m.reset(toSearch); 3610 } while (m.find()); 3611 3612 // Insert the pattern at a random spot 3613 int insertIndex = generator.nextInt(99); 3614 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3615 insertIndex++; 3616 s = s.insert(insertIndex, pattern); 3617 toSearch = s.toString(); 3618 3619 // Make sure that the pattern is found 3620 m.reset(toSearch); 3621 if (!m.find()) 3622 failCount++; 3623 3624 // Make sure that the match text is the pattern 3625 if (!m.group().equals(pattern)) 3626 failCount++; 3627 3628 // Make sure match occured at insertion point 3629 if (m.start() != insertIndex) 3630 failCount++; 3631 } 3632 } 3633 3634 private static void explainFailure(String pattern, String data, 3635 String expected, String actual) { 3636 System.err.println("----------------------------------------"); 3637 System.err.println("Pattern = "+pattern); 3638 System.err.println("Data = "+data); 3639 System.err.println("Expected = " + expected); 3640 System.err.println("Actual = " + actual); 3641 } 3642 3643 private static void explainFailure(String pattern, String data, 3644 Throwable t) { 3645 System.err.println("----------------------------------------"); 3646 System.err.println("Pattern = "+pattern); 3647 System.err.println("Data = "+data); 3648 t.printStackTrace(System.err); 3649 } 3650 3651 // Testing examples from a file 3652 3653 /** 3654 * Goes through the file "TestCases.txt" and creates many patterns 3655 * described in the file, matching the patterns against input lines in 3656 * the file, and comparing the results against the correct results 3657 * also found in the file. The file format is described in comments 3658 * at the head of the file. 3659 */ 3660 private static void processFile(String fileName) throws Exception { 3661 File testCases = new File(System.getProperty("test.src", "."), 3662 fileName); 3663 FileInputStream in = new FileInputStream(testCases); 3664 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3665 3666 // Process next test case. 3667 String aLine; 3668 while((aLine = r.readLine()) != null) { 3669 // Read a line for pattern 3670 String patternString = grabLine(r); 3671 Pattern p = null; 3672 try { 3673 p = compileTestPattern(patternString); 3674 } catch (PatternSyntaxException e) { 3675 String dataString = grabLine(r); 3676 String expectedResult = grabLine(r); 3677 if (expectedResult.startsWith("error")) 3678 continue; 3679 explainFailure(patternString, dataString, e); 3680 failCount++; 3681 continue; 3682 } 3683 3684 // Read a line for input string 3685 String dataString = grabLine(r); 3686 Matcher m = p.matcher(dataString); 3687 StringBuffer result = new StringBuffer(); 3688 3689 // Check for IllegalStateExceptions before a match 3690 failCount += preMatchInvariants(m); 3691 3692 boolean found = m.find(); 3693 3694 if (found) 3695 failCount += postTrueMatchInvariants(m); 3696 else 3697 failCount += postFalseMatchInvariants(m); 3698 3699 if (found) { 3700 result.append("true "); 3701 result.append(m.group(0) + " "); 3702 } else { 3703 result.append("false "); 3704 } 3705 3706 result.append(m.groupCount()); 3707 3708 if (found) { 3709 for (int i=1; i<m.groupCount()+1; i++) 3710 if (m.group(i) != null) 3711 result.append(" " +m.group(i)); 3712 } 3713 3714 // Read a line for the expected result 3715 String expectedResult = grabLine(r); 3716 3717 if (!result.toString().equals(expectedResult)) { 3718 explainFailure(patternString, dataString, expectedResult, result.toString()); 3719 failCount++; 3720 } 3721 } 3722 3723 report(fileName); 3724 } 3725 3726 private static int preMatchInvariants(Matcher m) { 3727 int failCount = 0; 3728 try { 3729 m.start(); 3730 failCount++; 3731 } catch (IllegalStateException ise) {} 3732 try { 3733 m.end(); 3734 failCount++; 3735 } catch (IllegalStateException ise) {} 3736 try { 3737 m.group(); 3738 failCount++; 3739 } catch (IllegalStateException ise) {} 3740 return failCount; 3741 } 3742 3743 private static int postFalseMatchInvariants(Matcher m) { 3744 int failCount = 0; 3745 try { 3746 m.group(); 3747 failCount++; 3748 } catch (IllegalStateException ise) {} 3749 try { 3750 m.start(); 3751 failCount++; 3752 } catch (IllegalStateException ise) {} 3753 try { 3754 m.end(); 3755 failCount++; 3756 } catch (IllegalStateException ise) {} 3757 return failCount; 3758 } 3759 3760 private static int postTrueMatchInvariants(Matcher m) { 3761 int failCount = 0; 3762 //assert(m.start() = m.start(0); 3763 if (m.start() != m.start(0)) 3764 failCount++; 3765 //assert(m.end() = m.end(0); 3766 if (m.start() != m.start(0)) 3767 failCount++; 3768 //assert(m.group() = m.group(0); 3769 if (!m.group().equals(m.group(0))) 3770 failCount++; 3771 try { 3772 m.group(50); 3773 failCount++; 3774 } catch (IndexOutOfBoundsException ise) {} 3775 3776 return failCount; 3777 } 3778 3779 private static Pattern compileTestPattern(String patternString) { 3780 if (!patternString.startsWith("'")) { 3781 return Pattern.compile(patternString); 3782 } 3783 3784 int break1 = patternString.lastIndexOf("'"); 3785 String flagString = patternString.substring( 3786 break1+1, patternString.length()); 3787 patternString = patternString.substring(1, break1); 3788 3789 if (flagString.equals("i")) 3790 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3791 3792 if (flagString.equals("m")) 3793 return Pattern.compile(patternString, Pattern.MULTILINE); 3794 3795 return Pattern.compile(patternString); 3796 } 3797 3798 /** 3799 * Reads a line from the input file. Keeps reading lines until a non 3800 * empty non comment line is read. If the line contains a \n then 3801 * these two characters are replaced by a newline char. If a \\uxxxx 3802 * sequence is read then the sequence is replaced by the unicode char. 3803 */ 3804 private static String grabLine(BufferedReader r) throws Exception { 3805 int index = 0; 3806 String line = r.readLine(); 3807 while (line.startsWith("//") || line.length() < 1) 3808 line = r.readLine(); 3809 while ((index = line.indexOf("\\n")) != -1) { 3810 StringBuffer temp = new StringBuffer(line); 3811 temp.replace(index, index+2, "\n"); 3812 line = temp.toString(); 3813 } 3814 while ((index = line.indexOf("\\u")) != -1) { 3815 StringBuffer temp = new StringBuffer(line); 3816 String value = temp.substring(index+2, index+6); 3817 char aChar = (char)Integer.parseInt(value, 16); 3818 String unicodeChar = "" + aChar; 3819 temp.replace(index, index+6, unicodeChar); 3820 line = temp.toString(); 3821 } 3822 3823 return line; 3824 } 3825 3826 private static void check(Pattern p, String s, String g, String expected) { 3827 Matcher m = p.matcher(s); 3828 m.find(); 3829 if (!m.group(g).equals(expected) || 3830 s.charAt(m.start(g)) != expected.charAt(0) || 3831 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3832 failCount++; 3833 } 3834 3835 private static void checkReplaceFirst(String p, String s, String r, String expected) 3836 { 3837 if (!expected.equals(Pattern.compile(p) 3838 .matcher(s) 3839 .replaceFirst(r))) 3840 failCount++; 3841 } 3842 3843 private static void checkReplaceAll(String p, String s, String r, String expected) 3844 { 3845 if (!expected.equals(Pattern.compile(p) 3846 .matcher(s) 3847 .replaceAll(r))) 3848 failCount++; 3849 } 3850 3851 private static void checkExpectedFail(String p) { 3852 try { 3853 Pattern.compile(p); 3854 } catch (PatternSyntaxException pse) { 3855 //pse.printStackTrace(); 3856 return; 3857 } 3858 failCount++; 3859 } 3860 3861 private static void checkExpectedIAE(Matcher m, String g) { 3862 m.find(); 3863 try { 3864 m.group(g); 3865 } catch (IllegalArgumentException x) { 3866 //iae.printStackTrace(); 3867 try { 3868 m.start(g); 3869 } catch (IllegalArgumentException xx) { 3870 try { 3871 m.start(g); 3872 } catch (IllegalArgumentException xxx) { 3873 return; 3874 } 3875 } 3876 } 3877 failCount++; 3878 } 3879 3880 private static void checkExpectedNPE(Matcher m) { 3881 m.find(); 3882 try { 3883 m.group(null); 3884 } catch (NullPointerException x) { 3885 try { 3886 m.start(null); 3887 } catch (NullPointerException xx) { 3888 try { 3889 m.end(null); 3890 } catch (NullPointerException xxx) { 3891 return; 3892 } 3893 } 3894 } 3895 failCount++; 3896 } 3897 3898 private static void namedGroupCaptureTest() throws Exception { 3899 check(Pattern.compile("x+(?<gname>y+)z+"), 3900 "xxxyyyzzz", 3901 "gname", 3902 "yyy"); 3903 3904 check(Pattern.compile("x+(?<gname8>y+)z+"), 3905 "xxxyyyzzz", 3906 "gname8", 3907 "yyy"); 3908 3909 //backref 3910 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3911 check(pattern, "zzzaabcazzz", true); // found "abca" 3912 3913 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3914 "zzzaabcaazzz", true); 3915 3916 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3917 "abcdefabc", true); 3918 3919 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3920 "abcdefghijkk", true); 3921 3922 // Supplementary character tests 3923 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3924 toSupplementaries("zzzaabcazzz"), true); 3925 3926 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3927 toSupplementaries("zzzaabcaazzz"), true); 3928 3929 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3930 toSupplementaries("abcdefabc"), true); 3931 3932 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3933 "(?<gname>" + 3934 toSupplementaries("k)") + "\\k<gname>"), 3935 toSupplementaries("abcdefghijkk"), true); 3936 3937 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3938 "xxxyyyzzzyyy", 3939 "gname", 3940 "yyy"); 3941 3942 //replaceFirst/All 3943 checkReplaceFirst("(?<gn>ab)(c*)", 3944 "abccczzzabcczzzabccc", 3945 "${gn}", 3946 "abzzzabcczzzabccc"); 3947 3948 checkReplaceAll("(?<gn>ab)(c*)", 3949 "abccczzzabcczzzabccc", 3950 "${gn}", 3951 "abzzzabzzzab"); 3952 3953 3954 checkReplaceFirst("(?<gn>ab)(c*)", 3955 "zzzabccczzzabcczzzabccczzz", 3956 "${gn}", 3957 "zzzabzzzabcczzzabccczzz"); 3958 3959 checkReplaceAll("(?<gn>ab)(c*)", 3960 "zzzabccczzzabcczzzabccczzz", 3961 "${gn}", 3962 "zzzabzzzabzzzabzzz"); 3963 3964 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3965 "zzzabccczzzabcczzzabccczzz", 3966 "${gn2}", 3967 "zzzccczzzabcczzzabccczzz"); 3968 3969 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3970 "zzzabccczzzabcczzzabccczzz", 3971 "${gn2}", 3972 "zzzccczzzcczzzccczzz"); 3973 3974 //toSupplementaries("(ab)(c*)")); 3975 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3976 ")(?<gn2>" + toSupplementaries("c") + "*)", 3977 toSupplementaries("abccczzzabcczzzabccc"), 3978 "${gn1}", 3979 toSupplementaries("abzzzabcczzzabccc")); 3980 3981 3982 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3983 ")(?<gn2>" + toSupplementaries("c") + "*)", 3984 toSupplementaries("abccczzzabcczzzabccc"), 3985 "${gn1}", 3986 toSupplementaries("abzzzabzzzab")); 3987 3988 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3989 ")(?<gn2>" + toSupplementaries("c") + "*)", 3990 toSupplementaries("abccczzzabcczzzabccc"), 3991 "${gn2}", 3992 toSupplementaries("ccczzzabcczzzabccc")); 3993 3994 3995 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3996 ")(?<gn2>" + toSupplementaries("c") + "*)", 3997 toSupplementaries("abccczzzabcczzzabccc"), 3998 "${gn2}", 3999 toSupplementaries("ccczzzcczzzccc")); 4000 4001 checkReplaceFirst("(?<dog>Dog)AndCat", 4002 "zzzDogAndCatzzzDogAndCatzzz", 4003 "${dog}", 4004 "zzzDogzzzDogAndCatzzz"); 4005 4006 4007 checkReplaceAll("(?<dog>Dog)AndCat", 4008 "zzzDogAndCatzzzDogAndCatzzz", 4009 "${dog}", 4010 "zzzDogzzzDogzzz"); 4011 4012 // backref in Matcher & String 4013 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 4014 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 4015 failCount++; 4016 4017 // negative 4018 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 4019 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 4020 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 4021 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 4022 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 4023 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 4024 "gnameX"); 4025 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 4026 report("NamedGroupCapture"); 4027 } 4028 4029 // This is for bug 6969132 4030 private static void nonBmpClassComplementTest() throws Exception { 4031 Pattern p = Pattern.compile("\\P{Lu}"); 4032 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4033 if (m.find() && m.start() == 1) 4034 failCount++; 4035 4036 // from a unicode category 4037 p = Pattern.compile("\\P{Lu}"); 4038 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4039 if (m.find()) 4040 failCount++; 4041 if (!m.hitEnd()) 4042 failCount++; 4043 4044 // block 4045 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 4046 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 4047 if (m.find() && m.start() == 1) 4048 failCount++; 4049 4050 report("NonBmpClassComplement"); 4051 } 4052 4053 private static void unicodePropertiesTest() throws Exception { 4054 // different forms 4055 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 4056 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 4057 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 4058 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 4059 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 4060 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 4061 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 4062 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 4063 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 4064 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 4065 failCount++; 4066 4067 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 4068 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 4069 Matcher lastSM = common; 4070 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 4071 4072 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 4073 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 4074 Matcher lastBM = latin; 4075 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 4076 4077 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 4078 if (cp >= 0x30000 && (cp & 0x70) == 0){ 4079 continue; // only pick couple code points, they are the same 4080 } 4081 4082 // Unicode Script 4083 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 4084 Matcher m; 4085 String str = new String(Character.toChars(cp)); 4086 if (script == lastScript) { 4087 m = lastSM; 4088 m.reset(str); 4089 } else { 4090 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 4091 } 4092 if (!m.matches()) { 4093 failCount++; 4094 } 4095 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 4096 other.reset(str); 4097 if (other.matches()) { 4098 failCount++; 4099 } 4100 lastSM = m; 4101 lastScript = script; 4102 4103 // Unicode Block 4104 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 4105 if (block == null) { 4106 //System.out.printf("Not a Block: cp=%x%n", cp); 4107 continue; 4108 } 4109 if (block == lastBlock) { 4110 m = lastBM; 4111 m.reset(str); 4112 } else { 4113 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 4114 } 4115 if (!m.matches()) { 4116 failCount++; 4117 } 4118 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 4119 other.reset(str); 4120 if (other.matches()) { 4121 failCount++; 4122 } 4123 lastBM = m; 4124 lastBlock = block; 4125 } 4126 report("unicodeProperties"); 4127 } 4128 4129 private static void unicodeHexNotationTest() throws Exception { 4130 4131 // negative 4132 checkExpectedFail("\\x{-23}"); 4133 checkExpectedFail("\\x{110000}"); 4134 checkExpectedFail("\\x{}"); 4135 checkExpectedFail("\\x{AB[ef]"); 4136 4137 // codepoint 4138 check("^\\x{1033c}$", "\uD800\uDF3C", true); 4139 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4140 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 4141 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 4142 4143 // in class 4144 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 4145 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 4146 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 4147 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 4148 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 4149 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 4150 4151 for (int cp = 0; cp <= 0x10FFFF; cp++) { 4152 String s = "A" + new String(Character.toChars(cp)) + "B"; 4153 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 4154 : String.format("\\u%04x\\u%04x", 4155 (int) Character.toChars(cp)[0], 4156 (int) Character.toChars(cp)[1]); 4157 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 4158 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 4159 failCount++; 4160 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 4161 failCount++; 4162 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 4163 failCount++; 4164 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 4165 failCount++; 4166 } 4167 report("unicodeHexNotation"); 4168 } 4169 4170 private static void unicodeClassesTest() throws Exception { 4171 4172 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 4173 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 4174 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 4175 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 4176 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 4177 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 4178 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 4179 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 4180 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 4181 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 4182 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 4183 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 4184 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 4185 Matcher bound = Pattern.compile("\\b").matcher(""); 4186 Matcher word = Pattern.compile("\\w++").matcher(""); 4187 // UNICODE_CHARACTER_CLASS 4188 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4189 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4190 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4191 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4192 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4193 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4194 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4195 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4196 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4197 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4198 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4199 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4200 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4201 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4202 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4203 // embedded flag (?U) 4204 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4205 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4206 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4207 4208 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 4209 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4210 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 4211 // properties 4212 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 4213 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 4214 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 4215 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 4216 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 4217 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 4218 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 4219 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 4220 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 4221 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 4222 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 4223 4224 // javaMethod 4225 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 4226 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 4227 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 4228 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 4229 4230 for (int cp = 1; cp < 0x30000; cp++) { 4231 String str = new String(Character.toChars(cp)); 4232 int type = Character.getType(cp); 4233 if (// lower 4234 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 4235 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 4236 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 4237 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 4238 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 4239 // upper 4240 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 4241 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 4242 Character.isUpperCase(cp) != upperP.reset(str).matches() || 4243 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 4244 // alpha 4245 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 4246 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 4247 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 4248 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 4249 // digit 4250 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 4251 Character.isDigit(cp) != digitU.reset(str).matches() || 4252 // alnum 4253 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 4254 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 4255 // punct 4256 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 4257 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 4258 // graph 4259 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 4260 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 4261 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 4262 // blank 4263 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 4264 != blank.reset(str).matches() || 4265 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 4266 // print 4267 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 4268 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 4269 // cntrl 4270 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 4271 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 4272 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 4273 // hexdigit 4274 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 4275 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 4276 // space 4277 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 4278 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 4279 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 4280 // word 4281 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 4282 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 4283 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 4284 // bwordb 4285 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 4286 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 4287 // properties 4288 Character.isTitleCase(cp) != titleP.reset(str).matches() || 4289 Character.isLetter(cp) != letterP.reset(str).matches()|| 4290 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 4291 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 4292 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 4293 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 4294 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 4295 failCount++; 4296 } 4297 4298 // bounds/word align 4299 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 4300 if (!bwbU.reset("\u0180sherman\u0400").matches()) 4301 failCount++; 4302 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 4303 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 4304 failCount++; 4305 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 4306 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 4307 failCount++; 4308 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 4309 failCount++; 4310 report("unicodePredefinedClasses"); 4311 } 4312 4313 private static void horizontalAndVerticalWSTest() throws Exception { 4314 String hws = new String (new char[] { 4315 0x09, 0x20, 0xa0, 0x1680, 0x180e, 4316 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 4317 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 4318 0x202f, 0x205f, 0x3000 }); 4319 String vws = new String (new char[] { 4320 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 4321 if (!Pattern.compile("\\h+").matcher(hws).matches() || 4322 !Pattern.compile("[\\h]+").matcher(hws).matches()) 4323 failCount++; 4324 if (Pattern.compile("\\H").matcher(hws).find() || 4325 Pattern.compile("[\\H]").matcher(hws).find()) 4326 failCount++; 4327 if (!Pattern.compile("\\v+").matcher(vws).matches() || 4328 !Pattern.compile("[\\v]+").matcher(vws).matches()) 4329 failCount++; 4330 if (Pattern.compile("\\V").matcher(vws).find() || 4331 Pattern.compile("[\\V]").matcher(vws).find()) 4332 failCount++; 4333 String prefix = "abcd"; 4334 String suffix = "efgh"; 4335 String ng = "A"; 4336 for (int i = 0; i < hws.length(); i++) { 4337 String c = String.valueOf(hws.charAt(i)); 4338 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 4339 if (!m.find() || !c.equals(m.group())) 4340 failCount++; 4341 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 4342 if (!m.find() || !c.equals(m.group())) 4343 failCount++; 4344 4345 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4346 if (!m.find() || !ng.equals(m.group())) 4347 failCount++; 4348 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 4349 if (!m.find() || !ng.equals(m.group())) 4350 failCount++; 4351 } 4352 for (int i = 0; i < vws.length(); i++) { 4353 String c = String.valueOf(vws.charAt(i)); 4354 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 4355 if (!m.find() || !c.equals(m.group())) 4356 failCount++; 4357 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 4358 if (!m.find() || !c.equals(m.group())) 4359 failCount++; 4360 4361 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4362 if (!m.find() || !ng.equals(m.group())) 4363 failCount++; 4364 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4365 if (!m.find() || !ng.equals(m.group())) 4366 failCount++; 4367 } 4368 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4369 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4370 failCount++; 4371 report("horizontalAndVerticalWSTest"); 4372 } 4373 4374 private static void linebreakTest() throws Exception { 4375 String linebreaks = new String (new char[] { 4376 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4377 String crnl = "\r\n"; 4378 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4379 !Pattern.compile("\\R").matcher(crnl).matches() || 4380 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4381 failCount++; 4382 report("linebreakTest"); 4383 } 4384 4385 // #7189363 4386 private static void branchTest() throws Exception { 4387 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4388 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4389 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4390 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4391 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4392 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4393 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4394 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4395 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4396 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4397 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4398 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4399 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4400 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4401 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4402 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4403 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4404 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4405 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4406 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4407 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4408 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4409 failCount++; 4410 report("branchTest"); 4411 } 4412 4413 // This test is for 8007395 4414 private static void groupCurlyNotFoundSuppTest() throws Exception { 4415 String input = "test this as \ud83d\ude0d"; 4416 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4417 "test(.)*(@[a-zA-Z.]+)", 4418 "test([^B])+(@[a-zA-Z.]+)", 4419 "test([^B])*(@[a-zA-Z.]+)", 4420 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4421 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4422 }) { 4423 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4424 .matcher(input); 4425 try { 4426 if (m.find()) { 4427 failCount++; 4428 } 4429 } catch (Exception x) { 4430 failCount++; 4431 } 4432 } 4433 report("GroupCurly NotFoundSupp"); 4434 } 4435 4436 // This test is for 8023647 4437 private static void groupCurlyBackoffTest() throws Exception { 4438 if (!"abc1c".matches("(\\w)+1\\1") || 4439 "abc11".matches("(\\w)+1\\1")) { 4440 failCount++; 4441 } 4442 report("GroupCurly backoff"); 4443 } 4444 4445 // This test is for 8012646 4446 private static void patternAsPredicate() throws Exception { 4447 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4448 4449 if (p.test("")) { 4450 failCount++; 4451 } 4452 if (!p.test("word")) { 4453 failCount++; 4454 } 4455 if (p.test("1234")) { 4456 failCount++; 4457 } 4458 report("Pattern.asPredicate"); 4459 } 4460 }