1 /* 2 * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Sun designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Sun in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 22 * CA 95054 USA or visit www.sun.com if you need additional information or 23 * have any questions. 24 */ 25 26 /** 27 * @test 28 * @summary tests RegExp framework 29 * @author Mike McCloskey 30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 35 * 6350801 6676425 6878475 6919132 6931676 36 */ 37 38 import java.util.regex.*; 39 import java.util.Random; 40 import java.io.*; 41 import java.util.*; 42 import java.nio.CharBuffer; 43 44 /** 45 * This is a test class created to check the operation of 46 * the Pattern and Matcher classes. 47 */ 48 public class RegExTest { 49 50 private static Random generator = new Random(); 51 private static boolean failure = false; 52 private static int failCount = 0; 53 54 /** 55 * Main to interpret arguments and run several tests. 56 * 57 */ 58 public static void main(String[] args) throws Exception { 59 // Most of the tests are in a file 60 processFile("TestCases.txt"); 61 //processFile("PerlCases.txt"); 62 processFile("BMPTestCases.txt"); 63 processFile("SupplementaryTestCases.txt"); 64 65 // These test many randomly generated char patterns 66 bm(); 67 slice(); 68 69 // These are hard to put into the file 70 escapes(); 71 blankInput(); 72 73 // Substitition tests on randomly generated sequences 74 globalSubstitute(); 75 stringbufferSubstitute(); 76 substitutionBasher(); 77 78 // Canonical Equivalence 79 ceTest(); 80 81 // Anchors 82 anchorTest(); 83 84 // boolean match calls 85 matchesTest(); 86 lookingAtTest(); 87 88 // Pattern API 89 patternMatchesTest(); 90 91 // Misc 92 lookbehindTest(); 93 nullArgumentTest(); 94 backRefTest(); 95 groupCaptureTest(); 96 caretTest(); 97 charClassTest(); 98 emptyPatternTest(); 99 findIntTest(); 100 group0Test(); 101 longPatternTest(); 102 octalTest(); 103 ampersandTest(); 104 negationTest(); 105 splitTest(); 106 appendTest(); 107 caseFoldingTest(); 108 commentsTest(); 109 unixLinesTest(); 110 replaceFirstTest(); 111 gTest(); 112 zTest(); 113 serializeTest(); 114 reluctantRepetitionTest(); 115 multilineDollarTest(); 116 dollarAtEndTest(); 117 caretBetweenTerminatorsTest(); 118 // This RFE rejected in Tiger numOccurrencesTest(); 119 javaCharClassTest(); 120 nonCaptureRepetitionTest(); 121 notCapturedGroupCurlyMatchTest(); 122 escapedSegmentTest(); 123 literalPatternTest(); 124 literalReplacementTest(); 125 regionTest(); 126 toStringTest(); 127 negatedCharClassTest(); 128 findFromTest(); 129 boundsTest(); 130 unicodeWordBoundsTest(); 131 caretAtEndTest(); 132 wordSearchTest(); 133 hitEndTest(); 134 toMatchResultTest(); 135 surrogatesInClassTest(); 136 namedGroupCaptureTest(); 137 nonBmpClassComplementTest(); 138 139 if (failure) 140 throw new RuntimeException("Failure in the RE handling."); 141 else 142 System.err.println("OKAY: All tests passed."); 143 } 144 145 // Utility functions 146 147 private static String getRandomAlphaString(int length) { 148 StringBuffer buf = new StringBuffer(length); 149 for (int i=0; i<length; i++) { 150 char randChar = (char)(97 + generator.nextInt(26)); 151 buf.append(randChar); 152 } 153 return buf.toString(); 154 } 155 156 private static void check(Matcher m, String expected) { 157 m.find(); 158 if (!m.group().equals(expected)) 159 failCount++; 160 } 161 162 private static void check(Matcher m, String result, boolean expected) { 163 m.find(); 164 if (m.group().equals(result)) 165 failCount += (expected) ? 0 : 1; 166 else 167 failCount += (expected) ? 1 : 0; 168 } 169 170 private static void check(Pattern p, String s, boolean expected) { 171 Matcher matcher = p.matcher(s); 172 if (matcher.find()) 173 failCount += (expected) ? 0 : 1; 174 else 175 failCount += (expected) ? 1 : 0; 176 } 177 178 private static void check(String p, char c, boolean expected) { 179 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 180 Pattern pattern = Pattern.compile(propertyPattern); 181 char[] ca = new char[1]; ca[0] = c; 182 Matcher matcher = pattern.matcher(new String(ca)); 183 if (!matcher.find()) 184 failCount++; 185 } 186 187 private static void check(String p, int codePoint, boolean expected) { 188 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 189 Pattern pattern = Pattern.compile(propertyPattern); 190 char[] ca = Character.toChars(codePoint); 191 Matcher matcher = pattern.matcher(new String(ca)); 192 if (!matcher.find()) 193 failCount++; 194 } 195 196 private static void check(String p, int flag, String input, String s, 197 boolean expected) 198 { 199 Pattern pattern = Pattern.compile(p, flag); 200 Matcher matcher = pattern.matcher(input); 201 if (expected) 202 check(matcher, s, expected); 203 else 204 check(pattern, input, false); 205 } 206 207 private static void report(String testName) { 208 int spacesToAdd = 30 - testName.length(); 209 StringBuffer paddedNameBuffer = new StringBuffer(testName); 210 for (int i=0; i<spacesToAdd; i++) 211 paddedNameBuffer.append(" "); 212 String paddedName = paddedNameBuffer.toString(); 213 System.err.println(paddedName + ": " + 214 (failCount==0 ? "Passed":"Failed("+failCount+")")); 215 if (failCount > 0) 216 failure = true; 217 failCount = 0; 218 } 219 220 /** 221 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 222 * supplementary characters. This method does NOT fully take care 223 * of the regex syntax. 224 */ 225 private static String toSupplementaries(String s) { 226 int length = s.length(); 227 StringBuffer sb = new StringBuffer(length * 2); 228 229 for (int i = 0; i < length; ) { 230 char c = s.charAt(i++); 231 if (c == '\\') { 232 sb.append(c); 233 if (i < length) { 234 c = s.charAt(i++); 235 sb.append(c); 236 if (c == 'u') { 237 // assume no syntax error 238 sb.append(s.charAt(i++)); 239 sb.append(s.charAt(i++)); 240 sb.append(s.charAt(i++)); 241 sb.append(s.charAt(i++)); 242 } 243 } 244 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 245 sb.append('\ud800').append((char)('\udc00'+c)); 246 } else { 247 sb.append(c); 248 } 249 } 250 return sb.toString(); 251 } 252 253 // Regular expression tests 254 255 // This is for bug 6178785 256 // Test if an expected NPE gets thrown when passing in a null argument 257 private static boolean check(Runnable test) { 258 try { 259 test.run(); 260 failCount++; 261 return false; 262 } catch (NullPointerException npe) { 263 return true; 264 } 265 } 266 267 private static void nullArgumentTest() { 268 check(new Runnable() { public void run() { Pattern.compile(null); }}); 269 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 270 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 271 check(new Runnable() { public void run() { Pattern.quote(null);}}); 272 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 273 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 274 275 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 276 m.matches(); 277 check(new Runnable() { public void run() { m.appendTail(null);}}); 278 check(new Runnable() { public void run() { m.replaceAll(null);}}); 279 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 280 check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); 281 check(new Runnable() { public void run() { m.reset(null);}}); 282 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 283 //check(new Runnable() { public void run() { m.usePattern(null);}}); 284 285 report("Null Argument"); 286 } 287 288 // This is for bug6635133 289 // Test if surrogate pair in Unicode escapes can be handled correctly. 290 private static void surrogatesInClassTest() throws Exception { 291 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 292 Matcher matcher = pattern.matcher("\ud834\udd22"); 293 if (!matcher.find()) 294 failCount++; 295 } 296 297 // This is for bug 4988891 298 // Test toMatchResult to see that it is a copy of the Matcher 299 // that is not affected by subsequent operations on the original 300 private static void toMatchResultTest() throws Exception { 301 Pattern pattern = Pattern.compile("squid"); 302 Matcher matcher = pattern.matcher( 303 "agiantsquidofdestinyasmallsquidoffate"); 304 matcher.find(); 305 int matcherStart1 = matcher.start(); 306 MatchResult mr = matcher.toMatchResult(); 307 if (mr == matcher) 308 failCount++; 309 int resultStart1 = mr.start(); 310 if (matcherStart1 != resultStart1) 311 failCount++; 312 matcher.find(); 313 int matcherStart2 = matcher.start(); 314 int resultStart2 = mr.start(); 315 if (matcherStart2 == resultStart2) 316 failCount++; 317 if (resultStart1 != resultStart2) 318 failCount++; 319 MatchResult mr2 = matcher.toMatchResult(); 320 if (mr == mr2) 321 failCount++; 322 if (mr2.start() != matcherStart2) 323 failCount++; 324 report("toMatchResult is a copy"); 325 } 326 327 // This is for bug 5013885 328 // Must test a slice to see if it reports hitEnd correctly 329 private static void hitEndTest() throws Exception { 330 // Basic test of Slice node 331 Pattern p = Pattern.compile("^squidattack"); 332 Matcher m = p.matcher("squack"); 333 m.find(); 334 if (m.hitEnd()) 335 failCount++; 336 m.reset("squid"); 337 m.find(); 338 if (!m.hitEnd()) 339 failCount++; 340 341 // Test Slice, SliceA and SliceU nodes 342 for (int i=0; i<3; i++) { 343 int flags = 0; 344 if (i==1) flags = Pattern.CASE_INSENSITIVE; 345 if (i==2) flags = Pattern.UNICODE_CASE; 346 p = Pattern.compile("^abc", flags); 347 m = p.matcher("ad"); 348 m.find(); 349 if (m.hitEnd()) 350 failCount++; 351 m.reset("ab"); 352 m.find(); 353 if (!m.hitEnd()) 354 failCount++; 355 } 356 357 // Test Boyer-Moore node 358 p = Pattern.compile("catattack"); 359 m = p.matcher("attack"); 360 m.find(); 361 if (!m.hitEnd()) 362 failCount++; 363 364 p = Pattern.compile("catattack"); 365 m = p.matcher("attackattackattackcatatta"); 366 m.find(); 367 if (!m.hitEnd()) 368 failCount++; 369 report("hitEnd from a Slice"); 370 } 371 372 // This is for bug 4997476 373 // It is weird code submitted by customer demonstrating a regression 374 private static void wordSearchTest() throws Exception { 375 String testString = new String("word1 word2 word3"); 376 Pattern p = Pattern.compile("\\b"); 377 Matcher m = p.matcher(testString); 378 int position = 0; 379 int start = 0; 380 while (m.find(position)) { 381 start = m.start(); 382 if (start == testString.length()) 383 break; 384 if (m.find(start+1)) { 385 position = m.start(); 386 } else { 387 position = testString.length(); 388 } 389 if (testString.substring(start, position).equals(" ")) 390 continue; 391 if (!testString.substring(start, position-1).startsWith("word")) 392 failCount++; 393 } 394 report("Customer word search"); 395 } 396 397 // This is for bug 4994840 398 private static void caretAtEndTest() throws Exception { 399 // Problem only occurs with multiline patterns 400 // containing a beginning-of-line caret "^" followed 401 // by an expression that also matches the empty string. 402 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 403 Matcher matcher = pattern.matcher("\r"); 404 matcher.find(); 405 matcher.find(); 406 report("Caret at end"); 407 } 408 409 // This test is for 4979006 410 // Check to see if word boundary construct properly handles unicode 411 // non spacing marks 412 private static void unicodeWordBoundsTest() throws Exception { 413 String spaces = " "; 414 String wordChar = "a"; 415 String nsm = "\u030a"; 416 417 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 418 419 Pattern pattern = Pattern.compile("\\b"); 420 Matcher matcher = pattern.matcher(""); 421 // S=other B=word character N=non spacing mark .=word boundary 422 // SS.BB.SS 423 String input = spaces + wordChar + wordChar + spaces; 424 twoFindIndexes(input, matcher, 2, 4); 425 // SS.BBN.SS 426 input = spaces + wordChar +wordChar + nsm + spaces; 427 twoFindIndexes(input, matcher, 2, 5); 428 // SS.BN.SS 429 input = spaces + wordChar + nsm + spaces; 430 twoFindIndexes(input, matcher, 2, 4); 431 // SS.BNN.SS 432 input = spaces + wordChar + nsm + nsm + spaces; 433 twoFindIndexes(input, matcher, 2, 5); 434 // SSN.BB.SS 435 input = spaces + nsm + wordChar + wordChar + spaces; 436 twoFindIndexes(input, matcher, 3, 5); 437 // SS.BNB.SS 438 input = spaces + wordChar + nsm + wordChar + spaces; 439 twoFindIndexes(input, matcher, 2, 5); 440 // SSNNSS 441 input = spaces + nsm + nsm + spaces; 442 matcher.reset(input); 443 if (matcher.find()) 444 failCount++; 445 // SSN.BBN.SS 446 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 447 twoFindIndexes(input, matcher, 3, 6); 448 449 report("Unicode word boundary"); 450 } 451 452 private static void twoFindIndexes(String input, Matcher matcher, int a, 453 int b) throws Exception 454 { 455 matcher.reset(input); 456 matcher.find(); 457 if (matcher.start() != a) 458 failCount++; 459 matcher.find(); 460 if (matcher.start() != b) 461 failCount++; 462 } 463 464 // This test is for 6284152 465 static void check(String regex, String input, String[] expected) { 466 List<String> result = new ArrayList<String>(); 467 Pattern p = Pattern.compile(regex); 468 Matcher m = p.matcher(input); 469 while (m.find()) { 470 result.add(m.group()); 471 } 472 if (!Arrays.asList(expected).equals(result)) 473 failCount++; 474 } 475 476 private static void lookbehindTest() throws Exception { 477 //Positive 478 check("(?<=%.{0,5})foo\\d", 479 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 480 new String[]{"foo1", "foo2", "foo3"}); 481 482 //boundary at end of the lookbehind sub-regex should work consistently 483 //with the boundary just after the lookbehind sub-regex 484 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 485 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 486 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 487 check("(?<!abc \\b)foo", "abc foo", new String[0]); 488 489 //Negative 490 check("(?<!%.{0,5})foo\\d", 491 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 492 new String[] {"foo4", "foo5"}); 493 494 //Positive greedy 495 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 496 497 //Positive reluctant 498 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 499 500 //supplementary 501 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 502 new String[] {"fo\ud800\udc00o"}); 503 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 504 new String[] {"fo\ud800\udc00o"}); 505 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 506 new String[] {"fo\ud800\udc00o"}); 507 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 508 new String[] {"fo\ud800\udc00o"}); 509 report("Lookbehind"); 510 } 511 512 // This test is for 4938995 513 // Check to see if weak region boundaries are transparent to 514 // lookahead and lookbehind constructs 515 private static void boundsTest() throws Exception { 516 String fullMessage = "catdogcat"; 517 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 518 Matcher matcher = pattern.matcher("catdogca"); 519 matcher.useTransparentBounds(true); 520 if (matcher.find()) 521 failCount++; 522 matcher.reset("atdogcat"); 523 if (matcher.find()) 524 failCount++; 525 matcher.reset(fullMessage); 526 if (!matcher.find()) 527 failCount++; 528 matcher.reset(fullMessage); 529 matcher.region(0,9); 530 if (!matcher.find()) 531 failCount++; 532 matcher.reset(fullMessage); 533 matcher.region(0,6); 534 if (!matcher.find()) 535 failCount++; 536 matcher.reset(fullMessage); 537 matcher.region(3,6); 538 if (!matcher.find()) 539 failCount++; 540 matcher.useTransparentBounds(false); 541 if (matcher.find()) 542 failCount++; 543 544 // Negative lookahead/lookbehind 545 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 546 matcher = pattern.matcher("dogcat"); 547 matcher.useTransparentBounds(true); 548 matcher.region(0,3); 549 if (matcher.find()) 550 failCount++; 551 matcher.reset("catdog"); 552 matcher.region(3,6); 553 if (matcher.find()) 554 failCount++; 555 matcher.useTransparentBounds(false); 556 matcher.reset("dogcat"); 557 matcher.region(0,3); 558 if (!matcher.find()) 559 failCount++; 560 matcher.reset("catdog"); 561 matcher.region(3,6); 562 if (!matcher.find()) 563 failCount++; 564 565 report("Region bounds transparency"); 566 } 567 568 // This test is for 4945394 569 private static void findFromTest() throws Exception { 570 String message = "This is 40 $0 message."; 571 Pattern pat = Pattern.compile("\\$0"); 572 Matcher match = pat.matcher(message); 573 if (!match.find()) 574 failCount++; 575 if (match.find()) 576 failCount++; 577 if (match.find()) 578 failCount++; 579 report("Check for alternating find"); 580 } 581 582 // This test is for 4872664 and 4892980 583 private static void negatedCharClassTest() throws Exception { 584 Pattern pattern = Pattern.compile("[^>]"); 585 Matcher matcher = pattern.matcher("\u203A"); 586 if (!matcher.matches()) 587 failCount++; 588 pattern = Pattern.compile("[^fr]"); 589 matcher = pattern.matcher("a"); 590 if (!matcher.find()) 591 failCount++; 592 matcher.reset("\u203A"); 593 if (!matcher.find()) 594 failCount++; 595 String s = "for"; 596 String result[] = s.split("[^fr]"); 597 if (!result[0].equals("f")) 598 failCount++; 599 if (!result[1].equals("r")) 600 failCount++; 601 s = "f\u203Ar"; 602 result = s.split("[^fr]"); 603 if (!result[0].equals("f")) 604 failCount++; 605 if (!result[1].equals("r")) 606 failCount++; 607 608 // Test adding to bits, subtracting a node, then adding to bits again 609 pattern = Pattern.compile("[^f\u203Ar]"); 610 matcher = pattern.matcher("a"); 611 if (!matcher.find()) 612 failCount++; 613 matcher.reset("f"); 614 if (matcher.find()) 615 failCount++; 616 matcher.reset("\u203A"); 617 if (matcher.find()) 618 failCount++; 619 matcher.reset("r"); 620 if (matcher.find()) 621 failCount++; 622 matcher.reset("\u203B"); 623 if (!matcher.find()) 624 failCount++; 625 626 // Test subtracting a node, adding to bits, subtracting again 627 pattern = Pattern.compile("[^\u203Ar\u203B]"); 628 matcher = pattern.matcher("a"); 629 if (!matcher.find()) 630 failCount++; 631 matcher.reset("\u203A"); 632 if (matcher.find()) 633 failCount++; 634 matcher.reset("r"); 635 if (matcher.find()) 636 failCount++; 637 matcher.reset("\u203B"); 638 if (matcher.find()) 639 failCount++; 640 matcher.reset("\u203C"); 641 if (!matcher.find()) 642 failCount++; 643 644 report("Negated Character Class"); 645 } 646 647 // This test is for 4628291 648 private static void toStringTest() throws Exception { 649 Pattern pattern = Pattern.compile("b+"); 650 if (pattern.toString() != "b+") 651 failCount++; 652 Matcher matcher = pattern.matcher("aaabbbccc"); 653 String matcherString = matcher.toString(); // unspecified 654 matcher.find(); 655 matcherString = matcher.toString(); // unspecified 656 matcher.region(0,3); 657 matcherString = matcher.toString(); // unspecified 658 matcher.reset(); 659 matcherString = matcher.toString(); // unspecified 660 report("toString"); 661 } 662 663 // This test is for 4808962 664 private static void literalPatternTest() throws Exception { 665 int flags = Pattern.LITERAL; 666 667 Pattern pattern = Pattern.compile("abc\\t$^", flags); 668 check(pattern, "abc\\t$^", true); 669 670 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 671 check(pattern, "abc\\t$^", true); 672 673 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 674 check(pattern, "\\Qa^$bcabc\\E", true); 675 check(pattern, "a^$bcabc", false); 676 677 pattern = Pattern.compile("\\\\Q\\\\E"); 678 check(pattern, "\\Q\\E", true); 679 680 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 681 check(pattern, "abcefg\\Q\\Ehij", true); 682 683 pattern = Pattern.compile("\\\\\\Q\\\\E"); 684 check(pattern, "\\\\\\\\", true); 685 686 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 687 check(pattern, "\\Qa^$bcabc\\E", true); 688 check(pattern, "a^$bcabc", false); 689 690 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 691 check(pattern, "\\Qabc\\Edef", true); 692 check(pattern, "abcdef", false); 693 694 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 695 check(pattern, "abc\\Edef", true); 696 check(pattern, "abcdef", false); 697 698 pattern = Pattern.compile(Pattern.quote("\\E")); 699 check(pattern, "\\E", true); 700 701 pattern = Pattern.compile("((((abc.+?:)", flags); 702 check(pattern, "((((abc.+?:)", true); 703 704 flags |= Pattern.MULTILINE; 705 706 pattern = Pattern.compile("^cat$", flags); 707 check(pattern, "abc^cat$def", true); 708 check(pattern, "cat", false); 709 710 flags |= Pattern.CASE_INSENSITIVE; 711 712 pattern = Pattern.compile("abcdef", flags); 713 check(pattern, "ABCDEF", true); 714 check(pattern, "AbCdEf", true); 715 716 flags |= Pattern.DOTALL; 717 718 pattern = Pattern.compile("a...b", flags); 719 check(pattern, "A...b", true); 720 check(pattern, "Axxxb", false); 721 722 flags |= Pattern.CANON_EQ; 723 724 Pattern p = Pattern.compile("testa\u030a", flags); 725 check(pattern, "testa\u030a", false); 726 check(pattern, "test\u00e5", false); 727 728 // Supplementary character test 729 flags = Pattern.LITERAL; 730 731 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 732 check(pattern, toSupplementaries("abc\\t$^"), true); 733 734 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 735 check(pattern, toSupplementaries("abc\\t$^"), true); 736 737 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 738 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 739 check(pattern, toSupplementaries("a^$bcabc"), false); 740 741 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 742 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 743 check(pattern, toSupplementaries("a^$bcabc"), false); 744 745 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 746 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 747 check(pattern, toSupplementaries("abcdef"), false); 748 749 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 750 check(pattern, toSupplementaries("abc\\Edef"), true); 751 check(pattern, toSupplementaries("abcdef"), false); 752 753 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 754 check(pattern, toSupplementaries("((((abc.+?:)"), true); 755 756 flags |= Pattern.MULTILINE; 757 758 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 759 check(pattern, toSupplementaries("abc^cat$def"), true); 760 check(pattern, toSupplementaries("cat"), false); 761 762 flags |= Pattern.DOTALL; 763 764 // note: this is case-sensitive. 765 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 766 check(pattern, toSupplementaries("a...b"), true); 767 check(pattern, toSupplementaries("axxxb"), false); 768 769 flags |= Pattern.CANON_EQ; 770 771 String t = toSupplementaries("test"); 772 p = Pattern.compile(t + "a\u030a", flags); 773 check(pattern, t + "a\u030a", false); 774 check(pattern, t + "\u00e5", false); 775 776 report("Literal pattern"); 777 } 778 779 // This test is for 4803179 780 // This test is also for 4808962, replacement parts 781 private static void literalReplacementTest() throws Exception { 782 int flags = Pattern.LITERAL; 783 784 Pattern pattern = Pattern.compile("abc", flags); 785 Matcher matcher = pattern.matcher("zzzabczzz"); 786 String replaceTest = "$0"; 787 String result = matcher.replaceAll(replaceTest); 788 if (!result.equals("zzzabczzz")) 789 failCount++; 790 791 matcher.reset(); 792 String literalReplacement = matcher.quoteReplacement(replaceTest); 793 result = matcher.replaceAll(literalReplacement); 794 if (!result.equals("zzz$0zzz")) 795 failCount++; 796 797 matcher.reset(); 798 replaceTest = "\\t$\\$"; 799 literalReplacement = matcher.quoteReplacement(replaceTest); 800 result = matcher.replaceAll(literalReplacement); 801 if (!result.equals("zzz\\t$\\$zzz")) 802 failCount++; 803 804 // Supplementary character test 805 pattern = Pattern.compile(toSupplementaries("abc"), flags); 806 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 807 replaceTest = "$0"; 808 result = matcher.replaceAll(replaceTest); 809 if (!result.equals(toSupplementaries("zzzabczzz"))) 810 failCount++; 811 812 matcher.reset(); 813 literalReplacement = matcher.quoteReplacement(replaceTest); 814 result = matcher.replaceAll(literalReplacement); 815 if (!result.equals(toSupplementaries("zzz$0zzz"))) 816 failCount++; 817 818 matcher.reset(); 819 replaceTest = "\\t$\\$"; 820 literalReplacement = matcher.quoteReplacement(replaceTest); 821 result = matcher.replaceAll(literalReplacement); 822 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 823 failCount++; 824 825 report("Literal replacement"); 826 } 827 828 // This test is for 4757029 829 private static void regionTest() throws Exception { 830 Pattern pattern = Pattern.compile("abc"); 831 Matcher matcher = pattern.matcher("abcdefabc"); 832 833 matcher.region(0,9); 834 if (!matcher.find()) 835 failCount++; 836 if (!matcher.find()) 837 failCount++; 838 matcher.region(0,3); 839 if (!matcher.find()) 840 failCount++; 841 matcher.region(3,6); 842 if (matcher.find()) 843 failCount++; 844 matcher.region(0,2); 845 if (matcher.find()) 846 failCount++; 847 848 expectRegionFail(matcher, 1, -1); 849 expectRegionFail(matcher, -1, -1); 850 expectRegionFail(matcher, -1, 1); 851 expectRegionFail(matcher, 5, 3); 852 expectRegionFail(matcher, 5, 12); 853 expectRegionFail(matcher, 12, 12); 854 855 pattern = Pattern.compile("^abc$"); 856 matcher = pattern.matcher("zzzabczzz"); 857 matcher.region(0,9); 858 if (matcher.find()) 859 failCount++; 860 matcher.region(3,6); 861 if (!matcher.find()) 862 failCount++; 863 matcher.region(3,6); 864 matcher.useAnchoringBounds(false); 865 if (matcher.find()) 866 failCount++; 867 868 // Supplementary character test 869 pattern = Pattern.compile(toSupplementaries("abc")); 870 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 871 matcher.region(0,9*2); 872 if (!matcher.find()) 873 failCount++; 874 if (!matcher.find()) 875 failCount++; 876 matcher.region(0,3*2); 877 if (!matcher.find()) 878 failCount++; 879 matcher.region(1,3*2); 880 if (matcher.find()) 881 failCount++; 882 matcher.region(3*2,6*2); 883 if (matcher.find()) 884 failCount++; 885 matcher.region(0,2*2); 886 if (matcher.find()) 887 failCount++; 888 matcher.region(0,2*2+1); 889 if (matcher.find()) 890 failCount++; 891 892 expectRegionFail(matcher, 1*2, -1); 893 expectRegionFail(matcher, -1, -1); 894 expectRegionFail(matcher, -1, 1*2); 895 expectRegionFail(matcher, 5*2, 3*2); 896 expectRegionFail(matcher, 5*2, 12*2); 897 expectRegionFail(matcher, 12*2, 12*2); 898 899 pattern = Pattern.compile(toSupplementaries("^abc$")); 900 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 901 matcher.region(0,9*2); 902 if (matcher.find()) 903 failCount++; 904 matcher.region(3*2,6*2); 905 if (!matcher.find()) 906 failCount++; 907 matcher.region(3*2+1,6*2); 908 if (matcher.find()) 909 failCount++; 910 matcher.region(3*2,6*2-1); 911 if (matcher.find()) 912 failCount++; 913 matcher.region(3*2,6*2); 914 matcher.useAnchoringBounds(false); 915 if (matcher.find()) 916 failCount++; 917 report("Regions"); 918 } 919 920 private static void expectRegionFail(Matcher matcher, int index1, 921 int index2) 922 { 923 try { 924 matcher.region(index1, index2); 925 failCount++; 926 } catch (IndexOutOfBoundsException ioobe) { 927 // Correct result 928 } catch (IllegalStateException ise) { 929 // Correct result 930 } 931 } 932 933 // This test is for 4803197 934 private static void escapedSegmentTest() throws Exception { 935 936 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 937 check(pattern, "dir1\\dir2", true); 938 939 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 940 check(pattern, "dir1\\dir2\\", true); 941 942 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 943 check(pattern, "dir1\\dir2\\", true); 944 945 // Supplementary character test 946 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 947 check(pattern, toSupplementaries("dir1\\dir2"), true); 948 949 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 950 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 951 952 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 953 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 954 955 report("Escaped segment"); 956 } 957 958 // This test is for 4792284 959 private static void nonCaptureRepetitionTest() throws Exception { 960 String input = "abcdefgh;"; 961 962 String[] patterns = new String[] { 963 "(?:\\w{4})+;", 964 "(?:\\w{8})*;", 965 "(?:\\w{2}){2,4};", 966 "(?:\\w{4}){2,};", // only matches the 967 ".*?(?:\\w{5})+;", // specified minimum 968 ".*?(?:\\w{9})*;", // number of reps - OK 969 "(?:\\w{4})+?;", // lazy repetition - OK 970 "(?:\\w{4})++;", // possessive repetition - OK 971 "(?:\\w{2,}?)+;", // non-deterministic - OK 972 "(\\w{4})+;", // capturing group - OK 973 }; 974 975 for (int i = 0; i < patterns.length; i++) { 976 // Check find() 977 check(patterns[i], 0, input, input, true); 978 // Check matches() 979 Pattern p = Pattern.compile(patterns[i]); 980 Matcher m = p.matcher(input); 981 982 if (m.matches()) { 983 if (!m.group(0).equals(input)) 984 failCount++; 985 } else { 986 failCount++; 987 } 988 } 989 990 report("Non capturing repetition"); 991 } 992 993 // This test is for 6358731 994 private static void notCapturedGroupCurlyMatchTest() throws Exception { 995 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 996 Matcher matcher = pattern.matcher("abcd"); 997 if (!matcher.matches() || 998 matcher.group(1) != null || 999 !matcher.group(2).equals("abcd")) { 1000 failCount++; 1001 } 1002 report("Not captured GroupCurly"); 1003 } 1004 1005 // This test is for 4706545 1006 private static void javaCharClassTest() throws Exception { 1007 for (int i=0; i<1000; i++) { 1008 char c = (char)generator.nextInt(); 1009 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1010 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1011 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1012 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1013 check("{javaDigit}", c, Character.isDigit(c)); 1014 check("{javaDefined}", c, Character.isDefined(c)); 1015 check("{javaLetter}", c, Character.isLetter(c)); 1016 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1017 check("{javaJavaIdentifierStart}", c, 1018 Character.isJavaIdentifierStart(c)); 1019 check("{javaJavaIdentifierPart}", c, 1020 Character.isJavaIdentifierPart(c)); 1021 check("{javaUnicodeIdentifierStart}", c, 1022 Character.isUnicodeIdentifierStart(c)); 1023 check("{javaUnicodeIdentifierPart}", c, 1024 Character.isUnicodeIdentifierPart(c)); 1025 check("{javaIdentifierIgnorable}", c, 1026 Character.isIdentifierIgnorable(c)); 1027 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1028 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1029 check("{javaISOControl}", c, Character.isISOControl(c)); 1030 check("{javaMirrored}", c, Character.isMirrored(c)); 1031 1032 } 1033 1034 // Supplementary character test 1035 for (int i=0; i<1000; i++) { 1036 int c = generator.nextInt(Character.MAX_CODE_POINT 1037 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1038 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1039 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1040 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1041 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1042 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1043 check("{javaDigit}", c, Character.isDigit(c)); 1044 check("{javaDefined}", c, Character.isDefined(c)); 1045 check("{javaLetter}", c, Character.isLetter(c)); 1046 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1047 check("{javaJavaIdentifierStart}", c, 1048 Character.isJavaIdentifierStart(c)); 1049 check("{javaJavaIdentifierPart}", c, 1050 Character.isJavaIdentifierPart(c)); 1051 check("{javaUnicodeIdentifierStart}", c, 1052 Character.isUnicodeIdentifierStart(c)); 1053 check("{javaUnicodeIdentifierPart}", c, 1054 Character.isUnicodeIdentifierPart(c)); 1055 check("{javaIdentifierIgnorable}", c, 1056 Character.isIdentifierIgnorable(c)); 1057 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1058 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1059 check("{javaISOControl}", c, Character.isISOControl(c)); 1060 check("{javaMirrored}", c, Character.isMirrored(c)); 1061 } 1062 1063 report("Java character classes"); 1064 } 1065 1066 // This test is for 4523620 1067 /* 1068 private static void numOccurrencesTest() throws Exception { 1069 Pattern pattern = Pattern.compile("aaa"); 1070 1071 if (pattern.numOccurrences("aaaaaa", false) != 2) 1072 failCount++; 1073 if (pattern.numOccurrences("aaaaaa", true) != 4) 1074 failCount++; 1075 1076 pattern = Pattern.compile("^"); 1077 if (pattern.numOccurrences("aaaaaa", false) != 1) 1078 failCount++; 1079 if (pattern.numOccurrences("aaaaaa", true) != 1) 1080 failCount++; 1081 1082 report("Number of Occurrences"); 1083 } 1084 */ 1085 1086 // This test is for 4776374 1087 private static void caretBetweenTerminatorsTest() throws Exception { 1088 int flags1 = Pattern.DOTALL; 1089 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1090 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1091 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1092 1093 check("^....", flags1, "test\ntest", "test", true); 1094 check(".....^", flags1, "test\ntest", "test", false); 1095 check(".....^", flags1, "test\n", "test", false); 1096 check("....^", flags1, "test\r\n", "test", false); 1097 1098 check("^....", flags2, "test\ntest", "test", true); 1099 check("....^", flags2, "test\ntest", "test", false); 1100 check(".....^", flags2, "test\n", "test", false); 1101 check("....^", flags2, "test\r\n", "test", false); 1102 1103 check("^....", flags3, "test\ntest", "test", true); 1104 check(".....^", flags3, "test\ntest", "test\n", true); 1105 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1106 check(".....^", flags3, "test\n", "test", false); 1107 check(".....^", flags3, "test\r\n", "test", false); 1108 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1109 1110 check("^....", flags4, "test\ntest", "test", true); 1111 check(".....^", flags3, "test\ntest", "test\n", true); 1112 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1113 check(".....^", flags4, "test\n", "test\n", false); 1114 check(".....^", flags4, "test\r\n", "test\r", false); 1115 1116 // Supplementary character test 1117 String t = toSupplementaries("test"); 1118 check("^....", flags1, t+"\n"+t, t, true); 1119 check(".....^", flags1, t+"\n"+t, t, false); 1120 check(".....^", flags1, t+"\n", t, false); 1121 check("....^", flags1, t+"\r\n", t, false); 1122 1123 check("^....", flags2, t+"\n"+t, t, true); 1124 check("....^", flags2, t+"\n"+t, t, false); 1125 check(".....^", flags2, t+"\n", t, false); 1126 check("....^", flags2, t+"\r\n", t, false); 1127 1128 check("^....", flags3, t+"\n"+t, t, true); 1129 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1130 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1131 check(".....^", flags3, t+"\n", t, false); 1132 check(".....^", flags3, t+"\r\n", t, false); 1133 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1134 1135 check("^....", flags4, t+"\n"+t, t, true); 1136 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1137 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1138 check(".....^", flags4, t+"\n", t+"\n", false); 1139 check(".....^", flags4, t+"\r\n", t+"\r", false); 1140 1141 report("Caret between terminators"); 1142 } 1143 1144 // This test is for 4727935 1145 private static void dollarAtEndTest() throws Exception { 1146 int flags1 = Pattern.DOTALL; 1147 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1148 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1149 1150 check("....$", flags1, "test\n", "test", true); 1151 check("....$", flags1, "test\r\n", "test", true); 1152 check(".....$", flags1, "test\n", "test\n", true); 1153 check(".....$", flags1, "test\u0085", "test\u0085", true); 1154 check("....$", flags1, "test\u0085", "test", true); 1155 1156 check("....$", flags2, "test\n", "test", true); 1157 check(".....$", flags2, "test\n", "test\n", true); 1158 check(".....$", flags2, "test\u0085", "test\u0085", true); 1159 check("....$", flags2, "test\u0085", "est\u0085", true); 1160 1161 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1162 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1163 check("....$blah", flags3, "test\nblah", "!!!!", false); 1164 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1165 1166 // Supplementary character test 1167 String t = toSupplementaries("test"); 1168 String b = toSupplementaries("blah"); 1169 check("....$", flags1, t+"\n", t, true); 1170 check("....$", flags1, t+"\r\n", t, true); 1171 check(".....$", flags1, t+"\n", t+"\n", true); 1172 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1173 check("....$", flags1, t+"\u0085", t, true); 1174 1175 check("....$", flags2, t+"\n", t, true); 1176 check(".....$", flags2, t+"\n", t+"\n", true); 1177 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1178 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1179 1180 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1181 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1182 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1183 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1184 1185 report("Dollar at End"); 1186 } 1187 1188 // This test is for 4711773 1189 private static void multilineDollarTest() throws Exception { 1190 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1191 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1192 matcher.find(); 1193 if (matcher.start(0) != 9) 1194 failCount++; 1195 matcher.find(); 1196 if (matcher.start(0) != 20) 1197 failCount++; 1198 1199 // Supplementary character test 1200 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1201 matcher.find(); 1202 if (matcher.start(0) != 9*2) 1203 failCount++; 1204 matcher.find(); 1205 if (matcher.start(0) != 20*2) 1206 failCount++; 1207 1208 report("Multiline Dollar"); 1209 } 1210 1211 private static void reluctantRepetitionTest() throws Exception { 1212 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1213 check(p, "1 word word word 2", true); 1214 check(p, "1 wor wo w 2", true); 1215 check(p, "1 word word 2", true); 1216 check(p, "1 word 2", true); 1217 check(p, "1 wo w w 2", true); 1218 check(p, "1 wo w 2", true); 1219 check(p, "1 wor w 2", true); 1220 1221 p = Pattern.compile("([a-z])+?c"); 1222 Matcher m = p.matcher("ababcdefdec"); 1223 check(m, "ababc"); 1224 1225 // Supplementary character test 1226 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1227 m = p.matcher(toSupplementaries("ababcdefdec")); 1228 check(m, toSupplementaries("ababc")); 1229 1230 report("Reluctant Repetition"); 1231 } 1232 1233 private static void serializeTest() throws Exception { 1234 String patternStr = "(b)"; 1235 String matchStr = "b"; 1236 Pattern pattern = Pattern.compile(patternStr); 1237 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1238 ObjectOutputStream oos = new ObjectOutputStream(baos); 1239 oos.writeObject(pattern); 1240 oos.close(); 1241 ObjectInputStream ois = new ObjectInputStream( 1242 new ByteArrayInputStream(baos.toByteArray())); 1243 Pattern serializedPattern = (Pattern)ois.readObject(); 1244 ois.close(); 1245 Matcher matcher = serializedPattern.matcher(matchStr); 1246 if (!matcher.matches()) 1247 failCount++; 1248 if (matcher.groupCount() != 1) 1249 failCount++; 1250 1251 report("Serialization"); 1252 } 1253 1254 private static void gTest() { 1255 Pattern pattern = Pattern.compile("\\G\\w"); 1256 Matcher matcher = pattern.matcher("abc#x#x"); 1257 matcher.find(); 1258 matcher.find(); 1259 matcher.find(); 1260 if (matcher.find()) 1261 failCount++; 1262 1263 pattern = Pattern.compile("\\GA*"); 1264 matcher = pattern.matcher("1A2AA3"); 1265 matcher.find(); 1266 if (matcher.find()) 1267 failCount++; 1268 1269 pattern = Pattern.compile("\\GA*"); 1270 matcher = pattern.matcher("1A2AA3"); 1271 if (!matcher.find(1)) 1272 failCount++; 1273 matcher.find(); 1274 if (matcher.find()) 1275 failCount++; 1276 1277 report("\\G"); 1278 } 1279 1280 private static void zTest() { 1281 Pattern pattern = Pattern.compile("foo\\Z"); 1282 // Positives 1283 check(pattern, "foo\u0085", true); 1284 check(pattern, "foo\u2028", true); 1285 check(pattern, "foo\u2029", true); 1286 check(pattern, "foo\n", true); 1287 check(pattern, "foo\r", true); 1288 check(pattern, "foo\r\n", true); 1289 // Negatives 1290 check(pattern, "fooo", false); 1291 check(pattern, "foo\n\r", false); 1292 1293 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1294 // Positives 1295 check(pattern, "foo", true); 1296 check(pattern, "foo\n", true); 1297 // Negatives 1298 check(pattern, "foo\r", false); 1299 check(pattern, "foo\u0085", false); 1300 check(pattern, "foo\u2028", false); 1301 check(pattern, "foo\u2029", false); 1302 1303 report("\\Z"); 1304 } 1305 1306 private static void replaceFirstTest() { 1307 Pattern pattern = Pattern.compile("(ab)(c*)"); 1308 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1309 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1310 failCount++; 1311 1312 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1313 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1314 failCount++; 1315 1316 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1317 String result = matcher.replaceFirst("$1"); 1318 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1319 failCount++; 1320 1321 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1322 result = matcher.replaceFirst("$2"); 1323 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1324 failCount++; 1325 1326 pattern = Pattern.compile("a*"); 1327 matcher = pattern.matcher("aaaaaaaaaa"); 1328 if (!matcher.replaceFirst("test").equals("test")) 1329 failCount++; 1330 1331 pattern = Pattern.compile("a+"); 1332 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1333 if (!matcher.replaceFirst("test").equals("zzztest")) 1334 failCount++; 1335 1336 // Supplementary character test 1337 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1338 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1339 if (!matcher.replaceFirst(toSupplementaries("test")) 1340 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1341 failCount++; 1342 1343 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1344 if (!matcher.replaceFirst(toSupplementaries("test")). 1345 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1346 failCount++; 1347 1348 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1349 result = matcher.replaceFirst("$1"); 1350 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1351 failCount++; 1352 1353 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1354 result = matcher.replaceFirst("$2"); 1355 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1356 failCount++; 1357 1358 pattern = Pattern.compile(toSupplementaries("a*")); 1359 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1360 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1361 failCount++; 1362 1363 pattern = Pattern.compile(toSupplementaries("a+")); 1364 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1365 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1366 failCount++; 1367 1368 report("Replace First"); 1369 } 1370 1371 private static void unixLinesTest() { 1372 Pattern pattern = Pattern.compile(".*"); 1373 Matcher matcher = pattern.matcher("aa\u2028blah"); 1374 matcher.find(); 1375 if (!matcher.group(0).equals("aa")) 1376 failCount++; 1377 1378 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1379 matcher = pattern.matcher("aa\u2028blah"); 1380 matcher.find(); 1381 if (!matcher.group(0).equals("aa\u2028blah")) 1382 failCount++; 1383 1384 pattern = Pattern.compile("[az]$", 1385 Pattern.MULTILINE | Pattern.UNIX_LINES); 1386 matcher = pattern.matcher("aa\u2028zz"); 1387 check(matcher, "a\u2028", false); 1388 1389 // Supplementary character test 1390 pattern = Pattern.compile(".*"); 1391 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1392 matcher.find(); 1393 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1394 failCount++; 1395 1396 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1397 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1398 matcher.find(); 1399 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1400 failCount++; 1401 1402 pattern = Pattern.compile(toSupplementaries("[az]$"), 1403 Pattern.MULTILINE | Pattern.UNIX_LINES); 1404 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1405 check(matcher, toSupplementaries("a\u2028"), false); 1406 1407 report("Unix Lines"); 1408 } 1409 1410 private static void commentsTest() { 1411 int flags = Pattern.COMMENTS; 1412 1413 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1414 Matcher matcher = pattern.matcher("aa#aa"); 1415 if (!matcher.matches()) 1416 failCount++; 1417 1418 pattern = Pattern.compile("aa # blah", flags); 1419 matcher = pattern.matcher("aa"); 1420 if (!matcher.matches()) 1421 failCount++; 1422 1423 pattern = Pattern.compile("aa blah", flags); 1424 matcher = pattern.matcher("aablah"); 1425 if (!matcher.matches()) 1426 failCount++; 1427 1428 pattern = Pattern.compile("aa # blah blech ", flags); 1429 matcher = pattern.matcher("aa"); 1430 if (!matcher.matches()) 1431 failCount++; 1432 1433 pattern = Pattern.compile("aa # blah\n ", flags); 1434 matcher = pattern.matcher("aa"); 1435 if (!matcher.matches()) 1436 failCount++; 1437 1438 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1439 matcher = pattern.matcher("aabc"); 1440 if (!matcher.matches()) 1441 failCount++; 1442 1443 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1444 matcher = pattern.matcher("aabc"); 1445 if (!matcher.matches()) 1446 failCount++; 1447 1448 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1449 matcher = pattern.matcher("aabc#blech"); 1450 if (!matcher.matches()) 1451 failCount++; 1452 1453 // Supplementary character test 1454 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1455 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1456 if (!matcher.matches()) 1457 failCount++; 1458 1459 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1460 matcher = pattern.matcher(toSupplementaries("aa")); 1461 if (!matcher.matches()) 1462 failCount++; 1463 1464 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1465 matcher = pattern.matcher(toSupplementaries("aablah")); 1466 if (!matcher.matches()) 1467 failCount++; 1468 1469 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1470 matcher = pattern.matcher(toSupplementaries("aa")); 1471 if (!matcher.matches()) 1472 failCount++; 1473 1474 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1475 matcher = pattern.matcher(toSupplementaries("aa")); 1476 if (!matcher.matches()) 1477 failCount++; 1478 1479 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1480 matcher = pattern.matcher(toSupplementaries("aabc")); 1481 if (!matcher.matches()) 1482 failCount++; 1483 1484 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1485 matcher = pattern.matcher(toSupplementaries("aabc")); 1486 if (!matcher.matches()) 1487 failCount++; 1488 1489 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1490 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1491 if (!matcher.matches()) 1492 failCount++; 1493 1494 report("Comments"); 1495 } 1496 1497 private static void caseFoldingTest() { // bug 4504687 1498 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1499 Pattern pattern = Pattern.compile("aa", flags); 1500 Matcher matcher = pattern.matcher("ab"); 1501 if (matcher.matches()) 1502 failCount++; 1503 1504 pattern = Pattern.compile("aA", flags); 1505 matcher = pattern.matcher("ab"); 1506 if (matcher.matches()) 1507 failCount++; 1508 1509 pattern = Pattern.compile("aa", flags); 1510 matcher = pattern.matcher("aB"); 1511 if (matcher.matches()) 1512 failCount++; 1513 matcher = pattern.matcher("Ab"); 1514 if (matcher.matches()) 1515 failCount++; 1516 1517 // ASCII "a" 1518 // Latin-1 Supplement "a" + grave 1519 // Cyrillic "a" 1520 String[] patterns = new String[] { 1521 //single 1522 "a", "\u00e0", "\u0430", 1523 //slice 1524 "ab", "\u00e0\u00e1", "\u0430\u0431", 1525 //class single 1526 "[a]", "[\u00e0]", "[\u0430]", 1527 //class range 1528 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1529 //back reference 1530 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1531 }; 1532 1533 String[] texts = new String[] { 1534 "A", "\u00c0", "\u0410", 1535 "AB", "\u00c0\u00c1", "\u0410\u0411", 1536 "A", "\u00c0", "\u0410", 1537 "B", "\u00c2", "\u0411", 1538 "aA", "\u00e0\u00c0", "\u0430\u0410" 1539 }; 1540 1541 boolean[] expected = new boolean[] { 1542 true, false, false, 1543 true, false, false, 1544 true, false, false, 1545 true, false, false, 1546 true, false, false 1547 }; 1548 1549 flags = Pattern.CASE_INSENSITIVE; 1550 for (int i = 0; i < patterns.length; i++) { 1551 pattern = Pattern.compile(patterns[i], flags); 1552 matcher = pattern.matcher(texts[i]); 1553 if (matcher.matches() != expected[i]) { 1554 System.out.println("<1> Failed at " + i); 1555 failCount++; 1556 } 1557 } 1558 1559 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1560 for (int i = 0; i < patterns.length; i++) { 1561 pattern = Pattern.compile(patterns[i], flags); 1562 matcher = pattern.matcher(texts[i]); 1563 if (!matcher.matches()) { 1564 System.out.println("<2> Failed at " + i); 1565 failCount++; 1566 } 1567 } 1568 // flag unicode_case alone should do nothing 1569 flags = Pattern.UNICODE_CASE; 1570 for (int i = 0; i < patterns.length; i++) { 1571 pattern = Pattern.compile(patterns[i], flags); 1572 matcher = pattern.matcher(texts[i]); 1573 if (matcher.matches()) { 1574 System.out.println("<3> Failed at " + i); 1575 failCount++; 1576 } 1577 } 1578 1579 // Special cases: i, I, u+0131 and u+0130 1580 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1581 pattern = Pattern.compile("[h-j]+", flags); 1582 if (!pattern.matcher("\u0131\u0130").matches()) 1583 failCount++; 1584 report("Case Folding"); 1585 } 1586 1587 private static void appendTest() { 1588 Pattern pattern = Pattern.compile("(ab)(cd)"); 1589 Matcher matcher = pattern.matcher("abcd"); 1590 String result = matcher.replaceAll("$2$1"); 1591 if (!result.equals("cdab")) 1592 failCount++; 1593 1594 String s1 = "Swap all: first = 123, second = 456"; 1595 String s2 = "Swap one: first = 123, second = 456"; 1596 String r = "$3$2$1"; 1597 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1598 matcher = pattern.matcher(s1); 1599 1600 result = matcher.replaceAll(r); 1601 if (!result.equals("Swap all: 123 = first, 456 = second")) 1602 failCount++; 1603 1604 matcher = pattern.matcher(s2); 1605 1606 if (matcher.find()) { 1607 StringBuffer sb = new StringBuffer(); 1608 matcher.appendReplacement(sb, r); 1609 matcher.appendTail(sb); 1610 result = sb.toString(); 1611 if (!result.equals("Swap one: 123 = first, second = 456")) 1612 failCount++; 1613 } 1614 1615 // Supplementary character test 1616 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1617 matcher = pattern.matcher(toSupplementaries("abcd")); 1618 result = matcher.replaceAll("$2$1"); 1619 if (!result.equals(toSupplementaries("cdab"))) 1620 failCount++; 1621 1622 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1623 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1624 r = toSupplementaries("$3$2$1"); 1625 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1626 matcher = pattern.matcher(s1); 1627 1628 result = matcher.replaceAll(r); 1629 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1630 failCount++; 1631 1632 matcher = pattern.matcher(s2); 1633 1634 if (matcher.find()) { 1635 StringBuffer sb = new StringBuffer(); 1636 matcher.appendReplacement(sb, r); 1637 matcher.appendTail(sb); 1638 result = sb.toString(); 1639 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1640 failCount++; 1641 } 1642 report("Append"); 1643 } 1644 1645 private static void splitTest() { 1646 Pattern pattern = Pattern.compile(":"); 1647 String[] result = pattern.split("foo:and:boo", 2); 1648 if (!result[0].equals("foo")) 1649 failCount++; 1650 if (!result[1].equals("and:boo")) 1651 failCount++; 1652 // Supplementary character test 1653 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1654 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1655 if (!result[0].equals(toSupplementaries("foo"))) 1656 failCount++; 1657 if (!result[1].equals(toSupplementaries("andXboo"))) 1658 failCount++; 1659 1660 CharBuffer cb = CharBuffer.allocate(100); 1661 cb.put("foo:and:boo"); 1662 cb.flip(); 1663 result = pattern.split(cb); 1664 if (!result[0].equals("foo")) 1665 failCount++; 1666 if (!result[1].equals("and")) 1667 failCount++; 1668 if (!result[2].equals("boo")) 1669 failCount++; 1670 1671 // Supplementary character test 1672 CharBuffer cbs = CharBuffer.allocate(100); 1673 cbs.put(toSupplementaries("fooXandXboo")); 1674 cbs.flip(); 1675 result = patternX.split(cbs); 1676 if (!result[0].equals(toSupplementaries("foo"))) 1677 failCount++; 1678 if (!result[1].equals(toSupplementaries("and"))) 1679 failCount++; 1680 if (!result[2].equals(toSupplementaries("boo"))) 1681 failCount++; 1682 1683 String source = "0123456789"; 1684 for (int limit=-2; limit<3; limit++) { 1685 for (int x=0; x<10; x++) { 1686 result = source.split(Integer.toString(x), limit); 1687 int expectedLength = limit < 1 ? 2 : limit; 1688 1689 if ((limit == 0) && (x == 9)) { 1690 // expected dropping of "" 1691 if (result.length != 1) 1692 failCount++; 1693 if (!result[0].equals("012345678")) { 1694 failCount++; 1695 } 1696 } else { 1697 if (result.length != expectedLength) { 1698 failCount++; 1699 } 1700 if (!result[0].equals(source.substring(0,x))) { 1701 if (limit != 1) { 1702 failCount++; 1703 } else { 1704 if (!result[0].equals(source.substring(0,10))) { 1705 failCount++; 1706 } 1707 } 1708 } 1709 if (expectedLength > 1) { // Check segment 2 1710 if (!result[1].equals(source.substring(x+1,10))) 1711 failCount++; 1712 } 1713 } 1714 } 1715 } 1716 // Check the case for no match found 1717 for (int limit=-2; limit<3; limit++) { 1718 result = source.split("e", limit); 1719 if (result.length != 1) 1720 failCount++; 1721 if (!result[0].equals(source)) 1722 failCount++; 1723 } 1724 // Check the case for limit == 0, source = ""; 1725 source = ""; 1726 result = source.split("e", 0); 1727 if (result.length != 1) 1728 failCount++; 1729 if (!result[0].equals(source)) 1730 failCount++; 1731 1732 report("Split"); 1733 } 1734 1735 private static void negationTest() { 1736 Pattern pattern = Pattern.compile("[\\[@^]+"); 1737 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1738 if (!matcher.find()) 1739 failCount++; 1740 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1741 failCount++; 1742 pattern = Pattern.compile("[@\\[^]+"); 1743 matcher = pattern.matcher("@@@@[[[[^^^^"); 1744 if (!matcher.find()) 1745 failCount++; 1746 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1747 failCount++; 1748 pattern = Pattern.compile("[@\\[^@]+"); 1749 matcher = pattern.matcher("@@@@[[[[^^^^"); 1750 if (!matcher.find()) 1751 failCount++; 1752 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1753 failCount++; 1754 1755 pattern = Pattern.compile("\\)"); 1756 matcher = pattern.matcher("xxx)xxx"); 1757 if (!matcher.find()) 1758 failCount++; 1759 1760 report("Negation"); 1761 } 1762 1763 private static void ampersandTest() { 1764 Pattern pattern = Pattern.compile("[&@]+"); 1765 check(pattern, "@@@@&&&&", true); 1766 1767 pattern = Pattern.compile("[@&]+"); 1768 check(pattern, "@@@@&&&&", true); 1769 1770 pattern = Pattern.compile("[@\\&]+"); 1771 check(pattern, "@@@@&&&&", true); 1772 1773 report("Ampersand"); 1774 } 1775 1776 private static void octalTest() throws Exception { 1777 Pattern pattern = Pattern.compile("\\u0007"); 1778 Matcher matcher = pattern.matcher("\u0007"); 1779 if (!matcher.matches()) 1780 failCount++; 1781 pattern = Pattern.compile("\\07"); 1782 matcher = pattern.matcher("\u0007"); 1783 if (!matcher.matches()) 1784 failCount++; 1785 pattern = Pattern.compile("\\007"); 1786 matcher = pattern.matcher("\u0007"); 1787 if (!matcher.matches()) 1788 failCount++; 1789 pattern = Pattern.compile("\\0007"); 1790 matcher = pattern.matcher("\u0007"); 1791 if (!matcher.matches()) 1792 failCount++; 1793 pattern = Pattern.compile("\\040"); 1794 matcher = pattern.matcher("\u0020"); 1795 if (!matcher.matches()) 1796 failCount++; 1797 pattern = Pattern.compile("\\0403"); 1798 matcher = pattern.matcher("\u00203"); 1799 if (!matcher.matches()) 1800 failCount++; 1801 pattern = Pattern.compile("\\0103"); 1802 matcher = pattern.matcher("\u0043"); 1803 if (!matcher.matches()) 1804 failCount++; 1805 1806 report("Octal"); 1807 } 1808 1809 private static void longPatternTest() throws Exception { 1810 try { 1811 Pattern pattern = Pattern.compile( 1812 "a 32-character-long pattern xxxx"); 1813 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1814 pattern = Pattern.compile("a thirty four character long regex"); 1815 StringBuffer patternToBe = new StringBuffer(101); 1816 for (int i=0; i<100; i++) 1817 patternToBe.append((char)(97 + i%26)); 1818 pattern = Pattern.compile(patternToBe.toString()); 1819 } catch (PatternSyntaxException e) { 1820 failCount++; 1821 } 1822 1823 // Supplementary character test 1824 try { 1825 Pattern pattern = Pattern.compile( 1826 toSupplementaries("a 32-character-long pattern xxxx")); 1827 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1828 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1829 StringBuffer patternToBe = new StringBuffer(101*2); 1830 for (int i=0; i<100; i++) 1831 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1832 + 97 + i%26)); 1833 pattern = Pattern.compile(patternToBe.toString()); 1834 } catch (PatternSyntaxException e) { 1835 failCount++; 1836 } 1837 report("LongPattern"); 1838 } 1839 1840 private static void group0Test() throws Exception { 1841 Pattern pattern = Pattern.compile("(tes)ting"); 1842 Matcher matcher = pattern.matcher("testing"); 1843 check(matcher, "testing"); 1844 1845 matcher.reset("testing"); 1846 if (matcher.lookingAt()) { 1847 if (!matcher.group(0).equals("testing")) 1848 failCount++; 1849 } else { 1850 failCount++; 1851 } 1852 1853 matcher.reset("testing"); 1854 if (matcher.matches()) { 1855 if (!matcher.group(0).equals("testing")) 1856 failCount++; 1857 } else { 1858 failCount++; 1859 } 1860 1861 pattern = Pattern.compile("(tes)ting"); 1862 matcher = pattern.matcher("testing"); 1863 if (matcher.lookingAt()) { 1864 if (!matcher.group(0).equals("testing")) 1865 failCount++; 1866 } else { 1867 failCount++; 1868 } 1869 1870 pattern = Pattern.compile("^(tes)ting"); 1871 matcher = pattern.matcher("testing"); 1872 if (matcher.matches()) { 1873 if (!matcher.group(0).equals("testing")) 1874 failCount++; 1875 } else { 1876 failCount++; 1877 } 1878 1879 // Supplementary character test 1880 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1881 matcher = pattern.matcher(toSupplementaries("testing")); 1882 check(matcher, toSupplementaries("testing")); 1883 1884 matcher.reset(toSupplementaries("testing")); 1885 if (matcher.lookingAt()) { 1886 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1887 failCount++; 1888 } else { 1889 failCount++; 1890 } 1891 1892 matcher.reset(toSupplementaries("testing")); 1893 if (matcher.matches()) { 1894 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1895 failCount++; 1896 } else { 1897 failCount++; 1898 } 1899 1900 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1901 matcher = pattern.matcher(toSupplementaries("testing")); 1902 if (matcher.lookingAt()) { 1903 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1904 failCount++; 1905 } else { 1906 failCount++; 1907 } 1908 1909 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 1910 matcher = pattern.matcher(toSupplementaries("testing")); 1911 if (matcher.matches()) { 1912 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1913 failCount++; 1914 } else { 1915 failCount++; 1916 } 1917 1918 report("Group0"); 1919 } 1920 1921 private static void findIntTest() throws Exception { 1922 Pattern p = Pattern.compile("blah"); 1923 Matcher m = p.matcher("zzzzblahzzzzzblah"); 1924 boolean result = m.find(2); 1925 if (!result) 1926 failCount++; 1927 1928 p = Pattern.compile("$"); 1929 m = p.matcher("1234567890"); 1930 result = m.find(10); 1931 if (!result) 1932 failCount++; 1933 try { 1934 result = m.find(11); 1935 failCount++; 1936 } catch (IndexOutOfBoundsException e) { 1937 // correct result 1938 } 1939 1940 // Supplementary character test 1941 p = Pattern.compile(toSupplementaries("blah")); 1942 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 1943 result = m.find(2); 1944 if (!result) 1945 failCount++; 1946 1947 report("FindInt"); 1948 } 1949 1950 private static void emptyPatternTest() throws Exception { 1951 Pattern p = Pattern.compile(""); 1952 Matcher m = p.matcher("foo"); 1953 1954 // Should find empty pattern at beginning of input 1955 boolean result = m.find(); 1956 if (result != true) 1957 failCount++; 1958 if (m.start() != 0) 1959 failCount++; 1960 1961 // Should not match entire input if input is not empty 1962 m.reset(); 1963 result = m.matches(); 1964 if (result == true) 1965 failCount++; 1966 1967 try { 1968 m.start(0); 1969 failCount++; 1970 } catch (IllegalStateException e) { 1971 // Correct result 1972 } 1973 1974 // Should match entire input if input is empty 1975 m.reset(""); 1976 result = m.matches(); 1977 if (result != true) 1978 failCount++; 1979 1980 result = Pattern.matches("", ""); 1981 if (result != true) 1982 failCount++; 1983 1984 result = Pattern.matches("", "foo"); 1985 if (result == true) 1986 failCount++; 1987 report("EmptyPattern"); 1988 } 1989 1990 private static void charClassTest() throws Exception { 1991 Pattern pattern = Pattern.compile("blah[ab]]blech"); 1992 check(pattern, "blahb]blech", true); 1993 1994 pattern = Pattern.compile("[abc[def]]"); 1995 check(pattern, "b", true); 1996 1997 // Supplementary character tests 1998 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 1999 check(pattern, toSupplementaries("blahb]blech"), true); 2000 2001 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2002 check(pattern, toSupplementaries("b"), true); 2003 2004 try { 2005 // u00ff when UNICODE_CASE 2006 pattern = Pattern.compile("[ab\u00ffcd]", 2007 Pattern.CASE_INSENSITIVE| 2008 Pattern.UNICODE_CASE); 2009 check(pattern, "ab\u00ffcd", true); 2010 check(pattern, "Ab\u0178Cd", true); 2011 2012 // u00b5 when UNICODE_CASE 2013 pattern = Pattern.compile("[ab\u00b5cd]", 2014 Pattern.CASE_INSENSITIVE| 2015 Pattern.UNICODE_CASE); 2016 check(pattern, "ab\u00b5cd", true); 2017 check(pattern, "Ab\u039cCd", true); 2018 } catch (Exception e) { failCount++; } 2019 2020 /* Special cases 2021 (1)LatinSmallLetterLongS u+017f 2022 (2)LatinSmallLetterDotlessI u+0131 2023 (3)LatineCapitalLetterIWithDotAbove u+0130 2024 (4)KelvinSign u+212a 2025 (5)AngstromSign u+212b 2026 */ 2027 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2028 pattern = Pattern.compile("[sik\u00c5]+", flags); 2029 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2030 failCount++; 2031 2032 report("CharClass"); 2033 } 2034 2035 private static void caretTest() throws Exception { 2036 Pattern pattern = Pattern.compile("\\w*"); 2037 Matcher matcher = pattern.matcher("a#bc#def##g"); 2038 check(matcher, "a"); 2039 check(matcher, ""); 2040 check(matcher, "bc"); 2041 check(matcher, ""); 2042 check(matcher, "def"); 2043 check(matcher, ""); 2044 check(matcher, ""); 2045 check(matcher, "g"); 2046 check(matcher, ""); 2047 if (matcher.find()) 2048 failCount++; 2049 2050 pattern = Pattern.compile("^\\w*"); 2051 matcher = pattern.matcher("a#bc#def##g"); 2052 check(matcher, "a"); 2053 if (matcher.find()) 2054 failCount++; 2055 2056 pattern = Pattern.compile("\\w"); 2057 matcher = pattern.matcher("abc##x"); 2058 check(matcher, "a"); 2059 check(matcher, "b"); 2060 check(matcher, "c"); 2061 check(matcher, "x"); 2062 if (matcher.find()) 2063 failCount++; 2064 2065 pattern = Pattern.compile("^\\w"); 2066 matcher = pattern.matcher("abc##x"); 2067 check(matcher, "a"); 2068 if (matcher.find()) 2069 failCount++; 2070 2071 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2072 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2073 check(matcher, "abc"); 2074 if (matcher.find()) 2075 failCount++; 2076 2077 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2078 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2079 check(matcher, "abc"); 2080 check(matcher, "jkl"); 2081 if (matcher.find()) 2082 failCount++; 2083 2084 pattern = Pattern.compile("^", Pattern.MULTILINE); 2085 matcher = pattern.matcher("this is some text"); 2086 String result = matcher.replaceAll("X"); 2087 if (!result.equals("Xthis is some text")) 2088 failCount++; 2089 2090 pattern = Pattern.compile("^"); 2091 matcher = pattern.matcher("this is some text"); 2092 result = matcher.replaceAll("X"); 2093 if (!result.equals("Xthis is some text")) 2094 failCount++; 2095 2096 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2097 matcher = pattern.matcher("this is some text\n"); 2098 result = matcher.replaceAll("X"); 2099 if (!result.equals("Xthis is some text\n")) 2100 failCount++; 2101 2102 report("Caret"); 2103 } 2104 2105 private static void groupCaptureTest() throws Exception { 2106 // Independent group 2107 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2108 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2109 matcher.find(); 2110 try { 2111 String blah = matcher.group(1); 2112 failCount++; 2113 } catch (IndexOutOfBoundsException ioobe) { 2114 // Good result 2115 } 2116 // Pure group 2117 pattern = Pattern.compile("x+(?:y+)z+"); 2118 matcher = pattern.matcher("xxxyyyzzz"); 2119 matcher.find(); 2120 try { 2121 String blah = matcher.group(1); 2122 failCount++; 2123 } catch (IndexOutOfBoundsException ioobe) { 2124 // Good result 2125 } 2126 2127 // Supplementary character tests 2128 // Independent group 2129 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2130 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2131 matcher.find(); 2132 try { 2133 String blah = matcher.group(1); 2134 failCount++; 2135 } catch (IndexOutOfBoundsException ioobe) { 2136 // Good result 2137 } 2138 // Pure group 2139 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2140 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2141 matcher.find(); 2142 try { 2143 String blah = matcher.group(1); 2144 failCount++; 2145 } catch (IndexOutOfBoundsException ioobe) { 2146 // Good result 2147 } 2148 2149 report("GroupCapture"); 2150 } 2151 2152 private static void backRefTest() throws Exception { 2153 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2154 check(pattern, "zzzaabcazzz", true); 2155 2156 pattern = Pattern.compile("(a*)bc\\1"); 2157 check(pattern, "zzzaabcaazzz", true); 2158 2159 pattern = Pattern.compile("(abc)(def)\\1"); 2160 check(pattern, "abcdefabc", true); 2161 2162 pattern = Pattern.compile("(abc)(def)\\3"); 2163 check(pattern, "abcdefabc", false); 2164 2165 try { 2166 for (int i = 1; i < 10; i++) { 2167 // Make sure backref 1-9 are always accepted 2168 pattern = Pattern.compile("abcdef\\" + i); 2169 // and fail to match if the target group does not exit 2170 check(pattern, "abcdef", false); 2171 } 2172 } catch(PatternSyntaxException e) { 2173 failCount++; 2174 } 2175 2176 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2177 check(pattern, "abcdefghija", false); 2178 check(pattern, "abcdefghija1", true); 2179 2180 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2181 check(pattern, "abcdefghijkk", true); 2182 2183 pattern = Pattern.compile("(a)bcdefghij\\11"); 2184 check(pattern, "abcdefghija1", true); 2185 2186 // Supplementary character tests 2187 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2188 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2189 2190 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2191 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2192 2193 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2194 check(pattern, toSupplementaries("abcdefabc"), true); 2195 2196 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2197 check(pattern, toSupplementaries("abcdefabc"), false); 2198 2199 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2200 check(pattern, toSupplementaries("abcdefghija"), false); 2201 check(pattern, toSupplementaries("abcdefghija1"), true); 2202 2203 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2204 check(pattern, toSupplementaries("abcdefghijkk"), true); 2205 2206 report("BackRef"); 2207 } 2208 2209 /** 2210 * Unicode Technical Report #18, section 2.6 End of Line 2211 * There is no empty line to be matched in the sequence \u000D\u000A 2212 * but there is an empty line in the sequence \u000A\u000D. 2213 */ 2214 private static void anchorTest() throws Exception { 2215 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2216 Matcher m = p.matcher("blah1\r\nblah2"); 2217 m.find(); 2218 m.find(); 2219 if (!m.group().equals("blah2")) 2220 failCount++; 2221 2222 m.reset("blah1\n\rblah2"); 2223 m.find(); 2224 m.find(); 2225 m.find(); 2226 if (!m.group().equals("blah2")) 2227 failCount++; 2228 2229 // Test behavior of $ with \r\n at end of input 2230 p = Pattern.compile(".+$"); 2231 m = p.matcher("blah1\r\n"); 2232 if (!m.find()) 2233 failCount++; 2234 if (!m.group().equals("blah1")) 2235 failCount++; 2236 if (m.find()) 2237 failCount++; 2238 2239 // Test behavior of $ with \r\n at end of input in multiline 2240 p = Pattern.compile(".+$", Pattern.MULTILINE); 2241 m = p.matcher("blah1\r\n"); 2242 if (!m.find()) 2243 failCount++; 2244 if (m.find()) 2245 failCount++; 2246 2247 // Test for $ recognition of \u0085 for bug 4527731 2248 p = Pattern.compile(".+$", Pattern.MULTILINE); 2249 m = p.matcher("blah1\u0085"); 2250 if (!m.find()) 2251 failCount++; 2252 2253 // Supplementary character test 2254 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2255 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2256 m.find(); 2257 m.find(); 2258 if (!m.group().equals(toSupplementaries("blah2"))) 2259 failCount++; 2260 2261 m.reset(toSupplementaries("blah1\n\rblah2")); 2262 m.find(); 2263 m.find(); 2264 m.find(); 2265 if (!m.group().equals(toSupplementaries("blah2"))) 2266 failCount++; 2267 2268 // Test behavior of $ with \r\n at end of input 2269 p = Pattern.compile(".+$"); 2270 m = p.matcher(toSupplementaries("blah1\r\n")); 2271 if (!m.find()) 2272 failCount++; 2273 if (!m.group().equals(toSupplementaries("blah1"))) 2274 failCount++; 2275 if (m.find()) 2276 failCount++; 2277 2278 // Test behavior of $ with \r\n at end of input in multiline 2279 p = Pattern.compile(".+$", Pattern.MULTILINE); 2280 m = p.matcher(toSupplementaries("blah1\r\n")); 2281 if (!m.find()) 2282 failCount++; 2283 if (m.find()) 2284 failCount++; 2285 2286 // Test for $ recognition of \u0085 for bug 4527731 2287 p = Pattern.compile(".+$", Pattern.MULTILINE); 2288 m = p.matcher(toSupplementaries("blah1\u0085")); 2289 if (!m.find()) 2290 failCount++; 2291 2292 report("Anchors"); 2293 } 2294 2295 /** 2296 * A basic sanity test of Matcher.lookingAt(). 2297 */ 2298 private static void lookingAtTest() throws Exception { 2299 Pattern p = Pattern.compile("(ab)(c*)"); 2300 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2301 2302 if (!m.lookingAt()) 2303 failCount++; 2304 2305 if (!m.group().equals(m.group(0))) 2306 failCount++; 2307 2308 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2309 if (m.lookingAt()) 2310 failCount++; 2311 2312 // Supplementary character test 2313 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2314 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2315 2316 if (!m.lookingAt()) 2317 failCount++; 2318 2319 if (!m.group().equals(m.group(0))) 2320 failCount++; 2321 2322 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2323 if (m.lookingAt()) 2324 failCount++; 2325 2326 report("Looking At"); 2327 } 2328 2329 /** 2330 * A basic sanity test of Matcher.matches(). 2331 */ 2332 private static void matchesTest() throws Exception { 2333 // matches() 2334 Pattern p = Pattern.compile("ulb(c*)"); 2335 Matcher m = p.matcher("ulbcccccc"); 2336 if (!m.matches()) 2337 failCount++; 2338 2339 // find() but not matches() 2340 m.reset("zzzulbcccccc"); 2341 if (m.matches()) 2342 failCount++; 2343 2344 // lookingAt() but not matches() 2345 m.reset("ulbccccccdef"); 2346 if (m.matches()) 2347 failCount++; 2348 2349 // matches() 2350 p = Pattern.compile("a|ad"); 2351 m = p.matcher("ad"); 2352 if (!m.matches()) 2353 failCount++; 2354 2355 // Supplementary character test 2356 // matches() 2357 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2358 m = p.matcher(toSupplementaries("ulbcccccc")); 2359 if (!m.matches()) 2360 failCount++; 2361 2362 // find() but not matches() 2363 m.reset(toSupplementaries("zzzulbcccccc")); 2364 if (m.matches()) 2365 failCount++; 2366 2367 // lookingAt() but not matches() 2368 m.reset(toSupplementaries("ulbccccccdef")); 2369 if (m.matches()) 2370 failCount++; 2371 2372 // matches() 2373 p = Pattern.compile(toSupplementaries("a|ad")); 2374 m = p.matcher(toSupplementaries("ad")); 2375 if (!m.matches()) 2376 failCount++; 2377 2378 report("Matches"); 2379 } 2380 2381 /** 2382 * A basic sanity test of Pattern.matches(). 2383 */ 2384 private static void patternMatchesTest() throws Exception { 2385 // matches() 2386 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2387 toSupplementaries("ulbcccccc"))) 2388 failCount++; 2389 2390 // find() but not matches() 2391 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2392 toSupplementaries("zzzulbcccccc"))) 2393 failCount++; 2394 2395 // lookingAt() but not matches() 2396 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2397 toSupplementaries("ulbccccccdef"))) 2398 failCount++; 2399 2400 // Supplementary character test 2401 // matches() 2402 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2403 toSupplementaries("ulbcccccc"))) 2404 failCount++; 2405 2406 // find() but not matches() 2407 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2408 toSupplementaries("zzzulbcccccc"))) 2409 failCount++; 2410 2411 // lookingAt() but not matches() 2412 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2413 toSupplementaries("ulbccccccdef"))) 2414 failCount++; 2415 2416 report("Pattern Matches"); 2417 } 2418 2419 /** 2420 * Canonical equivalence testing. Tests the ability of the engine 2421 * to match sequences that are not explicitly specified in the 2422 * pattern when they are considered equivalent by the Unicode Standard. 2423 */ 2424 private static void ceTest() throws Exception { 2425 // Decomposed char outside char classes 2426 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2427 Matcher m = p.matcher("test\u00e5"); 2428 if (!m.matches()) 2429 failCount++; 2430 2431 m.reset("testa\u030a"); 2432 if (!m.matches()) 2433 failCount++; 2434 2435 // Composed char outside char classes 2436 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2437 m = p.matcher("test\u00e5"); 2438 if (!m.matches()) 2439 failCount++; 2440 2441 m.reset("testa\u030a"); 2442 if (!m.find()) 2443 failCount++; 2444 2445 // Decomposed char inside a char class 2446 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2447 m = p.matcher("test\u00e5"); 2448 if (!m.find()) 2449 failCount++; 2450 2451 m.reset("testa\u030a"); 2452 if (!m.find()) 2453 failCount++; 2454 2455 // Composed char inside a char class 2456 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2457 m = p.matcher("test\u00e5"); 2458 if (!m.find()) 2459 failCount++; 2460 2461 m.reset("testa\u0300"); 2462 if (!m.find()) 2463 failCount++; 2464 2465 m.reset("testa\u030a"); 2466 if (!m.find()) 2467 failCount++; 2468 2469 // Marks that cannot legally change order and be equivalent 2470 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2471 check(p, "testa\u0308\u0300", true); 2472 check(p, "testa\u0300\u0308", false); 2473 2474 // Marks that can legally change order and be equivalent 2475 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2476 check(p, "testa\u0308\u0323", true); 2477 check(p, "testa\u0323\u0308", true); 2478 2479 // Test all equivalences of the sequence a\u0308\u0323\u0300 2480 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2481 check(p, "testa\u0308\u0323\u0300", true); 2482 check(p, "testa\u0323\u0308\u0300", true); 2483 check(p, "testa\u0308\u0300\u0323", true); 2484 check(p, "test\u00e4\u0323\u0300", true); 2485 check(p, "test\u00e4\u0300\u0323", true); 2486 2487 /* 2488 * The following canonical equivalence tests don't work. Bug id: 4916384. 2489 * 2490 // Decomposed hangul (jamos) 2491 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2492 m = p.matcher("\u1100\u1161"); 2493 if (!m.matches()) 2494 failCount++; 2495 2496 m.reset("\uac00"); 2497 if (!m.matches()) 2498 failCount++; 2499 2500 // Composed hangul 2501 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2502 m = p.matcher("\u1100\u1161"); 2503 if (!m.matches()) 2504 failCount++; 2505 2506 m.reset("\uac00"); 2507 if (!m.matches()) 2508 failCount++; 2509 2510 // Decomposed supplementary outside char classes 2511 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2512 m = p.matcher("test\ud834\uddc0"); 2513 if (!m.matches()) 2514 failCount++; 2515 2516 m.reset("test\ud834\uddbc\ud834\udd6f"); 2517 if (!m.matches()) 2518 failCount++; 2519 2520 // Composed supplementary outside char classes 2521 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2522 m.reset("test\ud834\uddbc\ud834\udd6f"); 2523 if (!m.matches()) 2524 failCount++; 2525 2526 m = p.matcher("test\ud834\uddc0"); 2527 if (!m.matches()) 2528 failCount++; 2529 2530 */ 2531 2532 report("Canonical Equivalence"); 2533 } 2534 2535 /** 2536 * A basic sanity test of Matcher.replaceAll(). 2537 */ 2538 private static void globalSubstitute() throws Exception { 2539 // Global substitution with a literal 2540 Pattern p = Pattern.compile("(ab)(c*)"); 2541 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2542 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2543 failCount++; 2544 2545 m.reset("zzzabccczzzabcczzzabccczzz"); 2546 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2547 failCount++; 2548 2549 // Global substitution with groups 2550 m.reset("zzzabccczzzabcczzzabccczzz"); 2551 String result = m.replaceAll("$1"); 2552 if (!result.equals("zzzabzzzabzzzabzzz")) 2553 failCount++; 2554 2555 // Supplementary character test 2556 // Global substitution with a literal 2557 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2558 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2559 if (!m.replaceAll(toSupplementaries("test")). 2560 equals(toSupplementaries("testzzztestzzztest"))) 2561 failCount++; 2562 2563 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2564 if (!m.replaceAll(toSupplementaries("test")). 2565 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2566 failCount++; 2567 2568 // Global substitution with groups 2569 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2570 result = m.replaceAll("$1"); 2571 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2572 failCount++; 2573 2574 report("Global Substitution"); 2575 } 2576 2577 /** 2578 * Tests the usage of Matcher.appendReplacement() with literal 2579 * and group substitutions. 2580 */ 2581 private static void stringbufferSubstitute() throws Exception { 2582 // SB substitution with literal 2583 String blah = "zzzblahzzz"; 2584 Pattern p = Pattern.compile("blah"); 2585 Matcher m = p.matcher(blah); 2586 StringBuffer result = new StringBuffer(); 2587 try { 2588 m.appendReplacement(result, "blech"); 2589 failCount++; 2590 } catch (IllegalStateException e) { 2591 } 2592 m.find(); 2593 m.appendReplacement(result, "blech"); 2594 if (!result.toString().equals("zzzblech")) 2595 failCount++; 2596 2597 m.appendTail(result); 2598 if (!result.toString().equals("zzzblechzzz")) 2599 failCount++; 2600 2601 // SB substitution with groups 2602 blah = "zzzabcdzzz"; 2603 p = Pattern.compile("(ab)(cd)*"); 2604 m = p.matcher(blah); 2605 result = new StringBuffer(); 2606 try { 2607 m.appendReplacement(result, "$1"); 2608 failCount++; 2609 } catch (IllegalStateException e) { 2610 } 2611 m.find(); 2612 m.appendReplacement(result, "$1"); 2613 if (!result.toString().equals("zzzab")) 2614 failCount++; 2615 2616 m.appendTail(result); 2617 if (!result.toString().equals("zzzabzzz")) 2618 failCount++; 2619 2620 // SB substitution with 3 groups 2621 blah = "zzzabcdcdefzzz"; 2622 p = Pattern.compile("(ab)(cd)*(ef)"); 2623 m = p.matcher(blah); 2624 result = new StringBuffer(); 2625 try { 2626 m.appendReplacement(result, "$1w$2w$3"); 2627 failCount++; 2628 } catch (IllegalStateException e) { 2629 } 2630 m.find(); 2631 m.appendReplacement(result, "$1w$2w$3"); 2632 if (!result.toString().equals("zzzabwcdwef")) 2633 failCount++; 2634 2635 m.appendTail(result); 2636 if (!result.toString().equals("zzzabwcdwefzzz")) 2637 failCount++; 2638 2639 // SB substitution with groups and three matches 2640 // skipping middle match 2641 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2642 p = Pattern.compile("(ab)(cd*)"); 2643 m = p.matcher(blah); 2644 result = new StringBuffer(); 2645 try { 2646 m.appendReplacement(result, "$1"); 2647 failCount++; 2648 } catch (IllegalStateException e) { 2649 } 2650 m.find(); 2651 m.appendReplacement(result, "$1"); 2652 if (!result.toString().equals("zzzab")) 2653 failCount++; 2654 2655 m.find(); 2656 m.find(); 2657 m.appendReplacement(result, "$2"); 2658 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2659 failCount++; 2660 2661 m.appendTail(result); 2662 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2663 failCount++; 2664 2665 // Check to make sure escaped $ is ignored 2666 blah = "zzzabcdcdefzzz"; 2667 p = Pattern.compile("(ab)(cd)*(ef)"); 2668 m = p.matcher(blah); 2669 result = new StringBuffer(); 2670 m.find(); 2671 m.appendReplacement(result, "$1w\\$2w$3"); 2672 if (!result.toString().equals("zzzabw$2wef")) 2673 failCount++; 2674 2675 m.appendTail(result); 2676 if (!result.toString().equals("zzzabw$2wefzzz")) 2677 failCount++; 2678 2679 // Check to make sure a reference to nonexistent group causes error 2680 blah = "zzzabcdcdefzzz"; 2681 p = Pattern.compile("(ab)(cd)*(ef)"); 2682 m = p.matcher(blah); 2683 result = new StringBuffer(); 2684 m.find(); 2685 try { 2686 m.appendReplacement(result, "$1w$5w$3"); 2687 failCount++; 2688 } catch (IndexOutOfBoundsException ioobe) { 2689 // Correct result 2690 } 2691 2692 // Check double digit group references 2693 blah = "zzz123456789101112zzz"; 2694 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2695 m = p.matcher(blah); 2696 result = new StringBuffer(); 2697 m.find(); 2698 m.appendReplacement(result, "$1w$11w$3"); 2699 if (!result.toString().equals("zzz1w11w3")) 2700 failCount++; 2701 2702 // Check to make sure it backs off $15 to $1 if only three groups 2703 blah = "zzzabcdcdefzzz"; 2704 p = Pattern.compile("(ab)(cd)*(ef)"); 2705 m = p.matcher(blah); 2706 result = new StringBuffer(); 2707 m.find(); 2708 m.appendReplacement(result, "$1w$15w$3"); 2709 if (!result.toString().equals("zzzabwab5wef")) 2710 failCount++; 2711 2712 2713 // Supplementary character test 2714 // SB substitution with literal 2715 blah = toSupplementaries("zzzblahzzz"); 2716 p = Pattern.compile(toSupplementaries("blah")); 2717 m = p.matcher(blah); 2718 result = new StringBuffer(); 2719 try { 2720 m.appendReplacement(result, toSupplementaries("blech")); 2721 failCount++; 2722 } catch (IllegalStateException e) { 2723 } 2724 m.find(); 2725 m.appendReplacement(result, toSupplementaries("blech")); 2726 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2727 failCount++; 2728 2729 m.appendTail(result); 2730 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2731 failCount++; 2732 2733 // SB substitution with groups 2734 blah = toSupplementaries("zzzabcdzzz"); 2735 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2736 m = p.matcher(blah); 2737 result = new StringBuffer(); 2738 try { 2739 m.appendReplacement(result, "$1"); 2740 failCount++; 2741 } catch (IllegalStateException e) { 2742 } 2743 m.find(); 2744 m.appendReplacement(result, "$1"); 2745 if (!result.toString().equals(toSupplementaries("zzzab"))) 2746 failCount++; 2747 2748 m.appendTail(result); 2749 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2750 failCount++; 2751 2752 // SB substitution with 3 groups 2753 blah = toSupplementaries("zzzabcdcdefzzz"); 2754 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2755 m = p.matcher(blah); 2756 result = new StringBuffer(); 2757 try { 2758 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2759 failCount++; 2760 } catch (IllegalStateException e) { 2761 } 2762 m.find(); 2763 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2764 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2765 failCount++; 2766 2767 m.appendTail(result); 2768 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2769 failCount++; 2770 2771 // SB substitution with groups and three matches 2772 // skipping middle match 2773 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2774 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2775 m = p.matcher(blah); 2776 result = new StringBuffer(); 2777 try { 2778 m.appendReplacement(result, "$1"); 2779 failCount++; 2780 } catch (IllegalStateException e) { 2781 } 2782 m.find(); 2783 m.appendReplacement(result, "$1"); 2784 if (!result.toString().equals(toSupplementaries("zzzab"))) 2785 failCount++; 2786 2787 m.find(); 2788 m.find(); 2789 m.appendReplacement(result, "$2"); 2790 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2791 failCount++; 2792 2793 m.appendTail(result); 2794 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2795 failCount++; 2796 2797 // Check to make sure escaped $ is ignored 2798 blah = toSupplementaries("zzzabcdcdefzzz"); 2799 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2800 m = p.matcher(blah); 2801 result = new StringBuffer(); 2802 m.find(); 2803 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2804 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2805 failCount++; 2806 2807 m.appendTail(result); 2808 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2809 failCount++; 2810 2811 // Check to make sure a reference to nonexistent group causes error 2812 blah = toSupplementaries("zzzabcdcdefzzz"); 2813 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2814 m = p.matcher(blah); 2815 result = new StringBuffer(); 2816 m.find(); 2817 try { 2818 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2819 failCount++; 2820 } catch (IndexOutOfBoundsException ioobe) { 2821 // Correct result 2822 } 2823 2824 // Check double digit group references 2825 blah = toSupplementaries("zzz123456789101112zzz"); 2826 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2827 m = p.matcher(blah); 2828 result = new StringBuffer(); 2829 m.find(); 2830 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2831 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2832 failCount++; 2833 2834 // Check to make sure it backs off $15 to $1 if only three groups 2835 blah = toSupplementaries("zzzabcdcdefzzz"); 2836 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2837 m = p.matcher(blah); 2838 result = new StringBuffer(); 2839 m.find(); 2840 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2841 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2842 failCount++; 2843 2844 // Check nothing has been appended into the output buffer if 2845 // the replacement string triggers IllegalArgumentException. 2846 p = Pattern.compile("(abc)"); 2847 m = p.matcher("abcd"); 2848 result = new StringBuffer(); 2849 m.find(); 2850 try { 2851 m.appendReplacement(result, ("xyz$g")); 2852 failCount++; 2853 } catch (IllegalArgumentException iae) { 2854 if (result.length() != 0) 2855 failCount++; 2856 } 2857 2858 report("SB Substitution"); 2859 } 2860 2861 /* 2862 * 5 groups of characters are created to make a substitution string. 2863 * A base string will be created including random lead chars, the 2864 * substitution string, and random trailing chars. 2865 * A pattern containing the 5 groups is searched for and replaced with: 2866 * random group + random string + random group. 2867 * The results are checked for correctness. 2868 */ 2869 private static void substitutionBasher() { 2870 for (int runs = 0; runs<1000; runs++) { 2871 // Create a base string to work in 2872 int leadingChars = generator.nextInt(10); 2873 StringBuffer baseBuffer = new StringBuffer(100); 2874 String leadingString = getRandomAlphaString(leadingChars); 2875 baseBuffer.append(leadingString); 2876 2877 // Create 5 groups of random number of random chars 2878 // Create the string to substitute 2879 // Create the pattern string to search for 2880 StringBuffer bufferToSub = new StringBuffer(25); 2881 StringBuffer bufferToPat = new StringBuffer(50); 2882 String[] groups = new String[5]; 2883 for(int i=0; i<5; i++) { 2884 int aGroupSize = generator.nextInt(5)+1; 2885 groups[i] = getRandomAlphaString(aGroupSize); 2886 bufferToSub.append(groups[i]); 2887 bufferToPat.append('('); 2888 bufferToPat.append(groups[i]); 2889 bufferToPat.append(')'); 2890 } 2891 String stringToSub = bufferToSub.toString(); 2892 String pattern = bufferToPat.toString(); 2893 2894 // Place sub string into working string at random index 2895 baseBuffer.append(stringToSub); 2896 2897 // Append random chars to end 2898 int trailingChars = generator.nextInt(10); 2899 String trailingString = getRandomAlphaString(trailingChars); 2900 baseBuffer.append(trailingString); 2901 String baseString = baseBuffer.toString(); 2902 2903 // Create test pattern and matcher 2904 Pattern p = Pattern.compile(pattern); 2905 Matcher m = p.matcher(baseString); 2906 2907 // Reject candidate if pattern happens to start early 2908 m.find(); 2909 if (m.start() < leadingChars) 2910 continue; 2911 2912 // Reject candidate if more than one match 2913 if (m.find()) 2914 continue; 2915 2916 // Construct a replacement string with : 2917 // random group + random string + random group 2918 StringBuffer bufferToRep = new StringBuffer(); 2919 int groupIndex1 = generator.nextInt(5); 2920 bufferToRep.append("$" + (groupIndex1 + 1)); 2921 String randomMidString = getRandomAlphaString(5); 2922 bufferToRep.append(randomMidString); 2923 int groupIndex2 = generator.nextInt(5); 2924 bufferToRep.append("$" + (groupIndex2 + 1)); 2925 String replacement = bufferToRep.toString(); 2926 2927 // Do the replacement 2928 String result = m.replaceAll(replacement); 2929 2930 // Construct expected result 2931 StringBuffer bufferToRes = new StringBuffer(); 2932 bufferToRes.append(leadingString); 2933 bufferToRes.append(groups[groupIndex1]); 2934 bufferToRes.append(randomMidString); 2935 bufferToRes.append(groups[groupIndex2]); 2936 bufferToRes.append(trailingString); 2937 String expectedResult = bufferToRes.toString(); 2938 2939 // Check results 2940 if (!result.equals(expectedResult)) 2941 failCount++; 2942 } 2943 2944 report("Substitution Basher"); 2945 } 2946 2947 /** 2948 * Checks the handling of some escape sequences that the Pattern 2949 * class should process instead of the java compiler. These are 2950 * not in the file because the escapes should be be processed 2951 * by the Pattern class when the regex is compiled. 2952 */ 2953 private static void escapes() throws Exception { 2954 Pattern p = Pattern.compile("\\043"); 2955 Matcher m = p.matcher("#"); 2956 if (!m.find()) 2957 failCount++; 2958 2959 p = Pattern.compile("\\x23"); 2960 m = p.matcher("#"); 2961 if (!m.find()) 2962 failCount++; 2963 2964 p = Pattern.compile("\\u0023"); 2965 m = p.matcher("#"); 2966 if (!m.find()) 2967 failCount++; 2968 2969 report("Escape sequences"); 2970 } 2971 2972 /** 2973 * Checks the handling of blank input situations. These 2974 * tests are incompatible with my test file format. 2975 */ 2976 private static void blankInput() throws Exception { 2977 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 2978 Matcher m = p.matcher(""); 2979 if (m.find()) 2980 failCount++; 2981 2982 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 2983 m = p.matcher(""); 2984 if (!m.find()) 2985 failCount++; 2986 2987 p = Pattern.compile("abc"); 2988 m = p.matcher(""); 2989 if (m.find()) 2990 failCount++; 2991 2992 p = Pattern.compile("a*"); 2993 m = p.matcher(""); 2994 if (!m.find()) 2995 failCount++; 2996 2997 report("Blank input"); 2998 } 2999 3000 /** 3001 * Tests the Boyer-Moore pattern matching of a character sequence 3002 * on randomly generated patterns. 3003 */ 3004 private static void bm() throws Exception { 3005 doBnM('a'); 3006 report("Boyer Moore (ASCII)"); 3007 3008 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3009 report("Boyer Moore (Supplementary)"); 3010 } 3011 3012 private static void doBnM(int baseCharacter) throws Exception { 3013 int achar=0; 3014 3015 for (int i=0; i<100; i++) { 3016 // Create a short pattern to search for 3017 int patternLength = generator.nextInt(7) + 4; 3018 StringBuffer patternBuffer = new StringBuffer(patternLength); 3019 for (int x=0; x<patternLength; x++) { 3020 int ch = baseCharacter + generator.nextInt(26); 3021 if (Character.isSupplementaryCodePoint(ch)) { 3022 patternBuffer.append(Character.toChars(ch)); 3023 } else { 3024 patternBuffer.append((char)ch); 3025 } 3026 } 3027 String pattern = patternBuffer.toString(); 3028 Pattern p = Pattern.compile(pattern); 3029 3030 // Create a buffer with random ASCII chars that does 3031 // not match the sample 3032 String toSearch = null; 3033 StringBuffer s = null; 3034 Matcher m = p.matcher(""); 3035 do { 3036 s = new StringBuffer(100); 3037 for (int x=0; x<100; x++) { 3038 int ch = baseCharacter + generator.nextInt(26); 3039 if (Character.isSupplementaryCodePoint(ch)) { 3040 s.append(Character.toChars(ch)); 3041 } else { 3042 s.append((char)ch); 3043 } 3044 } 3045 toSearch = s.toString(); 3046 m.reset(toSearch); 3047 } while (m.find()); 3048 3049 // Insert the pattern at a random spot 3050 int insertIndex = generator.nextInt(99); 3051 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3052 insertIndex++; 3053 s = s.insert(insertIndex, pattern); 3054 toSearch = s.toString(); 3055 3056 // Make sure that the pattern is found 3057 m.reset(toSearch); 3058 if (!m.find()) 3059 failCount++; 3060 3061 // Make sure that the match text is the pattern 3062 if (!m.group().equals(pattern)) 3063 failCount++; 3064 3065 // Make sure match occured at insertion point 3066 if (m.start() != insertIndex) 3067 failCount++; 3068 } 3069 } 3070 3071 /** 3072 * Tests the matching of slices on randomly generated patterns. 3073 * The Boyer-Moore optimization is not done on these patterns 3074 * because it uses unicode case folding. 3075 */ 3076 private static void slice() throws Exception { 3077 doSlice(Character.MAX_VALUE); 3078 report("Slice"); 3079 3080 doSlice(Character.MAX_CODE_POINT); 3081 report("Slice (Supplementary)"); 3082 } 3083 3084 private static void doSlice(int maxCharacter) throws Exception { 3085 Random generator = new Random(); 3086 int achar=0; 3087 3088 for (int i=0; i<100; i++) { 3089 // Create a short pattern to search for 3090 int patternLength = generator.nextInt(7) + 4; 3091 StringBuffer patternBuffer = new StringBuffer(patternLength); 3092 for (int x=0; x<patternLength; x++) { 3093 int randomChar = 0; 3094 while (!Character.isLetterOrDigit(randomChar)) 3095 randomChar = generator.nextInt(maxCharacter); 3096 if (Character.isSupplementaryCodePoint(randomChar)) { 3097 patternBuffer.append(Character.toChars(randomChar)); 3098 } else { 3099 patternBuffer.append((char) randomChar); 3100 } 3101 } 3102 String pattern = patternBuffer.toString(); 3103 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3104 3105 // Create a buffer with random chars that does not match the sample 3106 String toSearch = null; 3107 StringBuffer s = null; 3108 Matcher m = p.matcher(""); 3109 do { 3110 s = new StringBuffer(100); 3111 for (int x=0; x<100; x++) { 3112 int randomChar = 0; 3113 while (!Character.isLetterOrDigit(randomChar)) 3114 randomChar = generator.nextInt(maxCharacter); 3115 if (Character.isSupplementaryCodePoint(randomChar)) { 3116 s.append(Character.toChars(randomChar)); 3117 } else { 3118 s.append((char) randomChar); 3119 } 3120 } 3121 toSearch = s.toString(); 3122 m.reset(toSearch); 3123 } while (m.find()); 3124 3125 // Insert the pattern at a random spot 3126 int insertIndex = generator.nextInt(99); 3127 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3128 insertIndex++; 3129 s = s.insert(insertIndex, pattern); 3130 toSearch = s.toString(); 3131 3132 // Make sure that the pattern is found 3133 m.reset(toSearch); 3134 if (!m.find()) 3135 failCount++; 3136 3137 // Make sure that the match text is the pattern 3138 if (!m.group().equals(pattern)) 3139 failCount++; 3140 3141 // Make sure match occured at insertion point 3142 if (m.start() != insertIndex) 3143 failCount++; 3144 } 3145 } 3146 3147 private static void explainFailure(String pattern, String data, 3148 String expected, String actual) { 3149 System.err.println("----------------------------------------"); 3150 System.err.println("Pattern = "+pattern); 3151 System.err.println("Data = "+data); 3152 System.err.println("Expected = " + expected); 3153 System.err.println("Actual = " + actual); 3154 } 3155 3156 private static void explainFailure(String pattern, String data, 3157 Throwable t) { 3158 System.err.println("----------------------------------------"); 3159 System.err.println("Pattern = "+pattern); 3160 System.err.println("Data = "+data); 3161 t.printStackTrace(System.err); 3162 } 3163 3164 // Testing examples from a file 3165 3166 /** 3167 * Goes through the file "TestCases.txt" and creates many patterns 3168 * described in the file, matching the patterns against input lines in 3169 * the file, and comparing the results against the correct results 3170 * also found in the file. The file format is described in comments 3171 * at the head of the file. 3172 */ 3173 private static void processFile(String fileName) throws Exception { 3174 File testCases = new File(System.getProperty("test.src", "."), 3175 fileName); 3176 FileInputStream in = new FileInputStream(testCases); 3177 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3178 3179 // Process next test case. 3180 String aLine; 3181 while((aLine = r.readLine()) != null) { 3182 // Read a line for pattern 3183 String patternString = grabLine(r); 3184 Pattern p = null; 3185 try { 3186 p = compileTestPattern(patternString); 3187 } catch (PatternSyntaxException e) { 3188 String dataString = grabLine(r); 3189 String expectedResult = grabLine(r); 3190 if (expectedResult.startsWith("error")) 3191 continue; 3192 explainFailure(patternString, dataString, e); 3193 failCount++; 3194 continue; 3195 } 3196 3197 // Read a line for input string 3198 String dataString = grabLine(r); 3199 Matcher m = p.matcher(dataString); 3200 StringBuffer result = new StringBuffer(); 3201 3202 // Check for IllegalStateExceptions before a match 3203 failCount += preMatchInvariants(m); 3204 3205 boolean found = m.find(); 3206 3207 if (found) 3208 failCount += postTrueMatchInvariants(m); 3209 else 3210 failCount += postFalseMatchInvariants(m); 3211 3212 if (found) { 3213 result.append("true "); 3214 result.append(m.group(0) + " "); 3215 } else { 3216 result.append("false "); 3217 } 3218 3219 result.append(m.groupCount()); 3220 3221 if (found) { 3222 for (int i=1; i<m.groupCount()+1; i++) 3223 if (m.group(i) != null) 3224 result.append(" " +m.group(i)); 3225 } 3226 3227 // Read a line for the expected result 3228 String expectedResult = grabLine(r); 3229 3230 if (!result.toString().equals(expectedResult)) { 3231 explainFailure(patternString, dataString, expectedResult, result.toString()); 3232 failCount++; 3233 } 3234 } 3235 3236 report(fileName); 3237 } 3238 3239 private static int preMatchInvariants(Matcher m) { 3240 int failCount = 0; 3241 try { 3242 m.start(); 3243 failCount++; 3244 } catch (IllegalStateException ise) {} 3245 try { 3246 m.end(); 3247 failCount++; 3248 } catch (IllegalStateException ise) {} 3249 try { 3250 m.group(); 3251 failCount++; 3252 } catch (IllegalStateException ise) {} 3253 return failCount; 3254 } 3255 3256 private static int postFalseMatchInvariants(Matcher m) { 3257 int failCount = 0; 3258 try { 3259 m.group(); 3260 failCount++; 3261 } catch (IllegalStateException ise) {} 3262 try { 3263 m.start(); 3264 failCount++; 3265 } catch (IllegalStateException ise) {} 3266 try { 3267 m.end(); 3268 failCount++; 3269 } catch (IllegalStateException ise) {} 3270 return failCount; 3271 } 3272 3273 private static int postTrueMatchInvariants(Matcher m) { 3274 int failCount = 0; 3275 //assert(m.start() = m.start(0); 3276 if (m.start() != m.start(0)) 3277 failCount++; 3278 //assert(m.end() = m.end(0); 3279 if (m.start() != m.start(0)) 3280 failCount++; 3281 //assert(m.group() = m.group(0); 3282 if (!m.group().equals(m.group(0))) 3283 failCount++; 3284 try { 3285 m.group(50); 3286 failCount++; 3287 } catch (IndexOutOfBoundsException ise) {} 3288 3289 return failCount; 3290 } 3291 3292 private static Pattern compileTestPattern(String patternString) { 3293 if (!patternString.startsWith("'")) { 3294 return Pattern.compile(patternString); 3295 } 3296 3297 int break1 = patternString.lastIndexOf("'"); 3298 String flagString = patternString.substring( 3299 break1+1, patternString.length()); 3300 patternString = patternString.substring(1, break1); 3301 3302 if (flagString.equals("i")) 3303 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3304 3305 if (flagString.equals("m")) 3306 return Pattern.compile(patternString, Pattern.MULTILINE); 3307 3308 return Pattern.compile(patternString); 3309 } 3310 3311 /** 3312 * Reads a line from the input file. Keeps reading lines until a non 3313 * empty non comment line is read. If the line contains a \n then 3314 * these two characters are replaced by a newline char. If a \\uxxxx 3315 * sequence is read then the sequence is replaced by the unicode char. 3316 */ 3317 private static String grabLine(BufferedReader r) throws Exception { 3318 int index = 0; 3319 String line = r.readLine(); 3320 while (line.startsWith("//") || line.length() < 1) 3321 line = r.readLine(); 3322 while ((index = line.indexOf("\\n")) != -1) { 3323 StringBuffer temp = new StringBuffer(line); 3324 temp.replace(index, index+2, "\n"); 3325 line = temp.toString(); 3326 } 3327 while ((index = line.indexOf("\\u")) != -1) { 3328 StringBuffer temp = new StringBuffer(line); 3329 String value = temp.substring(index+2, index+6); 3330 char aChar = (char)Integer.parseInt(value, 16); 3331 String unicodeChar = "" + aChar; 3332 temp.replace(index, index+6, unicodeChar); 3333 line = temp.toString(); 3334 } 3335 3336 return line; 3337 } 3338 3339 private static void check(Pattern p, String s, String g, String expected) { 3340 Matcher m = p.matcher(s); 3341 m.find(); 3342 if (!m.group(g).equals(expected)) 3343 failCount++; 3344 } 3345 3346 private static void checkReplaceFirst(String p, String s, String r, String expected) 3347 { 3348 if (!expected.equals(Pattern.compile(p) 3349 .matcher(s) 3350 .replaceFirst(r))) 3351 failCount++; 3352 } 3353 3354 private static void checkReplaceAll(String p, String s, String r, String expected) 3355 { 3356 if (!expected.equals(Pattern.compile(p) 3357 .matcher(s) 3358 .replaceAll(r))) 3359 failCount++; 3360 } 3361 3362 private static void checkExpectedFail(String p) { 3363 try { 3364 Pattern.compile(p); 3365 } catch (PatternSyntaxException pse) { 3366 //pse.printStackTrace(); 3367 return; 3368 } 3369 failCount++; 3370 } 3371 3372 private static void checkExpectedFail(Matcher m, String g) { 3373 m.find(); 3374 try { 3375 m.group(g); 3376 } catch (IllegalArgumentException iae) { 3377 //iae.printStackTrace(); 3378 return; 3379 } catch (NullPointerException npe) { 3380 return; 3381 } 3382 failCount++; 3383 } 3384 3385 3386 private static void namedGroupCaptureTest() throws Exception { 3387 check(Pattern.compile("x+(?<gname>y+)z+"), 3388 "xxxyyyzzz", 3389 "gname", 3390 "yyy"); 3391 3392 check(Pattern.compile("x+(?<gname8>y+)z+"), 3393 "xxxyyyzzz", 3394 "gname8", 3395 "yyy"); 3396 3397 //backref 3398 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3399 check(pattern, "zzzaabcazzz", true); // found "abca" 3400 3401 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3402 "zzzaabcaazzz", true); 3403 3404 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3405 "abcdefabc", true); 3406 3407 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3408 "abcdefghijkk", true); 3409 3410 // Supplementary character tests 3411 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3412 toSupplementaries("zzzaabcazzz"), true); 3413 3414 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3415 toSupplementaries("zzzaabcaazzz"), true); 3416 3417 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3418 toSupplementaries("abcdefabc"), true); 3419 3420 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3421 "(?<gname>" + 3422 toSupplementaries("k)") + "\\k<gname>"), 3423 toSupplementaries("abcdefghijkk"), true); 3424 3425 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3426 "xxxyyyzzzyyy", 3427 "gname", 3428 "yyy"); 3429 3430 //replaceFirst/All 3431 checkReplaceFirst("(?<gn>ab)(c*)", 3432 "abccczzzabcczzzabccc", 3433 "${gn}", 3434 "abzzzabcczzzabccc"); 3435 3436 checkReplaceAll("(?<gn>ab)(c*)", 3437 "abccczzzabcczzzabccc", 3438 "${gn}", 3439 "abzzzabzzzab"); 3440 3441 3442 checkReplaceFirst("(?<gn>ab)(c*)", 3443 "zzzabccczzzabcczzzabccczzz", 3444 "${gn}", 3445 "zzzabzzzabcczzzabccczzz"); 3446 3447 checkReplaceAll("(?<gn>ab)(c*)", 3448 "zzzabccczzzabcczzzabccczzz", 3449 "${gn}", 3450 "zzzabzzzabzzzabzzz"); 3451 3452 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3453 "zzzabccczzzabcczzzabccczzz", 3454 "${gn2}", 3455 "zzzccczzzabcczzzabccczzz"); 3456 3457 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3458 "zzzabccczzzabcczzzabccczzz", 3459 "${gn2}", 3460 "zzzccczzzcczzzccczzz"); 3461 3462 //toSupplementaries("(ab)(c*)")); 3463 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3464 ")(?<gn2>" + toSupplementaries("c") + "*)", 3465 toSupplementaries("abccczzzabcczzzabccc"), 3466 "${gn1}", 3467 toSupplementaries("abzzzabcczzzabccc")); 3468 3469 3470 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3471 ")(?<gn2>" + toSupplementaries("c") + "*)", 3472 toSupplementaries("abccczzzabcczzzabccc"), 3473 "${gn1}", 3474 toSupplementaries("abzzzabzzzab")); 3475 3476 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3477 ")(?<gn2>" + toSupplementaries("c") + "*)", 3478 toSupplementaries("abccczzzabcczzzabccc"), 3479 "${gn2}", 3480 toSupplementaries("ccczzzabcczzzabccc")); 3481 3482 3483 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3484 ")(?<gn2>" + toSupplementaries("c") + "*)", 3485 toSupplementaries("abccczzzabcczzzabccc"), 3486 "${gn2}", 3487 toSupplementaries("ccczzzcczzzccc")); 3488 3489 checkReplaceFirst("(?<dog>Dog)AndCat", 3490 "zzzDogAndCatzzzDogAndCatzzz", 3491 "${dog}", 3492 "zzzDogzzzDogAndCatzzz"); 3493 3494 3495 checkReplaceAll("(?<dog>Dog)AndCat", 3496 "zzzDogAndCatzzzDogAndCatzzz", 3497 "${dog}", 3498 "zzzDogzzzDogzzz"); 3499 3500 // backref in Matcher & String 3501 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 3502 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 3503 failCount++; 3504 3505 // negative 3506 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3507 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3508 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3509 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3510 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3511 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3512 "gnameX"); 3513 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3514 null); 3515 report("NamedGroupCapture"); 3516 } 3517 3518 // This is for bug 6919132 3519 private static void nonBmpClassComplementTest() throws Exception { 3520 Pattern p = Pattern.compile("\\P{Lu}"); 3521 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3522 if (m.find() && m.start() == 1) 3523 failCount++; 3524 3525 // from a unicode category 3526 p = Pattern.compile("\\P{Lu}"); 3527 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3528 if (m.find()) 3529 failCount++; 3530 if (!m.hitEnd()) 3531 failCount++; 3532 3533 // block 3534 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3535 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3536 if (m.find() && m.start() == 1) 3537 failCount++; 3538 3539 report("NonBmpClassComplement"); 3540 } 3541 3542 }