1 /* 2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /** 27 * @test 28 * @summary tests RegExp framework 29 * @author Mike McCloskey 30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 35 * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 36 */ 37 38 import java.util.regex.*; 39 import java.util.Random; 40 import java.io.*; 41 import java.util.*; 42 import java.nio.CharBuffer; 43 44 /** 45 * This is a test class created to check the operation of 46 * the Pattern and Matcher classes. 47 */ 48 public class RegExTest { 49 50 private static Random generator = new Random(); 51 private static boolean failure = false; 52 private static int failCount = 0; 53 54 /** 55 * Main to interpret arguments and run several tests. 56 * 57 */ 58 public static void main(String[] args) throws Exception { 59 // Most of the tests are in a file 60 processFile("TestCases.txt"); 61 //processFile("PerlCases.txt"); 62 processFile("BMPTestCases.txt"); 63 processFile("SupplementaryTestCases.txt"); 64 65 // These test many randomly generated char patterns 66 bm(); 67 slice(); 68 69 // These are hard to put into the file 70 escapes(); 71 blankInput(); 72 73 // Substitition tests on randomly generated sequences 74 globalSubstitute(); 75 stringbufferSubstitute(); 76 substitutionBasher(); 77 78 // Canonical Equivalence 79 ceTest(); 80 81 // Anchors 82 anchorTest(); 83 84 // boolean match calls 85 matchesTest(); 86 lookingAtTest(); 87 88 // Pattern API 89 patternMatchesTest(); 90 91 // Misc 92 lookbehindTest(); 93 nullArgumentTest(); 94 backRefTest(); 95 groupCaptureTest(); 96 caretTest(); 97 charClassTest(); 98 emptyPatternTest(); 99 findIntTest(); 100 group0Test(); 101 longPatternTest(); 102 octalTest(); 103 ampersandTest(); 104 negationTest(); 105 splitTest(); 106 appendTest(); 107 caseFoldingTest(); 108 commentsTest(); 109 unixLinesTest(); 110 replaceFirstTest(); 111 gTest(); 112 zTest(); 113 serializeTest(); 114 reluctantRepetitionTest(); 115 multilineDollarTest(); 116 dollarAtEndTest(); 117 caretBetweenTerminatorsTest(); 118 // This RFE rejected in Tiger numOccurrencesTest(); 119 javaCharClassTest(); 120 nonCaptureRepetitionTest(); 121 notCapturedGroupCurlyMatchTest(); 122 escapedSegmentTest(); 123 literalPatternTest(); 124 literalReplacementTest(); 125 regionTest(); 126 toStringTest(); 127 negatedCharClassTest(); 128 findFromTest(); 129 boundsTest(); 130 unicodeWordBoundsTest(); 131 caretAtEndTest(); 132 wordSearchTest(); 133 hitEndTest(); 134 toMatchResultTest(); 135 surrogatesInClassTest(); 136 namedGroupCaptureTest(); 137 nonBmpClassComplementTest(); 138 unicodePropertiesTest(); 139 unicodeHexNotationTest(); 140 if (failure) 141 throw new RuntimeException("Failure in the RE handling."); 142 else 143 System.err.println("OKAY: All tests passed."); 144 } 145 146 // Utility functions 147 148 private static String getRandomAlphaString(int length) { 149 StringBuffer buf = new StringBuffer(length); 150 for (int i=0; i<length; i++) { 151 char randChar = (char)(97 + generator.nextInt(26)); 152 buf.append(randChar); 153 } 154 return buf.toString(); 155 } 156 157 private static void check(Matcher m, String expected) { 158 m.find(); 159 if (!m.group().equals(expected)) 160 failCount++; 161 } 162 163 private static void check(Matcher m, String result, boolean expected) { 164 m.find(); 165 if (m.group().equals(result) != expected) 166 failCount++; 167 } 168 169 private static void check(Pattern p, String s, boolean expected) { 170 if (p.matcher(s).find() != expected) 171 failCount++; 172 } 173 174 private static void check(String p, String s, boolean expected) { 175 Matcher matcher = Pattern.compile(p).matcher(s); 176 if (matcher.find() != expected) 177 failCount++; 178 } 179 180 private static void check(String p, char c, boolean expected) { 181 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 182 Pattern pattern = Pattern.compile(propertyPattern); 183 char[] ca = new char[1]; ca[0] = c; 184 Matcher matcher = pattern.matcher(new String(ca)); 185 if (!matcher.find()) 186 failCount++; 187 } 188 189 private static void check(String p, int codePoint, boolean expected) { 190 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 191 Pattern pattern = Pattern.compile(propertyPattern); 192 char[] ca = Character.toChars(codePoint); 193 Matcher matcher = pattern.matcher(new String(ca)); 194 if (!matcher.find()) 195 failCount++; 196 } 197 198 private static void check(String p, int flag, String input, String s, 199 boolean expected) 200 { 201 Pattern pattern = Pattern.compile(p, flag); 202 Matcher matcher = pattern.matcher(input); 203 if (expected) 204 check(matcher, s, expected); 205 else 206 check(pattern, input, false); 207 } 208 209 private static void report(String testName) { 210 int spacesToAdd = 30 - testName.length(); 211 StringBuffer paddedNameBuffer = new StringBuffer(testName); 212 for (int i=0; i<spacesToAdd; i++) 213 paddedNameBuffer.append(" "); 214 String paddedName = paddedNameBuffer.toString(); 215 System.err.println(paddedName + ": " + 216 (failCount==0 ? "Passed":"Failed("+failCount+")")); 217 if (failCount > 0) 218 failure = true; 219 failCount = 0; 220 } 221 222 /** 223 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 224 * supplementary characters. This method does NOT fully take care 225 * of the regex syntax. 226 */ 227 private static String toSupplementaries(String s) { 228 int length = s.length(); 229 StringBuffer sb = new StringBuffer(length * 2); 230 231 for (int i = 0; i < length; ) { 232 char c = s.charAt(i++); 233 if (c == '\\') { 234 sb.append(c); 235 if (i < length) { 236 c = s.charAt(i++); 237 sb.append(c); 238 if (c == 'u') { 239 // assume no syntax error 240 sb.append(s.charAt(i++)); 241 sb.append(s.charAt(i++)); 242 sb.append(s.charAt(i++)); 243 sb.append(s.charAt(i++)); 244 } 245 } 246 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 247 sb.append('\ud800').append((char)('\udc00'+c)); 248 } else { 249 sb.append(c); 250 } 251 } 252 return sb.toString(); 253 } 254 255 // Regular expression tests 256 257 // This is for bug 6178785 258 // Test if an expected NPE gets thrown when passing in a null argument 259 private static boolean check(Runnable test) { 260 try { 261 test.run(); 262 failCount++; 263 return false; 264 } catch (NullPointerException npe) { 265 return true; 266 } 267 } 268 269 private static void nullArgumentTest() { 270 check(new Runnable() { public void run() { Pattern.compile(null); }}); 271 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 272 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 273 check(new Runnable() { public void run() { Pattern.quote(null);}}); 274 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 275 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 276 277 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 278 m.matches(); 279 check(new Runnable() { public void run() { m.appendTail(null);}}); 280 check(new Runnable() { public void run() { m.replaceAll(null);}}); 281 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 282 check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); 283 check(new Runnable() { public void run() { m.reset(null);}}); 284 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 285 //check(new Runnable() { public void run() { m.usePattern(null);}}); 286 287 report("Null Argument"); 288 } 289 290 // This is for bug6635133 291 // Test if surrogate pair in Unicode escapes can be handled correctly. 292 private static void surrogatesInClassTest() throws Exception { 293 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 294 Matcher matcher = pattern.matcher("\ud834\udd22"); 295 if (!matcher.find()) 296 failCount++; 297 } 298 299 // This is for bug 4988891 300 // Test toMatchResult to see that it is a copy of the Matcher 301 // that is not affected by subsequent operations on the original 302 private static void toMatchResultTest() throws Exception { 303 Pattern pattern = Pattern.compile("squid"); 304 Matcher matcher = pattern.matcher( 305 "agiantsquidofdestinyasmallsquidoffate"); 306 matcher.find(); 307 int matcherStart1 = matcher.start(); 308 MatchResult mr = matcher.toMatchResult(); 309 if (mr == matcher) 310 failCount++; 311 int resultStart1 = mr.start(); 312 if (matcherStart1 != resultStart1) 313 failCount++; 314 matcher.find(); 315 int matcherStart2 = matcher.start(); 316 int resultStart2 = mr.start(); 317 if (matcherStart2 == resultStart2) 318 failCount++; 319 if (resultStart1 != resultStart2) 320 failCount++; 321 MatchResult mr2 = matcher.toMatchResult(); 322 if (mr == mr2) 323 failCount++; 324 if (mr2.start() != matcherStart2) 325 failCount++; 326 report("toMatchResult is a copy"); 327 } 328 329 // This is for bug 5013885 330 // Must test a slice to see if it reports hitEnd correctly 331 private static void hitEndTest() throws Exception { 332 // Basic test of Slice node 333 Pattern p = Pattern.compile("^squidattack"); 334 Matcher m = p.matcher("squack"); 335 m.find(); 336 if (m.hitEnd()) 337 failCount++; 338 m.reset("squid"); 339 m.find(); 340 if (!m.hitEnd()) 341 failCount++; 342 343 // Test Slice, SliceA and SliceU nodes 344 for (int i=0; i<3; i++) { 345 int flags = 0; 346 if (i==1) flags = Pattern.CASE_INSENSITIVE; 347 if (i==2) flags = Pattern.UNICODE_CASE; 348 p = Pattern.compile("^abc", flags); 349 m = p.matcher("ad"); 350 m.find(); 351 if (m.hitEnd()) 352 failCount++; 353 m.reset("ab"); 354 m.find(); 355 if (!m.hitEnd()) 356 failCount++; 357 } 358 359 // Test Boyer-Moore node 360 p = Pattern.compile("catattack"); 361 m = p.matcher("attack"); 362 m.find(); 363 if (!m.hitEnd()) 364 failCount++; 365 366 p = Pattern.compile("catattack"); 367 m = p.matcher("attackattackattackcatatta"); 368 m.find(); 369 if (!m.hitEnd()) 370 failCount++; 371 report("hitEnd from a Slice"); 372 } 373 374 // This is for bug 4997476 375 // It is weird code submitted by customer demonstrating a regression 376 private static void wordSearchTest() throws Exception { 377 String testString = new String("word1 word2 word3"); 378 Pattern p = Pattern.compile("\\b"); 379 Matcher m = p.matcher(testString); 380 int position = 0; 381 int start = 0; 382 while (m.find(position)) { 383 start = m.start(); 384 if (start == testString.length()) 385 break; 386 if (m.find(start+1)) { 387 position = m.start(); 388 } else { 389 position = testString.length(); 390 } 391 if (testString.substring(start, position).equals(" ")) 392 continue; 393 if (!testString.substring(start, position-1).startsWith("word")) 394 failCount++; 395 } 396 report("Customer word search"); 397 } 398 399 // This is for bug 4994840 400 private static void caretAtEndTest() throws Exception { 401 // Problem only occurs with multiline patterns 402 // containing a beginning-of-line caret "^" followed 403 // by an expression that also matches the empty string. 404 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 405 Matcher matcher = pattern.matcher("\r"); 406 matcher.find(); 407 matcher.find(); 408 report("Caret at end"); 409 } 410 411 // This test is for 4979006 412 // Check to see if word boundary construct properly handles unicode 413 // non spacing marks 414 private static void unicodeWordBoundsTest() throws Exception { 415 String spaces = " "; 416 String wordChar = "a"; 417 String nsm = "\u030a"; 418 419 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 420 421 Pattern pattern = Pattern.compile("\\b"); 422 Matcher matcher = pattern.matcher(""); 423 // S=other B=word character N=non spacing mark .=word boundary 424 // SS.BB.SS 425 String input = spaces + wordChar + wordChar + spaces; 426 twoFindIndexes(input, matcher, 2, 4); 427 // SS.BBN.SS 428 input = spaces + wordChar +wordChar + nsm + spaces; 429 twoFindIndexes(input, matcher, 2, 5); 430 // SS.BN.SS 431 input = spaces + wordChar + nsm + spaces; 432 twoFindIndexes(input, matcher, 2, 4); 433 // SS.BNN.SS 434 input = spaces + wordChar + nsm + nsm + spaces; 435 twoFindIndexes(input, matcher, 2, 5); 436 // SSN.BB.SS 437 input = spaces + nsm + wordChar + wordChar + spaces; 438 twoFindIndexes(input, matcher, 3, 5); 439 // SS.BNB.SS 440 input = spaces + wordChar + nsm + wordChar + spaces; 441 twoFindIndexes(input, matcher, 2, 5); 442 // SSNNSS 443 input = spaces + nsm + nsm + spaces; 444 matcher.reset(input); 445 if (matcher.find()) 446 failCount++; 447 // SSN.BBN.SS 448 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 449 twoFindIndexes(input, matcher, 3, 6); 450 451 report("Unicode word boundary"); 452 } 453 454 private static void twoFindIndexes(String input, Matcher matcher, int a, 455 int b) throws Exception 456 { 457 matcher.reset(input); 458 matcher.find(); 459 if (matcher.start() != a) 460 failCount++; 461 matcher.find(); 462 if (matcher.start() != b) 463 failCount++; 464 } 465 466 // This test is for 6284152 467 static void check(String regex, String input, String[] expected) { 468 List<String> result = new ArrayList<String>(); 469 Pattern p = Pattern.compile(regex); 470 Matcher m = p.matcher(input); 471 while (m.find()) { 472 result.add(m.group()); 473 } 474 if (!Arrays.asList(expected).equals(result)) 475 failCount++; 476 } 477 478 private static void lookbehindTest() throws Exception { 479 //Positive 480 check("(?<=%.{0,5})foo\\d", 481 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 482 new String[]{"foo1", "foo2", "foo3"}); 483 484 //boundary at end of the lookbehind sub-regex should work consistently 485 //with the boundary just after the lookbehind sub-regex 486 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 487 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 488 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 489 check("(?<!abc \\b)foo", "abc foo", new String[0]); 490 491 //Negative 492 check("(?<!%.{0,5})foo\\d", 493 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 494 new String[] {"foo4", "foo5"}); 495 496 //Positive greedy 497 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 498 499 //Positive reluctant 500 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 501 502 //supplementary 503 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 504 new String[] {"fo\ud800\udc00o"}); 505 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 506 new String[] {"fo\ud800\udc00o"}); 507 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 508 new String[] {"fo\ud800\udc00o"}); 509 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 510 new String[] {"fo\ud800\udc00o"}); 511 report("Lookbehind"); 512 } 513 514 // This test is for 4938995 515 // Check to see if weak region boundaries are transparent to 516 // lookahead and lookbehind constructs 517 private static void boundsTest() throws Exception { 518 String fullMessage = "catdogcat"; 519 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 520 Matcher matcher = pattern.matcher("catdogca"); 521 matcher.useTransparentBounds(true); 522 if (matcher.find()) 523 failCount++; 524 matcher.reset("atdogcat"); 525 if (matcher.find()) 526 failCount++; 527 matcher.reset(fullMessage); 528 if (!matcher.find()) 529 failCount++; 530 matcher.reset(fullMessage); 531 matcher.region(0,9); 532 if (!matcher.find()) 533 failCount++; 534 matcher.reset(fullMessage); 535 matcher.region(0,6); 536 if (!matcher.find()) 537 failCount++; 538 matcher.reset(fullMessage); 539 matcher.region(3,6); 540 if (!matcher.find()) 541 failCount++; 542 matcher.useTransparentBounds(false); 543 if (matcher.find()) 544 failCount++; 545 546 // Negative lookahead/lookbehind 547 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 548 matcher = pattern.matcher("dogcat"); 549 matcher.useTransparentBounds(true); 550 matcher.region(0,3); 551 if (matcher.find()) 552 failCount++; 553 matcher.reset("catdog"); 554 matcher.region(3,6); 555 if (matcher.find()) 556 failCount++; 557 matcher.useTransparentBounds(false); 558 matcher.reset("dogcat"); 559 matcher.region(0,3); 560 if (!matcher.find()) 561 failCount++; 562 matcher.reset("catdog"); 563 matcher.region(3,6); 564 if (!matcher.find()) 565 failCount++; 566 567 report("Region bounds transparency"); 568 } 569 570 // This test is for 4945394 571 private static void findFromTest() throws Exception { 572 String message = "This is 40 $0 message."; 573 Pattern pat = Pattern.compile("\\$0"); 574 Matcher match = pat.matcher(message); 575 if (!match.find()) 576 failCount++; 577 if (match.find()) 578 failCount++; 579 if (match.find()) 580 failCount++; 581 report("Check for alternating find"); 582 } 583 584 // This test is for 4872664 and 4892980 585 private static void negatedCharClassTest() throws Exception { 586 Pattern pattern = Pattern.compile("[^>]"); 587 Matcher matcher = pattern.matcher("\u203A"); 588 if (!matcher.matches()) 589 failCount++; 590 pattern = Pattern.compile("[^fr]"); 591 matcher = pattern.matcher("a"); 592 if (!matcher.find()) 593 failCount++; 594 matcher.reset("\u203A"); 595 if (!matcher.find()) 596 failCount++; 597 String s = "for"; 598 String result[] = s.split("[^fr]"); 599 if (!result[0].equals("f")) 600 failCount++; 601 if (!result[1].equals("r")) 602 failCount++; 603 s = "f\u203Ar"; 604 result = s.split("[^fr]"); 605 if (!result[0].equals("f")) 606 failCount++; 607 if (!result[1].equals("r")) 608 failCount++; 609 610 // Test adding to bits, subtracting a node, then adding to bits again 611 pattern = Pattern.compile("[^f\u203Ar]"); 612 matcher = pattern.matcher("a"); 613 if (!matcher.find()) 614 failCount++; 615 matcher.reset("f"); 616 if (matcher.find()) 617 failCount++; 618 matcher.reset("\u203A"); 619 if (matcher.find()) 620 failCount++; 621 matcher.reset("r"); 622 if (matcher.find()) 623 failCount++; 624 matcher.reset("\u203B"); 625 if (!matcher.find()) 626 failCount++; 627 628 // Test subtracting a node, adding to bits, subtracting again 629 pattern = Pattern.compile("[^\u203Ar\u203B]"); 630 matcher = pattern.matcher("a"); 631 if (!matcher.find()) 632 failCount++; 633 matcher.reset("\u203A"); 634 if (matcher.find()) 635 failCount++; 636 matcher.reset("r"); 637 if (matcher.find()) 638 failCount++; 639 matcher.reset("\u203B"); 640 if (matcher.find()) 641 failCount++; 642 matcher.reset("\u203C"); 643 if (!matcher.find()) 644 failCount++; 645 646 report("Negated Character Class"); 647 } 648 649 // This test is for 4628291 650 private static void toStringTest() throws Exception { 651 Pattern pattern = Pattern.compile("b+"); 652 if (pattern.toString() != "b+") 653 failCount++; 654 Matcher matcher = pattern.matcher("aaabbbccc"); 655 String matcherString = matcher.toString(); // unspecified 656 matcher.find(); 657 matcherString = matcher.toString(); // unspecified 658 matcher.region(0,3); 659 matcherString = matcher.toString(); // unspecified 660 matcher.reset(); 661 matcherString = matcher.toString(); // unspecified 662 report("toString"); 663 } 664 665 // This test is for 4808962 666 private static void literalPatternTest() throws Exception { 667 int flags = Pattern.LITERAL; 668 669 Pattern pattern = Pattern.compile("abc\\t$^", flags); 670 check(pattern, "abc\\t$^", true); 671 672 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 673 check(pattern, "abc\\t$^", true); 674 675 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 676 check(pattern, "\\Qa^$bcabc\\E", true); 677 check(pattern, "a^$bcabc", false); 678 679 pattern = Pattern.compile("\\\\Q\\\\E"); 680 check(pattern, "\\Q\\E", true); 681 682 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 683 check(pattern, "abcefg\\Q\\Ehij", true); 684 685 pattern = Pattern.compile("\\\\\\Q\\\\E"); 686 check(pattern, "\\\\\\\\", true); 687 688 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 689 check(pattern, "\\Qa^$bcabc\\E", true); 690 check(pattern, "a^$bcabc", false); 691 692 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 693 check(pattern, "\\Qabc\\Edef", true); 694 check(pattern, "abcdef", false); 695 696 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 697 check(pattern, "abc\\Edef", true); 698 check(pattern, "abcdef", false); 699 700 pattern = Pattern.compile(Pattern.quote("\\E")); 701 check(pattern, "\\E", true); 702 703 pattern = Pattern.compile("((((abc.+?:)", flags); 704 check(pattern, "((((abc.+?:)", true); 705 706 flags |= Pattern.MULTILINE; 707 708 pattern = Pattern.compile("^cat$", flags); 709 check(pattern, "abc^cat$def", true); 710 check(pattern, "cat", false); 711 712 flags |= Pattern.CASE_INSENSITIVE; 713 714 pattern = Pattern.compile("abcdef", flags); 715 check(pattern, "ABCDEF", true); 716 check(pattern, "AbCdEf", true); 717 718 flags |= Pattern.DOTALL; 719 720 pattern = Pattern.compile("a...b", flags); 721 check(pattern, "A...b", true); 722 check(pattern, "Axxxb", false); 723 724 flags |= Pattern.CANON_EQ; 725 726 Pattern p = Pattern.compile("testa\u030a", flags); 727 check(pattern, "testa\u030a", false); 728 check(pattern, "test\u00e5", false); 729 730 // Supplementary character test 731 flags = Pattern.LITERAL; 732 733 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 734 check(pattern, toSupplementaries("abc\\t$^"), true); 735 736 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 737 check(pattern, toSupplementaries("abc\\t$^"), true); 738 739 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 740 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 741 check(pattern, toSupplementaries("a^$bcabc"), false); 742 743 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 744 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 745 check(pattern, toSupplementaries("a^$bcabc"), false); 746 747 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 748 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 749 check(pattern, toSupplementaries("abcdef"), false); 750 751 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 752 check(pattern, toSupplementaries("abc\\Edef"), true); 753 check(pattern, toSupplementaries("abcdef"), false); 754 755 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 756 check(pattern, toSupplementaries("((((abc.+?:)"), true); 757 758 flags |= Pattern.MULTILINE; 759 760 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 761 check(pattern, toSupplementaries("abc^cat$def"), true); 762 check(pattern, toSupplementaries("cat"), false); 763 764 flags |= Pattern.DOTALL; 765 766 // note: this is case-sensitive. 767 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 768 check(pattern, toSupplementaries("a...b"), true); 769 check(pattern, toSupplementaries("axxxb"), false); 770 771 flags |= Pattern.CANON_EQ; 772 773 String t = toSupplementaries("test"); 774 p = Pattern.compile(t + "a\u030a", flags); 775 check(pattern, t + "a\u030a", false); 776 check(pattern, t + "\u00e5", false); 777 778 report("Literal pattern"); 779 } 780 781 // This test is for 4803179 782 // This test is also for 4808962, replacement parts 783 private static void literalReplacementTest() throws Exception { 784 int flags = Pattern.LITERAL; 785 786 Pattern pattern = Pattern.compile("abc", flags); 787 Matcher matcher = pattern.matcher("zzzabczzz"); 788 String replaceTest = "$0"; 789 String result = matcher.replaceAll(replaceTest); 790 if (!result.equals("zzzabczzz")) 791 failCount++; 792 793 matcher.reset(); 794 String literalReplacement = matcher.quoteReplacement(replaceTest); 795 result = matcher.replaceAll(literalReplacement); 796 if (!result.equals("zzz$0zzz")) 797 failCount++; 798 799 matcher.reset(); 800 replaceTest = "\\t$\\$"; 801 literalReplacement = matcher.quoteReplacement(replaceTest); 802 result = matcher.replaceAll(literalReplacement); 803 if (!result.equals("zzz\\t$\\$zzz")) 804 failCount++; 805 806 // Supplementary character test 807 pattern = Pattern.compile(toSupplementaries("abc"), flags); 808 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 809 replaceTest = "$0"; 810 result = matcher.replaceAll(replaceTest); 811 if (!result.equals(toSupplementaries("zzzabczzz"))) 812 failCount++; 813 814 matcher.reset(); 815 literalReplacement = matcher.quoteReplacement(replaceTest); 816 result = matcher.replaceAll(literalReplacement); 817 if (!result.equals(toSupplementaries("zzz$0zzz"))) 818 failCount++; 819 820 matcher.reset(); 821 replaceTest = "\\t$\\$"; 822 literalReplacement = matcher.quoteReplacement(replaceTest); 823 result = matcher.replaceAll(literalReplacement); 824 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 825 failCount++; 826 827 report("Literal replacement"); 828 } 829 830 // This test is for 4757029 831 private static void regionTest() throws Exception { 832 Pattern pattern = Pattern.compile("abc"); 833 Matcher matcher = pattern.matcher("abcdefabc"); 834 835 matcher.region(0,9); 836 if (!matcher.find()) 837 failCount++; 838 if (!matcher.find()) 839 failCount++; 840 matcher.region(0,3); 841 if (!matcher.find()) 842 failCount++; 843 matcher.region(3,6); 844 if (matcher.find()) 845 failCount++; 846 matcher.region(0,2); 847 if (matcher.find()) 848 failCount++; 849 850 expectRegionFail(matcher, 1, -1); 851 expectRegionFail(matcher, -1, -1); 852 expectRegionFail(matcher, -1, 1); 853 expectRegionFail(matcher, 5, 3); 854 expectRegionFail(matcher, 5, 12); 855 expectRegionFail(matcher, 12, 12); 856 857 pattern = Pattern.compile("^abc$"); 858 matcher = pattern.matcher("zzzabczzz"); 859 matcher.region(0,9); 860 if (matcher.find()) 861 failCount++; 862 matcher.region(3,6); 863 if (!matcher.find()) 864 failCount++; 865 matcher.region(3,6); 866 matcher.useAnchoringBounds(false); 867 if (matcher.find()) 868 failCount++; 869 870 // Supplementary character test 871 pattern = Pattern.compile(toSupplementaries("abc")); 872 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 873 matcher.region(0,9*2); 874 if (!matcher.find()) 875 failCount++; 876 if (!matcher.find()) 877 failCount++; 878 matcher.region(0,3*2); 879 if (!matcher.find()) 880 failCount++; 881 matcher.region(1,3*2); 882 if (matcher.find()) 883 failCount++; 884 matcher.region(3*2,6*2); 885 if (matcher.find()) 886 failCount++; 887 matcher.region(0,2*2); 888 if (matcher.find()) 889 failCount++; 890 matcher.region(0,2*2+1); 891 if (matcher.find()) 892 failCount++; 893 894 expectRegionFail(matcher, 1*2, -1); 895 expectRegionFail(matcher, -1, -1); 896 expectRegionFail(matcher, -1, 1*2); 897 expectRegionFail(matcher, 5*2, 3*2); 898 expectRegionFail(matcher, 5*2, 12*2); 899 expectRegionFail(matcher, 12*2, 12*2); 900 901 pattern = Pattern.compile(toSupplementaries("^abc$")); 902 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 903 matcher.region(0,9*2); 904 if (matcher.find()) 905 failCount++; 906 matcher.region(3*2,6*2); 907 if (!matcher.find()) 908 failCount++; 909 matcher.region(3*2+1,6*2); 910 if (matcher.find()) 911 failCount++; 912 matcher.region(3*2,6*2-1); 913 if (matcher.find()) 914 failCount++; 915 matcher.region(3*2,6*2); 916 matcher.useAnchoringBounds(false); 917 if (matcher.find()) 918 failCount++; 919 report("Regions"); 920 } 921 922 private static void expectRegionFail(Matcher matcher, int index1, 923 int index2) 924 { 925 try { 926 matcher.region(index1, index2); 927 failCount++; 928 } catch (IndexOutOfBoundsException ioobe) { 929 // Correct result 930 } catch (IllegalStateException ise) { 931 // Correct result 932 } 933 } 934 935 // This test is for 4803197 936 private static void escapedSegmentTest() throws Exception { 937 938 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 939 check(pattern, "dir1\\dir2", true); 940 941 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 942 check(pattern, "dir1\\dir2\\", true); 943 944 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 945 check(pattern, "dir1\\dir2\\", true); 946 947 // Supplementary character test 948 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 949 check(pattern, toSupplementaries("dir1\\dir2"), true); 950 951 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 952 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 953 954 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 955 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 956 957 report("Escaped segment"); 958 } 959 960 // This test is for 4792284 961 private static void nonCaptureRepetitionTest() throws Exception { 962 String input = "abcdefgh;"; 963 964 String[] patterns = new String[] { 965 "(?:\\w{4})+;", 966 "(?:\\w{8})*;", 967 "(?:\\w{2}){2,4};", 968 "(?:\\w{4}){2,};", // only matches the 969 ".*?(?:\\w{5})+;", // specified minimum 970 ".*?(?:\\w{9})*;", // number of reps - OK 971 "(?:\\w{4})+?;", // lazy repetition - OK 972 "(?:\\w{4})++;", // possessive repetition - OK 973 "(?:\\w{2,}?)+;", // non-deterministic - OK 974 "(\\w{4})+;", // capturing group - OK 975 }; 976 977 for (int i = 0; i < patterns.length; i++) { 978 // Check find() 979 check(patterns[i], 0, input, input, true); 980 // Check matches() 981 Pattern p = Pattern.compile(patterns[i]); 982 Matcher m = p.matcher(input); 983 984 if (m.matches()) { 985 if (!m.group(0).equals(input)) 986 failCount++; 987 } else { 988 failCount++; 989 } 990 } 991 992 report("Non capturing repetition"); 993 } 994 995 // This test is for 6358731 996 private static void notCapturedGroupCurlyMatchTest() throws Exception { 997 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 998 Matcher matcher = pattern.matcher("abcd"); 999 if (!matcher.matches() || 1000 matcher.group(1) != null || 1001 !matcher.group(2).equals("abcd")) { 1002 failCount++; 1003 } 1004 report("Not captured GroupCurly"); 1005 } 1006 1007 // This test is for 4706545 1008 private static void javaCharClassTest() throws Exception { 1009 for (int i=0; i<1000; i++) { 1010 char c = (char)generator.nextInt(); 1011 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1012 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1013 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1014 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1015 check("{javaDigit}", c, Character.isDigit(c)); 1016 check("{javaDefined}", c, Character.isDefined(c)); 1017 check("{javaLetter}", c, Character.isLetter(c)); 1018 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1019 check("{javaJavaIdentifierStart}", c, 1020 Character.isJavaIdentifierStart(c)); 1021 check("{javaJavaIdentifierPart}", c, 1022 Character.isJavaIdentifierPart(c)); 1023 check("{javaUnicodeIdentifierStart}", c, 1024 Character.isUnicodeIdentifierStart(c)); 1025 check("{javaUnicodeIdentifierPart}", c, 1026 Character.isUnicodeIdentifierPart(c)); 1027 check("{javaIdentifierIgnorable}", c, 1028 Character.isIdentifierIgnorable(c)); 1029 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1030 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1031 check("{javaISOControl}", c, Character.isISOControl(c)); 1032 check("{javaMirrored}", c, Character.isMirrored(c)); 1033 1034 } 1035 1036 // Supplementary character test 1037 for (int i=0; i<1000; i++) { 1038 int c = generator.nextInt(Character.MAX_CODE_POINT 1039 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1040 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1041 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1042 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1043 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1044 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1045 check("{javaDigit}", c, Character.isDigit(c)); 1046 check("{javaDefined}", c, Character.isDefined(c)); 1047 check("{javaLetter}", c, Character.isLetter(c)); 1048 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1049 check("{javaJavaIdentifierStart}", c, 1050 Character.isJavaIdentifierStart(c)); 1051 check("{javaJavaIdentifierPart}", c, 1052 Character.isJavaIdentifierPart(c)); 1053 check("{javaUnicodeIdentifierStart}", c, 1054 Character.isUnicodeIdentifierStart(c)); 1055 check("{javaUnicodeIdentifierPart}", c, 1056 Character.isUnicodeIdentifierPart(c)); 1057 check("{javaIdentifierIgnorable}", c, 1058 Character.isIdentifierIgnorable(c)); 1059 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1060 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1061 check("{javaISOControl}", c, Character.isISOControl(c)); 1062 check("{javaMirrored}", c, Character.isMirrored(c)); 1063 } 1064 1065 report("Java character classes"); 1066 } 1067 1068 // This test is for 4523620 1069 /* 1070 private static void numOccurrencesTest() throws Exception { 1071 Pattern pattern = Pattern.compile("aaa"); 1072 1073 if (pattern.numOccurrences("aaaaaa", false) != 2) 1074 failCount++; 1075 if (pattern.numOccurrences("aaaaaa", true) != 4) 1076 failCount++; 1077 1078 pattern = Pattern.compile("^"); 1079 if (pattern.numOccurrences("aaaaaa", false) != 1) 1080 failCount++; 1081 if (pattern.numOccurrences("aaaaaa", true) != 1) 1082 failCount++; 1083 1084 report("Number of Occurrences"); 1085 } 1086 */ 1087 1088 // This test is for 4776374 1089 private static void caretBetweenTerminatorsTest() throws Exception { 1090 int flags1 = Pattern.DOTALL; 1091 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1092 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1093 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1094 1095 check("^....", flags1, "test\ntest", "test", true); 1096 check(".....^", flags1, "test\ntest", "test", false); 1097 check(".....^", flags1, "test\n", "test", false); 1098 check("....^", flags1, "test\r\n", "test", false); 1099 1100 check("^....", flags2, "test\ntest", "test", true); 1101 check("....^", flags2, "test\ntest", "test", false); 1102 check(".....^", flags2, "test\n", "test", false); 1103 check("....^", flags2, "test\r\n", "test", false); 1104 1105 check("^....", flags3, "test\ntest", "test", true); 1106 check(".....^", flags3, "test\ntest", "test\n", true); 1107 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1108 check(".....^", flags3, "test\n", "test", false); 1109 check(".....^", flags3, "test\r\n", "test", false); 1110 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1111 1112 check("^....", flags4, "test\ntest", "test", true); 1113 check(".....^", flags3, "test\ntest", "test\n", true); 1114 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1115 check(".....^", flags4, "test\n", "test\n", false); 1116 check(".....^", flags4, "test\r\n", "test\r", false); 1117 1118 // Supplementary character test 1119 String t = toSupplementaries("test"); 1120 check("^....", flags1, t+"\n"+t, t, true); 1121 check(".....^", flags1, t+"\n"+t, t, false); 1122 check(".....^", flags1, t+"\n", t, false); 1123 check("....^", flags1, t+"\r\n", t, false); 1124 1125 check("^....", flags2, t+"\n"+t, t, true); 1126 check("....^", flags2, t+"\n"+t, t, false); 1127 check(".....^", flags2, t+"\n", t, false); 1128 check("....^", flags2, t+"\r\n", t, false); 1129 1130 check("^....", flags3, t+"\n"+t, t, true); 1131 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1132 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1133 check(".....^", flags3, t+"\n", t, false); 1134 check(".....^", flags3, t+"\r\n", t, false); 1135 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1136 1137 check("^....", flags4, t+"\n"+t, t, true); 1138 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1139 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1140 check(".....^", flags4, t+"\n", t+"\n", false); 1141 check(".....^", flags4, t+"\r\n", t+"\r", false); 1142 1143 report("Caret between terminators"); 1144 } 1145 1146 // This test is for 4727935 1147 private static void dollarAtEndTest() throws Exception { 1148 int flags1 = Pattern.DOTALL; 1149 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1150 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1151 1152 check("....$", flags1, "test\n", "test", true); 1153 check("....$", flags1, "test\r\n", "test", true); 1154 check(".....$", flags1, "test\n", "test\n", true); 1155 check(".....$", flags1, "test\u0085", "test\u0085", true); 1156 check("....$", flags1, "test\u0085", "test", true); 1157 1158 check("....$", flags2, "test\n", "test", true); 1159 check(".....$", flags2, "test\n", "test\n", true); 1160 check(".....$", flags2, "test\u0085", "test\u0085", true); 1161 check("....$", flags2, "test\u0085", "est\u0085", true); 1162 1163 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1164 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1165 check("....$blah", flags3, "test\nblah", "!!!!", false); 1166 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1167 1168 // Supplementary character test 1169 String t = toSupplementaries("test"); 1170 String b = toSupplementaries("blah"); 1171 check("....$", flags1, t+"\n", t, true); 1172 check("....$", flags1, t+"\r\n", t, true); 1173 check(".....$", flags1, t+"\n", t+"\n", true); 1174 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1175 check("....$", flags1, t+"\u0085", t, true); 1176 1177 check("....$", flags2, t+"\n", t, true); 1178 check(".....$", flags2, t+"\n", t+"\n", true); 1179 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1180 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1181 1182 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1183 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1184 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1185 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1186 1187 report("Dollar at End"); 1188 } 1189 1190 // This test is for 4711773 1191 private static void multilineDollarTest() throws Exception { 1192 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1193 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1194 matcher.find(); 1195 if (matcher.start(0) != 9) 1196 failCount++; 1197 matcher.find(); 1198 if (matcher.start(0) != 20) 1199 failCount++; 1200 1201 // Supplementary character test 1202 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1203 matcher.find(); 1204 if (matcher.start(0) != 9*2) 1205 failCount++; 1206 matcher.find(); 1207 if (matcher.start(0) != 20*2) 1208 failCount++; 1209 1210 report("Multiline Dollar"); 1211 } 1212 1213 private static void reluctantRepetitionTest() throws Exception { 1214 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1215 check(p, "1 word word word 2", true); 1216 check(p, "1 wor wo w 2", true); 1217 check(p, "1 word word 2", true); 1218 check(p, "1 word 2", true); 1219 check(p, "1 wo w w 2", true); 1220 check(p, "1 wo w 2", true); 1221 check(p, "1 wor w 2", true); 1222 1223 p = Pattern.compile("([a-z])+?c"); 1224 Matcher m = p.matcher("ababcdefdec"); 1225 check(m, "ababc"); 1226 1227 // Supplementary character test 1228 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1229 m = p.matcher(toSupplementaries("ababcdefdec")); 1230 check(m, toSupplementaries("ababc")); 1231 1232 report("Reluctant Repetition"); 1233 } 1234 1235 private static void serializeTest() throws Exception { 1236 String patternStr = "(b)"; 1237 String matchStr = "b"; 1238 Pattern pattern = Pattern.compile(patternStr); 1239 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1240 ObjectOutputStream oos = new ObjectOutputStream(baos); 1241 oos.writeObject(pattern); 1242 oos.close(); 1243 ObjectInputStream ois = new ObjectInputStream( 1244 new ByteArrayInputStream(baos.toByteArray())); 1245 Pattern serializedPattern = (Pattern)ois.readObject(); 1246 ois.close(); 1247 Matcher matcher = serializedPattern.matcher(matchStr); 1248 if (!matcher.matches()) 1249 failCount++; 1250 if (matcher.groupCount() != 1) 1251 failCount++; 1252 1253 report("Serialization"); 1254 } 1255 1256 private static void gTest() { 1257 Pattern pattern = Pattern.compile("\\G\\w"); 1258 Matcher matcher = pattern.matcher("abc#x#x"); 1259 matcher.find(); 1260 matcher.find(); 1261 matcher.find(); 1262 if (matcher.find()) 1263 failCount++; 1264 1265 pattern = Pattern.compile("\\GA*"); 1266 matcher = pattern.matcher("1A2AA3"); 1267 matcher.find(); 1268 if (matcher.find()) 1269 failCount++; 1270 1271 pattern = Pattern.compile("\\GA*"); 1272 matcher = pattern.matcher("1A2AA3"); 1273 if (!matcher.find(1)) 1274 failCount++; 1275 matcher.find(); 1276 if (matcher.find()) 1277 failCount++; 1278 1279 report("\\G"); 1280 } 1281 1282 private static void zTest() { 1283 Pattern pattern = Pattern.compile("foo\\Z"); 1284 // Positives 1285 check(pattern, "foo\u0085", true); 1286 check(pattern, "foo\u2028", true); 1287 check(pattern, "foo\u2029", true); 1288 check(pattern, "foo\n", true); 1289 check(pattern, "foo\r", true); 1290 check(pattern, "foo\r\n", true); 1291 // Negatives 1292 check(pattern, "fooo", false); 1293 check(pattern, "foo\n\r", false); 1294 1295 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1296 // Positives 1297 check(pattern, "foo", true); 1298 check(pattern, "foo\n", true); 1299 // Negatives 1300 check(pattern, "foo\r", false); 1301 check(pattern, "foo\u0085", false); 1302 check(pattern, "foo\u2028", false); 1303 check(pattern, "foo\u2029", false); 1304 1305 report("\\Z"); 1306 } 1307 1308 private static void replaceFirstTest() { 1309 Pattern pattern = Pattern.compile("(ab)(c*)"); 1310 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1311 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1312 failCount++; 1313 1314 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1315 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1316 failCount++; 1317 1318 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1319 String result = matcher.replaceFirst("$1"); 1320 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1321 failCount++; 1322 1323 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1324 result = matcher.replaceFirst("$2"); 1325 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1326 failCount++; 1327 1328 pattern = Pattern.compile("a*"); 1329 matcher = pattern.matcher("aaaaaaaaaa"); 1330 if (!matcher.replaceFirst("test").equals("test")) 1331 failCount++; 1332 1333 pattern = Pattern.compile("a+"); 1334 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1335 if (!matcher.replaceFirst("test").equals("zzztest")) 1336 failCount++; 1337 1338 // Supplementary character test 1339 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1340 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1341 if (!matcher.replaceFirst(toSupplementaries("test")) 1342 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1343 failCount++; 1344 1345 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1346 if (!matcher.replaceFirst(toSupplementaries("test")). 1347 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1348 failCount++; 1349 1350 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1351 result = matcher.replaceFirst("$1"); 1352 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1353 failCount++; 1354 1355 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1356 result = matcher.replaceFirst("$2"); 1357 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1358 failCount++; 1359 1360 pattern = Pattern.compile(toSupplementaries("a*")); 1361 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1362 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1363 failCount++; 1364 1365 pattern = Pattern.compile(toSupplementaries("a+")); 1366 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1367 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1368 failCount++; 1369 1370 report("Replace First"); 1371 } 1372 1373 private static void unixLinesTest() { 1374 Pattern pattern = Pattern.compile(".*"); 1375 Matcher matcher = pattern.matcher("aa\u2028blah"); 1376 matcher.find(); 1377 if (!matcher.group(0).equals("aa")) 1378 failCount++; 1379 1380 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1381 matcher = pattern.matcher("aa\u2028blah"); 1382 matcher.find(); 1383 if (!matcher.group(0).equals("aa\u2028blah")) 1384 failCount++; 1385 1386 pattern = Pattern.compile("[az]$", 1387 Pattern.MULTILINE | Pattern.UNIX_LINES); 1388 matcher = pattern.matcher("aa\u2028zz"); 1389 check(matcher, "a\u2028", false); 1390 1391 // Supplementary character test 1392 pattern = Pattern.compile(".*"); 1393 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1394 matcher.find(); 1395 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1396 failCount++; 1397 1398 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1399 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1400 matcher.find(); 1401 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1402 failCount++; 1403 1404 pattern = Pattern.compile(toSupplementaries("[az]$"), 1405 Pattern.MULTILINE | Pattern.UNIX_LINES); 1406 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1407 check(matcher, toSupplementaries("a\u2028"), false); 1408 1409 report("Unix Lines"); 1410 } 1411 1412 private static void commentsTest() { 1413 int flags = Pattern.COMMENTS; 1414 1415 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1416 Matcher matcher = pattern.matcher("aa#aa"); 1417 if (!matcher.matches()) 1418 failCount++; 1419 1420 pattern = Pattern.compile("aa # blah", flags); 1421 matcher = pattern.matcher("aa"); 1422 if (!matcher.matches()) 1423 failCount++; 1424 1425 pattern = Pattern.compile("aa blah", flags); 1426 matcher = pattern.matcher("aablah"); 1427 if (!matcher.matches()) 1428 failCount++; 1429 1430 pattern = Pattern.compile("aa # blah blech ", flags); 1431 matcher = pattern.matcher("aa"); 1432 if (!matcher.matches()) 1433 failCount++; 1434 1435 pattern = Pattern.compile("aa # blah\n ", flags); 1436 matcher = pattern.matcher("aa"); 1437 if (!matcher.matches()) 1438 failCount++; 1439 1440 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1441 matcher = pattern.matcher("aabc"); 1442 if (!matcher.matches()) 1443 failCount++; 1444 1445 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1446 matcher = pattern.matcher("aabc"); 1447 if (!matcher.matches()) 1448 failCount++; 1449 1450 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1451 matcher = pattern.matcher("aabc#blech"); 1452 if (!matcher.matches()) 1453 failCount++; 1454 1455 // Supplementary character test 1456 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1457 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1458 if (!matcher.matches()) 1459 failCount++; 1460 1461 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1462 matcher = pattern.matcher(toSupplementaries("aa")); 1463 if (!matcher.matches()) 1464 failCount++; 1465 1466 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1467 matcher = pattern.matcher(toSupplementaries("aablah")); 1468 if (!matcher.matches()) 1469 failCount++; 1470 1471 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1472 matcher = pattern.matcher(toSupplementaries("aa")); 1473 if (!matcher.matches()) 1474 failCount++; 1475 1476 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1477 matcher = pattern.matcher(toSupplementaries("aa")); 1478 if (!matcher.matches()) 1479 failCount++; 1480 1481 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1482 matcher = pattern.matcher(toSupplementaries("aabc")); 1483 if (!matcher.matches()) 1484 failCount++; 1485 1486 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1487 matcher = pattern.matcher(toSupplementaries("aabc")); 1488 if (!matcher.matches()) 1489 failCount++; 1490 1491 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1492 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1493 if (!matcher.matches()) 1494 failCount++; 1495 1496 report("Comments"); 1497 } 1498 1499 private static void caseFoldingTest() { // bug 4504687 1500 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1501 Pattern pattern = Pattern.compile("aa", flags); 1502 Matcher matcher = pattern.matcher("ab"); 1503 if (matcher.matches()) 1504 failCount++; 1505 1506 pattern = Pattern.compile("aA", flags); 1507 matcher = pattern.matcher("ab"); 1508 if (matcher.matches()) 1509 failCount++; 1510 1511 pattern = Pattern.compile("aa", flags); 1512 matcher = pattern.matcher("aB"); 1513 if (matcher.matches()) 1514 failCount++; 1515 matcher = pattern.matcher("Ab"); 1516 if (matcher.matches()) 1517 failCount++; 1518 1519 // ASCII "a" 1520 // Latin-1 Supplement "a" + grave 1521 // Cyrillic "a" 1522 String[] patterns = new String[] { 1523 //single 1524 "a", "\u00e0", "\u0430", 1525 //slice 1526 "ab", "\u00e0\u00e1", "\u0430\u0431", 1527 //class single 1528 "[a]", "[\u00e0]", "[\u0430]", 1529 //class range 1530 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1531 //back reference 1532 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1533 }; 1534 1535 String[] texts = new String[] { 1536 "A", "\u00c0", "\u0410", 1537 "AB", "\u00c0\u00c1", "\u0410\u0411", 1538 "A", "\u00c0", "\u0410", 1539 "B", "\u00c2", "\u0411", 1540 "aA", "\u00e0\u00c0", "\u0430\u0410" 1541 }; 1542 1543 boolean[] expected = new boolean[] { 1544 true, false, false, 1545 true, false, false, 1546 true, false, false, 1547 true, false, false, 1548 true, false, false 1549 }; 1550 1551 flags = Pattern.CASE_INSENSITIVE; 1552 for (int i = 0; i < patterns.length; i++) { 1553 pattern = Pattern.compile(patterns[i], flags); 1554 matcher = pattern.matcher(texts[i]); 1555 if (matcher.matches() != expected[i]) { 1556 System.out.println("<1> Failed at " + i); 1557 failCount++; 1558 } 1559 } 1560 1561 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1562 for (int i = 0; i < patterns.length; i++) { 1563 pattern = Pattern.compile(patterns[i], flags); 1564 matcher = pattern.matcher(texts[i]); 1565 if (!matcher.matches()) { 1566 System.out.println("<2> Failed at " + i); 1567 failCount++; 1568 } 1569 } 1570 // flag unicode_case alone should do nothing 1571 flags = Pattern.UNICODE_CASE; 1572 for (int i = 0; i < patterns.length; i++) { 1573 pattern = Pattern.compile(patterns[i], flags); 1574 matcher = pattern.matcher(texts[i]); 1575 if (matcher.matches()) { 1576 System.out.println("<3> Failed at " + i); 1577 failCount++; 1578 } 1579 } 1580 1581 // Special cases: i, I, u+0131 and u+0130 1582 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1583 pattern = Pattern.compile("[h-j]+", flags); 1584 if (!pattern.matcher("\u0131\u0130").matches()) 1585 failCount++; 1586 report("Case Folding"); 1587 } 1588 1589 private static void appendTest() { 1590 Pattern pattern = Pattern.compile("(ab)(cd)"); 1591 Matcher matcher = pattern.matcher("abcd"); 1592 String result = matcher.replaceAll("$2$1"); 1593 if (!result.equals("cdab")) 1594 failCount++; 1595 1596 String s1 = "Swap all: first = 123, second = 456"; 1597 String s2 = "Swap one: first = 123, second = 456"; 1598 String r = "$3$2$1"; 1599 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1600 matcher = pattern.matcher(s1); 1601 1602 result = matcher.replaceAll(r); 1603 if (!result.equals("Swap all: 123 = first, 456 = second")) 1604 failCount++; 1605 1606 matcher = pattern.matcher(s2); 1607 1608 if (matcher.find()) { 1609 StringBuffer sb = new StringBuffer(); 1610 matcher.appendReplacement(sb, r); 1611 matcher.appendTail(sb); 1612 result = sb.toString(); 1613 if (!result.equals("Swap one: 123 = first, second = 456")) 1614 failCount++; 1615 } 1616 1617 // Supplementary character test 1618 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1619 matcher = pattern.matcher(toSupplementaries("abcd")); 1620 result = matcher.replaceAll("$2$1"); 1621 if (!result.equals(toSupplementaries("cdab"))) 1622 failCount++; 1623 1624 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1625 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1626 r = toSupplementaries("$3$2$1"); 1627 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1628 matcher = pattern.matcher(s1); 1629 1630 result = matcher.replaceAll(r); 1631 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1632 failCount++; 1633 1634 matcher = pattern.matcher(s2); 1635 1636 if (matcher.find()) { 1637 StringBuffer sb = new StringBuffer(); 1638 matcher.appendReplacement(sb, r); 1639 matcher.appendTail(sb); 1640 result = sb.toString(); 1641 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1642 failCount++; 1643 } 1644 report("Append"); 1645 } 1646 1647 private static void splitTest() { 1648 Pattern pattern = Pattern.compile(":"); 1649 String[] result = pattern.split("foo:and:boo", 2); 1650 if (!result[0].equals("foo")) 1651 failCount++; 1652 if (!result[1].equals("and:boo")) 1653 failCount++; 1654 // Supplementary character test 1655 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1656 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1657 if (!result[0].equals(toSupplementaries("foo"))) 1658 failCount++; 1659 if (!result[1].equals(toSupplementaries("andXboo"))) 1660 failCount++; 1661 1662 CharBuffer cb = CharBuffer.allocate(100); 1663 cb.put("foo:and:boo"); 1664 cb.flip(); 1665 result = pattern.split(cb); 1666 if (!result[0].equals("foo")) 1667 failCount++; 1668 if (!result[1].equals("and")) 1669 failCount++; 1670 if (!result[2].equals("boo")) 1671 failCount++; 1672 1673 // Supplementary character test 1674 CharBuffer cbs = CharBuffer.allocate(100); 1675 cbs.put(toSupplementaries("fooXandXboo")); 1676 cbs.flip(); 1677 result = patternX.split(cbs); 1678 if (!result[0].equals(toSupplementaries("foo"))) 1679 failCount++; 1680 if (!result[1].equals(toSupplementaries("and"))) 1681 failCount++; 1682 if (!result[2].equals(toSupplementaries("boo"))) 1683 failCount++; 1684 1685 String source = "0123456789"; 1686 for (int limit=-2; limit<3; limit++) { 1687 for (int x=0; x<10; x++) { 1688 result = source.split(Integer.toString(x), limit); 1689 int expectedLength = limit < 1 ? 2 : limit; 1690 1691 if ((limit == 0) && (x == 9)) { 1692 // expected dropping of "" 1693 if (result.length != 1) 1694 failCount++; 1695 if (!result[0].equals("012345678")) { 1696 failCount++; 1697 } 1698 } else { 1699 if (result.length != expectedLength) { 1700 failCount++; 1701 } 1702 if (!result[0].equals(source.substring(0,x))) { 1703 if (limit != 1) { 1704 failCount++; 1705 } else { 1706 if (!result[0].equals(source.substring(0,10))) { 1707 failCount++; 1708 } 1709 } 1710 } 1711 if (expectedLength > 1) { // Check segment 2 1712 if (!result[1].equals(source.substring(x+1,10))) 1713 failCount++; 1714 } 1715 } 1716 } 1717 } 1718 // Check the case for no match found 1719 for (int limit=-2; limit<3; limit++) { 1720 result = source.split("e", limit); 1721 if (result.length != 1) 1722 failCount++; 1723 if (!result[0].equals(source)) 1724 failCount++; 1725 } 1726 // Check the case for limit == 0, source = ""; 1727 source = ""; 1728 result = source.split("e", 0); 1729 if (result.length != 1) 1730 failCount++; 1731 if (!result[0].equals(source)) 1732 failCount++; 1733 1734 report("Split"); 1735 } 1736 1737 private static void negationTest() { 1738 Pattern pattern = Pattern.compile("[\\[@^]+"); 1739 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1740 if (!matcher.find()) 1741 failCount++; 1742 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1743 failCount++; 1744 pattern = Pattern.compile("[@\\[^]+"); 1745 matcher = pattern.matcher("@@@@[[[[^^^^"); 1746 if (!matcher.find()) 1747 failCount++; 1748 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1749 failCount++; 1750 pattern = Pattern.compile("[@\\[^@]+"); 1751 matcher = pattern.matcher("@@@@[[[[^^^^"); 1752 if (!matcher.find()) 1753 failCount++; 1754 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1755 failCount++; 1756 1757 pattern = Pattern.compile("\\)"); 1758 matcher = pattern.matcher("xxx)xxx"); 1759 if (!matcher.find()) 1760 failCount++; 1761 1762 report("Negation"); 1763 } 1764 1765 private static void ampersandTest() { 1766 Pattern pattern = Pattern.compile("[&@]+"); 1767 check(pattern, "@@@@&&&&", true); 1768 1769 pattern = Pattern.compile("[@&]+"); 1770 check(pattern, "@@@@&&&&", true); 1771 1772 pattern = Pattern.compile("[@\\&]+"); 1773 check(pattern, "@@@@&&&&", true); 1774 1775 report("Ampersand"); 1776 } 1777 1778 private static void octalTest() throws Exception { 1779 Pattern pattern = Pattern.compile("\\u0007"); 1780 Matcher matcher = pattern.matcher("\u0007"); 1781 if (!matcher.matches()) 1782 failCount++; 1783 pattern = Pattern.compile("\\07"); 1784 matcher = pattern.matcher("\u0007"); 1785 if (!matcher.matches()) 1786 failCount++; 1787 pattern = Pattern.compile("\\007"); 1788 matcher = pattern.matcher("\u0007"); 1789 if (!matcher.matches()) 1790 failCount++; 1791 pattern = Pattern.compile("\\0007"); 1792 matcher = pattern.matcher("\u0007"); 1793 if (!matcher.matches()) 1794 failCount++; 1795 pattern = Pattern.compile("\\040"); 1796 matcher = pattern.matcher("\u0020"); 1797 if (!matcher.matches()) 1798 failCount++; 1799 pattern = Pattern.compile("\\0403"); 1800 matcher = pattern.matcher("\u00203"); 1801 if (!matcher.matches()) 1802 failCount++; 1803 pattern = Pattern.compile("\\0103"); 1804 matcher = pattern.matcher("\u0043"); 1805 if (!matcher.matches()) 1806 failCount++; 1807 1808 report("Octal"); 1809 } 1810 1811 private static void longPatternTest() throws Exception { 1812 try { 1813 Pattern pattern = Pattern.compile( 1814 "a 32-character-long pattern xxxx"); 1815 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1816 pattern = Pattern.compile("a thirty four character long regex"); 1817 StringBuffer patternToBe = new StringBuffer(101); 1818 for (int i=0; i<100; i++) 1819 patternToBe.append((char)(97 + i%26)); 1820 pattern = Pattern.compile(patternToBe.toString()); 1821 } catch (PatternSyntaxException e) { 1822 failCount++; 1823 } 1824 1825 // Supplementary character test 1826 try { 1827 Pattern pattern = Pattern.compile( 1828 toSupplementaries("a 32-character-long pattern xxxx")); 1829 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1830 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1831 StringBuffer patternToBe = new StringBuffer(101*2); 1832 for (int i=0; i<100; i++) 1833 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1834 + 97 + i%26)); 1835 pattern = Pattern.compile(patternToBe.toString()); 1836 } catch (PatternSyntaxException e) { 1837 failCount++; 1838 } 1839 report("LongPattern"); 1840 } 1841 1842 private static void group0Test() throws Exception { 1843 Pattern pattern = Pattern.compile("(tes)ting"); 1844 Matcher matcher = pattern.matcher("testing"); 1845 check(matcher, "testing"); 1846 1847 matcher.reset("testing"); 1848 if (matcher.lookingAt()) { 1849 if (!matcher.group(0).equals("testing")) 1850 failCount++; 1851 } else { 1852 failCount++; 1853 } 1854 1855 matcher.reset("testing"); 1856 if (matcher.matches()) { 1857 if (!matcher.group(0).equals("testing")) 1858 failCount++; 1859 } else { 1860 failCount++; 1861 } 1862 1863 pattern = Pattern.compile("(tes)ting"); 1864 matcher = pattern.matcher("testing"); 1865 if (matcher.lookingAt()) { 1866 if (!matcher.group(0).equals("testing")) 1867 failCount++; 1868 } else { 1869 failCount++; 1870 } 1871 1872 pattern = Pattern.compile("^(tes)ting"); 1873 matcher = pattern.matcher("testing"); 1874 if (matcher.matches()) { 1875 if (!matcher.group(0).equals("testing")) 1876 failCount++; 1877 } else { 1878 failCount++; 1879 } 1880 1881 // Supplementary character test 1882 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1883 matcher = pattern.matcher(toSupplementaries("testing")); 1884 check(matcher, toSupplementaries("testing")); 1885 1886 matcher.reset(toSupplementaries("testing")); 1887 if (matcher.lookingAt()) { 1888 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1889 failCount++; 1890 } else { 1891 failCount++; 1892 } 1893 1894 matcher.reset(toSupplementaries("testing")); 1895 if (matcher.matches()) { 1896 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1897 failCount++; 1898 } else { 1899 failCount++; 1900 } 1901 1902 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1903 matcher = pattern.matcher(toSupplementaries("testing")); 1904 if (matcher.lookingAt()) { 1905 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1906 failCount++; 1907 } else { 1908 failCount++; 1909 } 1910 1911 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 1912 matcher = pattern.matcher(toSupplementaries("testing")); 1913 if (matcher.matches()) { 1914 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1915 failCount++; 1916 } else { 1917 failCount++; 1918 } 1919 1920 report("Group0"); 1921 } 1922 1923 private static void findIntTest() throws Exception { 1924 Pattern p = Pattern.compile("blah"); 1925 Matcher m = p.matcher("zzzzblahzzzzzblah"); 1926 boolean result = m.find(2); 1927 if (!result) 1928 failCount++; 1929 1930 p = Pattern.compile("$"); 1931 m = p.matcher("1234567890"); 1932 result = m.find(10); 1933 if (!result) 1934 failCount++; 1935 try { 1936 result = m.find(11); 1937 failCount++; 1938 } catch (IndexOutOfBoundsException e) { 1939 // correct result 1940 } 1941 1942 // Supplementary character test 1943 p = Pattern.compile(toSupplementaries("blah")); 1944 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 1945 result = m.find(2); 1946 if (!result) 1947 failCount++; 1948 1949 report("FindInt"); 1950 } 1951 1952 private static void emptyPatternTest() throws Exception { 1953 Pattern p = Pattern.compile(""); 1954 Matcher m = p.matcher("foo"); 1955 1956 // Should find empty pattern at beginning of input 1957 boolean result = m.find(); 1958 if (result != true) 1959 failCount++; 1960 if (m.start() != 0) 1961 failCount++; 1962 1963 // Should not match entire input if input is not empty 1964 m.reset(); 1965 result = m.matches(); 1966 if (result == true) 1967 failCount++; 1968 1969 try { 1970 m.start(0); 1971 failCount++; 1972 } catch (IllegalStateException e) { 1973 // Correct result 1974 } 1975 1976 // Should match entire input if input is empty 1977 m.reset(""); 1978 result = m.matches(); 1979 if (result != true) 1980 failCount++; 1981 1982 result = Pattern.matches("", ""); 1983 if (result != true) 1984 failCount++; 1985 1986 result = Pattern.matches("", "foo"); 1987 if (result == true) 1988 failCount++; 1989 report("EmptyPattern"); 1990 } 1991 1992 private static void charClassTest() throws Exception { 1993 Pattern pattern = Pattern.compile("blah[ab]]blech"); 1994 check(pattern, "blahb]blech", true); 1995 1996 pattern = Pattern.compile("[abc[def]]"); 1997 check(pattern, "b", true); 1998 1999 // Supplementary character tests 2000 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2001 check(pattern, toSupplementaries("blahb]blech"), true); 2002 2003 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2004 check(pattern, toSupplementaries("b"), true); 2005 2006 try { 2007 // u00ff when UNICODE_CASE 2008 pattern = Pattern.compile("[ab\u00ffcd]", 2009 Pattern.CASE_INSENSITIVE| 2010 Pattern.UNICODE_CASE); 2011 check(pattern, "ab\u00ffcd", true); 2012 check(pattern, "Ab\u0178Cd", true); 2013 2014 // u00b5 when UNICODE_CASE 2015 pattern = Pattern.compile("[ab\u00b5cd]", 2016 Pattern.CASE_INSENSITIVE| 2017 Pattern.UNICODE_CASE); 2018 check(pattern, "ab\u00b5cd", true); 2019 check(pattern, "Ab\u039cCd", true); 2020 } catch (Exception e) { failCount++; } 2021 2022 /* Special cases 2023 (1)LatinSmallLetterLongS u+017f 2024 (2)LatinSmallLetterDotlessI u+0131 2025 (3)LatineCapitalLetterIWithDotAbove u+0130 2026 (4)KelvinSign u+212a 2027 (5)AngstromSign u+212b 2028 */ 2029 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2030 pattern = Pattern.compile("[sik\u00c5]+", flags); 2031 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2032 failCount++; 2033 2034 report("CharClass"); 2035 } 2036 2037 private static void caretTest() throws Exception { 2038 Pattern pattern = Pattern.compile("\\w*"); 2039 Matcher matcher = pattern.matcher("a#bc#def##g"); 2040 check(matcher, "a"); 2041 check(matcher, ""); 2042 check(matcher, "bc"); 2043 check(matcher, ""); 2044 check(matcher, "def"); 2045 check(matcher, ""); 2046 check(matcher, ""); 2047 check(matcher, "g"); 2048 check(matcher, ""); 2049 if (matcher.find()) 2050 failCount++; 2051 2052 pattern = Pattern.compile("^\\w*"); 2053 matcher = pattern.matcher("a#bc#def##g"); 2054 check(matcher, "a"); 2055 if (matcher.find()) 2056 failCount++; 2057 2058 pattern = Pattern.compile("\\w"); 2059 matcher = pattern.matcher("abc##x"); 2060 check(matcher, "a"); 2061 check(matcher, "b"); 2062 check(matcher, "c"); 2063 check(matcher, "x"); 2064 if (matcher.find()) 2065 failCount++; 2066 2067 pattern = Pattern.compile("^\\w"); 2068 matcher = pattern.matcher("abc##x"); 2069 check(matcher, "a"); 2070 if (matcher.find()) 2071 failCount++; 2072 2073 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2074 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2075 check(matcher, "abc"); 2076 if (matcher.find()) 2077 failCount++; 2078 2079 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2080 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2081 check(matcher, "abc"); 2082 check(matcher, "jkl"); 2083 if (matcher.find()) 2084 failCount++; 2085 2086 pattern = Pattern.compile("^", Pattern.MULTILINE); 2087 matcher = pattern.matcher("this is some text"); 2088 String result = matcher.replaceAll("X"); 2089 if (!result.equals("Xthis is some text")) 2090 failCount++; 2091 2092 pattern = Pattern.compile("^"); 2093 matcher = pattern.matcher("this is some text"); 2094 result = matcher.replaceAll("X"); 2095 if (!result.equals("Xthis is some text")) 2096 failCount++; 2097 2098 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2099 matcher = pattern.matcher("this is some text\n"); 2100 result = matcher.replaceAll("X"); 2101 if (!result.equals("Xthis is some text\n")) 2102 failCount++; 2103 2104 report("Caret"); 2105 } 2106 2107 private static void groupCaptureTest() throws Exception { 2108 // Independent group 2109 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2110 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2111 matcher.find(); 2112 try { 2113 String blah = matcher.group(1); 2114 failCount++; 2115 } catch (IndexOutOfBoundsException ioobe) { 2116 // Good result 2117 } 2118 // Pure group 2119 pattern = Pattern.compile("x+(?:y+)z+"); 2120 matcher = pattern.matcher("xxxyyyzzz"); 2121 matcher.find(); 2122 try { 2123 String blah = matcher.group(1); 2124 failCount++; 2125 } catch (IndexOutOfBoundsException ioobe) { 2126 // Good result 2127 } 2128 2129 // Supplementary character tests 2130 // Independent group 2131 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2132 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2133 matcher.find(); 2134 try { 2135 String blah = matcher.group(1); 2136 failCount++; 2137 } catch (IndexOutOfBoundsException ioobe) { 2138 // Good result 2139 } 2140 // Pure group 2141 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2142 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2143 matcher.find(); 2144 try { 2145 String blah = matcher.group(1); 2146 failCount++; 2147 } catch (IndexOutOfBoundsException ioobe) { 2148 // Good result 2149 } 2150 2151 report("GroupCapture"); 2152 } 2153 2154 private static void backRefTest() throws Exception { 2155 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2156 check(pattern, "zzzaabcazzz", true); 2157 2158 pattern = Pattern.compile("(a*)bc\\1"); 2159 check(pattern, "zzzaabcaazzz", true); 2160 2161 pattern = Pattern.compile("(abc)(def)\\1"); 2162 check(pattern, "abcdefabc", true); 2163 2164 pattern = Pattern.compile("(abc)(def)\\3"); 2165 check(pattern, "abcdefabc", false); 2166 2167 try { 2168 for (int i = 1; i < 10; i++) { 2169 // Make sure backref 1-9 are always accepted 2170 pattern = Pattern.compile("abcdef\\" + i); 2171 // and fail to match if the target group does not exit 2172 check(pattern, "abcdef", false); 2173 } 2174 } catch(PatternSyntaxException e) { 2175 failCount++; 2176 } 2177 2178 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2179 check(pattern, "abcdefghija", false); 2180 check(pattern, "abcdefghija1", true); 2181 2182 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2183 check(pattern, "abcdefghijkk", true); 2184 2185 pattern = Pattern.compile("(a)bcdefghij\\11"); 2186 check(pattern, "abcdefghija1", true); 2187 2188 // Supplementary character tests 2189 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2190 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2191 2192 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2193 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2194 2195 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2196 check(pattern, toSupplementaries("abcdefabc"), true); 2197 2198 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2199 check(pattern, toSupplementaries("abcdefabc"), false); 2200 2201 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2202 check(pattern, toSupplementaries("abcdefghija"), false); 2203 check(pattern, toSupplementaries("abcdefghija1"), true); 2204 2205 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2206 check(pattern, toSupplementaries("abcdefghijkk"), true); 2207 2208 report("BackRef"); 2209 } 2210 2211 /** 2212 * Unicode Technical Report #18, section 2.6 End of Line 2213 * There is no empty line to be matched in the sequence \u000D\u000A 2214 * but there is an empty line in the sequence \u000A\u000D. 2215 */ 2216 private static void anchorTest() throws Exception { 2217 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2218 Matcher m = p.matcher("blah1\r\nblah2"); 2219 m.find(); 2220 m.find(); 2221 if (!m.group().equals("blah2")) 2222 failCount++; 2223 2224 m.reset("blah1\n\rblah2"); 2225 m.find(); 2226 m.find(); 2227 m.find(); 2228 if (!m.group().equals("blah2")) 2229 failCount++; 2230 2231 // Test behavior of $ with \r\n at end of input 2232 p = Pattern.compile(".+$"); 2233 m = p.matcher("blah1\r\n"); 2234 if (!m.find()) 2235 failCount++; 2236 if (!m.group().equals("blah1")) 2237 failCount++; 2238 if (m.find()) 2239 failCount++; 2240 2241 // Test behavior of $ with \r\n at end of input in multiline 2242 p = Pattern.compile(".+$", Pattern.MULTILINE); 2243 m = p.matcher("blah1\r\n"); 2244 if (!m.find()) 2245 failCount++; 2246 if (m.find()) 2247 failCount++; 2248 2249 // Test for $ recognition of \u0085 for bug 4527731 2250 p = Pattern.compile(".+$", Pattern.MULTILINE); 2251 m = p.matcher("blah1\u0085"); 2252 if (!m.find()) 2253 failCount++; 2254 2255 // Supplementary character test 2256 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2257 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2258 m.find(); 2259 m.find(); 2260 if (!m.group().equals(toSupplementaries("blah2"))) 2261 failCount++; 2262 2263 m.reset(toSupplementaries("blah1\n\rblah2")); 2264 m.find(); 2265 m.find(); 2266 m.find(); 2267 if (!m.group().equals(toSupplementaries("blah2"))) 2268 failCount++; 2269 2270 // Test behavior of $ with \r\n at end of input 2271 p = Pattern.compile(".+$"); 2272 m = p.matcher(toSupplementaries("blah1\r\n")); 2273 if (!m.find()) 2274 failCount++; 2275 if (!m.group().equals(toSupplementaries("blah1"))) 2276 failCount++; 2277 if (m.find()) 2278 failCount++; 2279 2280 // Test behavior of $ with \r\n at end of input in multiline 2281 p = Pattern.compile(".+$", Pattern.MULTILINE); 2282 m = p.matcher(toSupplementaries("blah1\r\n")); 2283 if (!m.find()) 2284 failCount++; 2285 if (m.find()) 2286 failCount++; 2287 2288 // Test for $ recognition of \u0085 for bug 4527731 2289 p = Pattern.compile(".+$", Pattern.MULTILINE); 2290 m = p.matcher(toSupplementaries("blah1\u0085")); 2291 if (!m.find()) 2292 failCount++; 2293 2294 report("Anchors"); 2295 } 2296 2297 /** 2298 * A basic sanity test of Matcher.lookingAt(). 2299 */ 2300 private static void lookingAtTest() throws Exception { 2301 Pattern p = Pattern.compile("(ab)(c*)"); 2302 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2303 2304 if (!m.lookingAt()) 2305 failCount++; 2306 2307 if (!m.group().equals(m.group(0))) 2308 failCount++; 2309 2310 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2311 if (m.lookingAt()) 2312 failCount++; 2313 2314 // Supplementary character test 2315 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2316 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2317 2318 if (!m.lookingAt()) 2319 failCount++; 2320 2321 if (!m.group().equals(m.group(0))) 2322 failCount++; 2323 2324 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2325 if (m.lookingAt()) 2326 failCount++; 2327 2328 report("Looking At"); 2329 } 2330 2331 /** 2332 * A basic sanity test of Matcher.matches(). 2333 */ 2334 private static void matchesTest() throws Exception { 2335 // matches() 2336 Pattern p = Pattern.compile("ulb(c*)"); 2337 Matcher m = p.matcher("ulbcccccc"); 2338 if (!m.matches()) 2339 failCount++; 2340 2341 // find() but not matches() 2342 m.reset("zzzulbcccccc"); 2343 if (m.matches()) 2344 failCount++; 2345 2346 // lookingAt() but not matches() 2347 m.reset("ulbccccccdef"); 2348 if (m.matches()) 2349 failCount++; 2350 2351 // matches() 2352 p = Pattern.compile("a|ad"); 2353 m = p.matcher("ad"); 2354 if (!m.matches()) 2355 failCount++; 2356 2357 // Supplementary character test 2358 // matches() 2359 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2360 m = p.matcher(toSupplementaries("ulbcccccc")); 2361 if (!m.matches()) 2362 failCount++; 2363 2364 // find() but not matches() 2365 m.reset(toSupplementaries("zzzulbcccccc")); 2366 if (m.matches()) 2367 failCount++; 2368 2369 // lookingAt() but not matches() 2370 m.reset(toSupplementaries("ulbccccccdef")); 2371 if (m.matches()) 2372 failCount++; 2373 2374 // matches() 2375 p = Pattern.compile(toSupplementaries("a|ad")); 2376 m = p.matcher(toSupplementaries("ad")); 2377 if (!m.matches()) 2378 failCount++; 2379 2380 report("Matches"); 2381 } 2382 2383 /** 2384 * A basic sanity test of Pattern.matches(). 2385 */ 2386 private static void patternMatchesTest() throws Exception { 2387 // matches() 2388 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2389 toSupplementaries("ulbcccccc"))) 2390 failCount++; 2391 2392 // find() but not matches() 2393 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2394 toSupplementaries("zzzulbcccccc"))) 2395 failCount++; 2396 2397 // lookingAt() but not matches() 2398 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2399 toSupplementaries("ulbccccccdef"))) 2400 failCount++; 2401 2402 // Supplementary character test 2403 // matches() 2404 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2405 toSupplementaries("ulbcccccc"))) 2406 failCount++; 2407 2408 // find() but not matches() 2409 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2410 toSupplementaries("zzzulbcccccc"))) 2411 failCount++; 2412 2413 // lookingAt() but not matches() 2414 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2415 toSupplementaries("ulbccccccdef"))) 2416 failCount++; 2417 2418 report("Pattern Matches"); 2419 } 2420 2421 /** 2422 * Canonical equivalence testing. Tests the ability of the engine 2423 * to match sequences that are not explicitly specified in the 2424 * pattern when they are considered equivalent by the Unicode Standard. 2425 */ 2426 private static void ceTest() throws Exception { 2427 // Decomposed char outside char classes 2428 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2429 Matcher m = p.matcher("test\u00e5"); 2430 if (!m.matches()) 2431 failCount++; 2432 2433 m.reset("testa\u030a"); 2434 if (!m.matches()) 2435 failCount++; 2436 2437 // Composed char outside char classes 2438 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2439 m = p.matcher("test\u00e5"); 2440 if (!m.matches()) 2441 failCount++; 2442 2443 m.reset("testa\u030a"); 2444 if (!m.find()) 2445 failCount++; 2446 2447 // Decomposed char inside a char class 2448 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2449 m = p.matcher("test\u00e5"); 2450 if (!m.find()) 2451 failCount++; 2452 2453 m.reset("testa\u030a"); 2454 if (!m.find()) 2455 failCount++; 2456 2457 // Composed char inside a char class 2458 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2459 m = p.matcher("test\u00e5"); 2460 if (!m.find()) 2461 failCount++; 2462 2463 m.reset("testa\u0300"); 2464 if (!m.find()) 2465 failCount++; 2466 2467 m.reset("testa\u030a"); 2468 if (!m.find()) 2469 failCount++; 2470 2471 // Marks that cannot legally change order and be equivalent 2472 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2473 check(p, "testa\u0308\u0300", true); 2474 check(p, "testa\u0300\u0308", false); 2475 2476 // Marks that can legally change order and be equivalent 2477 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2478 check(p, "testa\u0308\u0323", true); 2479 check(p, "testa\u0323\u0308", true); 2480 2481 // Test all equivalences of the sequence a\u0308\u0323\u0300 2482 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2483 check(p, "testa\u0308\u0323\u0300", true); 2484 check(p, "testa\u0323\u0308\u0300", true); 2485 check(p, "testa\u0308\u0300\u0323", true); 2486 check(p, "test\u00e4\u0323\u0300", true); 2487 check(p, "test\u00e4\u0300\u0323", true); 2488 2489 /* 2490 * The following canonical equivalence tests don't work. Bug id: 4916384. 2491 * 2492 // Decomposed hangul (jamos) 2493 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2494 m = p.matcher("\u1100\u1161"); 2495 if (!m.matches()) 2496 failCount++; 2497 2498 m.reset("\uac00"); 2499 if (!m.matches()) 2500 failCount++; 2501 2502 // Composed hangul 2503 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2504 m = p.matcher("\u1100\u1161"); 2505 if (!m.matches()) 2506 failCount++; 2507 2508 m.reset("\uac00"); 2509 if (!m.matches()) 2510 failCount++; 2511 2512 // Decomposed supplementary outside char classes 2513 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2514 m = p.matcher("test\ud834\uddc0"); 2515 if (!m.matches()) 2516 failCount++; 2517 2518 m.reset("test\ud834\uddbc\ud834\udd6f"); 2519 if (!m.matches()) 2520 failCount++; 2521 2522 // Composed supplementary outside char classes 2523 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2524 m.reset("test\ud834\uddbc\ud834\udd6f"); 2525 if (!m.matches()) 2526 failCount++; 2527 2528 m = p.matcher("test\ud834\uddc0"); 2529 if (!m.matches()) 2530 failCount++; 2531 2532 */ 2533 2534 report("Canonical Equivalence"); 2535 } 2536 2537 /** 2538 * A basic sanity test of Matcher.replaceAll(). 2539 */ 2540 private static void globalSubstitute() throws Exception { 2541 // Global substitution with a literal 2542 Pattern p = Pattern.compile("(ab)(c*)"); 2543 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2544 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2545 failCount++; 2546 2547 m.reset("zzzabccczzzabcczzzabccczzz"); 2548 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2549 failCount++; 2550 2551 // Global substitution with groups 2552 m.reset("zzzabccczzzabcczzzabccczzz"); 2553 String result = m.replaceAll("$1"); 2554 if (!result.equals("zzzabzzzabzzzabzzz")) 2555 failCount++; 2556 2557 // Supplementary character test 2558 // Global substitution with a literal 2559 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2560 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2561 if (!m.replaceAll(toSupplementaries("test")). 2562 equals(toSupplementaries("testzzztestzzztest"))) 2563 failCount++; 2564 2565 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2566 if (!m.replaceAll(toSupplementaries("test")). 2567 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2568 failCount++; 2569 2570 // Global substitution with groups 2571 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2572 result = m.replaceAll("$1"); 2573 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2574 failCount++; 2575 2576 report("Global Substitution"); 2577 } 2578 2579 /** 2580 * Tests the usage of Matcher.appendReplacement() with literal 2581 * and group substitutions. 2582 */ 2583 private static void stringbufferSubstitute() throws Exception { 2584 // SB substitution with literal 2585 String blah = "zzzblahzzz"; 2586 Pattern p = Pattern.compile("blah"); 2587 Matcher m = p.matcher(blah); 2588 StringBuffer result = new StringBuffer(); 2589 try { 2590 m.appendReplacement(result, "blech"); 2591 failCount++; 2592 } catch (IllegalStateException e) { 2593 } 2594 m.find(); 2595 m.appendReplacement(result, "blech"); 2596 if (!result.toString().equals("zzzblech")) 2597 failCount++; 2598 2599 m.appendTail(result); 2600 if (!result.toString().equals("zzzblechzzz")) 2601 failCount++; 2602 2603 // SB substitution with groups 2604 blah = "zzzabcdzzz"; 2605 p = Pattern.compile("(ab)(cd)*"); 2606 m = p.matcher(blah); 2607 result = new StringBuffer(); 2608 try { 2609 m.appendReplacement(result, "$1"); 2610 failCount++; 2611 } catch (IllegalStateException e) { 2612 } 2613 m.find(); 2614 m.appendReplacement(result, "$1"); 2615 if (!result.toString().equals("zzzab")) 2616 failCount++; 2617 2618 m.appendTail(result); 2619 if (!result.toString().equals("zzzabzzz")) 2620 failCount++; 2621 2622 // SB substitution with 3 groups 2623 blah = "zzzabcdcdefzzz"; 2624 p = Pattern.compile("(ab)(cd)*(ef)"); 2625 m = p.matcher(blah); 2626 result = new StringBuffer(); 2627 try { 2628 m.appendReplacement(result, "$1w$2w$3"); 2629 failCount++; 2630 } catch (IllegalStateException e) { 2631 } 2632 m.find(); 2633 m.appendReplacement(result, "$1w$2w$3"); 2634 if (!result.toString().equals("zzzabwcdwef")) 2635 failCount++; 2636 2637 m.appendTail(result); 2638 if (!result.toString().equals("zzzabwcdwefzzz")) 2639 failCount++; 2640 2641 // SB substitution with groups and three matches 2642 // skipping middle match 2643 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2644 p = Pattern.compile("(ab)(cd*)"); 2645 m = p.matcher(blah); 2646 result = new StringBuffer(); 2647 try { 2648 m.appendReplacement(result, "$1"); 2649 failCount++; 2650 } catch (IllegalStateException e) { 2651 } 2652 m.find(); 2653 m.appendReplacement(result, "$1"); 2654 if (!result.toString().equals("zzzab")) 2655 failCount++; 2656 2657 m.find(); 2658 m.find(); 2659 m.appendReplacement(result, "$2"); 2660 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2661 failCount++; 2662 2663 m.appendTail(result); 2664 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2665 failCount++; 2666 2667 // Check to make sure escaped $ is ignored 2668 blah = "zzzabcdcdefzzz"; 2669 p = Pattern.compile("(ab)(cd)*(ef)"); 2670 m = p.matcher(blah); 2671 result = new StringBuffer(); 2672 m.find(); 2673 m.appendReplacement(result, "$1w\\$2w$3"); 2674 if (!result.toString().equals("zzzabw$2wef")) 2675 failCount++; 2676 2677 m.appendTail(result); 2678 if (!result.toString().equals("zzzabw$2wefzzz")) 2679 failCount++; 2680 2681 // Check to make sure a reference to nonexistent group causes error 2682 blah = "zzzabcdcdefzzz"; 2683 p = Pattern.compile("(ab)(cd)*(ef)"); 2684 m = p.matcher(blah); 2685 result = new StringBuffer(); 2686 m.find(); 2687 try { 2688 m.appendReplacement(result, "$1w$5w$3"); 2689 failCount++; 2690 } catch (IndexOutOfBoundsException ioobe) { 2691 // Correct result 2692 } 2693 2694 // Check double digit group references 2695 blah = "zzz123456789101112zzz"; 2696 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2697 m = p.matcher(blah); 2698 result = new StringBuffer(); 2699 m.find(); 2700 m.appendReplacement(result, "$1w$11w$3"); 2701 if (!result.toString().equals("zzz1w11w3")) 2702 failCount++; 2703 2704 // Check to make sure it backs off $15 to $1 if only three groups 2705 blah = "zzzabcdcdefzzz"; 2706 p = Pattern.compile("(ab)(cd)*(ef)"); 2707 m = p.matcher(blah); 2708 result = new StringBuffer(); 2709 m.find(); 2710 m.appendReplacement(result, "$1w$15w$3"); 2711 if (!result.toString().equals("zzzabwab5wef")) 2712 failCount++; 2713 2714 2715 // Supplementary character test 2716 // SB substitution with literal 2717 blah = toSupplementaries("zzzblahzzz"); 2718 p = Pattern.compile(toSupplementaries("blah")); 2719 m = p.matcher(blah); 2720 result = new StringBuffer(); 2721 try { 2722 m.appendReplacement(result, toSupplementaries("blech")); 2723 failCount++; 2724 } catch (IllegalStateException e) { 2725 } 2726 m.find(); 2727 m.appendReplacement(result, toSupplementaries("blech")); 2728 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2729 failCount++; 2730 2731 m.appendTail(result); 2732 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2733 failCount++; 2734 2735 // SB substitution with groups 2736 blah = toSupplementaries("zzzabcdzzz"); 2737 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2738 m = p.matcher(blah); 2739 result = new StringBuffer(); 2740 try { 2741 m.appendReplacement(result, "$1"); 2742 failCount++; 2743 } catch (IllegalStateException e) { 2744 } 2745 m.find(); 2746 m.appendReplacement(result, "$1"); 2747 if (!result.toString().equals(toSupplementaries("zzzab"))) 2748 failCount++; 2749 2750 m.appendTail(result); 2751 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2752 failCount++; 2753 2754 // SB substitution with 3 groups 2755 blah = toSupplementaries("zzzabcdcdefzzz"); 2756 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2757 m = p.matcher(blah); 2758 result = new StringBuffer(); 2759 try { 2760 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2761 failCount++; 2762 } catch (IllegalStateException e) { 2763 } 2764 m.find(); 2765 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2766 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2767 failCount++; 2768 2769 m.appendTail(result); 2770 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2771 failCount++; 2772 2773 // SB substitution with groups and three matches 2774 // skipping middle match 2775 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2776 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2777 m = p.matcher(blah); 2778 result = new StringBuffer(); 2779 try { 2780 m.appendReplacement(result, "$1"); 2781 failCount++; 2782 } catch (IllegalStateException e) { 2783 } 2784 m.find(); 2785 m.appendReplacement(result, "$1"); 2786 if (!result.toString().equals(toSupplementaries("zzzab"))) 2787 failCount++; 2788 2789 m.find(); 2790 m.find(); 2791 m.appendReplacement(result, "$2"); 2792 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2793 failCount++; 2794 2795 m.appendTail(result); 2796 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2797 failCount++; 2798 2799 // Check to make sure escaped $ is ignored 2800 blah = toSupplementaries("zzzabcdcdefzzz"); 2801 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2802 m = p.matcher(blah); 2803 result = new StringBuffer(); 2804 m.find(); 2805 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2806 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2807 failCount++; 2808 2809 m.appendTail(result); 2810 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2811 failCount++; 2812 2813 // Check to make sure a reference to nonexistent group causes error 2814 blah = toSupplementaries("zzzabcdcdefzzz"); 2815 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2816 m = p.matcher(blah); 2817 result = new StringBuffer(); 2818 m.find(); 2819 try { 2820 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2821 failCount++; 2822 } catch (IndexOutOfBoundsException ioobe) { 2823 // Correct result 2824 } 2825 2826 // Check double digit group references 2827 blah = toSupplementaries("zzz123456789101112zzz"); 2828 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2829 m = p.matcher(blah); 2830 result = new StringBuffer(); 2831 m.find(); 2832 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2833 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2834 failCount++; 2835 2836 // Check to make sure it backs off $15 to $1 if only three groups 2837 blah = toSupplementaries("zzzabcdcdefzzz"); 2838 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2839 m = p.matcher(blah); 2840 result = new StringBuffer(); 2841 m.find(); 2842 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2843 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2844 failCount++; 2845 2846 // Check nothing has been appended into the output buffer if 2847 // the replacement string triggers IllegalArgumentException. 2848 p = Pattern.compile("(abc)"); 2849 m = p.matcher("abcd"); 2850 result = new StringBuffer(); 2851 m.find(); 2852 try { 2853 m.appendReplacement(result, ("xyz$g")); 2854 failCount++; 2855 } catch (IllegalArgumentException iae) { 2856 if (result.length() != 0) 2857 failCount++; 2858 } 2859 2860 report("SB Substitution"); 2861 } 2862 2863 /* 2864 * 5 groups of characters are created to make a substitution string. 2865 * A base string will be created including random lead chars, the 2866 * substitution string, and random trailing chars. 2867 * A pattern containing the 5 groups is searched for and replaced with: 2868 * random group + random string + random group. 2869 * The results are checked for correctness. 2870 */ 2871 private static void substitutionBasher() { 2872 for (int runs = 0; runs<1000; runs++) { 2873 // Create a base string to work in 2874 int leadingChars = generator.nextInt(10); 2875 StringBuffer baseBuffer = new StringBuffer(100); 2876 String leadingString = getRandomAlphaString(leadingChars); 2877 baseBuffer.append(leadingString); 2878 2879 // Create 5 groups of random number of random chars 2880 // Create the string to substitute 2881 // Create the pattern string to search for 2882 StringBuffer bufferToSub = new StringBuffer(25); 2883 StringBuffer bufferToPat = new StringBuffer(50); 2884 String[] groups = new String[5]; 2885 for(int i=0; i<5; i++) { 2886 int aGroupSize = generator.nextInt(5)+1; 2887 groups[i] = getRandomAlphaString(aGroupSize); 2888 bufferToSub.append(groups[i]); 2889 bufferToPat.append('('); 2890 bufferToPat.append(groups[i]); 2891 bufferToPat.append(')'); 2892 } 2893 String stringToSub = bufferToSub.toString(); 2894 String pattern = bufferToPat.toString(); 2895 2896 // Place sub string into working string at random index 2897 baseBuffer.append(stringToSub); 2898 2899 // Append random chars to end 2900 int trailingChars = generator.nextInt(10); 2901 String trailingString = getRandomAlphaString(trailingChars); 2902 baseBuffer.append(trailingString); 2903 String baseString = baseBuffer.toString(); 2904 2905 // Create test pattern and matcher 2906 Pattern p = Pattern.compile(pattern); 2907 Matcher m = p.matcher(baseString); 2908 2909 // Reject candidate if pattern happens to start early 2910 m.find(); 2911 if (m.start() < leadingChars) 2912 continue; 2913 2914 // Reject candidate if more than one match 2915 if (m.find()) 2916 continue; 2917 2918 // Construct a replacement string with : 2919 // random group + random string + random group 2920 StringBuffer bufferToRep = new StringBuffer(); 2921 int groupIndex1 = generator.nextInt(5); 2922 bufferToRep.append("$" + (groupIndex1 + 1)); 2923 String randomMidString = getRandomAlphaString(5); 2924 bufferToRep.append(randomMidString); 2925 int groupIndex2 = generator.nextInt(5); 2926 bufferToRep.append("$" + (groupIndex2 + 1)); 2927 String replacement = bufferToRep.toString(); 2928 2929 // Do the replacement 2930 String result = m.replaceAll(replacement); 2931 2932 // Construct expected result 2933 StringBuffer bufferToRes = new StringBuffer(); 2934 bufferToRes.append(leadingString); 2935 bufferToRes.append(groups[groupIndex1]); 2936 bufferToRes.append(randomMidString); 2937 bufferToRes.append(groups[groupIndex2]); 2938 bufferToRes.append(trailingString); 2939 String expectedResult = bufferToRes.toString(); 2940 2941 // Check results 2942 if (!result.equals(expectedResult)) 2943 failCount++; 2944 } 2945 2946 report("Substitution Basher"); 2947 } 2948 2949 /** 2950 * Checks the handling of some escape sequences that the Pattern 2951 * class should process instead of the java compiler. These are 2952 * not in the file because the escapes should be be processed 2953 * by the Pattern class when the regex is compiled. 2954 */ 2955 private static void escapes() throws Exception { 2956 Pattern p = Pattern.compile("\\043"); 2957 Matcher m = p.matcher("#"); 2958 if (!m.find()) 2959 failCount++; 2960 2961 p = Pattern.compile("\\x23"); 2962 m = p.matcher("#"); 2963 if (!m.find()) 2964 failCount++; 2965 2966 p = Pattern.compile("\\u0023"); 2967 m = p.matcher("#"); 2968 if (!m.find()) 2969 failCount++; 2970 2971 report("Escape sequences"); 2972 } 2973 2974 /** 2975 * Checks the handling of blank input situations. These 2976 * tests are incompatible with my test file format. 2977 */ 2978 private static void blankInput() throws Exception { 2979 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 2980 Matcher m = p.matcher(""); 2981 if (m.find()) 2982 failCount++; 2983 2984 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 2985 m = p.matcher(""); 2986 if (!m.find()) 2987 failCount++; 2988 2989 p = Pattern.compile("abc"); 2990 m = p.matcher(""); 2991 if (m.find()) 2992 failCount++; 2993 2994 p = Pattern.compile("a*"); 2995 m = p.matcher(""); 2996 if (!m.find()) 2997 failCount++; 2998 2999 report("Blank input"); 3000 } 3001 3002 /** 3003 * Tests the Boyer-Moore pattern matching of a character sequence 3004 * on randomly generated patterns. 3005 */ 3006 private static void bm() throws Exception { 3007 doBnM('a'); 3008 report("Boyer Moore (ASCII)"); 3009 3010 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3011 report("Boyer Moore (Supplementary)"); 3012 } 3013 3014 private static void doBnM(int baseCharacter) throws Exception { 3015 int achar=0; 3016 3017 for (int i=0; i<100; i++) { 3018 // Create a short pattern to search for 3019 int patternLength = generator.nextInt(7) + 4; 3020 StringBuffer patternBuffer = new StringBuffer(patternLength); 3021 for (int x=0; x<patternLength; x++) { 3022 int ch = baseCharacter + generator.nextInt(26); 3023 if (Character.isSupplementaryCodePoint(ch)) { 3024 patternBuffer.append(Character.toChars(ch)); 3025 } else { 3026 patternBuffer.append((char)ch); 3027 } 3028 } 3029 String pattern = patternBuffer.toString(); 3030 Pattern p = Pattern.compile(pattern); 3031 3032 // Create a buffer with random ASCII chars that does 3033 // not match the sample 3034 String toSearch = null; 3035 StringBuffer s = null; 3036 Matcher m = p.matcher(""); 3037 do { 3038 s = new StringBuffer(100); 3039 for (int x=0; x<100; x++) { 3040 int ch = baseCharacter + generator.nextInt(26); 3041 if (Character.isSupplementaryCodePoint(ch)) { 3042 s.append(Character.toChars(ch)); 3043 } else { 3044 s.append((char)ch); 3045 } 3046 } 3047 toSearch = s.toString(); 3048 m.reset(toSearch); 3049 } while (m.find()); 3050 3051 // Insert the pattern at a random spot 3052 int insertIndex = generator.nextInt(99); 3053 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3054 insertIndex++; 3055 s = s.insert(insertIndex, pattern); 3056 toSearch = s.toString(); 3057 3058 // Make sure that the pattern is found 3059 m.reset(toSearch); 3060 if (!m.find()) 3061 failCount++; 3062 3063 // Make sure that the match text is the pattern 3064 if (!m.group().equals(pattern)) 3065 failCount++; 3066 3067 // Make sure match occured at insertion point 3068 if (m.start() != insertIndex) 3069 failCount++; 3070 } 3071 } 3072 3073 /** 3074 * Tests the matching of slices on randomly generated patterns. 3075 * The Boyer-Moore optimization is not done on these patterns 3076 * because it uses unicode case folding. 3077 */ 3078 private static void slice() throws Exception { 3079 doSlice(Character.MAX_VALUE); 3080 report("Slice"); 3081 3082 doSlice(Character.MAX_CODE_POINT); 3083 report("Slice (Supplementary)"); 3084 } 3085 3086 private static void doSlice(int maxCharacter) throws Exception { 3087 Random generator = new Random(); 3088 int achar=0; 3089 3090 for (int i=0; i<100; i++) { 3091 // Create a short pattern to search for 3092 int patternLength = generator.nextInt(7) + 4; 3093 StringBuffer patternBuffer = new StringBuffer(patternLength); 3094 for (int x=0; x<patternLength; x++) { 3095 int randomChar = 0; 3096 while (!Character.isLetterOrDigit(randomChar)) 3097 randomChar = generator.nextInt(maxCharacter); 3098 if (Character.isSupplementaryCodePoint(randomChar)) { 3099 patternBuffer.append(Character.toChars(randomChar)); 3100 } else { 3101 patternBuffer.append((char) randomChar); 3102 } 3103 } 3104 String pattern = patternBuffer.toString(); 3105 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3106 3107 // Create a buffer with random chars that does not match the sample 3108 String toSearch = null; 3109 StringBuffer s = null; 3110 Matcher m = p.matcher(""); 3111 do { 3112 s = new StringBuffer(100); 3113 for (int x=0; x<100; x++) { 3114 int randomChar = 0; 3115 while (!Character.isLetterOrDigit(randomChar)) 3116 randomChar = generator.nextInt(maxCharacter); 3117 if (Character.isSupplementaryCodePoint(randomChar)) { 3118 s.append(Character.toChars(randomChar)); 3119 } else { 3120 s.append((char) randomChar); 3121 } 3122 } 3123 toSearch = s.toString(); 3124 m.reset(toSearch); 3125 } while (m.find()); 3126 3127 // Insert the pattern at a random spot 3128 int insertIndex = generator.nextInt(99); 3129 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3130 insertIndex++; 3131 s = s.insert(insertIndex, pattern); 3132 toSearch = s.toString(); 3133 3134 // Make sure that the pattern is found 3135 m.reset(toSearch); 3136 if (!m.find()) 3137 failCount++; 3138 3139 // Make sure that the match text is the pattern 3140 if (!m.group().equals(pattern)) 3141 failCount++; 3142 3143 // Make sure match occured at insertion point 3144 if (m.start() != insertIndex) 3145 failCount++; 3146 } 3147 } 3148 3149 private static void explainFailure(String pattern, String data, 3150 String expected, String actual) { 3151 System.err.println("----------------------------------------"); 3152 System.err.println("Pattern = "+pattern); 3153 System.err.println("Data = "+data); 3154 System.err.println("Expected = " + expected); 3155 System.err.println("Actual = " + actual); 3156 } 3157 3158 private static void explainFailure(String pattern, String data, 3159 Throwable t) { 3160 System.err.println("----------------------------------------"); 3161 System.err.println("Pattern = "+pattern); 3162 System.err.println("Data = "+data); 3163 t.printStackTrace(System.err); 3164 } 3165 3166 // Testing examples from a file 3167 3168 /** 3169 * Goes through the file "TestCases.txt" and creates many patterns 3170 * described in the file, matching the patterns against input lines in 3171 * the file, and comparing the results against the correct results 3172 * also found in the file. The file format is described in comments 3173 * at the head of the file. 3174 */ 3175 private static void processFile(String fileName) throws Exception { 3176 File testCases = new File(System.getProperty("test.src", "."), 3177 fileName); 3178 FileInputStream in = new FileInputStream(testCases); 3179 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3180 3181 // Process next test case. 3182 String aLine; 3183 while((aLine = r.readLine()) != null) { 3184 // Read a line for pattern 3185 String patternString = grabLine(r); 3186 Pattern p = null; 3187 try { 3188 p = compileTestPattern(patternString); 3189 } catch (PatternSyntaxException e) { 3190 String dataString = grabLine(r); 3191 String expectedResult = grabLine(r); 3192 if (expectedResult.startsWith("error")) 3193 continue; 3194 explainFailure(patternString, dataString, e); 3195 failCount++; 3196 continue; 3197 } 3198 3199 // Read a line for input string 3200 String dataString = grabLine(r); 3201 Matcher m = p.matcher(dataString); 3202 StringBuffer result = new StringBuffer(); 3203 3204 // Check for IllegalStateExceptions before a match 3205 failCount += preMatchInvariants(m); 3206 3207 boolean found = m.find(); 3208 3209 if (found) 3210 failCount += postTrueMatchInvariants(m); 3211 else 3212 failCount += postFalseMatchInvariants(m); 3213 3214 if (found) { 3215 result.append("true "); 3216 result.append(m.group(0) + " "); 3217 } else { 3218 result.append("false "); 3219 } 3220 3221 result.append(m.groupCount()); 3222 3223 if (found) { 3224 for (int i=1; i<m.groupCount()+1; i++) 3225 if (m.group(i) != null) 3226 result.append(" " +m.group(i)); 3227 } 3228 3229 // Read a line for the expected result 3230 String expectedResult = grabLine(r); 3231 3232 if (!result.toString().equals(expectedResult)) { 3233 explainFailure(patternString, dataString, expectedResult, result.toString()); 3234 failCount++; 3235 } 3236 } 3237 3238 report(fileName); 3239 } 3240 3241 private static int preMatchInvariants(Matcher m) { 3242 int failCount = 0; 3243 try { 3244 m.start(); 3245 failCount++; 3246 } catch (IllegalStateException ise) {} 3247 try { 3248 m.end(); 3249 failCount++; 3250 } catch (IllegalStateException ise) {} 3251 try { 3252 m.group(); 3253 failCount++; 3254 } catch (IllegalStateException ise) {} 3255 return failCount; 3256 } 3257 3258 private static int postFalseMatchInvariants(Matcher m) { 3259 int failCount = 0; 3260 try { 3261 m.group(); 3262 failCount++; 3263 } catch (IllegalStateException ise) {} 3264 try { 3265 m.start(); 3266 failCount++; 3267 } catch (IllegalStateException ise) {} 3268 try { 3269 m.end(); 3270 failCount++; 3271 } catch (IllegalStateException ise) {} 3272 return failCount; 3273 } 3274 3275 private static int postTrueMatchInvariants(Matcher m) { 3276 int failCount = 0; 3277 //assert(m.start() = m.start(0); 3278 if (m.start() != m.start(0)) 3279 failCount++; 3280 //assert(m.end() = m.end(0); 3281 if (m.start() != m.start(0)) 3282 failCount++; 3283 //assert(m.group() = m.group(0); 3284 if (!m.group().equals(m.group(0))) 3285 failCount++; 3286 try { 3287 m.group(50); 3288 failCount++; 3289 } catch (IndexOutOfBoundsException ise) {} 3290 3291 return failCount; 3292 } 3293 3294 private static Pattern compileTestPattern(String patternString) { 3295 if (!patternString.startsWith("'")) { 3296 return Pattern.compile(patternString); 3297 } 3298 3299 int break1 = patternString.lastIndexOf("'"); 3300 String flagString = patternString.substring( 3301 break1+1, patternString.length()); 3302 patternString = patternString.substring(1, break1); 3303 3304 if (flagString.equals("i")) 3305 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3306 3307 if (flagString.equals("m")) 3308 return Pattern.compile(patternString, Pattern.MULTILINE); 3309 3310 return Pattern.compile(patternString); 3311 } 3312 3313 /** 3314 * Reads a line from the input file. Keeps reading lines until a non 3315 * empty non comment line is read. If the line contains a \n then 3316 * these two characters are replaced by a newline char. If a \\uxxxx 3317 * sequence is read then the sequence is replaced by the unicode char. 3318 */ 3319 private static String grabLine(BufferedReader r) throws Exception { 3320 int index = 0; 3321 String line = r.readLine(); 3322 while (line.startsWith("//") || line.length() < 1) 3323 line = r.readLine(); 3324 while ((index = line.indexOf("\\n")) != -1) { 3325 StringBuffer temp = new StringBuffer(line); 3326 temp.replace(index, index+2, "\n"); 3327 line = temp.toString(); 3328 } 3329 while ((index = line.indexOf("\\u")) != -1) { 3330 StringBuffer temp = new StringBuffer(line); 3331 String value = temp.substring(index+2, index+6); 3332 char aChar = (char)Integer.parseInt(value, 16); 3333 String unicodeChar = "" + aChar; 3334 temp.replace(index, index+6, unicodeChar); 3335 line = temp.toString(); 3336 } 3337 3338 return line; 3339 } 3340 3341 private static void check(Pattern p, String s, String g, String expected) { 3342 Matcher m = p.matcher(s); 3343 m.find(); 3344 if (!m.group(g).equals(expected)) 3345 failCount++; 3346 } 3347 3348 private static void checkReplaceFirst(String p, String s, String r, String expected) 3349 { 3350 if (!expected.equals(Pattern.compile(p) 3351 .matcher(s) 3352 .replaceFirst(r))) 3353 failCount++; 3354 } 3355 3356 private static void checkReplaceAll(String p, String s, String r, String expected) 3357 { 3358 if (!expected.equals(Pattern.compile(p) 3359 .matcher(s) 3360 .replaceAll(r))) 3361 failCount++; 3362 } 3363 3364 private static void checkExpectedFail(String p) { 3365 try { 3366 Pattern.compile(p); 3367 } catch (PatternSyntaxException pse) { 3368 //pse.printStackTrace(); 3369 return; 3370 } 3371 failCount++; 3372 } 3373 3374 private static void checkExpectedFail(Matcher m, String g) { 3375 m.find(); 3376 try { 3377 m.group(g); 3378 } catch (IllegalArgumentException iae) { 3379 //iae.printStackTrace(); 3380 return; 3381 } catch (NullPointerException npe) { 3382 return; 3383 } 3384 failCount++; 3385 } 3386 3387 3388 private static void namedGroupCaptureTest() throws Exception { 3389 check(Pattern.compile("x+(?<gname>y+)z+"), 3390 "xxxyyyzzz", 3391 "gname", 3392 "yyy"); 3393 3394 check(Pattern.compile("x+(?<gname8>y+)z+"), 3395 "xxxyyyzzz", 3396 "gname8", 3397 "yyy"); 3398 3399 //backref 3400 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3401 check(pattern, "zzzaabcazzz", true); // found "abca" 3402 3403 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3404 "zzzaabcaazzz", true); 3405 3406 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3407 "abcdefabc", true); 3408 3409 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3410 "abcdefghijkk", true); 3411 3412 // Supplementary character tests 3413 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3414 toSupplementaries("zzzaabcazzz"), true); 3415 3416 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3417 toSupplementaries("zzzaabcaazzz"), true); 3418 3419 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3420 toSupplementaries("abcdefabc"), true); 3421 3422 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3423 "(?<gname>" + 3424 toSupplementaries("k)") + "\\k<gname>"), 3425 toSupplementaries("abcdefghijkk"), true); 3426 3427 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3428 "xxxyyyzzzyyy", 3429 "gname", 3430 "yyy"); 3431 3432 //replaceFirst/All 3433 checkReplaceFirst("(?<gn>ab)(c*)", 3434 "abccczzzabcczzzabccc", 3435 "${gn}", 3436 "abzzzabcczzzabccc"); 3437 3438 checkReplaceAll("(?<gn>ab)(c*)", 3439 "abccczzzabcczzzabccc", 3440 "${gn}", 3441 "abzzzabzzzab"); 3442 3443 3444 checkReplaceFirst("(?<gn>ab)(c*)", 3445 "zzzabccczzzabcczzzabccczzz", 3446 "${gn}", 3447 "zzzabzzzabcczzzabccczzz"); 3448 3449 checkReplaceAll("(?<gn>ab)(c*)", 3450 "zzzabccczzzabcczzzabccczzz", 3451 "${gn}", 3452 "zzzabzzzabzzzabzzz"); 3453 3454 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3455 "zzzabccczzzabcczzzabccczzz", 3456 "${gn2}", 3457 "zzzccczzzabcczzzabccczzz"); 3458 3459 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3460 "zzzabccczzzabcczzzabccczzz", 3461 "${gn2}", 3462 "zzzccczzzcczzzccczzz"); 3463 3464 //toSupplementaries("(ab)(c*)")); 3465 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3466 ")(?<gn2>" + toSupplementaries("c") + "*)", 3467 toSupplementaries("abccczzzabcczzzabccc"), 3468 "${gn1}", 3469 toSupplementaries("abzzzabcczzzabccc")); 3470 3471 3472 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3473 ")(?<gn2>" + toSupplementaries("c") + "*)", 3474 toSupplementaries("abccczzzabcczzzabccc"), 3475 "${gn1}", 3476 toSupplementaries("abzzzabzzzab")); 3477 3478 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3479 ")(?<gn2>" + toSupplementaries("c") + "*)", 3480 toSupplementaries("abccczzzabcczzzabccc"), 3481 "${gn2}", 3482 toSupplementaries("ccczzzabcczzzabccc")); 3483 3484 3485 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3486 ")(?<gn2>" + toSupplementaries("c") + "*)", 3487 toSupplementaries("abccczzzabcczzzabccc"), 3488 "${gn2}", 3489 toSupplementaries("ccczzzcczzzccc")); 3490 3491 checkReplaceFirst("(?<dog>Dog)AndCat", 3492 "zzzDogAndCatzzzDogAndCatzzz", 3493 "${dog}", 3494 "zzzDogzzzDogAndCatzzz"); 3495 3496 3497 checkReplaceAll("(?<dog>Dog)AndCat", 3498 "zzzDogAndCatzzzDogAndCatzzz", 3499 "${dog}", 3500 "zzzDogzzzDogzzz"); 3501 3502 // backref in Matcher & String 3503 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 3504 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 3505 failCount++; 3506 3507 // negative 3508 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3509 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3510 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3511 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3512 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3513 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3514 "gnameX"); 3515 checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3516 null); 3517 report("NamedGroupCapture"); 3518 } 3519 3520 // This is for bug 6969132 3521 private static void nonBmpClassComplementTest() throws Exception { 3522 Pattern p = Pattern.compile("\\P{Lu}"); 3523 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3524 if (m.find() && m.start() == 1) 3525 failCount++; 3526 3527 // from a unicode category 3528 p = Pattern.compile("\\P{Lu}"); 3529 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3530 if (m.find()) 3531 failCount++; 3532 if (!m.hitEnd()) 3533 failCount++; 3534 3535 // block 3536 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3537 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3538 if (m.find() && m.start() == 1) 3539 failCount++; 3540 3541 report("NonBmpClassComplement"); 3542 } 3543 3544 private static void unicodePropertiesTest() throws Exception { 3545 // different forms 3546 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 3547 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 3548 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 3549 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 3550 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 3551 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 3552 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 3553 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 3554 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 3555 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 3556 failCount++; 3557 3558 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 3559 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 3560 Matcher lastSM = common; 3561 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 3562 3563 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 3564 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 3565 Matcher lastBM = latin; 3566 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 3567 3568 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 3569 if (cp >= 0x30000 && (cp & 0x70) == 0){ 3570 continue; // only pick couple code points, they are the same 3571 } 3572 3573 // Unicode Script 3574 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 3575 Matcher m; 3576 String str = new String(Character.toChars(cp)); 3577 if (script == lastScript) { 3578 m = lastSM; 3579 m.reset(str); 3580 } else { 3581 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 3582 } 3583 if (!m.matches()) { 3584 failCount++; 3585 } 3586 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 3587 other.reset(str); 3588 if (other.matches()) { 3589 failCount++; 3590 } 3591 lastSM = m; 3592 lastScript = script; 3593 3594 // Unicode Block 3595 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 3596 if (block == null) { 3597 //System.out.printf("Not a Block: cp=%x%n", cp); 3598 continue; 3599 } 3600 if (block == lastBlock) { 3601 m = lastBM; 3602 m.reset(str); 3603 } else { 3604 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 3605 } 3606 if (!m.matches()) { 3607 failCount++; 3608 } 3609 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 3610 other.reset(str); 3611 if (other.matches()) { 3612 failCount++; 3613 } 3614 lastBM = m; 3615 lastBlock = block; 3616 } 3617 report("unicodeProperties"); 3618 } 3619 3620 private static void unicodeHexNotationTest() throws Exception { 3621 3622 // negative 3623 checkExpectedFail("\\x{-23}"); 3624 checkExpectedFail("\\x{110000}"); 3625 checkExpectedFail("\\x{}"); 3626 checkExpectedFail("\\x{AB[ef]"); 3627 3628 // codepoint 3629 check("^\\x{1033c}$", "\uD800\uDF3C", true); 3630 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3631 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 3632 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3633 3634 // in class 3635 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 3636 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 3637 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 3638 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 3639 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 3640 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 3641 3642 for (int cp = 0; cp <= 0x10FFFF; cp++) { 3643 String s = "A" + new String(Character.toChars(cp)) + "B"; 3644 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 3645 : String.format("\\u%04x\\u%04x", 3646 (int) Character.toChars(cp)[0], 3647 (int) Character.toChars(cp)[1]); 3648 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 3649 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 3650 failCount++; 3651 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 3652 failCount++; 3653 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 3654 failCount++; 3655 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 3656 failCount++; 3657 } 3658 report("unicodeHexNotation"); 3659 } 3660 }