1 /* 2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /** 25 * @test 26 * @summary tests RegExp framework 27 * @author Mike McCloskey 28 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 29 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 30 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 31 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 32 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 33 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 34 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 35 * 8027645 6854417 36 */ 37 38 import java.util.regex.*; 39 import java.util.Random; 40 import java.io.*; 41 import java.util.*; 42 import java.nio.CharBuffer; 43 import java.util.function.Predicate; 44 45 /** 46 * This is a test class created to check the operation of 47 * the Pattern and Matcher classes. 48 */ 49 public class RegExTest { 50 51 private static Random generator = new Random(); 52 private static boolean failure = false; 53 private static int failCount = 0; 54 private static String firstFailure = null; 55 56 /** 57 * Main to interpret arguments and run several tests. 58 * 59 */ 60 public static void main(String[] args) throws Exception { 61 // Most of the tests are in a file 62 processFile("TestCases.txt"); 63 //processFile("PerlCases.txt"); 64 processFile("BMPTestCases.txt"); 65 processFile("SupplementaryTestCases.txt"); 66 67 // These test many randomly generated char patterns 68 bm(); 69 slice(); 70 71 // These are hard to put into the file 72 escapes(); 73 blankInput(); 74 75 // Substitition tests on randomly generated sequences 76 globalSubstitute(); 77 stringbufferSubstitute(); 78 substitutionBasher(); 79 80 // Canonical Equivalence 81 ceTest(); 82 83 // Anchors 84 anchorTest(); 85 86 // boolean match calls 87 matchesTest(); 88 lookingAtTest(); 89 90 // Pattern API 91 patternMatchesTest(); 92 93 // Misc 94 lookbehindTest(); 95 nullArgumentTest(); 96 backRefTest(); 97 groupCaptureTest(); 98 caretTest(); 99 charClassTest(); 100 emptyPatternTest(); 101 findIntTest(); 102 group0Test(); 103 longPatternTest(); 104 octalTest(); 105 ampersandTest(); 106 negationTest(); 107 splitTest(); 108 appendTest(); 109 caseFoldingTest(); 110 commentsTest(); 111 unixLinesTest(); 112 replaceFirstTest(); 113 gTest(); 114 zTest(); 115 serializeTest(); 116 reluctantRepetitionTest(); 117 multilineDollarTest(); 118 dollarAtEndTest(); 119 caretBetweenTerminatorsTest(); 120 // This RFE rejected in Tiger numOccurrencesTest(); 121 javaCharClassTest(); 122 nonCaptureRepetitionTest(); 123 notCapturedGroupCurlyMatchTest(); 124 escapedSegmentTest(); 125 literalPatternTest(); 126 literalReplacementTest(); 127 regionTest(); 128 toStringTest(); 129 negatedCharClassTest(); 130 findFromTest(); 131 boundsTest(); 132 unicodeWordBoundsTest(); 133 caretAtEndTest(); 134 wordSearchTest(); 135 hitEndTest(); 136 toMatchResultTest(); 137 surrogatesInClassTest(); 138 removeQEQuotingTest(); 139 namedGroupCaptureTest(); 140 nonBmpClassComplementTest(); 141 unicodePropertiesTest(); 142 unicodeHexNotationTest(); 143 unicodeClassesTest(); 144 horizontalAndVerticalWSTest(); 145 linebreakTest(); 146 branchTest(); 147 groupCurlyNotFoundSuppTest(); 148 groupCurlyBackoffTest(); 149 patternAsPredicate(); 150 151 if (failure) { 152 throw new 153 RuntimeException("RegExTest failed, 1st failure: " + 154 firstFailure); 155 } else { 156 System.err.println("OKAY: All tests passed."); 157 } 158 } 159 160 // Utility functions 161 162 private static String getRandomAlphaString(int length) { 163 StringBuffer buf = new StringBuffer(length); 164 for (int i=0; i<length; i++) { 165 char randChar = (char)(97 + generator.nextInt(26)); 166 buf.append(randChar); 167 } 168 return buf.toString(); 169 } 170 171 private static void check(Matcher m, String expected) { 172 m.find(); 173 if (!m.group().equals(expected)) 174 failCount++; 175 } 176 177 private static void check(Matcher m, String result, boolean expected) { 178 m.find(); 179 if (m.group().equals(result) != expected) 180 failCount++; 181 } 182 183 private static void check(Pattern p, String s, boolean expected) { 184 if (p.matcher(s).find() != expected) 185 failCount++; 186 } 187 188 private static void check(String p, String s, boolean expected) { 189 Matcher matcher = Pattern.compile(p).matcher(s); 190 if (matcher.find() != expected) 191 failCount++; 192 } 193 194 private static void check(String p, char c, boolean expected) { 195 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 196 Pattern pattern = Pattern.compile(propertyPattern); 197 char[] ca = new char[1]; ca[0] = c; 198 Matcher matcher = pattern.matcher(new String(ca)); 199 if (!matcher.find()) 200 failCount++; 201 } 202 203 private static void check(String p, int codePoint, boolean expected) { 204 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 205 Pattern pattern = Pattern.compile(propertyPattern); 206 char[] ca = Character.toChars(codePoint); 207 Matcher matcher = pattern.matcher(new String(ca)); 208 if (!matcher.find()) 209 failCount++; 210 } 211 212 private static void check(String p, int flag, String input, String s, 213 boolean expected) 214 { 215 Pattern pattern = Pattern.compile(p, flag); 216 Matcher matcher = pattern.matcher(input); 217 if (expected) 218 check(matcher, s, expected); 219 else 220 check(pattern, input, false); 221 } 222 223 private static void report(String testName) { 224 int spacesToAdd = 30 - testName.length(); 225 StringBuffer paddedNameBuffer = new StringBuffer(testName); 226 for (int i=0; i<spacesToAdd; i++) 227 paddedNameBuffer.append(" "); 228 String paddedName = paddedNameBuffer.toString(); 229 System.err.println(paddedName + ": " + 230 (failCount==0 ? "Passed":"Failed("+failCount+")")); 231 if (failCount > 0) { 232 failure = true; 233 234 if (firstFailure == null) { 235 firstFailure = testName; 236 } 237 } 238 239 failCount = 0; 240 } 241 242 /** 243 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 244 * supplementary characters. This method does NOT fully take care 245 * of the regex syntax. 246 */ 247 private static String toSupplementaries(String s) { 248 int length = s.length(); 249 StringBuffer sb = new StringBuffer(length * 2); 250 251 for (int i = 0; i < length; ) { 252 char c = s.charAt(i++); 253 if (c == '\\') { 254 sb.append(c); 255 if (i < length) { 256 c = s.charAt(i++); 257 sb.append(c); 258 if (c == 'u') { 259 // assume no syntax error 260 sb.append(s.charAt(i++)); 261 sb.append(s.charAt(i++)); 262 sb.append(s.charAt(i++)); 263 sb.append(s.charAt(i++)); 264 } 265 } 266 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 267 sb.append('\ud800').append((char)('\udc00'+c)); 268 } else { 269 sb.append(c); 270 } 271 } 272 return sb.toString(); 273 } 274 275 // Regular expression tests 276 277 // This is for bug 6178785 278 // Test if an expected NPE gets thrown when passing in a null argument 279 private static boolean check(Runnable test) { 280 try { 281 test.run(); 282 failCount++; 283 return false; 284 } catch (NullPointerException npe) { 285 return true; 286 } 287 } 288 289 private static void nullArgumentTest() { 290 check(new Runnable() { public void run() { Pattern.compile(null); }}); 291 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 292 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 293 check(new Runnable() { public void run() { Pattern.quote(null);}}); 294 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 295 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 296 297 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 298 m.matches(); 299 check(new Runnable() { public void run() { m.appendTail(null);}}); 300 check(new Runnable() { public void run() { m.replaceAll(null);}}); 301 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 302 check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); 303 check(new Runnable() { public void run() { m.reset(null);}}); 304 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 305 //check(new Runnable() { public void run() { m.usePattern(null);}}); 306 307 report("Null Argument"); 308 } 309 310 // This is for bug6635133 311 // Test if surrogate pair in Unicode escapes can be handled correctly. 312 private static void surrogatesInClassTest() throws Exception { 313 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 314 Matcher matcher = pattern.matcher("\ud834\udd22"); 315 if (!matcher.find()) 316 failCount++; 317 318 report("Surrogate pair in Unicode escape"); 319 } 320 321 // This is for bug6990617 322 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 323 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 324 // char is an octal digit. 325 private static void removeQEQuotingTest() throws Exception { 326 Pattern pattern = 327 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 328 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 329 if (!matcher.find()) 330 failCount++; 331 332 report("Remove Q/E Quoting"); 333 } 334 335 // This is for bug 4988891 336 // Test toMatchResult to see that it is a copy of the Matcher 337 // that is not affected by subsequent operations on the original 338 private static void toMatchResultTest() throws Exception { 339 Pattern pattern = Pattern.compile("squid"); 340 Matcher matcher = pattern.matcher( 341 "agiantsquidofdestinyasmallsquidoffate"); 342 matcher.find(); 343 int matcherStart1 = matcher.start(); 344 MatchResult mr = matcher.toMatchResult(); 345 if (mr == matcher) 346 failCount++; 347 int resultStart1 = mr.start(); 348 if (matcherStart1 != resultStart1) 349 failCount++; 350 matcher.find(); 351 int matcherStart2 = matcher.start(); 352 int resultStart2 = mr.start(); 353 if (matcherStart2 == resultStart2) 354 failCount++; 355 if (resultStart1 != resultStart2) 356 failCount++; 357 MatchResult mr2 = matcher.toMatchResult(); 358 if (mr == mr2) 359 failCount++; 360 if (mr2.start() != matcherStart2) 361 failCount++; 362 report("toMatchResult is a copy"); 363 } 364 365 // This is for bug 5013885 366 // Must test a slice to see if it reports hitEnd correctly 367 private static void hitEndTest() throws Exception { 368 // Basic test of Slice node 369 Pattern p = Pattern.compile("^squidattack"); 370 Matcher m = p.matcher("squack"); 371 m.find(); 372 if (m.hitEnd()) 373 failCount++; 374 m.reset("squid"); 375 m.find(); 376 if (!m.hitEnd()) 377 failCount++; 378 379 // Test Slice, SliceA and SliceU nodes 380 for (int i=0; i<3; i++) { 381 int flags = 0; 382 if (i==1) flags = Pattern.CASE_INSENSITIVE; 383 if (i==2) flags = Pattern.UNICODE_CASE; 384 p = Pattern.compile("^abc", flags); 385 m = p.matcher("ad"); 386 m.find(); 387 if (m.hitEnd()) 388 failCount++; 389 m.reset("ab"); 390 m.find(); 391 if (!m.hitEnd()) 392 failCount++; 393 } 394 395 // Test Boyer-Moore node 396 p = Pattern.compile("catattack"); 397 m = p.matcher("attack"); 398 m.find(); 399 if (!m.hitEnd()) 400 failCount++; 401 402 p = Pattern.compile("catattack"); 403 m = p.matcher("attackattackattackcatatta"); 404 m.find(); 405 if (!m.hitEnd()) 406 failCount++; 407 report("hitEnd from a Slice"); 408 } 409 410 // This is for bug 4997476 411 // It is weird code submitted by customer demonstrating a regression 412 private static void wordSearchTest() throws Exception { 413 String testString = new String("word1 word2 word3"); 414 Pattern p = Pattern.compile("\\b"); 415 Matcher m = p.matcher(testString); 416 int position = 0; 417 int start = 0; 418 while (m.find(position)) { 419 start = m.start(); 420 if (start == testString.length()) 421 break; 422 if (m.find(start+1)) { 423 position = m.start(); 424 } else { 425 position = testString.length(); 426 } 427 if (testString.substring(start, position).equals(" ")) 428 continue; 429 if (!testString.substring(start, position-1).startsWith("word")) 430 failCount++; 431 } 432 report("Customer word search"); 433 } 434 435 // This is for bug 4994840 436 private static void caretAtEndTest() throws Exception { 437 // Problem only occurs with multiline patterns 438 // containing a beginning-of-line caret "^" followed 439 // by an expression that also matches the empty string. 440 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 441 Matcher matcher = pattern.matcher("\r"); 442 matcher.find(); 443 matcher.find(); 444 report("Caret at end"); 445 } 446 447 // This test is for 4979006 448 // Check to see if word boundary construct properly handles unicode 449 // non spacing marks 450 private static void unicodeWordBoundsTest() throws Exception { 451 String spaces = " "; 452 String wordChar = "a"; 453 String nsm = "\u030a"; 454 455 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 456 457 Pattern pattern = Pattern.compile("\\b"); 458 Matcher matcher = pattern.matcher(""); 459 // S=other B=word character N=non spacing mark .=word boundary 460 // SS.BB.SS 461 String input = spaces + wordChar + wordChar + spaces; 462 twoFindIndexes(input, matcher, 2, 4); 463 // SS.BBN.SS 464 input = spaces + wordChar +wordChar + nsm + spaces; 465 twoFindIndexes(input, matcher, 2, 5); 466 // SS.BN.SS 467 input = spaces + wordChar + nsm + spaces; 468 twoFindIndexes(input, matcher, 2, 4); 469 // SS.BNN.SS 470 input = spaces + wordChar + nsm + nsm + spaces; 471 twoFindIndexes(input, matcher, 2, 5); 472 // SSN.BB.SS 473 input = spaces + nsm + wordChar + wordChar + spaces; 474 twoFindIndexes(input, matcher, 3, 5); 475 // SS.BNB.SS 476 input = spaces + wordChar + nsm + wordChar + spaces; 477 twoFindIndexes(input, matcher, 2, 5); 478 // SSNNSS 479 input = spaces + nsm + nsm + spaces; 480 matcher.reset(input); 481 if (matcher.find()) 482 failCount++; 483 // SSN.BBN.SS 484 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 485 twoFindIndexes(input, matcher, 3, 6); 486 487 report("Unicode word boundary"); 488 } 489 490 private static void twoFindIndexes(String input, Matcher matcher, int a, 491 int b) throws Exception 492 { 493 matcher.reset(input); 494 matcher.find(); 495 if (matcher.start() != a) 496 failCount++; 497 matcher.find(); 498 if (matcher.start() != b) 499 failCount++; 500 } 501 502 // This test is for 6284152 503 static void check(String regex, String input, String[] expected) { 504 List<String> result = new ArrayList<String>(); 505 Pattern p = Pattern.compile(regex); 506 Matcher m = p.matcher(input); 507 while (m.find()) { 508 result.add(m.group()); 509 } 510 if (!Arrays.asList(expected).equals(result)) 511 failCount++; 512 } 513 514 private static void lookbehindTest() throws Exception { 515 //Positive 516 check("(?<=%.{0,5})foo\\d", 517 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 518 new String[]{"foo1", "foo2", "foo3"}); 519 520 //boundary at end of the lookbehind sub-regex should work consistently 521 //with the boundary just after the lookbehind sub-regex 522 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 523 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 524 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 525 check("(?<!abc \\b)foo", "abc foo", new String[0]); 526 527 //Negative 528 check("(?<!%.{0,5})foo\\d", 529 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 530 new String[] {"foo4", "foo5"}); 531 532 //Positive greedy 533 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 534 535 //Positive reluctant 536 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 537 538 //supplementary 539 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 540 new String[] {"fo\ud800\udc00o"}); 541 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 542 new String[] {"fo\ud800\udc00o"}); 543 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 544 new String[] {"fo\ud800\udc00o"}); 545 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 546 new String[] {"fo\ud800\udc00o"}); 547 report("Lookbehind"); 548 } 549 550 // This test is for 4938995 551 // Check to see if weak region boundaries are transparent to 552 // lookahead and lookbehind constructs 553 private static void boundsTest() throws Exception { 554 String fullMessage = "catdogcat"; 555 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 556 Matcher matcher = pattern.matcher("catdogca"); 557 matcher.useTransparentBounds(true); 558 if (matcher.find()) 559 failCount++; 560 matcher.reset("atdogcat"); 561 if (matcher.find()) 562 failCount++; 563 matcher.reset(fullMessage); 564 if (!matcher.find()) 565 failCount++; 566 matcher.reset(fullMessage); 567 matcher.region(0,9); 568 if (!matcher.find()) 569 failCount++; 570 matcher.reset(fullMessage); 571 matcher.region(0,6); 572 if (!matcher.find()) 573 failCount++; 574 matcher.reset(fullMessage); 575 matcher.region(3,6); 576 if (!matcher.find()) 577 failCount++; 578 matcher.useTransparentBounds(false); 579 if (matcher.find()) 580 failCount++; 581 582 // Negative lookahead/lookbehind 583 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 584 matcher = pattern.matcher("dogcat"); 585 matcher.useTransparentBounds(true); 586 matcher.region(0,3); 587 if (matcher.find()) 588 failCount++; 589 matcher.reset("catdog"); 590 matcher.region(3,6); 591 if (matcher.find()) 592 failCount++; 593 matcher.useTransparentBounds(false); 594 matcher.reset("dogcat"); 595 matcher.region(0,3); 596 if (!matcher.find()) 597 failCount++; 598 matcher.reset("catdog"); 599 matcher.region(3,6); 600 if (!matcher.find()) 601 failCount++; 602 603 report("Region bounds transparency"); 604 } 605 606 // This test is for 4945394 607 private static void findFromTest() throws Exception { 608 String message = "This is 40 $0 message."; 609 Pattern pat = Pattern.compile("\\$0"); 610 Matcher match = pat.matcher(message); 611 if (!match.find()) 612 failCount++; 613 if (match.find()) 614 failCount++; 615 if (match.find()) 616 failCount++; 617 report("Check for alternating find"); 618 } 619 620 // This test is for 4872664 and 4892980 621 private static void negatedCharClassTest() throws Exception { 622 Pattern pattern = Pattern.compile("[^>]"); 623 Matcher matcher = pattern.matcher("\u203A"); 624 if (!matcher.matches()) 625 failCount++; 626 pattern = Pattern.compile("[^fr]"); 627 matcher = pattern.matcher("a"); 628 if (!matcher.find()) 629 failCount++; 630 matcher.reset("\u203A"); 631 if (!matcher.find()) 632 failCount++; 633 String s = "for"; 634 String result[] = s.split("[^fr]"); 635 if (!result[0].equals("f")) 636 failCount++; 637 if (!result[1].equals("r")) 638 failCount++; 639 s = "f\u203Ar"; 640 result = s.split("[^fr]"); 641 if (!result[0].equals("f")) 642 failCount++; 643 if (!result[1].equals("r")) 644 failCount++; 645 646 // Test adding to bits, subtracting a node, then adding to bits again 647 pattern = Pattern.compile("[^f\u203Ar]"); 648 matcher = pattern.matcher("a"); 649 if (!matcher.find()) 650 failCount++; 651 matcher.reset("f"); 652 if (matcher.find()) 653 failCount++; 654 matcher.reset("\u203A"); 655 if (matcher.find()) 656 failCount++; 657 matcher.reset("r"); 658 if (matcher.find()) 659 failCount++; 660 matcher.reset("\u203B"); 661 if (!matcher.find()) 662 failCount++; 663 664 // Test subtracting a node, adding to bits, subtracting again 665 pattern = Pattern.compile("[^\u203Ar\u203B]"); 666 matcher = pattern.matcher("a"); 667 if (!matcher.find()) 668 failCount++; 669 matcher.reset("\u203A"); 670 if (matcher.find()) 671 failCount++; 672 matcher.reset("r"); 673 if (matcher.find()) 674 failCount++; 675 matcher.reset("\u203B"); 676 if (matcher.find()) 677 failCount++; 678 matcher.reset("\u203C"); 679 if (!matcher.find()) 680 failCount++; 681 682 report("Negated Character Class"); 683 } 684 685 // This test is for 4628291 686 private static void toStringTest() throws Exception { 687 Pattern pattern = Pattern.compile("b+"); 688 if (pattern.toString() != "b+") 689 failCount++; 690 Matcher matcher = pattern.matcher("aaabbbccc"); 691 String matcherString = matcher.toString(); // unspecified 692 matcher.find(); 693 matcherString = matcher.toString(); // unspecified 694 matcher.region(0,3); 695 matcherString = matcher.toString(); // unspecified 696 matcher.reset(); 697 matcherString = matcher.toString(); // unspecified 698 report("toString"); 699 } 700 701 // This test is for 4808962 702 private static void literalPatternTest() throws Exception { 703 int flags = Pattern.LITERAL; 704 705 Pattern pattern = Pattern.compile("abc\\t$^", flags); 706 check(pattern, "abc\\t$^", true); 707 708 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 709 check(pattern, "abc\\t$^", true); 710 711 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 712 check(pattern, "\\Qa^$bcabc\\E", true); 713 check(pattern, "a^$bcabc", false); 714 715 pattern = Pattern.compile("\\\\Q\\\\E"); 716 check(pattern, "\\Q\\E", true); 717 718 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 719 check(pattern, "abcefg\\Q\\Ehij", true); 720 721 pattern = Pattern.compile("\\\\\\Q\\\\E"); 722 check(pattern, "\\\\\\\\", true); 723 724 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 725 check(pattern, "\\Qa^$bcabc\\E", true); 726 check(pattern, "a^$bcabc", false); 727 728 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 729 check(pattern, "\\Qabc\\Edef", true); 730 check(pattern, "abcdef", false); 731 732 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 733 check(pattern, "abc\\Edef", true); 734 check(pattern, "abcdef", false); 735 736 pattern = Pattern.compile(Pattern.quote("\\E")); 737 check(pattern, "\\E", true); 738 739 pattern = Pattern.compile("((((abc.+?:)", flags); 740 check(pattern, "((((abc.+?:)", true); 741 742 flags |= Pattern.MULTILINE; 743 744 pattern = Pattern.compile("^cat$", flags); 745 check(pattern, "abc^cat$def", true); 746 check(pattern, "cat", false); 747 748 flags |= Pattern.CASE_INSENSITIVE; 749 750 pattern = Pattern.compile("abcdef", flags); 751 check(pattern, "ABCDEF", true); 752 check(pattern, "AbCdEf", true); 753 754 flags |= Pattern.DOTALL; 755 756 pattern = Pattern.compile("a...b", flags); 757 check(pattern, "A...b", true); 758 check(pattern, "Axxxb", false); 759 760 flags |= Pattern.CANON_EQ; 761 762 Pattern p = Pattern.compile("testa\u030a", flags); 763 check(pattern, "testa\u030a", false); 764 check(pattern, "test\u00e5", false); 765 766 // Supplementary character test 767 flags = Pattern.LITERAL; 768 769 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 770 check(pattern, toSupplementaries("abc\\t$^"), true); 771 772 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 773 check(pattern, toSupplementaries("abc\\t$^"), true); 774 775 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 776 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 777 check(pattern, toSupplementaries("a^$bcabc"), false); 778 779 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 780 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 781 check(pattern, toSupplementaries("a^$bcabc"), false); 782 783 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 784 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 785 check(pattern, toSupplementaries("abcdef"), false); 786 787 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 788 check(pattern, toSupplementaries("abc\\Edef"), true); 789 check(pattern, toSupplementaries("abcdef"), false); 790 791 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 792 check(pattern, toSupplementaries("((((abc.+?:)"), true); 793 794 flags |= Pattern.MULTILINE; 795 796 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 797 check(pattern, toSupplementaries("abc^cat$def"), true); 798 check(pattern, toSupplementaries("cat"), false); 799 800 flags |= Pattern.DOTALL; 801 802 // note: this is case-sensitive. 803 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 804 check(pattern, toSupplementaries("a...b"), true); 805 check(pattern, toSupplementaries("axxxb"), false); 806 807 flags |= Pattern.CANON_EQ; 808 809 String t = toSupplementaries("test"); 810 p = Pattern.compile(t + "a\u030a", flags); 811 check(pattern, t + "a\u030a", false); 812 check(pattern, t + "\u00e5", false); 813 814 report("Literal pattern"); 815 } 816 817 // This test is for 4803179 818 // This test is also for 4808962, replacement parts 819 private static void literalReplacementTest() throws Exception { 820 int flags = Pattern.LITERAL; 821 822 Pattern pattern = Pattern.compile("abc", flags); 823 Matcher matcher = pattern.matcher("zzzabczzz"); 824 String replaceTest = "$0"; 825 String result = matcher.replaceAll(replaceTest); 826 if (!result.equals("zzzabczzz")) 827 failCount++; 828 829 matcher.reset(); 830 String literalReplacement = matcher.quoteReplacement(replaceTest); 831 result = matcher.replaceAll(literalReplacement); 832 if (!result.equals("zzz$0zzz")) 833 failCount++; 834 835 matcher.reset(); 836 replaceTest = "\\t$\\$"; 837 literalReplacement = matcher.quoteReplacement(replaceTest); 838 result = matcher.replaceAll(literalReplacement); 839 if (!result.equals("zzz\\t$\\$zzz")) 840 failCount++; 841 842 // Supplementary character test 843 pattern = Pattern.compile(toSupplementaries("abc"), flags); 844 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 845 replaceTest = "$0"; 846 result = matcher.replaceAll(replaceTest); 847 if (!result.equals(toSupplementaries("zzzabczzz"))) 848 failCount++; 849 850 matcher.reset(); 851 literalReplacement = matcher.quoteReplacement(replaceTest); 852 result = matcher.replaceAll(literalReplacement); 853 if (!result.equals(toSupplementaries("zzz$0zzz"))) 854 failCount++; 855 856 matcher.reset(); 857 replaceTest = "\\t$\\$"; 858 literalReplacement = matcher.quoteReplacement(replaceTest); 859 result = matcher.replaceAll(literalReplacement); 860 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 861 failCount++; 862 863 // IAE should be thrown if backslash or '$' is the last character 864 // in replacement string 865 try { 866 "\uac00".replaceAll("\uac00", "$"); 867 failCount++; 868 } catch (IllegalArgumentException iie) { 869 } catch (Exception e) { 870 failCount++; 871 } 872 try { 873 "\uac00".replaceAll("\uac00", "\\"); 874 failCount++; 875 } catch (IllegalArgumentException iie) { 876 } catch (Exception e) { 877 failCount++; 878 } 879 report("Literal replacement"); 880 } 881 882 // This test is for 4757029 883 private static void regionTest() throws Exception { 884 Pattern pattern = Pattern.compile("abc"); 885 Matcher matcher = pattern.matcher("abcdefabc"); 886 887 matcher.region(0,9); 888 if (!matcher.find()) 889 failCount++; 890 if (!matcher.find()) 891 failCount++; 892 matcher.region(0,3); 893 if (!matcher.find()) 894 failCount++; 895 matcher.region(3,6); 896 if (matcher.find()) 897 failCount++; 898 matcher.region(0,2); 899 if (matcher.find()) 900 failCount++; 901 902 expectRegionFail(matcher, 1, -1); 903 expectRegionFail(matcher, -1, -1); 904 expectRegionFail(matcher, -1, 1); 905 expectRegionFail(matcher, 5, 3); 906 expectRegionFail(matcher, 5, 12); 907 expectRegionFail(matcher, 12, 12); 908 909 pattern = Pattern.compile("^abc$"); 910 matcher = pattern.matcher("zzzabczzz"); 911 matcher.region(0,9); 912 if (matcher.find()) 913 failCount++; 914 matcher.region(3,6); 915 if (!matcher.find()) 916 failCount++; 917 matcher.region(3,6); 918 matcher.useAnchoringBounds(false); 919 if (matcher.find()) 920 failCount++; 921 922 // Supplementary character test 923 pattern = Pattern.compile(toSupplementaries("abc")); 924 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 925 matcher.region(0,9*2); 926 if (!matcher.find()) 927 failCount++; 928 if (!matcher.find()) 929 failCount++; 930 matcher.region(0,3*2); 931 if (!matcher.find()) 932 failCount++; 933 matcher.region(1,3*2); 934 if (matcher.find()) 935 failCount++; 936 matcher.region(3*2,6*2); 937 if (matcher.find()) 938 failCount++; 939 matcher.region(0,2*2); 940 if (matcher.find()) 941 failCount++; 942 matcher.region(0,2*2+1); 943 if (matcher.find()) 944 failCount++; 945 946 expectRegionFail(matcher, 1*2, -1); 947 expectRegionFail(matcher, -1, -1); 948 expectRegionFail(matcher, -1, 1*2); 949 expectRegionFail(matcher, 5*2, 3*2); 950 expectRegionFail(matcher, 5*2, 12*2); 951 expectRegionFail(matcher, 12*2, 12*2); 952 953 pattern = Pattern.compile(toSupplementaries("^abc$")); 954 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 955 matcher.region(0,9*2); 956 if (matcher.find()) 957 failCount++; 958 matcher.region(3*2,6*2); 959 if (!matcher.find()) 960 failCount++; 961 matcher.region(3*2+1,6*2); 962 if (matcher.find()) 963 failCount++; 964 matcher.region(3*2,6*2-1); 965 if (matcher.find()) 966 failCount++; 967 matcher.region(3*2,6*2); 968 matcher.useAnchoringBounds(false); 969 if (matcher.find()) 970 failCount++; 971 report("Regions"); 972 } 973 974 private static void expectRegionFail(Matcher matcher, int index1, 975 int index2) 976 { 977 try { 978 matcher.region(index1, index2); 979 failCount++; 980 } catch (IndexOutOfBoundsException ioobe) { 981 // Correct result 982 } catch (IllegalStateException ise) { 983 // Correct result 984 } 985 } 986 987 // This test is for 4803197 988 private static void escapedSegmentTest() throws Exception { 989 990 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 991 check(pattern, "dir1\\dir2", true); 992 993 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 994 check(pattern, "dir1\\dir2\\", true); 995 996 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 997 check(pattern, "dir1\\dir2\\", true); 998 999 // Supplementary character test 1000 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1001 check(pattern, toSupplementaries("dir1\\dir2"), true); 1002 1003 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1004 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1005 1006 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1007 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1008 1009 report("Escaped segment"); 1010 } 1011 1012 // This test is for 4792284 1013 private static void nonCaptureRepetitionTest() throws Exception { 1014 String input = "abcdefgh;"; 1015 1016 String[] patterns = new String[] { 1017 "(?:\\w{4})+;", 1018 "(?:\\w{8})*;", 1019 "(?:\\w{2}){2,4};", 1020 "(?:\\w{4}){2,};", // only matches the 1021 ".*?(?:\\w{5})+;", // specified minimum 1022 ".*?(?:\\w{9})*;", // number of reps - OK 1023 "(?:\\w{4})+?;", // lazy repetition - OK 1024 "(?:\\w{4})++;", // possessive repetition - OK 1025 "(?:\\w{2,}?)+;", // non-deterministic - OK 1026 "(\\w{4})+;", // capturing group - OK 1027 }; 1028 1029 for (int i = 0; i < patterns.length; i++) { 1030 // Check find() 1031 check(patterns[i], 0, input, input, true); 1032 // Check matches() 1033 Pattern p = Pattern.compile(patterns[i]); 1034 Matcher m = p.matcher(input); 1035 1036 if (m.matches()) { 1037 if (!m.group(0).equals(input)) 1038 failCount++; 1039 } else { 1040 failCount++; 1041 } 1042 } 1043 1044 report("Non capturing repetition"); 1045 } 1046 1047 // This test is for 6358731 1048 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1049 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1050 Matcher matcher = pattern.matcher("abcd"); 1051 if (!matcher.matches() || 1052 matcher.group(1) != null || 1053 !matcher.group(2).equals("abcd")) { 1054 failCount++; 1055 } 1056 report("Not captured GroupCurly"); 1057 } 1058 1059 // This test is for 4706545 1060 private static void javaCharClassTest() throws Exception { 1061 for (int i=0; i<1000; i++) { 1062 char c = (char)generator.nextInt(); 1063 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1064 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1065 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1066 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1067 check("{javaDigit}", c, Character.isDigit(c)); 1068 check("{javaDefined}", c, Character.isDefined(c)); 1069 check("{javaLetter}", c, Character.isLetter(c)); 1070 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1071 check("{javaJavaIdentifierStart}", c, 1072 Character.isJavaIdentifierStart(c)); 1073 check("{javaJavaIdentifierPart}", c, 1074 Character.isJavaIdentifierPart(c)); 1075 check("{javaUnicodeIdentifierStart}", c, 1076 Character.isUnicodeIdentifierStart(c)); 1077 check("{javaUnicodeIdentifierPart}", c, 1078 Character.isUnicodeIdentifierPart(c)); 1079 check("{javaIdentifierIgnorable}", c, 1080 Character.isIdentifierIgnorable(c)); 1081 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1082 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1083 check("{javaISOControl}", c, Character.isISOControl(c)); 1084 check("{javaMirrored}", c, Character.isMirrored(c)); 1085 1086 } 1087 1088 // Supplementary character test 1089 for (int i=0; i<1000; i++) { 1090 int c = generator.nextInt(Character.MAX_CODE_POINT 1091 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1092 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1093 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1094 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1095 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1096 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1097 check("{javaDigit}", c, Character.isDigit(c)); 1098 check("{javaDefined}", c, Character.isDefined(c)); 1099 check("{javaLetter}", c, Character.isLetter(c)); 1100 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1101 check("{javaJavaIdentifierStart}", c, 1102 Character.isJavaIdentifierStart(c)); 1103 check("{javaJavaIdentifierPart}", c, 1104 Character.isJavaIdentifierPart(c)); 1105 check("{javaUnicodeIdentifierStart}", c, 1106 Character.isUnicodeIdentifierStart(c)); 1107 check("{javaUnicodeIdentifierPart}", c, 1108 Character.isUnicodeIdentifierPart(c)); 1109 check("{javaIdentifierIgnorable}", c, 1110 Character.isIdentifierIgnorable(c)); 1111 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1112 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1113 check("{javaISOControl}", c, Character.isISOControl(c)); 1114 check("{javaMirrored}", c, Character.isMirrored(c)); 1115 } 1116 1117 report("Java character classes"); 1118 } 1119 1120 // This test is for 4523620 1121 /* 1122 private static void numOccurrencesTest() throws Exception { 1123 Pattern pattern = Pattern.compile("aaa"); 1124 1125 if (pattern.numOccurrences("aaaaaa", false) != 2) 1126 failCount++; 1127 if (pattern.numOccurrences("aaaaaa", true) != 4) 1128 failCount++; 1129 1130 pattern = Pattern.compile("^"); 1131 if (pattern.numOccurrences("aaaaaa", false) != 1) 1132 failCount++; 1133 if (pattern.numOccurrences("aaaaaa", true) != 1) 1134 failCount++; 1135 1136 report("Number of Occurrences"); 1137 } 1138 */ 1139 1140 // This test is for 4776374 1141 private static void caretBetweenTerminatorsTest() throws Exception { 1142 int flags1 = Pattern.DOTALL; 1143 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1144 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1145 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1146 1147 check("^....", flags1, "test\ntest", "test", true); 1148 check(".....^", flags1, "test\ntest", "test", false); 1149 check(".....^", flags1, "test\n", "test", false); 1150 check("....^", flags1, "test\r\n", "test", false); 1151 1152 check("^....", flags2, "test\ntest", "test", true); 1153 check("....^", flags2, "test\ntest", "test", false); 1154 check(".....^", flags2, "test\n", "test", false); 1155 check("....^", flags2, "test\r\n", "test", false); 1156 1157 check("^....", flags3, "test\ntest", "test", true); 1158 check(".....^", flags3, "test\ntest", "test\n", true); 1159 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1160 check(".....^", flags3, "test\n", "test", false); 1161 check(".....^", flags3, "test\r\n", "test", false); 1162 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1163 1164 check("^....", flags4, "test\ntest", "test", true); 1165 check(".....^", flags3, "test\ntest", "test\n", true); 1166 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1167 check(".....^", flags4, "test\n", "test\n", false); 1168 check(".....^", flags4, "test\r\n", "test\r", false); 1169 1170 // Supplementary character test 1171 String t = toSupplementaries("test"); 1172 check("^....", flags1, t+"\n"+t, t, true); 1173 check(".....^", flags1, t+"\n"+t, t, false); 1174 check(".....^", flags1, t+"\n", t, false); 1175 check("....^", flags1, t+"\r\n", t, false); 1176 1177 check("^....", flags2, t+"\n"+t, t, true); 1178 check("....^", flags2, t+"\n"+t, t, false); 1179 check(".....^", flags2, t+"\n", t, false); 1180 check("....^", flags2, t+"\r\n", t, false); 1181 1182 check("^....", flags3, t+"\n"+t, t, true); 1183 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1184 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1185 check(".....^", flags3, t+"\n", t, false); 1186 check(".....^", flags3, t+"\r\n", t, false); 1187 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1188 1189 check("^....", flags4, t+"\n"+t, t, true); 1190 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1191 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1192 check(".....^", flags4, t+"\n", t+"\n", false); 1193 check(".....^", flags4, t+"\r\n", t+"\r", false); 1194 1195 report("Caret between terminators"); 1196 } 1197 1198 // This test is for 4727935 1199 private static void dollarAtEndTest() throws Exception { 1200 int flags1 = Pattern.DOTALL; 1201 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1202 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1203 1204 check("....$", flags1, "test\n", "test", true); 1205 check("....$", flags1, "test\r\n", "test", true); 1206 check(".....$", flags1, "test\n", "test\n", true); 1207 check(".....$", flags1, "test\u0085", "test\u0085", true); 1208 check("....$", flags1, "test\u0085", "test", true); 1209 1210 check("....$", flags2, "test\n", "test", true); 1211 check(".....$", flags2, "test\n", "test\n", true); 1212 check(".....$", flags2, "test\u0085", "test\u0085", true); 1213 check("....$", flags2, "test\u0085", "est\u0085", true); 1214 1215 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1216 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1217 check("....$blah", flags3, "test\nblah", "!!!!", false); 1218 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1219 1220 // Supplementary character test 1221 String t = toSupplementaries("test"); 1222 String b = toSupplementaries("blah"); 1223 check("....$", flags1, t+"\n", t, true); 1224 check("....$", flags1, t+"\r\n", t, true); 1225 check(".....$", flags1, t+"\n", t+"\n", true); 1226 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1227 check("....$", flags1, t+"\u0085", t, true); 1228 1229 check("....$", flags2, t+"\n", t, true); 1230 check(".....$", flags2, t+"\n", t+"\n", true); 1231 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1232 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1233 1234 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1235 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1236 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1237 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1238 1239 report("Dollar at End"); 1240 } 1241 1242 // This test is for 4711773 1243 private static void multilineDollarTest() throws Exception { 1244 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1245 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1246 matcher.find(); 1247 if (matcher.start(0) != 9) 1248 failCount++; 1249 matcher.find(); 1250 if (matcher.start(0) != 20) 1251 failCount++; 1252 1253 // Supplementary character test 1254 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1255 matcher.find(); 1256 if (matcher.start(0) != 9*2) 1257 failCount++; 1258 matcher.find(); 1259 if (matcher.start(0) != 20*2) 1260 failCount++; 1261 1262 report("Multiline Dollar"); 1263 } 1264 1265 private static void reluctantRepetitionTest() throws Exception { 1266 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1267 check(p, "1 word word word 2", true); 1268 check(p, "1 wor wo w 2", true); 1269 check(p, "1 word word 2", true); 1270 check(p, "1 word 2", true); 1271 check(p, "1 wo w w 2", true); 1272 check(p, "1 wo w 2", true); 1273 check(p, "1 wor w 2", true); 1274 1275 p = Pattern.compile("([a-z])+?c"); 1276 Matcher m = p.matcher("ababcdefdec"); 1277 check(m, "ababc"); 1278 1279 // Supplementary character test 1280 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1281 m = p.matcher(toSupplementaries("ababcdefdec")); 1282 check(m, toSupplementaries("ababc")); 1283 1284 report("Reluctant Repetition"); 1285 } 1286 1287 private static void serializeTest() throws Exception { 1288 String patternStr = "(b)"; 1289 String matchStr = "b"; 1290 Pattern pattern = Pattern.compile(patternStr); 1291 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1292 ObjectOutputStream oos = new ObjectOutputStream(baos); 1293 oos.writeObject(pattern); 1294 oos.close(); 1295 ObjectInputStream ois = new ObjectInputStream( 1296 new ByteArrayInputStream(baos.toByteArray())); 1297 Pattern serializedPattern = (Pattern)ois.readObject(); 1298 ois.close(); 1299 Matcher matcher = serializedPattern.matcher(matchStr); 1300 if (!matcher.matches()) 1301 failCount++; 1302 if (matcher.groupCount() != 1) 1303 failCount++; 1304 1305 report("Serialization"); 1306 } 1307 1308 private static void gTest() { 1309 Pattern pattern = Pattern.compile("\\G\\w"); 1310 Matcher matcher = pattern.matcher("abc#x#x"); 1311 matcher.find(); 1312 matcher.find(); 1313 matcher.find(); 1314 if (matcher.find()) 1315 failCount++; 1316 1317 pattern = Pattern.compile("\\GA*"); 1318 matcher = pattern.matcher("1A2AA3"); 1319 matcher.find(); 1320 if (matcher.find()) 1321 failCount++; 1322 1323 pattern = Pattern.compile("\\GA*"); 1324 matcher = pattern.matcher("1A2AA3"); 1325 if (!matcher.find(1)) 1326 failCount++; 1327 matcher.find(); 1328 if (matcher.find()) 1329 failCount++; 1330 1331 report("\\G"); 1332 } 1333 1334 private static void zTest() { 1335 Pattern pattern = Pattern.compile("foo\\Z"); 1336 // Positives 1337 check(pattern, "foo\u0085", true); 1338 check(pattern, "foo\u2028", true); 1339 check(pattern, "foo\u2029", true); 1340 check(pattern, "foo\n", true); 1341 check(pattern, "foo\r", true); 1342 check(pattern, "foo\r\n", true); 1343 // Negatives 1344 check(pattern, "fooo", false); 1345 check(pattern, "foo\n\r", false); 1346 1347 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1348 // Positives 1349 check(pattern, "foo", true); 1350 check(pattern, "foo\n", true); 1351 // Negatives 1352 check(pattern, "foo\r", false); 1353 check(pattern, "foo\u0085", false); 1354 check(pattern, "foo\u2028", false); 1355 check(pattern, "foo\u2029", false); 1356 1357 report("\\Z"); 1358 } 1359 1360 private static void replaceFirstTest() { 1361 Pattern pattern = Pattern.compile("(ab)(c*)"); 1362 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1363 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1364 failCount++; 1365 1366 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1367 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1368 failCount++; 1369 1370 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1371 String result = matcher.replaceFirst("$1"); 1372 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1373 failCount++; 1374 1375 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1376 result = matcher.replaceFirst("$2"); 1377 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1378 failCount++; 1379 1380 pattern = Pattern.compile("a*"); 1381 matcher = pattern.matcher("aaaaaaaaaa"); 1382 if (!matcher.replaceFirst("test").equals("test")) 1383 failCount++; 1384 1385 pattern = Pattern.compile("a+"); 1386 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1387 if (!matcher.replaceFirst("test").equals("zzztest")) 1388 failCount++; 1389 1390 // Supplementary character test 1391 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1392 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1393 if (!matcher.replaceFirst(toSupplementaries("test")) 1394 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1395 failCount++; 1396 1397 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1398 if (!matcher.replaceFirst(toSupplementaries("test")). 1399 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1400 failCount++; 1401 1402 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1403 result = matcher.replaceFirst("$1"); 1404 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1405 failCount++; 1406 1407 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1408 result = matcher.replaceFirst("$2"); 1409 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1410 failCount++; 1411 1412 pattern = Pattern.compile(toSupplementaries("a*")); 1413 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1414 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1415 failCount++; 1416 1417 pattern = Pattern.compile(toSupplementaries("a+")); 1418 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1419 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1420 failCount++; 1421 1422 report("Replace First"); 1423 } 1424 1425 private static void unixLinesTest() { 1426 Pattern pattern = Pattern.compile(".*"); 1427 Matcher matcher = pattern.matcher("aa\u2028blah"); 1428 matcher.find(); 1429 if (!matcher.group(0).equals("aa")) 1430 failCount++; 1431 1432 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1433 matcher = pattern.matcher("aa\u2028blah"); 1434 matcher.find(); 1435 if (!matcher.group(0).equals("aa\u2028blah")) 1436 failCount++; 1437 1438 pattern = Pattern.compile("[az]$", 1439 Pattern.MULTILINE | Pattern.UNIX_LINES); 1440 matcher = pattern.matcher("aa\u2028zz"); 1441 check(matcher, "a\u2028", false); 1442 1443 // Supplementary character test 1444 pattern = Pattern.compile(".*"); 1445 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1446 matcher.find(); 1447 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1448 failCount++; 1449 1450 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1451 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1452 matcher.find(); 1453 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1454 failCount++; 1455 1456 pattern = Pattern.compile(toSupplementaries("[az]$"), 1457 Pattern.MULTILINE | Pattern.UNIX_LINES); 1458 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1459 check(matcher, toSupplementaries("a\u2028"), false); 1460 1461 report("Unix Lines"); 1462 } 1463 1464 private static void commentsTest() { 1465 int flags = Pattern.COMMENTS; 1466 1467 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1468 Matcher matcher = pattern.matcher("aa#aa"); 1469 if (!matcher.matches()) 1470 failCount++; 1471 1472 pattern = Pattern.compile("aa # blah", flags); 1473 matcher = pattern.matcher("aa"); 1474 if (!matcher.matches()) 1475 failCount++; 1476 1477 pattern = Pattern.compile("aa blah", flags); 1478 matcher = pattern.matcher("aablah"); 1479 if (!matcher.matches()) 1480 failCount++; 1481 1482 pattern = Pattern.compile("aa # blah blech ", flags); 1483 matcher = pattern.matcher("aa"); 1484 if (!matcher.matches()) 1485 failCount++; 1486 1487 pattern = Pattern.compile("aa # blah\n ", flags); 1488 matcher = pattern.matcher("aa"); 1489 if (!matcher.matches()) 1490 failCount++; 1491 1492 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1493 matcher = pattern.matcher("aabc"); 1494 if (!matcher.matches()) 1495 failCount++; 1496 1497 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1498 matcher = pattern.matcher("aabc"); 1499 if (!matcher.matches()) 1500 failCount++; 1501 1502 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1503 matcher = pattern.matcher("aabc#blech"); 1504 if (!matcher.matches()) 1505 failCount++; 1506 1507 // Supplementary character test 1508 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1509 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1510 if (!matcher.matches()) 1511 failCount++; 1512 1513 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1514 matcher = pattern.matcher(toSupplementaries("aa")); 1515 if (!matcher.matches()) 1516 failCount++; 1517 1518 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1519 matcher = pattern.matcher(toSupplementaries("aablah")); 1520 if (!matcher.matches()) 1521 failCount++; 1522 1523 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1524 matcher = pattern.matcher(toSupplementaries("aa")); 1525 if (!matcher.matches()) 1526 failCount++; 1527 1528 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1529 matcher = pattern.matcher(toSupplementaries("aa")); 1530 if (!matcher.matches()) 1531 failCount++; 1532 1533 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1534 matcher = pattern.matcher(toSupplementaries("aabc")); 1535 if (!matcher.matches()) 1536 failCount++; 1537 1538 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1539 matcher = pattern.matcher(toSupplementaries("aabc")); 1540 if (!matcher.matches()) 1541 failCount++; 1542 1543 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1544 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1545 if (!matcher.matches()) 1546 failCount++; 1547 1548 report("Comments"); 1549 } 1550 1551 private static void caseFoldingTest() { // bug 4504687 1552 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1553 Pattern pattern = Pattern.compile("aa", flags); 1554 Matcher matcher = pattern.matcher("ab"); 1555 if (matcher.matches()) 1556 failCount++; 1557 1558 pattern = Pattern.compile("aA", flags); 1559 matcher = pattern.matcher("ab"); 1560 if (matcher.matches()) 1561 failCount++; 1562 1563 pattern = Pattern.compile("aa", flags); 1564 matcher = pattern.matcher("aB"); 1565 if (matcher.matches()) 1566 failCount++; 1567 matcher = pattern.matcher("Ab"); 1568 if (matcher.matches()) 1569 failCount++; 1570 1571 // ASCII "a" 1572 // Latin-1 Supplement "a" + grave 1573 // Cyrillic "a" 1574 String[] patterns = new String[] { 1575 //single 1576 "a", "\u00e0", "\u0430", 1577 //slice 1578 "ab", "\u00e0\u00e1", "\u0430\u0431", 1579 //class single 1580 "[a]", "[\u00e0]", "[\u0430]", 1581 //class range 1582 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1583 //back reference 1584 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1585 }; 1586 1587 String[] texts = new String[] { 1588 "A", "\u00c0", "\u0410", 1589 "AB", "\u00c0\u00c1", "\u0410\u0411", 1590 "A", "\u00c0", "\u0410", 1591 "B", "\u00c2", "\u0411", 1592 "aA", "\u00e0\u00c0", "\u0430\u0410" 1593 }; 1594 1595 boolean[] expected = new boolean[] { 1596 true, false, false, 1597 true, false, false, 1598 true, false, false, 1599 true, false, false, 1600 true, false, false 1601 }; 1602 1603 flags = Pattern.CASE_INSENSITIVE; 1604 for (int i = 0; i < patterns.length; i++) { 1605 pattern = Pattern.compile(patterns[i], flags); 1606 matcher = pattern.matcher(texts[i]); 1607 if (matcher.matches() != expected[i]) { 1608 System.out.println("<1> Failed at " + i); 1609 failCount++; 1610 } 1611 } 1612 1613 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1614 for (int i = 0; i < patterns.length; i++) { 1615 pattern = Pattern.compile(patterns[i], flags); 1616 matcher = pattern.matcher(texts[i]); 1617 if (!matcher.matches()) { 1618 System.out.println("<2> Failed at " + i); 1619 failCount++; 1620 } 1621 } 1622 // flag unicode_case alone should do nothing 1623 flags = Pattern.UNICODE_CASE; 1624 for (int i = 0; i < patterns.length; i++) { 1625 pattern = Pattern.compile(patterns[i], flags); 1626 matcher = pattern.matcher(texts[i]); 1627 if (matcher.matches()) { 1628 System.out.println("<3> Failed at " + i); 1629 failCount++; 1630 } 1631 } 1632 1633 // Special cases: i, I, u+0131 and u+0130 1634 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1635 pattern = Pattern.compile("[h-j]+", flags); 1636 if (!pattern.matcher("\u0131\u0130").matches()) 1637 failCount++; 1638 report("Case Folding"); 1639 } 1640 1641 private static void appendTest() { 1642 Pattern pattern = Pattern.compile("(ab)(cd)"); 1643 Matcher matcher = pattern.matcher("abcd"); 1644 String result = matcher.replaceAll("$2$1"); 1645 if (!result.equals("cdab")) 1646 failCount++; 1647 1648 String s1 = "Swap all: first = 123, second = 456"; 1649 String s2 = "Swap one: first = 123, second = 456"; 1650 String r = "$3$2$1"; 1651 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1652 matcher = pattern.matcher(s1); 1653 1654 result = matcher.replaceAll(r); 1655 if (!result.equals("Swap all: 123 = first, 456 = second")) 1656 failCount++; 1657 1658 matcher = pattern.matcher(s2); 1659 1660 if (matcher.find()) { 1661 StringBuffer sb = new StringBuffer(); 1662 matcher.appendReplacement(sb, r); 1663 matcher.appendTail(sb); 1664 result = sb.toString(); 1665 if (!result.equals("Swap one: 123 = first, second = 456")) 1666 failCount++; 1667 } 1668 1669 // Supplementary character test 1670 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1671 matcher = pattern.matcher(toSupplementaries("abcd")); 1672 result = matcher.replaceAll("$2$1"); 1673 if (!result.equals(toSupplementaries("cdab"))) 1674 failCount++; 1675 1676 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1677 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1678 r = toSupplementaries("$3$2$1"); 1679 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1680 matcher = pattern.matcher(s1); 1681 1682 result = matcher.replaceAll(r); 1683 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1684 failCount++; 1685 1686 matcher = pattern.matcher(s2); 1687 1688 if (matcher.find()) { 1689 StringBuffer sb = new StringBuffer(); 1690 matcher.appendReplacement(sb, r); 1691 matcher.appendTail(sb); 1692 result = sb.toString(); 1693 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1694 failCount++; 1695 } 1696 report("Append"); 1697 } 1698 1699 private static void splitTest() { 1700 Pattern pattern = Pattern.compile(":"); 1701 String[] result = pattern.split("foo:and:boo", 2); 1702 if (!result[0].equals("foo")) 1703 failCount++; 1704 if (!result[1].equals("and:boo")) 1705 failCount++; 1706 // Supplementary character test 1707 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1708 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1709 if (!result[0].equals(toSupplementaries("foo"))) 1710 failCount++; 1711 if (!result[1].equals(toSupplementaries("andXboo"))) 1712 failCount++; 1713 1714 CharBuffer cb = CharBuffer.allocate(100); 1715 cb.put("foo:and:boo"); 1716 cb.flip(); 1717 result = pattern.split(cb); 1718 if (!result[0].equals("foo")) 1719 failCount++; 1720 if (!result[1].equals("and")) 1721 failCount++; 1722 if (!result[2].equals("boo")) 1723 failCount++; 1724 1725 // Supplementary character test 1726 CharBuffer cbs = CharBuffer.allocate(100); 1727 cbs.put(toSupplementaries("fooXandXboo")); 1728 cbs.flip(); 1729 result = patternX.split(cbs); 1730 if (!result[0].equals(toSupplementaries("foo"))) 1731 failCount++; 1732 if (!result[1].equals(toSupplementaries("and"))) 1733 failCount++; 1734 if (!result[2].equals(toSupplementaries("boo"))) 1735 failCount++; 1736 1737 String source = "0123456789"; 1738 for (int limit=-2; limit<3; limit++) { 1739 for (int x=0; x<10; x++) { 1740 result = source.split(Integer.toString(x), limit); 1741 int expectedLength = limit < 1 ? 2 : limit; 1742 1743 if ((limit == 0) && (x == 9)) { 1744 // expected dropping of "" 1745 if (result.length != 1) 1746 failCount++; 1747 if (!result[0].equals("012345678")) { 1748 failCount++; 1749 } 1750 } else { 1751 if (result.length != expectedLength) { 1752 failCount++; 1753 } 1754 if (!result[0].equals(source.substring(0,x))) { 1755 if (limit != 1) { 1756 failCount++; 1757 } else { 1758 if (!result[0].equals(source.substring(0,10))) { 1759 failCount++; 1760 } 1761 } 1762 } 1763 if (expectedLength > 1) { // Check segment 2 1764 if (!result[1].equals(source.substring(x+1,10))) 1765 failCount++; 1766 } 1767 } 1768 } 1769 } 1770 // Check the case for no match found 1771 for (int limit=-2; limit<3; limit++) { 1772 result = source.split("e", limit); 1773 if (result.length != 1) 1774 failCount++; 1775 if (!result[0].equals(source)) 1776 failCount++; 1777 } 1778 // Check the case for limit == 0, source = ""; 1779 // split() now returns 0-length for empty source "" see #6559590 1780 source = ""; 1781 result = source.split("e", 0); 1782 if (result.length != 1) 1783 failCount++; 1784 if (!result[0].equals(source)) 1785 failCount++; 1786 1787 // Check both split() and splitAsStraem(), especially for zero-lenth 1788 // input and zero-lenth match cases 1789 String[][] input = new String[][] { 1790 { " ", "Abc Efg Hij" }, // normal non-zero-match 1791 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1792 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1793 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1794 { "(?=\\p{Lu})", "AbcEfg" }, 1795 { "(?=\\p{Lu})", "Abc" }, 1796 { " ", "" }, // zero-length input 1797 { ".*", "" }, 1798 1799 // some tests from PatternStreamTest.java 1800 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1801 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1802 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1803 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1804 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1805 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1806 { "\u56da", "" }, 1807 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1808 { "o", "boo:and:foo" }, 1809 { "o", "booooo:and:fooooo" }, 1810 { "o", "fooooo:" }, 1811 }; 1812 1813 String[][] expected = new String[][] { 1814 { "Abc", "Efg", "Hij" }, 1815 { "", "Abc", "Efg", "Hij" }, 1816 { "Abc", "", "Efg", "Hij" }, 1817 { "Abc", "Efg", "Hij" }, 1818 { "Abc", "Efg" }, 1819 { "Abc" }, 1820 { "" }, 1821 { "" }, 1822 1823 { "awgqwefg1fefw", "vssv1vvv1" }, 1824 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1825 { "awgqwefg", "fefw4vssv", "vvv" }, 1826 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1827 { "1", "23", "456", "7890" }, 1828 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1829 { "" }, 1830 { "This", "is", "testing", "", "with", "different", "separators" }, 1831 { "b", "", ":and:f" }, 1832 { "b", "", "", "", "", ":and:f" }, 1833 { "f", "", "", "", "", ":" }, 1834 }; 1835 for (int i = 0; i < input.length; i++) { 1836 pattern = Pattern.compile(input[i][0]); 1837 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) { 1838 failCount++; 1839 } 1840 if (input[i][1].length() > 0 && // splitAsStream() return empty resulting 1841 // array for zero-length input for now 1842 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1843 expected[i])) { 1844 failCount++; 1845 } 1846 } 1847 report("Split"); 1848 } 1849 1850 private static void negationTest() { 1851 Pattern pattern = Pattern.compile("[\\[@^]+"); 1852 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1853 if (!matcher.find()) 1854 failCount++; 1855 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1856 failCount++; 1857 pattern = Pattern.compile("[@\\[^]+"); 1858 matcher = pattern.matcher("@@@@[[[[^^^^"); 1859 if (!matcher.find()) 1860 failCount++; 1861 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1862 failCount++; 1863 pattern = Pattern.compile("[@\\[^@]+"); 1864 matcher = pattern.matcher("@@@@[[[[^^^^"); 1865 if (!matcher.find()) 1866 failCount++; 1867 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1868 failCount++; 1869 1870 pattern = Pattern.compile("\\)"); 1871 matcher = pattern.matcher("xxx)xxx"); 1872 if (!matcher.find()) 1873 failCount++; 1874 1875 report("Negation"); 1876 } 1877 1878 private static void ampersandTest() { 1879 Pattern pattern = Pattern.compile("[&@]+"); 1880 check(pattern, "@@@@&&&&", true); 1881 1882 pattern = Pattern.compile("[@&]+"); 1883 check(pattern, "@@@@&&&&", true); 1884 1885 pattern = Pattern.compile("[@\\&]+"); 1886 check(pattern, "@@@@&&&&", true); 1887 1888 report("Ampersand"); 1889 } 1890 1891 private static void octalTest() throws Exception { 1892 Pattern pattern = Pattern.compile("\\u0007"); 1893 Matcher matcher = pattern.matcher("\u0007"); 1894 if (!matcher.matches()) 1895 failCount++; 1896 pattern = Pattern.compile("\\07"); 1897 matcher = pattern.matcher("\u0007"); 1898 if (!matcher.matches()) 1899 failCount++; 1900 pattern = Pattern.compile("\\007"); 1901 matcher = pattern.matcher("\u0007"); 1902 if (!matcher.matches()) 1903 failCount++; 1904 pattern = Pattern.compile("\\0007"); 1905 matcher = pattern.matcher("\u0007"); 1906 if (!matcher.matches()) 1907 failCount++; 1908 pattern = Pattern.compile("\\040"); 1909 matcher = pattern.matcher("\u0020"); 1910 if (!matcher.matches()) 1911 failCount++; 1912 pattern = Pattern.compile("\\0403"); 1913 matcher = pattern.matcher("\u00203"); 1914 if (!matcher.matches()) 1915 failCount++; 1916 pattern = Pattern.compile("\\0103"); 1917 matcher = pattern.matcher("\u0043"); 1918 if (!matcher.matches()) 1919 failCount++; 1920 1921 report("Octal"); 1922 } 1923 1924 private static void longPatternTest() throws Exception { 1925 try { 1926 Pattern pattern = Pattern.compile( 1927 "a 32-character-long pattern xxxx"); 1928 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1929 pattern = Pattern.compile("a thirty four character long regex"); 1930 StringBuffer patternToBe = new StringBuffer(101); 1931 for (int i=0; i<100; i++) 1932 patternToBe.append((char)(97 + i%26)); 1933 pattern = Pattern.compile(patternToBe.toString()); 1934 } catch (PatternSyntaxException e) { 1935 failCount++; 1936 } 1937 1938 // Supplementary character test 1939 try { 1940 Pattern pattern = Pattern.compile( 1941 toSupplementaries("a 32-character-long pattern xxxx")); 1942 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1943 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1944 StringBuffer patternToBe = new StringBuffer(101*2); 1945 for (int i=0; i<100; i++) 1946 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1947 + 97 + i%26)); 1948 pattern = Pattern.compile(patternToBe.toString()); 1949 } catch (PatternSyntaxException e) { 1950 failCount++; 1951 } 1952 report("LongPattern"); 1953 } 1954 1955 private static void group0Test() throws Exception { 1956 Pattern pattern = Pattern.compile("(tes)ting"); 1957 Matcher matcher = pattern.matcher("testing"); 1958 check(matcher, "testing"); 1959 1960 matcher.reset("testing"); 1961 if (matcher.lookingAt()) { 1962 if (!matcher.group(0).equals("testing")) 1963 failCount++; 1964 } else { 1965 failCount++; 1966 } 1967 1968 matcher.reset("testing"); 1969 if (matcher.matches()) { 1970 if (!matcher.group(0).equals("testing")) 1971 failCount++; 1972 } else { 1973 failCount++; 1974 } 1975 1976 pattern = Pattern.compile("(tes)ting"); 1977 matcher = pattern.matcher("testing"); 1978 if (matcher.lookingAt()) { 1979 if (!matcher.group(0).equals("testing")) 1980 failCount++; 1981 } else { 1982 failCount++; 1983 } 1984 1985 pattern = Pattern.compile("^(tes)ting"); 1986 matcher = pattern.matcher("testing"); 1987 if (matcher.matches()) { 1988 if (!matcher.group(0).equals("testing")) 1989 failCount++; 1990 } else { 1991 failCount++; 1992 } 1993 1994 // Supplementary character test 1995 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1996 matcher = pattern.matcher(toSupplementaries("testing")); 1997 check(matcher, toSupplementaries("testing")); 1998 1999 matcher.reset(toSupplementaries("testing")); 2000 if (matcher.lookingAt()) { 2001 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2002 failCount++; 2003 } else { 2004 failCount++; 2005 } 2006 2007 matcher.reset(toSupplementaries("testing")); 2008 if (matcher.matches()) { 2009 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2010 failCount++; 2011 } else { 2012 failCount++; 2013 } 2014 2015 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2016 matcher = pattern.matcher(toSupplementaries("testing")); 2017 if (matcher.lookingAt()) { 2018 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2019 failCount++; 2020 } else { 2021 failCount++; 2022 } 2023 2024 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2025 matcher = pattern.matcher(toSupplementaries("testing")); 2026 if (matcher.matches()) { 2027 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2028 failCount++; 2029 } else { 2030 failCount++; 2031 } 2032 2033 report("Group0"); 2034 } 2035 2036 private static void findIntTest() throws Exception { 2037 Pattern p = Pattern.compile("blah"); 2038 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2039 boolean result = m.find(2); 2040 if (!result) 2041 failCount++; 2042 2043 p = Pattern.compile("$"); 2044 m = p.matcher("1234567890"); 2045 result = m.find(10); 2046 if (!result) 2047 failCount++; 2048 try { 2049 result = m.find(11); 2050 failCount++; 2051 } catch (IndexOutOfBoundsException e) { 2052 // correct result 2053 } 2054 2055 // Supplementary character test 2056 p = Pattern.compile(toSupplementaries("blah")); 2057 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2058 result = m.find(2); 2059 if (!result) 2060 failCount++; 2061 2062 report("FindInt"); 2063 } 2064 2065 private static void emptyPatternTest() throws Exception { 2066 Pattern p = Pattern.compile(""); 2067 Matcher m = p.matcher("foo"); 2068 2069 // Should find empty pattern at beginning of input 2070 boolean result = m.find(); 2071 if (result != true) 2072 failCount++; 2073 if (m.start() != 0) 2074 failCount++; 2075 2076 // Should not match entire input if input is not empty 2077 m.reset(); 2078 result = m.matches(); 2079 if (result == true) 2080 failCount++; 2081 2082 try { 2083 m.start(0); 2084 failCount++; 2085 } catch (IllegalStateException e) { 2086 // Correct result 2087 } 2088 2089 // Should match entire input if input is empty 2090 m.reset(""); 2091 result = m.matches(); 2092 if (result != true) 2093 failCount++; 2094 2095 result = Pattern.matches("", ""); 2096 if (result != true) 2097 failCount++; 2098 2099 result = Pattern.matches("", "foo"); 2100 if (result == true) 2101 failCount++; 2102 report("EmptyPattern"); 2103 } 2104 2105 private static void charClassTest() throws Exception { 2106 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2107 check(pattern, "blahb]blech", true); 2108 2109 pattern = Pattern.compile("[abc[def]]"); 2110 check(pattern, "b", true); 2111 2112 // Supplementary character tests 2113 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2114 check(pattern, toSupplementaries("blahb]blech"), true); 2115 2116 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2117 check(pattern, toSupplementaries("b"), true); 2118 2119 try { 2120 // u00ff when UNICODE_CASE 2121 pattern = Pattern.compile("[ab\u00ffcd]", 2122 Pattern.CASE_INSENSITIVE| 2123 Pattern.UNICODE_CASE); 2124 check(pattern, "ab\u00ffcd", true); 2125 check(pattern, "Ab\u0178Cd", true); 2126 2127 // u00b5 when UNICODE_CASE 2128 pattern = Pattern.compile("[ab\u00b5cd]", 2129 Pattern.CASE_INSENSITIVE| 2130 Pattern.UNICODE_CASE); 2131 check(pattern, "ab\u00b5cd", true); 2132 check(pattern, "Ab\u039cCd", true); 2133 } catch (Exception e) { failCount++; } 2134 2135 /* Special cases 2136 (1)LatinSmallLetterLongS u+017f 2137 (2)LatinSmallLetterDotlessI u+0131 2138 (3)LatineCapitalLetterIWithDotAbove u+0130 2139 (4)KelvinSign u+212a 2140 (5)AngstromSign u+212b 2141 */ 2142 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2143 pattern = Pattern.compile("[sik\u00c5]+", flags); 2144 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2145 failCount++; 2146 2147 report("CharClass"); 2148 } 2149 2150 private static void caretTest() throws Exception { 2151 Pattern pattern = Pattern.compile("\\w*"); 2152 Matcher matcher = pattern.matcher("a#bc#def##g"); 2153 check(matcher, "a"); 2154 check(matcher, ""); 2155 check(matcher, "bc"); 2156 check(matcher, ""); 2157 check(matcher, "def"); 2158 check(matcher, ""); 2159 check(matcher, ""); 2160 check(matcher, "g"); 2161 check(matcher, ""); 2162 if (matcher.find()) 2163 failCount++; 2164 2165 pattern = Pattern.compile("^\\w*"); 2166 matcher = pattern.matcher("a#bc#def##g"); 2167 check(matcher, "a"); 2168 if (matcher.find()) 2169 failCount++; 2170 2171 pattern = Pattern.compile("\\w"); 2172 matcher = pattern.matcher("abc##x"); 2173 check(matcher, "a"); 2174 check(matcher, "b"); 2175 check(matcher, "c"); 2176 check(matcher, "x"); 2177 if (matcher.find()) 2178 failCount++; 2179 2180 pattern = Pattern.compile("^\\w"); 2181 matcher = pattern.matcher("abc##x"); 2182 check(matcher, "a"); 2183 if (matcher.find()) 2184 failCount++; 2185 2186 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2187 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2188 check(matcher, "abc"); 2189 if (matcher.find()) 2190 failCount++; 2191 2192 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2193 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2194 check(matcher, "abc"); 2195 check(matcher, "jkl"); 2196 if (matcher.find()) 2197 failCount++; 2198 2199 pattern = Pattern.compile("^", Pattern.MULTILINE); 2200 matcher = pattern.matcher("this is some text"); 2201 String result = matcher.replaceAll("X"); 2202 if (!result.equals("Xthis is some text")) 2203 failCount++; 2204 2205 pattern = Pattern.compile("^"); 2206 matcher = pattern.matcher("this is some text"); 2207 result = matcher.replaceAll("X"); 2208 if (!result.equals("Xthis is some text")) 2209 failCount++; 2210 2211 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2212 matcher = pattern.matcher("this is some text\n"); 2213 result = matcher.replaceAll("X"); 2214 if (!result.equals("Xthis is some text\n")) 2215 failCount++; 2216 2217 report("Caret"); 2218 } 2219 2220 private static void groupCaptureTest() throws Exception { 2221 // Independent group 2222 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2223 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2224 matcher.find(); 2225 try { 2226 String blah = matcher.group(1); 2227 failCount++; 2228 } catch (IndexOutOfBoundsException ioobe) { 2229 // Good result 2230 } 2231 // Pure group 2232 pattern = Pattern.compile("x+(?:y+)z+"); 2233 matcher = pattern.matcher("xxxyyyzzz"); 2234 matcher.find(); 2235 try { 2236 String blah = matcher.group(1); 2237 failCount++; 2238 } catch (IndexOutOfBoundsException ioobe) { 2239 // Good result 2240 } 2241 2242 // Supplementary character tests 2243 // Independent group 2244 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2245 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2246 matcher.find(); 2247 try { 2248 String blah = matcher.group(1); 2249 failCount++; 2250 } catch (IndexOutOfBoundsException ioobe) { 2251 // Good result 2252 } 2253 // Pure group 2254 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2255 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2256 matcher.find(); 2257 try { 2258 String blah = matcher.group(1); 2259 failCount++; 2260 } catch (IndexOutOfBoundsException ioobe) { 2261 // Good result 2262 } 2263 2264 report("GroupCapture"); 2265 } 2266 2267 private static void backRefTest() throws Exception { 2268 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2269 check(pattern, "zzzaabcazzz", true); 2270 2271 pattern = Pattern.compile("(a*)bc\\1"); 2272 check(pattern, "zzzaabcaazzz", true); 2273 2274 pattern = Pattern.compile("(abc)(def)\\1"); 2275 check(pattern, "abcdefabc", true); 2276 2277 pattern = Pattern.compile("(abc)(def)\\3"); 2278 check(pattern, "abcdefabc", false); 2279 2280 try { 2281 for (int i = 1; i < 10; i++) { 2282 // Make sure backref 1-9 are always accepted 2283 pattern = Pattern.compile("abcdef\\" + i); 2284 // and fail to match if the target group does not exit 2285 check(pattern, "abcdef", false); 2286 } 2287 } catch(PatternSyntaxException e) { 2288 failCount++; 2289 } 2290 2291 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2292 check(pattern, "abcdefghija", false); 2293 check(pattern, "abcdefghija1", true); 2294 2295 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2296 check(pattern, "abcdefghijkk", true); 2297 2298 pattern = Pattern.compile("(a)bcdefghij\\11"); 2299 check(pattern, "abcdefghija1", true); 2300 2301 // Supplementary character tests 2302 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2303 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2304 2305 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2306 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2307 2308 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2309 check(pattern, toSupplementaries("abcdefabc"), true); 2310 2311 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2312 check(pattern, toSupplementaries("abcdefabc"), false); 2313 2314 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2315 check(pattern, toSupplementaries("abcdefghija"), false); 2316 check(pattern, toSupplementaries("abcdefghija1"), true); 2317 2318 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2319 check(pattern, toSupplementaries("abcdefghijkk"), true); 2320 2321 report("BackRef"); 2322 } 2323 2324 /** 2325 * Unicode Technical Report #18, section 2.6 End of Line 2326 * There is no empty line to be matched in the sequence \u000D\u000A 2327 * but there is an empty line in the sequence \u000A\u000D. 2328 */ 2329 private static void anchorTest() throws Exception { 2330 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2331 Matcher m = p.matcher("blah1\r\nblah2"); 2332 m.find(); 2333 m.find(); 2334 if (!m.group().equals("blah2")) 2335 failCount++; 2336 2337 m.reset("blah1\n\rblah2"); 2338 m.find(); 2339 m.find(); 2340 m.find(); 2341 if (!m.group().equals("blah2")) 2342 failCount++; 2343 2344 // Test behavior of $ with \r\n at end of input 2345 p = Pattern.compile(".+$"); 2346 m = p.matcher("blah1\r\n"); 2347 if (!m.find()) 2348 failCount++; 2349 if (!m.group().equals("blah1")) 2350 failCount++; 2351 if (m.find()) 2352 failCount++; 2353 2354 // Test behavior of $ with \r\n at end of input in multiline 2355 p = Pattern.compile(".+$", Pattern.MULTILINE); 2356 m = p.matcher("blah1\r\n"); 2357 if (!m.find()) 2358 failCount++; 2359 if (m.find()) 2360 failCount++; 2361 2362 // Test for $ recognition of \u0085 for bug 4527731 2363 p = Pattern.compile(".+$", Pattern.MULTILINE); 2364 m = p.matcher("blah1\u0085"); 2365 if (!m.find()) 2366 failCount++; 2367 2368 // Supplementary character test 2369 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2370 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2371 m.find(); 2372 m.find(); 2373 if (!m.group().equals(toSupplementaries("blah2"))) 2374 failCount++; 2375 2376 m.reset(toSupplementaries("blah1\n\rblah2")); 2377 m.find(); 2378 m.find(); 2379 m.find(); 2380 if (!m.group().equals(toSupplementaries("blah2"))) 2381 failCount++; 2382 2383 // Test behavior of $ with \r\n at end of input 2384 p = Pattern.compile(".+$"); 2385 m = p.matcher(toSupplementaries("blah1\r\n")); 2386 if (!m.find()) 2387 failCount++; 2388 if (!m.group().equals(toSupplementaries("blah1"))) 2389 failCount++; 2390 if (m.find()) 2391 failCount++; 2392 2393 // Test behavior of $ with \r\n at end of input in multiline 2394 p = Pattern.compile(".+$", Pattern.MULTILINE); 2395 m = p.matcher(toSupplementaries("blah1\r\n")); 2396 if (!m.find()) 2397 failCount++; 2398 if (m.find()) 2399 failCount++; 2400 2401 // Test for $ recognition of \u0085 for bug 4527731 2402 p = Pattern.compile(".+$", Pattern.MULTILINE); 2403 m = p.matcher(toSupplementaries("blah1\u0085")); 2404 if (!m.find()) 2405 failCount++; 2406 2407 report("Anchors"); 2408 } 2409 2410 /** 2411 * A basic sanity test of Matcher.lookingAt(). 2412 */ 2413 private static void lookingAtTest() throws Exception { 2414 Pattern p = Pattern.compile("(ab)(c*)"); 2415 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2416 2417 if (!m.lookingAt()) 2418 failCount++; 2419 2420 if (!m.group().equals(m.group(0))) 2421 failCount++; 2422 2423 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2424 if (m.lookingAt()) 2425 failCount++; 2426 2427 // Supplementary character test 2428 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2429 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2430 2431 if (!m.lookingAt()) 2432 failCount++; 2433 2434 if (!m.group().equals(m.group(0))) 2435 failCount++; 2436 2437 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2438 if (m.lookingAt()) 2439 failCount++; 2440 2441 report("Looking At"); 2442 } 2443 2444 /** 2445 * A basic sanity test of Matcher.matches(). 2446 */ 2447 private static void matchesTest() throws Exception { 2448 // matches() 2449 Pattern p = Pattern.compile("ulb(c*)"); 2450 Matcher m = p.matcher("ulbcccccc"); 2451 if (!m.matches()) 2452 failCount++; 2453 2454 // find() but not matches() 2455 m.reset("zzzulbcccccc"); 2456 if (m.matches()) 2457 failCount++; 2458 2459 // lookingAt() but not matches() 2460 m.reset("ulbccccccdef"); 2461 if (m.matches()) 2462 failCount++; 2463 2464 // matches() 2465 p = Pattern.compile("a|ad"); 2466 m = p.matcher("ad"); 2467 if (!m.matches()) 2468 failCount++; 2469 2470 // Supplementary character test 2471 // matches() 2472 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2473 m = p.matcher(toSupplementaries("ulbcccccc")); 2474 if (!m.matches()) 2475 failCount++; 2476 2477 // find() but not matches() 2478 m.reset(toSupplementaries("zzzulbcccccc")); 2479 if (m.matches()) 2480 failCount++; 2481 2482 // lookingAt() but not matches() 2483 m.reset(toSupplementaries("ulbccccccdef")); 2484 if (m.matches()) 2485 failCount++; 2486 2487 // matches() 2488 p = Pattern.compile(toSupplementaries("a|ad")); 2489 m = p.matcher(toSupplementaries("ad")); 2490 if (!m.matches()) 2491 failCount++; 2492 2493 report("Matches"); 2494 } 2495 2496 /** 2497 * A basic sanity test of Pattern.matches(). 2498 */ 2499 private static void patternMatchesTest() throws Exception { 2500 // matches() 2501 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2502 toSupplementaries("ulbcccccc"))) 2503 failCount++; 2504 2505 // find() but not matches() 2506 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2507 toSupplementaries("zzzulbcccccc"))) 2508 failCount++; 2509 2510 // lookingAt() but not matches() 2511 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2512 toSupplementaries("ulbccccccdef"))) 2513 failCount++; 2514 2515 // Supplementary character test 2516 // matches() 2517 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2518 toSupplementaries("ulbcccccc"))) 2519 failCount++; 2520 2521 // find() but not matches() 2522 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2523 toSupplementaries("zzzulbcccccc"))) 2524 failCount++; 2525 2526 // lookingAt() but not matches() 2527 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2528 toSupplementaries("ulbccccccdef"))) 2529 failCount++; 2530 2531 report("Pattern Matches"); 2532 } 2533 2534 /** 2535 * Canonical equivalence testing. Tests the ability of the engine 2536 * to match sequences that are not explicitly specified in the 2537 * pattern when they are considered equivalent by the Unicode Standard. 2538 */ 2539 private static void ceTest() throws Exception { 2540 // Decomposed char outside char classes 2541 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2542 Matcher m = p.matcher("test\u00e5"); 2543 if (!m.matches()) 2544 failCount++; 2545 2546 m.reset("testa\u030a"); 2547 if (!m.matches()) 2548 failCount++; 2549 2550 // Composed char outside char classes 2551 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2552 m = p.matcher("test\u00e5"); 2553 if (!m.matches()) 2554 failCount++; 2555 2556 m.reset("testa\u030a"); 2557 if (!m.find()) 2558 failCount++; 2559 2560 // Decomposed char inside a char class 2561 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2562 m = p.matcher("test\u00e5"); 2563 if (!m.find()) 2564 failCount++; 2565 2566 m.reset("testa\u030a"); 2567 if (!m.find()) 2568 failCount++; 2569 2570 // Composed char inside a char class 2571 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2572 m = p.matcher("test\u00e5"); 2573 if (!m.find()) 2574 failCount++; 2575 2576 m.reset("testa\u0300"); 2577 if (!m.find()) 2578 failCount++; 2579 2580 m.reset("testa\u030a"); 2581 if (!m.find()) 2582 failCount++; 2583 2584 // Marks that cannot legally change order and be equivalent 2585 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2586 check(p, "testa\u0308\u0300", true); 2587 check(p, "testa\u0300\u0308", false); 2588 2589 // Marks that can legally change order and be equivalent 2590 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2591 check(p, "testa\u0308\u0323", true); 2592 check(p, "testa\u0323\u0308", true); 2593 2594 // Test all equivalences of the sequence a\u0308\u0323\u0300 2595 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2596 check(p, "testa\u0308\u0323\u0300", true); 2597 check(p, "testa\u0323\u0308\u0300", true); 2598 check(p, "testa\u0308\u0300\u0323", true); 2599 check(p, "test\u00e4\u0323\u0300", true); 2600 check(p, "test\u00e4\u0300\u0323", true); 2601 2602 /* 2603 * The following canonical equivalence tests don't work. Bug id: 4916384. 2604 * 2605 // Decomposed hangul (jamos) 2606 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2607 m = p.matcher("\u1100\u1161"); 2608 if (!m.matches()) 2609 failCount++; 2610 2611 m.reset("\uac00"); 2612 if (!m.matches()) 2613 failCount++; 2614 2615 // Composed hangul 2616 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2617 m = p.matcher("\u1100\u1161"); 2618 if (!m.matches()) 2619 failCount++; 2620 2621 m.reset("\uac00"); 2622 if (!m.matches()) 2623 failCount++; 2624 2625 // Decomposed supplementary outside char classes 2626 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2627 m = p.matcher("test\ud834\uddc0"); 2628 if (!m.matches()) 2629 failCount++; 2630 2631 m.reset("test\ud834\uddbc\ud834\udd6f"); 2632 if (!m.matches()) 2633 failCount++; 2634 2635 // Composed supplementary outside char classes 2636 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2637 m.reset("test\ud834\uddbc\ud834\udd6f"); 2638 if (!m.matches()) 2639 failCount++; 2640 2641 m = p.matcher("test\ud834\uddc0"); 2642 if (!m.matches()) 2643 failCount++; 2644 2645 */ 2646 2647 report("Canonical Equivalence"); 2648 } 2649 2650 /** 2651 * A basic sanity test of Matcher.replaceAll(). 2652 */ 2653 private static void globalSubstitute() throws Exception { 2654 // Global substitution with a literal 2655 Pattern p = Pattern.compile("(ab)(c*)"); 2656 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2657 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2658 failCount++; 2659 2660 m.reset("zzzabccczzzabcczzzabccczzz"); 2661 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2662 failCount++; 2663 2664 // Global substitution with groups 2665 m.reset("zzzabccczzzabcczzzabccczzz"); 2666 String result = m.replaceAll("$1"); 2667 if (!result.equals("zzzabzzzabzzzabzzz")) 2668 failCount++; 2669 2670 // Supplementary character test 2671 // Global substitution with a literal 2672 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2673 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2674 if (!m.replaceAll(toSupplementaries("test")). 2675 equals(toSupplementaries("testzzztestzzztest"))) 2676 failCount++; 2677 2678 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2679 if (!m.replaceAll(toSupplementaries("test")). 2680 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2681 failCount++; 2682 2683 // Global substitution with groups 2684 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2685 result = m.replaceAll("$1"); 2686 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2687 failCount++; 2688 2689 report("Global Substitution"); 2690 } 2691 2692 /** 2693 * Tests the usage of Matcher.appendReplacement() with literal 2694 * and group substitutions. 2695 */ 2696 private static void stringbufferSubstitute() throws Exception { 2697 // SB substitution with literal 2698 String blah = "zzzblahzzz"; 2699 Pattern p = Pattern.compile("blah"); 2700 Matcher m = p.matcher(blah); 2701 StringBuffer result = new StringBuffer(); 2702 try { 2703 m.appendReplacement(result, "blech"); 2704 failCount++; 2705 } catch (IllegalStateException e) { 2706 } 2707 m.find(); 2708 m.appendReplacement(result, "blech"); 2709 if (!result.toString().equals("zzzblech")) 2710 failCount++; 2711 2712 m.appendTail(result); 2713 if (!result.toString().equals("zzzblechzzz")) 2714 failCount++; 2715 2716 // SB substitution with groups 2717 blah = "zzzabcdzzz"; 2718 p = Pattern.compile("(ab)(cd)*"); 2719 m = p.matcher(blah); 2720 result = new StringBuffer(); 2721 try { 2722 m.appendReplacement(result, "$1"); 2723 failCount++; 2724 } catch (IllegalStateException e) { 2725 } 2726 m.find(); 2727 m.appendReplacement(result, "$1"); 2728 if (!result.toString().equals("zzzab")) 2729 failCount++; 2730 2731 m.appendTail(result); 2732 if (!result.toString().equals("zzzabzzz")) 2733 failCount++; 2734 2735 // SB substitution with 3 groups 2736 blah = "zzzabcdcdefzzz"; 2737 p = Pattern.compile("(ab)(cd)*(ef)"); 2738 m = p.matcher(blah); 2739 result = new StringBuffer(); 2740 try { 2741 m.appendReplacement(result, "$1w$2w$3"); 2742 failCount++; 2743 } catch (IllegalStateException e) { 2744 } 2745 m.find(); 2746 m.appendReplacement(result, "$1w$2w$3"); 2747 if (!result.toString().equals("zzzabwcdwef")) 2748 failCount++; 2749 2750 m.appendTail(result); 2751 if (!result.toString().equals("zzzabwcdwefzzz")) 2752 failCount++; 2753 2754 // SB substitution with groups and three matches 2755 // skipping middle match 2756 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2757 p = Pattern.compile("(ab)(cd*)"); 2758 m = p.matcher(blah); 2759 result = new StringBuffer(); 2760 try { 2761 m.appendReplacement(result, "$1"); 2762 failCount++; 2763 } catch (IllegalStateException e) { 2764 } 2765 m.find(); 2766 m.appendReplacement(result, "$1"); 2767 if (!result.toString().equals("zzzab")) 2768 failCount++; 2769 2770 m.find(); 2771 m.find(); 2772 m.appendReplacement(result, "$2"); 2773 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2774 failCount++; 2775 2776 m.appendTail(result); 2777 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2778 failCount++; 2779 2780 // Check to make sure escaped $ is ignored 2781 blah = "zzzabcdcdefzzz"; 2782 p = Pattern.compile("(ab)(cd)*(ef)"); 2783 m = p.matcher(blah); 2784 result = new StringBuffer(); 2785 m.find(); 2786 m.appendReplacement(result, "$1w\\$2w$3"); 2787 if (!result.toString().equals("zzzabw$2wef")) 2788 failCount++; 2789 2790 m.appendTail(result); 2791 if (!result.toString().equals("zzzabw$2wefzzz")) 2792 failCount++; 2793 2794 // Check to make sure a reference to nonexistent group causes error 2795 blah = "zzzabcdcdefzzz"; 2796 p = Pattern.compile("(ab)(cd)*(ef)"); 2797 m = p.matcher(blah); 2798 result = new StringBuffer(); 2799 m.find(); 2800 try { 2801 m.appendReplacement(result, "$1w$5w$3"); 2802 failCount++; 2803 } catch (IndexOutOfBoundsException ioobe) { 2804 // Correct result 2805 } 2806 2807 // Check double digit group references 2808 blah = "zzz123456789101112zzz"; 2809 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2810 m = p.matcher(blah); 2811 result = new StringBuffer(); 2812 m.find(); 2813 m.appendReplacement(result, "$1w$11w$3"); 2814 if (!result.toString().equals("zzz1w11w3")) 2815 failCount++; 2816 2817 // Check to make sure it backs off $15 to $1 if only three groups 2818 blah = "zzzabcdcdefzzz"; 2819 p = Pattern.compile("(ab)(cd)*(ef)"); 2820 m = p.matcher(blah); 2821 result = new StringBuffer(); 2822 m.find(); 2823 m.appendReplacement(result, "$1w$15w$3"); 2824 if (!result.toString().equals("zzzabwab5wef")) 2825 failCount++; 2826 2827 2828 // Supplementary character test 2829 // SB substitution with literal 2830 blah = toSupplementaries("zzzblahzzz"); 2831 p = Pattern.compile(toSupplementaries("blah")); 2832 m = p.matcher(blah); 2833 result = new StringBuffer(); 2834 try { 2835 m.appendReplacement(result, toSupplementaries("blech")); 2836 failCount++; 2837 } catch (IllegalStateException e) { 2838 } 2839 m.find(); 2840 m.appendReplacement(result, toSupplementaries("blech")); 2841 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2842 failCount++; 2843 2844 m.appendTail(result); 2845 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2846 failCount++; 2847 2848 // SB substitution with groups 2849 blah = toSupplementaries("zzzabcdzzz"); 2850 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2851 m = p.matcher(blah); 2852 result = new StringBuffer(); 2853 try { 2854 m.appendReplacement(result, "$1"); 2855 failCount++; 2856 } catch (IllegalStateException e) { 2857 } 2858 m.find(); 2859 m.appendReplacement(result, "$1"); 2860 if (!result.toString().equals(toSupplementaries("zzzab"))) 2861 failCount++; 2862 2863 m.appendTail(result); 2864 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2865 failCount++; 2866 2867 // SB substitution with 3 groups 2868 blah = toSupplementaries("zzzabcdcdefzzz"); 2869 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2870 m = p.matcher(blah); 2871 result = new StringBuffer(); 2872 try { 2873 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2874 failCount++; 2875 } catch (IllegalStateException e) { 2876 } 2877 m.find(); 2878 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2879 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2880 failCount++; 2881 2882 m.appendTail(result); 2883 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2884 failCount++; 2885 2886 // SB substitution with groups and three matches 2887 // skipping middle match 2888 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2889 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2890 m = p.matcher(blah); 2891 result = new StringBuffer(); 2892 try { 2893 m.appendReplacement(result, "$1"); 2894 failCount++; 2895 } catch (IllegalStateException e) { 2896 } 2897 m.find(); 2898 m.appendReplacement(result, "$1"); 2899 if (!result.toString().equals(toSupplementaries("zzzab"))) 2900 failCount++; 2901 2902 m.find(); 2903 m.find(); 2904 m.appendReplacement(result, "$2"); 2905 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2906 failCount++; 2907 2908 m.appendTail(result); 2909 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2910 failCount++; 2911 2912 // Check to make sure escaped $ is ignored 2913 blah = toSupplementaries("zzzabcdcdefzzz"); 2914 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2915 m = p.matcher(blah); 2916 result = new StringBuffer(); 2917 m.find(); 2918 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2919 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2920 failCount++; 2921 2922 m.appendTail(result); 2923 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2924 failCount++; 2925 2926 // Check to make sure a reference to nonexistent group causes error 2927 blah = toSupplementaries("zzzabcdcdefzzz"); 2928 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2929 m = p.matcher(blah); 2930 result = new StringBuffer(); 2931 m.find(); 2932 try { 2933 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2934 failCount++; 2935 } catch (IndexOutOfBoundsException ioobe) { 2936 // Correct result 2937 } 2938 2939 // Check double digit group references 2940 blah = toSupplementaries("zzz123456789101112zzz"); 2941 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2942 m = p.matcher(blah); 2943 result = new StringBuffer(); 2944 m.find(); 2945 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2946 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2947 failCount++; 2948 2949 // Check to make sure it backs off $15 to $1 if only three groups 2950 blah = toSupplementaries("zzzabcdcdefzzz"); 2951 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2952 m = p.matcher(blah); 2953 result = new StringBuffer(); 2954 m.find(); 2955 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2956 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2957 failCount++; 2958 2959 // Check nothing has been appended into the output buffer if 2960 // the replacement string triggers IllegalArgumentException. 2961 p = Pattern.compile("(abc)"); 2962 m = p.matcher("abcd"); 2963 result = new StringBuffer(); 2964 m.find(); 2965 try { 2966 m.appendReplacement(result, ("xyz$g")); 2967 failCount++; 2968 } catch (IllegalArgumentException iae) { 2969 if (result.length() != 0) 2970 failCount++; 2971 } 2972 2973 report("SB Substitution"); 2974 } 2975 2976 /* 2977 * 5 groups of characters are created to make a substitution string. 2978 * A base string will be created including random lead chars, the 2979 * substitution string, and random trailing chars. 2980 * A pattern containing the 5 groups is searched for and replaced with: 2981 * random group + random string + random group. 2982 * The results are checked for correctness. 2983 */ 2984 private static void substitutionBasher() { 2985 for (int runs = 0; runs<1000; runs++) { 2986 // Create a base string to work in 2987 int leadingChars = generator.nextInt(10); 2988 StringBuffer baseBuffer = new StringBuffer(100); 2989 String leadingString = getRandomAlphaString(leadingChars); 2990 baseBuffer.append(leadingString); 2991 2992 // Create 5 groups of random number of random chars 2993 // Create the string to substitute 2994 // Create the pattern string to search for 2995 StringBuffer bufferToSub = new StringBuffer(25); 2996 StringBuffer bufferToPat = new StringBuffer(50); 2997 String[] groups = new String[5]; 2998 for(int i=0; i<5; i++) { 2999 int aGroupSize = generator.nextInt(5)+1; 3000 groups[i] = getRandomAlphaString(aGroupSize); 3001 bufferToSub.append(groups[i]); 3002 bufferToPat.append('('); 3003 bufferToPat.append(groups[i]); 3004 bufferToPat.append(')'); 3005 } 3006 String stringToSub = bufferToSub.toString(); 3007 String pattern = bufferToPat.toString(); 3008 3009 // Place sub string into working string at random index 3010 baseBuffer.append(stringToSub); 3011 3012 // Append random chars to end 3013 int trailingChars = generator.nextInt(10); 3014 String trailingString = getRandomAlphaString(trailingChars); 3015 baseBuffer.append(trailingString); 3016 String baseString = baseBuffer.toString(); 3017 3018 // Create test pattern and matcher 3019 Pattern p = Pattern.compile(pattern); 3020 Matcher m = p.matcher(baseString); 3021 3022 // Reject candidate if pattern happens to start early 3023 m.find(); 3024 if (m.start() < leadingChars) 3025 continue; 3026 3027 // Reject candidate if more than one match 3028 if (m.find()) 3029 continue; 3030 3031 // Construct a replacement string with : 3032 // random group + random string + random group 3033 StringBuffer bufferToRep = new StringBuffer(); 3034 int groupIndex1 = generator.nextInt(5); 3035 bufferToRep.append("$" + (groupIndex1 + 1)); 3036 String randomMidString = getRandomAlphaString(5); 3037 bufferToRep.append(randomMidString); 3038 int groupIndex2 = generator.nextInt(5); 3039 bufferToRep.append("$" + (groupIndex2 + 1)); 3040 String replacement = bufferToRep.toString(); 3041 3042 // Do the replacement 3043 String result = m.replaceAll(replacement); 3044 3045 // Construct expected result 3046 StringBuffer bufferToRes = new StringBuffer(); 3047 bufferToRes.append(leadingString); 3048 bufferToRes.append(groups[groupIndex1]); 3049 bufferToRes.append(randomMidString); 3050 bufferToRes.append(groups[groupIndex2]); 3051 bufferToRes.append(trailingString); 3052 String expectedResult = bufferToRes.toString(); 3053 3054 // Check results 3055 if (!result.equals(expectedResult)) 3056 failCount++; 3057 } 3058 3059 report("Substitution Basher"); 3060 } 3061 3062 /** 3063 * Checks the handling of some escape sequences that the Pattern 3064 * class should process instead of the java compiler. These are 3065 * not in the file because the escapes should be be processed 3066 * by the Pattern class when the regex is compiled. 3067 */ 3068 private static void escapes() throws Exception { 3069 Pattern p = Pattern.compile("\\043"); 3070 Matcher m = p.matcher("#"); 3071 if (!m.find()) 3072 failCount++; 3073 3074 p = Pattern.compile("\\x23"); 3075 m = p.matcher("#"); 3076 if (!m.find()) 3077 failCount++; 3078 3079 p = Pattern.compile("\\u0023"); 3080 m = p.matcher("#"); 3081 if (!m.find()) 3082 failCount++; 3083 3084 report("Escape sequences"); 3085 } 3086 3087 /** 3088 * Checks the handling of blank input situations. These 3089 * tests are incompatible with my test file format. 3090 */ 3091 private static void blankInput() throws Exception { 3092 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3093 Matcher m = p.matcher(""); 3094 if (m.find()) 3095 failCount++; 3096 3097 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3098 m = p.matcher(""); 3099 if (!m.find()) 3100 failCount++; 3101 3102 p = Pattern.compile("abc"); 3103 m = p.matcher(""); 3104 if (m.find()) 3105 failCount++; 3106 3107 p = Pattern.compile("a*"); 3108 m = p.matcher(""); 3109 if (!m.find()) 3110 failCount++; 3111 3112 report("Blank input"); 3113 } 3114 3115 /** 3116 * Tests the Boyer-Moore pattern matching of a character sequence 3117 * on randomly generated patterns. 3118 */ 3119 private static void bm() throws Exception { 3120 doBnM('a'); 3121 report("Boyer Moore (ASCII)"); 3122 3123 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3124 report("Boyer Moore (Supplementary)"); 3125 } 3126 3127 private static void doBnM(int baseCharacter) throws Exception { 3128 int achar=0; 3129 3130 for (int i=0; i<100; i++) { 3131 // Create a short pattern to search for 3132 int patternLength = generator.nextInt(7) + 4; 3133 StringBuffer patternBuffer = new StringBuffer(patternLength); 3134 String pattern; 3135 retry: for (;;) { 3136 for (int x=0; x<patternLength; x++) { 3137 int ch = baseCharacter + generator.nextInt(26); 3138 if (Character.isSupplementaryCodePoint(ch)) { 3139 patternBuffer.append(Character.toChars(ch)); 3140 } else { 3141 patternBuffer.append((char)ch); 3142 } 3143 } 3144 pattern = patternBuffer.toString(); 3145 3146 // Avoid patterns that start and end with the same substring 3147 // See JDK-6854417 3148 for (int x=1; x <patternLength; x++) { 3149 if (pattern.startsWith(pattern.substring(x))) 3150 continue retry; 3151 } 3152 break; 3153 } 3154 Pattern p = Pattern.compile(pattern); 3155 3156 // Create a buffer with random ASCII chars that does 3157 // not match the sample 3158 String toSearch = null; 3159 StringBuffer s = null; 3160 Matcher m = p.matcher(""); 3161 do { 3162 s = new StringBuffer(100); 3163 for (int x=0; x<100; x++) { 3164 int ch = baseCharacter + generator.nextInt(26); 3165 if (Character.isSupplementaryCodePoint(ch)) { 3166 s.append(Character.toChars(ch)); 3167 } else { 3168 s.append((char)ch); 3169 } 3170 } 3171 toSearch = s.toString(); 3172 m.reset(toSearch); 3173 } while (m.find()); 3174 3175 // Insert the pattern at a random spot 3176 int insertIndex = generator.nextInt(99); 3177 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3178 insertIndex++; 3179 s = s.insert(insertIndex, pattern); 3180 toSearch = s.toString(); 3181 3182 // Make sure that the pattern is found 3183 m.reset(toSearch); 3184 if (!m.find()) 3185 failCount++; 3186 3187 // Make sure that the match text is the pattern 3188 if (!m.group().equals(pattern)) 3189 failCount++; 3190 3191 // Make sure match occured at insertion point 3192 if (m.start() != insertIndex) 3193 failCount++; 3194 } 3195 } 3196 3197 /** 3198 * Tests the matching of slices on randomly generated patterns. 3199 * The Boyer-Moore optimization is not done on these patterns 3200 * because it uses unicode case folding. 3201 */ 3202 private static void slice() throws Exception { 3203 doSlice(Character.MAX_VALUE); 3204 report("Slice"); 3205 3206 doSlice(Character.MAX_CODE_POINT); 3207 report("Slice (Supplementary)"); 3208 } 3209 3210 private static void doSlice(int maxCharacter) throws Exception { 3211 Random generator = new Random(); 3212 int achar=0; 3213 3214 for (int i=0; i<100; i++) { 3215 // Create a short pattern to search for 3216 int patternLength = generator.nextInt(7) + 4; 3217 StringBuffer patternBuffer = new StringBuffer(patternLength); 3218 for (int x=0; x<patternLength; x++) { 3219 int randomChar = 0; 3220 while (!Character.isLetterOrDigit(randomChar)) 3221 randomChar = generator.nextInt(maxCharacter); 3222 if (Character.isSupplementaryCodePoint(randomChar)) { 3223 patternBuffer.append(Character.toChars(randomChar)); 3224 } else { 3225 patternBuffer.append((char) randomChar); 3226 } 3227 } 3228 String pattern = patternBuffer.toString(); 3229 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3230 3231 // Create a buffer with random chars that does not match the sample 3232 String toSearch = null; 3233 StringBuffer s = null; 3234 Matcher m = p.matcher(""); 3235 do { 3236 s = new StringBuffer(100); 3237 for (int x=0; x<100; x++) { 3238 int randomChar = 0; 3239 while (!Character.isLetterOrDigit(randomChar)) 3240 randomChar = generator.nextInt(maxCharacter); 3241 if (Character.isSupplementaryCodePoint(randomChar)) { 3242 s.append(Character.toChars(randomChar)); 3243 } else { 3244 s.append((char) randomChar); 3245 } 3246 } 3247 toSearch = s.toString(); 3248 m.reset(toSearch); 3249 } while (m.find()); 3250 3251 // Insert the pattern at a random spot 3252 int insertIndex = generator.nextInt(99); 3253 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3254 insertIndex++; 3255 s = s.insert(insertIndex, pattern); 3256 toSearch = s.toString(); 3257 3258 // Make sure that the pattern is found 3259 m.reset(toSearch); 3260 if (!m.find()) 3261 failCount++; 3262 3263 // Make sure that the match text is the pattern 3264 if (!m.group().equals(pattern)) 3265 failCount++; 3266 3267 // Make sure match occured at insertion point 3268 if (m.start() != insertIndex) 3269 failCount++; 3270 } 3271 } 3272 3273 private static void explainFailure(String pattern, String data, 3274 String expected, String actual) { 3275 System.err.println("----------------------------------------"); 3276 System.err.println("Pattern = "+pattern); 3277 System.err.println("Data = "+data); 3278 System.err.println("Expected = " + expected); 3279 System.err.println("Actual = " + actual); 3280 } 3281 3282 private static void explainFailure(String pattern, String data, 3283 Throwable t) { 3284 System.err.println("----------------------------------------"); 3285 System.err.println("Pattern = "+pattern); 3286 System.err.println("Data = "+data); 3287 t.printStackTrace(System.err); 3288 } 3289 3290 // Testing examples from a file 3291 3292 /** 3293 * Goes through the file "TestCases.txt" and creates many patterns 3294 * described in the file, matching the patterns against input lines in 3295 * the file, and comparing the results against the correct results 3296 * also found in the file. The file format is described in comments 3297 * at the head of the file. 3298 */ 3299 private static void processFile(String fileName) throws Exception { 3300 File testCases = new File(System.getProperty("test.src", "."), 3301 fileName); 3302 FileInputStream in = new FileInputStream(testCases); 3303 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3304 3305 // Process next test case. 3306 String aLine; 3307 while((aLine = r.readLine()) != null) { 3308 // Read a line for pattern 3309 String patternString = grabLine(r); 3310 Pattern p = null; 3311 try { 3312 p = compileTestPattern(patternString); 3313 } catch (PatternSyntaxException e) { 3314 String dataString = grabLine(r); 3315 String expectedResult = grabLine(r); 3316 if (expectedResult.startsWith("error")) 3317 continue; 3318 explainFailure(patternString, dataString, e); 3319 failCount++; 3320 continue; 3321 } 3322 3323 // Read a line for input string 3324 String dataString = grabLine(r); 3325 Matcher m = p.matcher(dataString); 3326 StringBuffer result = new StringBuffer(); 3327 3328 // Check for IllegalStateExceptions before a match 3329 failCount += preMatchInvariants(m); 3330 3331 boolean found = m.find(); 3332 3333 if (found) 3334 failCount += postTrueMatchInvariants(m); 3335 else 3336 failCount += postFalseMatchInvariants(m); 3337 3338 if (found) { 3339 result.append("true "); 3340 result.append(m.group(0) + " "); 3341 } else { 3342 result.append("false "); 3343 } 3344 3345 result.append(m.groupCount()); 3346 3347 if (found) { 3348 for (int i=1; i<m.groupCount()+1; i++) 3349 if (m.group(i) != null) 3350 result.append(" " +m.group(i)); 3351 } 3352 3353 // Read a line for the expected result 3354 String expectedResult = grabLine(r); 3355 3356 if (!result.toString().equals(expectedResult)) { 3357 explainFailure(patternString, dataString, expectedResult, result.toString()); 3358 failCount++; 3359 } 3360 } 3361 3362 report(fileName); 3363 } 3364 3365 private static int preMatchInvariants(Matcher m) { 3366 int failCount = 0; 3367 try { 3368 m.start(); 3369 failCount++; 3370 } catch (IllegalStateException ise) {} 3371 try { 3372 m.end(); 3373 failCount++; 3374 } catch (IllegalStateException ise) {} 3375 try { 3376 m.group(); 3377 failCount++; 3378 } catch (IllegalStateException ise) {} 3379 return failCount; 3380 } 3381 3382 private static int postFalseMatchInvariants(Matcher m) { 3383 int failCount = 0; 3384 try { 3385 m.group(); 3386 failCount++; 3387 } catch (IllegalStateException ise) {} 3388 try { 3389 m.start(); 3390 failCount++; 3391 } catch (IllegalStateException ise) {} 3392 try { 3393 m.end(); 3394 failCount++; 3395 } catch (IllegalStateException ise) {} 3396 return failCount; 3397 } 3398 3399 private static int postTrueMatchInvariants(Matcher m) { 3400 int failCount = 0; 3401 //assert(m.start() = m.start(0); 3402 if (m.start() != m.start(0)) 3403 failCount++; 3404 //assert(m.end() = m.end(0); 3405 if (m.start() != m.start(0)) 3406 failCount++; 3407 //assert(m.group() = m.group(0); 3408 if (!m.group().equals(m.group(0))) 3409 failCount++; 3410 try { 3411 m.group(50); 3412 failCount++; 3413 } catch (IndexOutOfBoundsException ise) {} 3414 3415 return failCount; 3416 } 3417 3418 private static Pattern compileTestPattern(String patternString) { 3419 if (!patternString.startsWith("'")) { 3420 return Pattern.compile(patternString); 3421 } 3422 3423 int break1 = patternString.lastIndexOf("'"); 3424 String flagString = patternString.substring( 3425 break1+1, patternString.length()); 3426 patternString = patternString.substring(1, break1); 3427 3428 if (flagString.equals("i")) 3429 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3430 3431 if (flagString.equals("m")) 3432 return Pattern.compile(patternString, Pattern.MULTILINE); 3433 3434 return Pattern.compile(patternString); 3435 } 3436 3437 /** 3438 * Reads a line from the input file. Keeps reading lines until a non 3439 * empty non comment line is read. If the line contains a \n then 3440 * these two characters are replaced by a newline char. If a \\uxxxx 3441 * sequence is read then the sequence is replaced by the unicode char. 3442 */ 3443 private static String grabLine(BufferedReader r) throws Exception { 3444 int index = 0; 3445 String line = r.readLine(); 3446 while (line.startsWith("//") || line.length() < 1) 3447 line = r.readLine(); 3448 while ((index = line.indexOf("\\n")) != -1) { 3449 StringBuffer temp = new StringBuffer(line); 3450 temp.replace(index, index+2, "\n"); 3451 line = temp.toString(); 3452 } 3453 while ((index = line.indexOf("\\u")) != -1) { 3454 StringBuffer temp = new StringBuffer(line); 3455 String value = temp.substring(index+2, index+6); 3456 char aChar = (char)Integer.parseInt(value, 16); 3457 String unicodeChar = "" + aChar; 3458 temp.replace(index, index+6, unicodeChar); 3459 line = temp.toString(); 3460 } 3461 3462 return line; 3463 } 3464 3465 private static void check(Pattern p, String s, String g, String expected) { 3466 Matcher m = p.matcher(s); 3467 m.find(); 3468 if (!m.group(g).equals(expected) || 3469 s.charAt(m.start(g)) != expected.charAt(0) || 3470 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3471 failCount++; 3472 } 3473 3474 private static void checkReplaceFirst(String p, String s, String r, String expected) 3475 { 3476 if (!expected.equals(Pattern.compile(p) 3477 .matcher(s) 3478 .replaceFirst(r))) 3479 failCount++; 3480 } 3481 3482 private static void checkReplaceAll(String p, String s, String r, String expected) 3483 { 3484 if (!expected.equals(Pattern.compile(p) 3485 .matcher(s) 3486 .replaceAll(r))) 3487 failCount++; 3488 } 3489 3490 private static void checkExpectedFail(String p) { 3491 try { 3492 Pattern.compile(p); 3493 } catch (PatternSyntaxException pse) { 3494 //pse.printStackTrace(); 3495 return; 3496 } 3497 failCount++; 3498 } 3499 3500 private static void checkExpectedIAE(Matcher m, String g) { 3501 m.find(); 3502 try { 3503 m.group(g); 3504 } catch (IllegalArgumentException x) { 3505 //iae.printStackTrace(); 3506 try { 3507 m.start(g); 3508 } catch (IllegalArgumentException xx) { 3509 try { 3510 m.start(g); 3511 } catch (IllegalArgumentException xxx) { 3512 return; 3513 } 3514 } 3515 } 3516 failCount++; 3517 } 3518 3519 private static void checkExpectedNPE(Matcher m) { 3520 m.find(); 3521 try { 3522 m.group(null); 3523 } catch (NullPointerException x) { 3524 try { 3525 m.start(null); 3526 } catch (NullPointerException xx) { 3527 try { 3528 m.end(null); 3529 } catch (NullPointerException xxx) { 3530 return; 3531 } 3532 } 3533 } 3534 failCount++; 3535 } 3536 3537 private static void namedGroupCaptureTest() throws Exception { 3538 check(Pattern.compile("x+(?<gname>y+)z+"), 3539 "xxxyyyzzz", 3540 "gname", 3541 "yyy"); 3542 3543 check(Pattern.compile("x+(?<gname8>y+)z+"), 3544 "xxxyyyzzz", 3545 "gname8", 3546 "yyy"); 3547 3548 //backref 3549 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3550 check(pattern, "zzzaabcazzz", true); // found "abca" 3551 3552 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3553 "zzzaabcaazzz", true); 3554 3555 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3556 "abcdefabc", true); 3557 3558 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3559 "abcdefghijkk", true); 3560 3561 // Supplementary character tests 3562 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3563 toSupplementaries("zzzaabcazzz"), true); 3564 3565 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3566 toSupplementaries("zzzaabcaazzz"), true); 3567 3568 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3569 toSupplementaries("abcdefabc"), true); 3570 3571 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3572 "(?<gname>" + 3573 toSupplementaries("k)") + "\\k<gname>"), 3574 toSupplementaries("abcdefghijkk"), true); 3575 3576 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3577 "xxxyyyzzzyyy", 3578 "gname", 3579 "yyy"); 3580 3581 //replaceFirst/All 3582 checkReplaceFirst("(?<gn>ab)(c*)", 3583 "abccczzzabcczzzabccc", 3584 "${gn}", 3585 "abzzzabcczzzabccc"); 3586 3587 checkReplaceAll("(?<gn>ab)(c*)", 3588 "abccczzzabcczzzabccc", 3589 "${gn}", 3590 "abzzzabzzzab"); 3591 3592 3593 checkReplaceFirst("(?<gn>ab)(c*)", 3594 "zzzabccczzzabcczzzabccczzz", 3595 "${gn}", 3596 "zzzabzzzabcczzzabccczzz"); 3597 3598 checkReplaceAll("(?<gn>ab)(c*)", 3599 "zzzabccczzzabcczzzabccczzz", 3600 "${gn}", 3601 "zzzabzzzabzzzabzzz"); 3602 3603 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3604 "zzzabccczzzabcczzzabccczzz", 3605 "${gn2}", 3606 "zzzccczzzabcczzzabccczzz"); 3607 3608 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3609 "zzzabccczzzabcczzzabccczzz", 3610 "${gn2}", 3611 "zzzccczzzcczzzccczzz"); 3612 3613 //toSupplementaries("(ab)(c*)")); 3614 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3615 ")(?<gn2>" + toSupplementaries("c") + "*)", 3616 toSupplementaries("abccczzzabcczzzabccc"), 3617 "${gn1}", 3618 toSupplementaries("abzzzabcczzzabccc")); 3619 3620 3621 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3622 ")(?<gn2>" + toSupplementaries("c") + "*)", 3623 toSupplementaries("abccczzzabcczzzabccc"), 3624 "${gn1}", 3625 toSupplementaries("abzzzabzzzab")); 3626 3627 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3628 ")(?<gn2>" + toSupplementaries("c") + "*)", 3629 toSupplementaries("abccczzzabcczzzabccc"), 3630 "${gn2}", 3631 toSupplementaries("ccczzzabcczzzabccc")); 3632 3633 3634 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3635 ")(?<gn2>" + toSupplementaries("c") + "*)", 3636 toSupplementaries("abccczzzabcczzzabccc"), 3637 "${gn2}", 3638 toSupplementaries("ccczzzcczzzccc")); 3639 3640 checkReplaceFirst("(?<dog>Dog)AndCat", 3641 "zzzDogAndCatzzzDogAndCatzzz", 3642 "${dog}", 3643 "zzzDogzzzDogAndCatzzz"); 3644 3645 3646 checkReplaceAll("(?<dog>Dog)AndCat", 3647 "zzzDogAndCatzzzDogAndCatzzz", 3648 "${dog}", 3649 "zzzDogzzzDogzzz"); 3650 3651 // backref in Matcher & String 3652 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 3653 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 3654 failCount++; 3655 3656 // negative 3657 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3658 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3659 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3660 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3661 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3662 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3663 "gnameX"); 3664 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 3665 report("NamedGroupCapture"); 3666 } 3667 3668 // This is for bug 6969132 3669 private static void nonBmpClassComplementTest() throws Exception { 3670 Pattern p = Pattern.compile("\\P{Lu}"); 3671 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3672 if (m.find() && m.start() == 1) 3673 failCount++; 3674 3675 // from a unicode category 3676 p = Pattern.compile("\\P{Lu}"); 3677 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3678 if (m.find()) 3679 failCount++; 3680 if (!m.hitEnd()) 3681 failCount++; 3682 3683 // block 3684 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3685 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3686 if (m.find() && m.start() == 1) 3687 failCount++; 3688 3689 report("NonBmpClassComplement"); 3690 } 3691 3692 private static void unicodePropertiesTest() throws Exception { 3693 // different forms 3694 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 3695 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 3696 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 3697 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 3698 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 3699 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 3700 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 3701 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 3702 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 3703 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 3704 failCount++; 3705 3706 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 3707 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 3708 Matcher lastSM = common; 3709 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 3710 3711 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 3712 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 3713 Matcher lastBM = latin; 3714 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 3715 3716 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 3717 if (cp >= 0x30000 && (cp & 0x70) == 0){ 3718 continue; // only pick couple code points, they are the same 3719 } 3720 3721 // Unicode Script 3722 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 3723 Matcher m; 3724 String str = new String(Character.toChars(cp)); 3725 if (script == lastScript) { 3726 m = lastSM; 3727 m.reset(str); 3728 } else { 3729 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 3730 } 3731 if (!m.matches()) { 3732 failCount++; 3733 } 3734 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 3735 other.reset(str); 3736 if (other.matches()) { 3737 failCount++; 3738 } 3739 lastSM = m; 3740 lastScript = script; 3741 3742 // Unicode Block 3743 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 3744 if (block == null) { 3745 //System.out.printf("Not a Block: cp=%x%n", cp); 3746 continue; 3747 } 3748 if (block == lastBlock) { 3749 m = lastBM; 3750 m.reset(str); 3751 } else { 3752 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 3753 } 3754 if (!m.matches()) { 3755 failCount++; 3756 } 3757 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 3758 other.reset(str); 3759 if (other.matches()) { 3760 failCount++; 3761 } 3762 lastBM = m; 3763 lastBlock = block; 3764 } 3765 report("unicodeProperties"); 3766 } 3767 3768 private static void unicodeHexNotationTest() throws Exception { 3769 3770 // negative 3771 checkExpectedFail("\\x{-23}"); 3772 checkExpectedFail("\\x{110000}"); 3773 checkExpectedFail("\\x{}"); 3774 checkExpectedFail("\\x{AB[ef]"); 3775 3776 // codepoint 3777 check("^\\x{1033c}$", "\uD800\uDF3C", true); 3778 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3779 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 3780 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3781 3782 // in class 3783 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 3784 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 3785 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 3786 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 3787 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 3788 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 3789 3790 for (int cp = 0; cp <= 0x10FFFF; cp++) { 3791 String s = "A" + new String(Character.toChars(cp)) + "B"; 3792 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 3793 : String.format("\\u%04x\\u%04x", 3794 (int) Character.toChars(cp)[0], 3795 (int) Character.toChars(cp)[1]); 3796 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 3797 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 3798 failCount++; 3799 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 3800 failCount++; 3801 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 3802 failCount++; 3803 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 3804 failCount++; 3805 } 3806 report("unicodeHexNotation"); 3807 } 3808 3809 private static void unicodeClassesTest() throws Exception { 3810 3811 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 3812 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 3813 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 3814 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 3815 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 3816 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 3817 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 3818 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 3819 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 3820 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 3821 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 3822 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 3823 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 3824 Matcher bound = Pattern.compile("\\b").matcher(""); 3825 Matcher word = Pattern.compile("\\w++").matcher(""); 3826 // UNICODE_CHARACTER_CLASS 3827 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3828 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3829 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3830 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3831 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3832 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3833 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3834 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3835 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3836 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3837 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3838 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3839 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3840 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3841 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3842 // embedded flag (?U) 3843 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3844 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3845 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3846 3847 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 3848 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3849 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3850 // properties 3851 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 3852 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 3853 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 3854 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 3855 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 3856 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 3857 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 3858 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 3859 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 3860 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 3861 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 3862 3863 // javaMethod 3864 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 3865 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 3866 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 3867 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 3868 3869 for (int cp = 1; cp < 0x30000; cp++) { 3870 String str = new String(Character.toChars(cp)); 3871 int type = Character.getType(cp); 3872 if (// lower 3873 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 3874 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 3875 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 3876 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 3877 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 3878 // upper 3879 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 3880 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 3881 Character.isUpperCase(cp) != upperP.reset(str).matches() || 3882 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 3883 // alpha 3884 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 3885 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 3886 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 3887 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 3888 // digit 3889 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 3890 Character.isDigit(cp) != digitU.reset(str).matches() || 3891 // alnum 3892 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 3893 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 3894 // punct 3895 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 3896 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 3897 // graph 3898 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 3899 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 3900 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 3901 // blank 3902 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 3903 != blank.reset(str).matches() || 3904 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 3905 // print 3906 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 3907 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 3908 // cntrl 3909 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 3910 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 3911 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 3912 // hexdigit 3913 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 3914 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 3915 // space 3916 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 3917 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 3918 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 3919 // word 3920 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 3921 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 3922 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 3923 // bwordb 3924 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 3925 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 3926 // properties 3927 Character.isTitleCase(cp) != titleP.reset(str).matches() || 3928 Character.isLetter(cp) != letterP.reset(str).matches()|| 3929 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 3930 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 3931 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 3932 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 3933 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 3934 failCount++; 3935 } 3936 3937 // bounds/word align 3938 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 3939 if (!bwbU.reset("\u0180sherman\u0400").matches()) 3940 failCount++; 3941 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 3942 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 3943 failCount++; 3944 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 3945 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 3946 failCount++; 3947 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 3948 failCount++; 3949 report("unicodePredefinedClasses"); 3950 } 3951 3952 private static void horizontalAndVerticalWSTest() throws Exception { 3953 String hws = new String (new char[] { 3954 0x09, 0x20, 0xa0, 0x1680, 0x180e, 3955 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 3956 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 3957 0x202f, 0x205f, 0x3000 }); 3958 String vws = new String (new char[] { 3959 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 3960 if (!Pattern.compile("\\h+").matcher(hws).matches() || 3961 !Pattern.compile("[\\h]+").matcher(hws).matches()) 3962 failCount++; 3963 if (Pattern.compile("\\H").matcher(hws).find() || 3964 Pattern.compile("[\\H]").matcher(hws).find()) 3965 failCount++; 3966 if (!Pattern.compile("\\v+").matcher(vws).matches() || 3967 !Pattern.compile("[\\v]+").matcher(vws).matches()) 3968 failCount++; 3969 if (Pattern.compile("\\V").matcher(vws).find() || 3970 Pattern.compile("[\\V]").matcher(vws).find()) 3971 failCount++; 3972 String prefix = "abcd"; 3973 String suffix = "efgh"; 3974 String ng = "A"; 3975 for (int i = 0; i < hws.length(); i++) { 3976 String c = String.valueOf(hws.charAt(i)); 3977 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 3978 if (!m.find() || !c.equals(m.group())) 3979 failCount++; 3980 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 3981 if (!m.find() || !c.equals(m.group())) 3982 failCount++; 3983 3984 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3985 if (!m.find() || !ng.equals(m.group())) 3986 failCount++; 3987 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3988 if (!m.find() || !ng.equals(m.group())) 3989 failCount++; 3990 } 3991 for (int i = 0; i < vws.length(); i++) { 3992 String c = String.valueOf(vws.charAt(i)); 3993 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 3994 if (!m.find() || !c.equals(m.group())) 3995 failCount++; 3996 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 3997 if (!m.find() || !c.equals(m.group())) 3998 failCount++; 3999 4000 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4001 if (!m.find() || !ng.equals(m.group())) 4002 failCount++; 4003 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 4004 if (!m.find() || !ng.equals(m.group())) 4005 failCount++; 4006 } 4007 // \v in range is interpreted as 0x0B. This is the undocumented behavior 4008 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 4009 failCount++; 4010 report("horizontalAndVerticalWSTest"); 4011 } 4012 4013 private static void linebreakTest() throws Exception { 4014 String linebreaks = new String (new char[] { 4015 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4016 String crnl = "\r\n"; 4017 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4018 !Pattern.compile("\\R").matcher(crnl).matches() || 4019 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4020 failCount++; 4021 report("linebreakTest"); 4022 } 4023 4024 // #7189363 4025 private static void branchTest() throws Exception { 4026 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4027 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4028 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4029 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4030 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4031 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4032 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4033 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4034 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4035 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4036 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4037 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4038 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4039 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4040 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4041 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4042 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4043 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4044 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4045 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4046 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4047 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4048 failCount++; 4049 report("branchTest"); 4050 } 4051 4052 // This test is for 8007395 4053 private static void groupCurlyNotFoundSuppTest() throws Exception { 4054 String input = "test this as \ud83d\ude0d"; 4055 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4056 "test(.)*(@[a-zA-Z.]+)", 4057 "test([^B])+(@[a-zA-Z.]+)", 4058 "test([^B])*(@[a-zA-Z.]+)", 4059 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4060 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4061 }) { 4062 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4063 .matcher(input); 4064 try { 4065 if (m.find()) { 4066 failCount++; 4067 } 4068 } catch (Exception x) { 4069 failCount++; 4070 } 4071 } 4072 report("GroupCurly NotFoundSupp"); 4073 } 4074 4075 // This test is for 8023647 4076 private static void groupCurlyBackoffTest() throws Exception { 4077 if (!"abc1c".matches("(\\w)+1\\1") || 4078 "abc11".matches("(\\w)+1\\1")) { 4079 failCount++; 4080 } 4081 report("GroupCurly backoff"); 4082 } 4083 4084 // This test is for 8012646 4085 private static void patternAsPredicate() throws Exception { 4086 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4087 4088 if (p.test("")) { 4089 failCount++; 4090 } 4091 if (!p.test("word")) { 4092 failCount++; 4093 } 4094 if (p.test("1234")) { 4095 failCount++; 4096 } 4097 report("Pattern.asPredicate"); 4098 } 4099 }