1 /* 2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /** 27 * @test 28 * @summary tests RegExp framework 29 * @author Mike McCloskey 30 * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345 31 * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962 32 * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476 33 * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940 34 * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133 35 * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066 36 * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590 37 * 8027645 38 */ 39 40 import java.util.regex.*; 41 import java.util.Random; 42 import java.io.*; 43 import java.util.*; 44 import java.nio.CharBuffer; 45 import java.util.function.Predicate; 46 47 /** 48 * This is a test class created to check the operation of 49 * the Pattern and Matcher classes. 50 */ 51 public class RegExTest { 52 53 private static Random generator = new Random(); 54 private static boolean failure = false; 55 private static int failCount = 0; 56 private static String firstFailure = null; 57 58 /** 59 * Main to interpret arguments and run several tests. 60 * 61 */ 62 public static void main(String[] args) throws Exception { 63 // Most of the tests are in a file 64 processFile("TestCases.txt"); 65 //processFile("PerlCases.txt"); 66 processFile("BMPTestCases.txt"); 67 processFile("SupplementaryTestCases.txt"); 68 69 // These test many randomly generated char patterns 70 bm(); 71 slice(); 72 73 // These are hard to put into the file 74 escapes(); 75 blankInput(); 76 77 // Substitition tests on randomly generated sequences 78 globalSubstitute(); 79 stringbufferSubstitute(); 80 substitutionBasher(); 81 82 // Canonical Equivalence 83 ceTest(); 84 85 // Anchors 86 anchorTest(); 87 88 // boolean match calls 89 matchesTest(); 90 lookingAtTest(); 91 92 // Pattern API 93 patternMatchesTest(); 94 95 // Misc 96 lookbehindTest(); 97 nullArgumentTest(); 98 backRefTest(); 99 groupCaptureTest(); 100 caretTest(); 101 charClassTest(); 102 emptyPatternTest(); 103 findIntTest(); 104 group0Test(); 105 longPatternTest(); 106 octalTest(); 107 ampersandTest(); 108 negationTest(); 109 splitTest(); 110 appendTest(); 111 caseFoldingTest(); 112 commentsTest(); 113 unixLinesTest(); 114 replaceFirstTest(); 115 gTest(); 116 zTest(); 117 serializeTest(); 118 reluctantRepetitionTest(); 119 multilineDollarTest(); 120 dollarAtEndTest(); 121 caretBetweenTerminatorsTest(); 122 // This RFE rejected in Tiger numOccurrencesTest(); 123 javaCharClassTest(); 124 nonCaptureRepetitionTest(); 125 notCapturedGroupCurlyMatchTest(); 126 escapedSegmentTest(); 127 literalPatternTest(); 128 literalReplacementTest(); 129 regionTest(); 130 toStringTest(); 131 negatedCharClassTest(); 132 findFromTest(); 133 boundsTest(); 134 unicodeWordBoundsTest(); 135 caretAtEndTest(); 136 wordSearchTest(); 137 hitEndTest(); 138 toMatchResultTest(); 139 surrogatesInClassTest(); 140 removeQEQuotingTest(); 141 namedGroupCaptureTest(); 142 nonBmpClassComplementTest(); 143 unicodePropertiesTest(); 144 unicodeHexNotationTest(); 145 unicodeClassesTest(); 146 horizontalAndVerticalWSTest(); 147 linebreakTest(); 148 branchTest(); 149 groupCurlyNotFoundSuppTest(); 150 groupCurlyBackoffTest(); 151 patternAsPredicate(); 152 153 if (failure) { 154 throw new 155 RuntimeException("RegExTest failed, 1st failure: " + 156 firstFailure); 157 } else { 158 System.err.println("OKAY: All tests passed."); 159 } 160 } 161 162 // Utility functions 163 164 private static String getRandomAlphaString(int length) { 165 StringBuffer buf = new StringBuffer(length); 166 for (int i=0; i<length; i++) { 167 char randChar = (char)(97 + generator.nextInt(26)); 168 buf.append(randChar); 169 } 170 return buf.toString(); 171 } 172 173 private static void check(Matcher m, String expected) { 174 m.find(); 175 if (!m.group().equals(expected)) 176 failCount++; 177 } 178 179 private static void check(Matcher m, String result, boolean expected) { 180 m.find(); 181 if (m.group().equals(result) != expected) 182 failCount++; 183 } 184 185 private static void check(Pattern p, String s, boolean expected) { 186 if (p.matcher(s).find() != expected) 187 failCount++; 188 } 189 190 private static void check(String p, String s, boolean expected) { 191 Matcher matcher = Pattern.compile(p).matcher(s); 192 if (matcher.find() != expected) 193 failCount++; 194 } 195 196 private static void check(String p, char c, boolean expected) { 197 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 198 Pattern pattern = Pattern.compile(propertyPattern); 199 char[] ca = new char[1]; ca[0] = c; 200 Matcher matcher = pattern.matcher(new String(ca)); 201 if (!matcher.find()) 202 failCount++; 203 } 204 205 private static void check(String p, int codePoint, boolean expected) { 206 String propertyPattern = expected ? "\\p" + p : "\\P" + p; 207 Pattern pattern = Pattern.compile(propertyPattern); 208 char[] ca = Character.toChars(codePoint); 209 Matcher matcher = pattern.matcher(new String(ca)); 210 if (!matcher.find()) 211 failCount++; 212 } 213 214 private static void check(String p, int flag, String input, String s, 215 boolean expected) 216 { 217 Pattern pattern = Pattern.compile(p, flag); 218 Matcher matcher = pattern.matcher(input); 219 if (expected) 220 check(matcher, s, expected); 221 else 222 check(pattern, input, false); 223 } 224 225 private static void report(String testName) { 226 int spacesToAdd = 30 - testName.length(); 227 StringBuffer paddedNameBuffer = new StringBuffer(testName); 228 for (int i=0; i<spacesToAdd; i++) 229 paddedNameBuffer.append(" "); 230 String paddedName = paddedNameBuffer.toString(); 231 System.err.println(paddedName + ": " + 232 (failCount==0 ? "Passed":"Failed("+failCount+")")); 233 if (failCount > 0) { 234 failure = true; 235 236 if (firstFailure == null) { 237 firstFailure = testName; 238 } 239 } 240 241 failCount = 0; 242 } 243 244 /** 245 * Converts ASCII alphabet characters [A-Za-z] in the given 's' to 246 * supplementary characters. This method does NOT fully take care 247 * of the regex syntax. 248 */ 249 private static String toSupplementaries(String s) { 250 int length = s.length(); 251 StringBuffer sb = new StringBuffer(length * 2); 252 253 for (int i = 0; i < length; ) { 254 char c = s.charAt(i++); 255 if (c == '\\') { 256 sb.append(c); 257 if (i < length) { 258 c = s.charAt(i++); 259 sb.append(c); 260 if (c == 'u') { 261 // assume no syntax error 262 sb.append(s.charAt(i++)); 263 sb.append(s.charAt(i++)); 264 sb.append(s.charAt(i++)); 265 sb.append(s.charAt(i++)); 266 } 267 } 268 } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 269 sb.append('\ud800').append((char)('\udc00'+c)); 270 } else { 271 sb.append(c); 272 } 273 } 274 return sb.toString(); 275 } 276 277 // Regular expression tests 278 279 // This is for bug 6178785 280 // Test if an expected NPE gets thrown when passing in a null argument 281 private static boolean check(Runnable test) { 282 try { 283 test.run(); 284 failCount++; 285 return false; 286 } catch (NullPointerException npe) { 287 return true; 288 } 289 } 290 291 private static void nullArgumentTest() { 292 check(new Runnable() { public void run() { Pattern.compile(null); }}); 293 check(new Runnable() { public void run() { Pattern.matches(null, null); }}); 294 check(new Runnable() { public void run() { Pattern.matches("xyz", null);}}); 295 check(new Runnable() { public void run() { Pattern.quote(null);}}); 296 check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}}); 297 check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}}); 298 299 final Matcher m = Pattern.compile("xyz").matcher("xyz"); 300 m.matches(); 301 check(new Runnable() { public void run() { m.appendTail(null);}}); 302 check(new Runnable() { public void run() { m.replaceAll(null);}}); 303 check(new Runnable() { public void run() { m.replaceFirst(null);}}); 304 check(new Runnable() { public void run() { m.appendReplacement(null, null);}}); 305 check(new Runnable() { public void run() { m.reset(null);}}); 306 check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}}); 307 //check(new Runnable() { public void run() { m.usePattern(null);}}); 308 309 report("Null Argument"); 310 } 311 312 // This is for bug6635133 313 // Test if surrogate pair in Unicode escapes can be handled correctly. 314 private static void surrogatesInClassTest() throws Exception { 315 Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]"); 316 Matcher matcher = pattern.matcher("\ud834\udd22"); 317 if (!matcher.find()) 318 failCount++; 319 320 report("Surrogate pair in Unicode escape"); 321 } 322 323 // This is for bug6990617 324 // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode 325 // char encoding is only 2 or 3 digits instead of 4 and the first quoted 326 // char is an octal digit. 327 private static void removeQEQuotingTest() throws Exception { 328 Pattern pattern = 329 Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E"); 330 Matcher matcher = pattern.matcher("\t1sometext\t2sometext"); 331 if (!matcher.find()) 332 failCount++; 333 334 report("Remove Q/E Quoting"); 335 } 336 337 // This is for bug 4988891 338 // Test toMatchResult to see that it is a copy of the Matcher 339 // that is not affected by subsequent operations on the original 340 private static void toMatchResultTest() throws Exception { 341 Pattern pattern = Pattern.compile("squid"); 342 Matcher matcher = pattern.matcher( 343 "agiantsquidofdestinyasmallsquidoffate"); 344 matcher.find(); 345 int matcherStart1 = matcher.start(); 346 MatchResult mr = matcher.toMatchResult(); 347 if (mr == matcher) 348 failCount++; 349 int resultStart1 = mr.start(); 350 if (matcherStart1 != resultStart1) 351 failCount++; 352 matcher.find(); 353 int matcherStart2 = matcher.start(); 354 int resultStart2 = mr.start(); 355 if (matcherStart2 == resultStart2) 356 failCount++; 357 if (resultStart1 != resultStart2) 358 failCount++; 359 MatchResult mr2 = matcher.toMatchResult(); 360 if (mr == mr2) 361 failCount++; 362 if (mr2.start() != matcherStart2) 363 failCount++; 364 report("toMatchResult is a copy"); 365 } 366 367 // This is for bug 5013885 368 // Must test a slice to see if it reports hitEnd correctly 369 private static void hitEndTest() throws Exception { 370 // Basic test of Slice node 371 Pattern p = Pattern.compile("^squidattack"); 372 Matcher m = p.matcher("squack"); 373 m.find(); 374 if (m.hitEnd()) 375 failCount++; 376 m.reset("squid"); 377 m.find(); 378 if (!m.hitEnd()) 379 failCount++; 380 381 // Test Slice, SliceA and SliceU nodes 382 for (int i=0; i<3; i++) { 383 int flags = 0; 384 if (i==1) flags = Pattern.CASE_INSENSITIVE; 385 if (i==2) flags = Pattern.UNICODE_CASE; 386 p = Pattern.compile("^abc", flags); 387 m = p.matcher("ad"); 388 m.find(); 389 if (m.hitEnd()) 390 failCount++; 391 m.reset("ab"); 392 m.find(); 393 if (!m.hitEnd()) 394 failCount++; 395 } 396 397 // Test Boyer-Moore node 398 p = Pattern.compile("catattack"); 399 m = p.matcher("attack"); 400 m.find(); 401 if (!m.hitEnd()) 402 failCount++; 403 404 p = Pattern.compile("catattack"); 405 m = p.matcher("attackattackattackcatatta"); 406 m.find(); 407 if (!m.hitEnd()) 408 failCount++; 409 report("hitEnd from a Slice"); 410 } 411 412 // This is for bug 4997476 413 // It is weird code submitted by customer demonstrating a regression 414 private static void wordSearchTest() throws Exception { 415 String testString = new String("word1 word2 word3"); 416 Pattern p = Pattern.compile("\\b"); 417 Matcher m = p.matcher(testString); 418 int position = 0; 419 int start = 0; 420 while (m.find(position)) { 421 start = m.start(); 422 if (start == testString.length()) 423 break; 424 if (m.find(start+1)) { 425 position = m.start(); 426 } else { 427 position = testString.length(); 428 } 429 if (testString.substring(start, position).equals(" ")) 430 continue; 431 if (!testString.substring(start, position-1).startsWith("word")) 432 failCount++; 433 } 434 report("Customer word search"); 435 } 436 437 // This is for bug 4994840 438 private static void caretAtEndTest() throws Exception { 439 // Problem only occurs with multiline patterns 440 // containing a beginning-of-line caret "^" followed 441 // by an expression that also matches the empty string. 442 Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE); 443 Matcher matcher = pattern.matcher("\r"); 444 matcher.find(); 445 matcher.find(); 446 report("Caret at end"); 447 } 448 449 // This test is for 4979006 450 // Check to see if word boundary construct properly handles unicode 451 // non spacing marks 452 private static void unicodeWordBoundsTest() throws Exception { 453 String spaces = " "; 454 String wordChar = "a"; 455 String nsm = "\u030a"; 456 457 assert (Character.getType('\u030a') == Character.NON_SPACING_MARK); 458 459 Pattern pattern = Pattern.compile("\\b"); 460 Matcher matcher = pattern.matcher(""); 461 // S=other B=word character N=non spacing mark .=word boundary 462 // SS.BB.SS 463 String input = spaces + wordChar + wordChar + spaces; 464 twoFindIndexes(input, matcher, 2, 4); 465 // SS.BBN.SS 466 input = spaces + wordChar +wordChar + nsm + spaces; 467 twoFindIndexes(input, matcher, 2, 5); 468 // SS.BN.SS 469 input = spaces + wordChar + nsm + spaces; 470 twoFindIndexes(input, matcher, 2, 4); 471 // SS.BNN.SS 472 input = spaces + wordChar + nsm + nsm + spaces; 473 twoFindIndexes(input, matcher, 2, 5); 474 // SSN.BB.SS 475 input = spaces + nsm + wordChar + wordChar + spaces; 476 twoFindIndexes(input, matcher, 3, 5); 477 // SS.BNB.SS 478 input = spaces + wordChar + nsm + wordChar + spaces; 479 twoFindIndexes(input, matcher, 2, 5); 480 // SSNNSS 481 input = spaces + nsm + nsm + spaces; 482 matcher.reset(input); 483 if (matcher.find()) 484 failCount++; 485 // SSN.BBN.SS 486 input = spaces + nsm + wordChar + wordChar + nsm + spaces; 487 twoFindIndexes(input, matcher, 3, 6); 488 489 report("Unicode word boundary"); 490 } 491 492 private static void twoFindIndexes(String input, Matcher matcher, int a, 493 int b) throws Exception 494 { 495 matcher.reset(input); 496 matcher.find(); 497 if (matcher.start() != a) 498 failCount++; 499 matcher.find(); 500 if (matcher.start() != b) 501 failCount++; 502 } 503 504 // This test is for 6284152 505 static void check(String regex, String input, String[] expected) { 506 List<String> result = new ArrayList<String>(); 507 Pattern p = Pattern.compile(regex); 508 Matcher m = p.matcher(input); 509 while (m.find()) { 510 result.add(m.group()); 511 } 512 if (!Arrays.asList(expected).equals(result)) 513 failCount++; 514 } 515 516 private static void lookbehindTest() throws Exception { 517 //Positive 518 check("(?<=%.{0,5})foo\\d", 519 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 520 new String[]{"foo1", "foo2", "foo3"}); 521 522 //boundary at end of the lookbehind sub-regex should work consistently 523 //with the boundary just after the lookbehind sub-regex 524 check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"}); 525 check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"}); 526 check("(?<!abc )\\bfoo", "abc foo", new String[0]); 527 check("(?<!abc \\b)foo", "abc foo", new String[0]); 528 529 //Negative 530 check("(?<!%.{0,5})foo\\d", 531 "%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5", 532 new String[] {"foo4", "foo5"}); 533 534 //Positive greedy 535 check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"}); 536 537 //Positive reluctant 538 check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"}); 539 540 //supplementary 541 check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 542 new String[] {"fo\ud800\udc00o"}); 543 check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o", 544 new String[] {"fo\ud800\udc00o"}); 545 check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o", 546 new String[] {"fo\ud800\udc00o"}); 547 check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o", 548 new String[] {"fo\ud800\udc00o"}); 549 report("Lookbehind"); 550 } 551 552 // This test is for 4938995 553 // Check to see if weak region boundaries are transparent to 554 // lookahead and lookbehind constructs 555 private static void boundsTest() throws Exception { 556 String fullMessage = "catdogcat"; 557 Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)"); 558 Matcher matcher = pattern.matcher("catdogca"); 559 matcher.useTransparentBounds(true); 560 if (matcher.find()) 561 failCount++; 562 matcher.reset("atdogcat"); 563 if (matcher.find()) 564 failCount++; 565 matcher.reset(fullMessage); 566 if (!matcher.find()) 567 failCount++; 568 matcher.reset(fullMessage); 569 matcher.region(0,9); 570 if (!matcher.find()) 571 failCount++; 572 matcher.reset(fullMessage); 573 matcher.region(0,6); 574 if (!matcher.find()) 575 failCount++; 576 matcher.reset(fullMessage); 577 matcher.region(3,6); 578 if (!matcher.find()) 579 failCount++; 580 matcher.useTransparentBounds(false); 581 if (matcher.find()) 582 failCount++; 583 584 // Negative lookahead/lookbehind 585 pattern = Pattern.compile("(?<!cat)dog(?!cat)"); 586 matcher = pattern.matcher("dogcat"); 587 matcher.useTransparentBounds(true); 588 matcher.region(0,3); 589 if (matcher.find()) 590 failCount++; 591 matcher.reset("catdog"); 592 matcher.region(3,6); 593 if (matcher.find()) 594 failCount++; 595 matcher.useTransparentBounds(false); 596 matcher.reset("dogcat"); 597 matcher.region(0,3); 598 if (!matcher.find()) 599 failCount++; 600 matcher.reset("catdog"); 601 matcher.region(3,6); 602 if (!matcher.find()) 603 failCount++; 604 605 report("Region bounds transparency"); 606 } 607 608 // This test is for 4945394 609 private static void findFromTest() throws Exception { 610 String message = "This is 40 $0 message."; 611 Pattern pat = Pattern.compile("\\$0"); 612 Matcher match = pat.matcher(message); 613 if (!match.find()) 614 failCount++; 615 if (match.find()) 616 failCount++; 617 if (match.find()) 618 failCount++; 619 report("Check for alternating find"); 620 } 621 622 // This test is for 4872664 and 4892980 623 private static void negatedCharClassTest() throws Exception { 624 Pattern pattern = Pattern.compile("[^>]"); 625 Matcher matcher = pattern.matcher("\u203A"); 626 if (!matcher.matches()) 627 failCount++; 628 pattern = Pattern.compile("[^fr]"); 629 matcher = pattern.matcher("a"); 630 if (!matcher.find()) 631 failCount++; 632 matcher.reset("\u203A"); 633 if (!matcher.find()) 634 failCount++; 635 String s = "for"; 636 String result[] = s.split("[^fr]"); 637 if (!result[0].equals("f")) 638 failCount++; 639 if (!result[1].equals("r")) 640 failCount++; 641 s = "f\u203Ar"; 642 result = s.split("[^fr]"); 643 if (!result[0].equals("f")) 644 failCount++; 645 if (!result[1].equals("r")) 646 failCount++; 647 648 // Test adding to bits, subtracting a node, then adding to bits again 649 pattern = Pattern.compile("[^f\u203Ar]"); 650 matcher = pattern.matcher("a"); 651 if (!matcher.find()) 652 failCount++; 653 matcher.reset("f"); 654 if (matcher.find()) 655 failCount++; 656 matcher.reset("\u203A"); 657 if (matcher.find()) 658 failCount++; 659 matcher.reset("r"); 660 if (matcher.find()) 661 failCount++; 662 matcher.reset("\u203B"); 663 if (!matcher.find()) 664 failCount++; 665 666 // Test subtracting a node, adding to bits, subtracting again 667 pattern = Pattern.compile("[^\u203Ar\u203B]"); 668 matcher = pattern.matcher("a"); 669 if (!matcher.find()) 670 failCount++; 671 matcher.reset("\u203A"); 672 if (matcher.find()) 673 failCount++; 674 matcher.reset("r"); 675 if (matcher.find()) 676 failCount++; 677 matcher.reset("\u203B"); 678 if (matcher.find()) 679 failCount++; 680 matcher.reset("\u203C"); 681 if (!matcher.find()) 682 failCount++; 683 684 report("Negated Character Class"); 685 } 686 687 // This test is for 4628291 688 private static void toStringTest() throws Exception { 689 Pattern pattern = Pattern.compile("b+"); 690 if (pattern.toString() != "b+") 691 failCount++; 692 Matcher matcher = pattern.matcher("aaabbbccc"); 693 String matcherString = matcher.toString(); // unspecified 694 matcher.find(); 695 matcherString = matcher.toString(); // unspecified 696 matcher.region(0,3); 697 matcherString = matcher.toString(); // unspecified 698 matcher.reset(); 699 matcherString = matcher.toString(); // unspecified 700 report("toString"); 701 } 702 703 // This test is for 4808962 704 private static void literalPatternTest() throws Exception { 705 int flags = Pattern.LITERAL; 706 707 Pattern pattern = Pattern.compile("abc\\t$^", flags); 708 check(pattern, "abc\\t$^", true); 709 710 pattern = Pattern.compile(Pattern.quote("abc\\t$^")); 711 check(pattern, "abc\\t$^", true); 712 713 pattern = Pattern.compile("\\Qa^$bcabc\\E", flags); 714 check(pattern, "\\Qa^$bcabc\\E", true); 715 check(pattern, "a^$bcabc", false); 716 717 pattern = Pattern.compile("\\\\Q\\\\E"); 718 check(pattern, "\\Q\\E", true); 719 720 pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij"); 721 check(pattern, "abcefg\\Q\\Ehij", true); 722 723 pattern = Pattern.compile("\\\\\\Q\\\\E"); 724 check(pattern, "\\\\\\\\", true); 725 726 pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E")); 727 check(pattern, "\\Qa^$bcabc\\E", true); 728 check(pattern, "a^$bcabc", false); 729 730 pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef")); 731 check(pattern, "\\Qabc\\Edef", true); 732 check(pattern, "abcdef", false); 733 734 pattern = Pattern.compile(Pattern.quote("abc\\Edef")); 735 check(pattern, "abc\\Edef", true); 736 check(pattern, "abcdef", false); 737 738 pattern = Pattern.compile(Pattern.quote("\\E")); 739 check(pattern, "\\E", true); 740 741 pattern = Pattern.compile("((((abc.+?:)", flags); 742 check(pattern, "((((abc.+?:)", true); 743 744 flags |= Pattern.MULTILINE; 745 746 pattern = Pattern.compile("^cat$", flags); 747 check(pattern, "abc^cat$def", true); 748 check(pattern, "cat", false); 749 750 flags |= Pattern.CASE_INSENSITIVE; 751 752 pattern = Pattern.compile("abcdef", flags); 753 check(pattern, "ABCDEF", true); 754 check(pattern, "AbCdEf", true); 755 756 flags |= Pattern.DOTALL; 757 758 pattern = Pattern.compile("a...b", flags); 759 check(pattern, "A...b", true); 760 check(pattern, "Axxxb", false); 761 762 flags |= Pattern.CANON_EQ; 763 764 Pattern p = Pattern.compile("testa\u030a", flags); 765 check(pattern, "testa\u030a", false); 766 check(pattern, "test\u00e5", false); 767 768 // Supplementary character test 769 flags = Pattern.LITERAL; 770 771 pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags); 772 check(pattern, toSupplementaries("abc\\t$^"), true); 773 774 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^"))); 775 check(pattern, toSupplementaries("abc\\t$^"), true); 776 777 pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags); 778 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 779 check(pattern, toSupplementaries("a^$bcabc"), false); 780 781 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E"))); 782 check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true); 783 check(pattern, toSupplementaries("a^$bcabc"), false); 784 785 pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef"))); 786 check(pattern, toSupplementaries("\\Qabc\\Edef"), true); 787 check(pattern, toSupplementaries("abcdef"), false); 788 789 pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef"))); 790 check(pattern, toSupplementaries("abc\\Edef"), true); 791 check(pattern, toSupplementaries("abcdef"), false); 792 793 pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags); 794 check(pattern, toSupplementaries("((((abc.+?:)"), true); 795 796 flags |= Pattern.MULTILINE; 797 798 pattern = Pattern.compile(toSupplementaries("^cat$"), flags); 799 check(pattern, toSupplementaries("abc^cat$def"), true); 800 check(pattern, toSupplementaries("cat"), false); 801 802 flags |= Pattern.DOTALL; 803 804 // note: this is case-sensitive. 805 pattern = Pattern.compile(toSupplementaries("a...b"), flags); 806 check(pattern, toSupplementaries("a...b"), true); 807 check(pattern, toSupplementaries("axxxb"), false); 808 809 flags |= Pattern.CANON_EQ; 810 811 String t = toSupplementaries("test"); 812 p = Pattern.compile(t + "a\u030a", flags); 813 check(pattern, t + "a\u030a", false); 814 check(pattern, t + "\u00e5", false); 815 816 report("Literal pattern"); 817 } 818 819 // This test is for 4803179 820 // This test is also for 4808962, replacement parts 821 private static void literalReplacementTest() throws Exception { 822 int flags = Pattern.LITERAL; 823 824 Pattern pattern = Pattern.compile("abc", flags); 825 Matcher matcher = pattern.matcher("zzzabczzz"); 826 String replaceTest = "$0"; 827 String result = matcher.replaceAll(replaceTest); 828 if (!result.equals("zzzabczzz")) 829 failCount++; 830 831 matcher.reset(); 832 String literalReplacement = matcher.quoteReplacement(replaceTest); 833 result = matcher.replaceAll(literalReplacement); 834 if (!result.equals("zzz$0zzz")) 835 failCount++; 836 837 matcher.reset(); 838 replaceTest = "\\t$\\$"; 839 literalReplacement = matcher.quoteReplacement(replaceTest); 840 result = matcher.replaceAll(literalReplacement); 841 if (!result.equals("zzz\\t$\\$zzz")) 842 failCount++; 843 844 // Supplementary character test 845 pattern = Pattern.compile(toSupplementaries("abc"), flags); 846 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 847 replaceTest = "$0"; 848 result = matcher.replaceAll(replaceTest); 849 if (!result.equals(toSupplementaries("zzzabczzz"))) 850 failCount++; 851 852 matcher.reset(); 853 literalReplacement = matcher.quoteReplacement(replaceTest); 854 result = matcher.replaceAll(literalReplacement); 855 if (!result.equals(toSupplementaries("zzz$0zzz"))) 856 failCount++; 857 858 matcher.reset(); 859 replaceTest = "\\t$\\$"; 860 literalReplacement = matcher.quoteReplacement(replaceTest); 861 result = matcher.replaceAll(literalReplacement); 862 if (!result.equals(toSupplementaries("zzz\\t$\\$zzz"))) 863 failCount++; 864 865 // IAE should be thrown if backslash or '$' is the last character 866 // in replacement string 867 try { 868 "\uac00".replaceAll("\uac00", "$"); 869 failCount++; 870 } catch (IllegalArgumentException iie) { 871 } catch (Exception e) { 872 failCount++; 873 } 874 try { 875 "\uac00".replaceAll("\uac00", "\\"); 876 failCount++; 877 } catch (IllegalArgumentException iie) { 878 } catch (Exception e) { 879 failCount++; 880 } 881 report("Literal replacement"); 882 } 883 884 // This test is for 4757029 885 private static void regionTest() throws Exception { 886 Pattern pattern = Pattern.compile("abc"); 887 Matcher matcher = pattern.matcher("abcdefabc"); 888 889 matcher.region(0,9); 890 if (!matcher.find()) 891 failCount++; 892 if (!matcher.find()) 893 failCount++; 894 matcher.region(0,3); 895 if (!matcher.find()) 896 failCount++; 897 matcher.region(3,6); 898 if (matcher.find()) 899 failCount++; 900 matcher.region(0,2); 901 if (matcher.find()) 902 failCount++; 903 904 expectRegionFail(matcher, 1, -1); 905 expectRegionFail(matcher, -1, -1); 906 expectRegionFail(matcher, -1, 1); 907 expectRegionFail(matcher, 5, 3); 908 expectRegionFail(matcher, 5, 12); 909 expectRegionFail(matcher, 12, 12); 910 911 pattern = Pattern.compile("^abc$"); 912 matcher = pattern.matcher("zzzabczzz"); 913 matcher.region(0,9); 914 if (matcher.find()) 915 failCount++; 916 matcher.region(3,6); 917 if (!matcher.find()) 918 failCount++; 919 matcher.region(3,6); 920 matcher.useAnchoringBounds(false); 921 if (matcher.find()) 922 failCount++; 923 924 // Supplementary character test 925 pattern = Pattern.compile(toSupplementaries("abc")); 926 matcher = pattern.matcher(toSupplementaries("abcdefabc")); 927 matcher.region(0,9*2); 928 if (!matcher.find()) 929 failCount++; 930 if (!matcher.find()) 931 failCount++; 932 matcher.region(0,3*2); 933 if (!matcher.find()) 934 failCount++; 935 matcher.region(1,3*2); 936 if (matcher.find()) 937 failCount++; 938 matcher.region(3*2,6*2); 939 if (matcher.find()) 940 failCount++; 941 matcher.region(0,2*2); 942 if (matcher.find()) 943 failCount++; 944 matcher.region(0,2*2+1); 945 if (matcher.find()) 946 failCount++; 947 948 expectRegionFail(matcher, 1*2, -1); 949 expectRegionFail(matcher, -1, -1); 950 expectRegionFail(matcher, -1, 1*2); 951 expectRegionFail(matcher, 5*2, 3*2); 952 expectRegionFail(matcher, 5*2, 12*2); 953 expectRegionFail(matcher, 12*2, 12*2); 954 955 pattern = Pattern.compile(toSupplementaries("^abc$")); 956 matcher = pattern.matcher(toSupplementaries("zzzabczzz")); 957 matcher.region(0,9*2); 958 if (matcher.find()) 959 failCount++; 960 matcher.region(3*2,6*2); 961 if (!matcher.find()) 962 failCount++; 963 matcher.region(3*2+1,6*2); 964 if (matcher.find()) 965 failCount++; 966 matcher.region(3*2,6*2-1); 967 if (matcher.find()) 968 failCount++; 969 matcher.region(3*2,6*2); 970 matcher.useAnchoringBounds(false); 971 if (matcher.find()) 972 failCount++; 973 report("Regions"); 974 } 975 976 private static void expectRegionFail(Matcher matcher, int index1, 977 int index2) 978 { 979 try { 980 matcher.region(index1, index2); 981 failCount++; 982 } catch (IndexOutOfBoundsException ioobe) { 983 // Correct result 984 } catch (IllegalStateException ise) { 985 // Correct result 986 } 987 } 988 989 // This test is for 4803197 990 private static void escapedSegmentTest() throws Exception { 991 992 Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E"); 993 check(pattern, "dir1\\dir2", true); 994 995 pattern = Pattern.compile("\\Qdir1\\dir2\\\\E"); 996 check(pattern, "dir1\\dir2\\", true); 997 998 pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)"); 999 check(pattern, "dir1\\dir2\\", true); 1000 1001 // Supplementary character test 1002 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E")); 1003 check(pattern, toSupplementaries("dir1\\dir2"), true); 1004 1005 pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E"); 1006 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1007 1008 pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)"); 1009 check(pattern, toSupplementaries("dir1\\dir2\\"), true); 1010 1011 report("Escaped segment"); 1012 } 1013 1014 // This test is for 4792284 1015 private static void nonCaptureRepetitionTest() throws Exception { 1016 String input = "abcdefgh;"; 1017 1018 String[] patterns = new String[] { 1019 "(?:\\w{4})+;", 1020 "(?:\\w{8})*;", 1021 "(?:\\w{2}){2,4};", 1022 "(?:\\w{4}){2,};", // only matches the 1023 ".*?(?:\\w{5})+;", // specified minimum 1024 ".*?(?:\\w{9})*;", // number of reps - OK 1025 "(?:\\w{4})+?;", // lazy repetition - OK 1026 "(?:\\w{4})++;", // possessive repetition - OK 1027 "(?:\\w{2,}?)+;", // non-deterministic - OK 1028 "(\\w{4})+;", // capturing group - OK 1029 }; 1030 1031 for (int i = 0; i < patterns.length; i++) { 1032 // Check find() 1033 check(patterns[i], 0, input, input, true); 1034 // Check matches() 1035 Pattern p = Pattern.compile(patterns[i]); 1036 Matcher m = p.matcher(input); 1037 1038 if (m.matches()) { 1039 if (!m.group(0).equals(input)) 1040 failCount++; 1041 } else { 1042 failCount++; 1043 } 1044 } 1045 1046 report("Non capturing repetition"); 1047 } 1048 1049 // This test is for 6358731 1050 private static void notCapturedGroupCurlyMatchTest() throws Exception { 1051 Pattern pattern = Pattern.compile("(abc)+|(abcd)+"); 1052 Matcher matcher = pattern.matcher("abcd"); 1053 if (!matcher.matches() || 1054 matcher.group(1) != null || 1055 !matcher.group(2).equals("abcd")) { 1056 failCount++; 1057 } 1058 report("Not captured GroupCurly"); 1059 } 1060 1061 // This test is for 4706545 1062 private static void javaCharClassTest() throws Exception { 1063 for (int i=0; i<1000; i++) { 1064 char c = (char)generator.nextInt(); 1065 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1066 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1067 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1068 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1069 check("{javaDigit}", c, Character.isDigit(c)); 1070 check("{javaDefined}", c, Character.isDefined(c)); 1071 check("{javaLetter}", c, Character.isLetter(c)); 1072 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1073 check("{javaJavaIdentifierStart}", c, 1074 Character.isJavaIdentifierStart(c)); 1075 check("{javaJavaIdentifierPart}", c, 1076 Character.isJavaIdentifierPart(c)); 1077 check("{javaUnicodeIdentifierStart}", c, 1078 Character.isUnicodeIdentifierStart(c)); 1079 check("{javaUnicodeIdentifierPart}", c, 1080 Character.isUnicodeIdentifierPart(c)); 1081 check("{javaIdentifierIgnorable}", c, 1082 Character.isIdentifierIgnorable(c)); 1083 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1084 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1085 check("{javaISOControl}", c, Character.isISOControl(c)); 1086 check("{javaMirrored}", c, Character.isMirrored(c)); 1087 1088 } 1089 1090 // Supplementary character test 1091 for (int i=0; i<1000; i++) { 1092 int c = generator.nextInt(Character.MAX_CODE_POINT 1093 - Character.MIN_SUPPLEMENTARY_CODE_POINT) 1094 + Character.MIN_SUPPLEMENTARY_CODE_POINT; 1095 check("{javaLowerCase}", c, Character.isLowerCase(c)); 1096 check("{javaUpperCase}", c, Character.isUpperCase(c)); 1097 check("{javaUpperCase}+", c, Character.isUpperCase(c)); 1098 check("{javaTitleCase}", c, Character.isTitleCase(c)); 1099 check("{javaDigit}", c, Character.isDigit(c)); 1100 check("{javaDefined}", c, Character.isDefined(c)); 1101 check("{javaLetter}", c, Character.isLetter(c)); 1102 check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c)); 1103 check("{javaJavaIdentifierStart}", c, 1104 Character.isJavaIdentifierStart(c)); 1105 check("{javaJavaIdentifierPart}", c, 1106 Character.isJavaIdentifierPart(c)); 1107 check("{javaUnicodeIdentifierStart}", c, 1108 Character.isUnicodeIdentifierStart(c)); 1109 check("{javaUnicodeIdentifierPart}", c, 1110 Character.isUnicodeIdentifierPart(c)); 1111 check("{javaIdentifierIgnorable}", c, 1112 Character.isIdentifierIgnorable(c)); 1113 check("{javaSpaceChar}", c, Character.isSpaceChar(c)); 1114 check("{javaWhitespace}", c, Character.isWhitespace(c)); 1115 check("{javaISOControl}", c, Character.isISOControl(c)); 1116 check("{javaMirrored}", c, Character.isMirrored(c)); 1117 } 1118 1119 report("Java character classes"); 1120 } 1121 1122 // This test is for 4523620 1123 /* 1124 private static void numOccurrencesTest() throws Exception { 1125 Pattern pattern = Pattern.compile("aaa"); 1126 1127 if (pattern.numOccurrences("aaaaaa", false) != 2) 1128 failCount++; 1129 if (pattern.numOccurrences("aaaaaa", true) != 4) 1130 failCount++; 1131 1132 pattern = Pattern.compile("^"); 1133 if (pattern.numOccurrences("aaaaaa", false) != 1) 1134 failCount++; 1135 if (pattern.numOccurrences("aaaaaa", true) != 1) 1136 failCount++; 1137 1138 report("Number of Occurrences"); 1139 } 1140 */ 1141 1142 // This test is for 4776374 1143 private static void caretBetweenTerminatorsTest() throws Exception { 1144 int flags1 = Pattern.DOTALL; 1145 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1146 int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE; 1147 int flags4 = Pattern.DOTALL | Pattern.MULTILINE; 1148 1149 check("^....", flags1, "test\ntest", "test", true); 1150 check(".....^", flags1, "test\ntest", "test", false); 1151 check(".....^", flags1, "test\n", "test", false); 1152 check("....^", flags1, "test\r\n", "test", false); 1153 1154 check("^....", flags2, "test\ntest", "test", true); 1155 check("....^", flags2, "test\ntest", "test", false); 1156 check(".....^", flags2, "test\n", "test", false); 1157 check("....^", flags2, "test\r\n", "test", false); 1158 1159 check("^....", flags3, "test\ntest", "test", true); 1160 check(".....^", flags3, "test\ntest", "test\n", true); 1161 check(".....^", flags3, "test\u0085test", "test\u0085", false); 1162 check(".....^", flags3, "test\n", "test", false); 1163 check(".....^", flags3, "test\r\n", "test", false); 1164 check("......^", flags3, "test\r\ntest", "test\r\n", true); 1165 1166 check("^....", flags4, "test\ntest", "test", true); 1167 check(".....^", flags3, "test\ntest", "test\n", true); 1168 check(".....^", flags4, "test\u0085test", "test\u0085", true); 1169 check(".....^", flags4, "test\n", "test\n", false); 1170 check(".....^", flags4, "test\r\n", "test\r", false); 1171 1172 // Supplementary character test 1173 String t = toSupplementaries("test"); 1174 check("^....", flags1, t+"\n"+t, t, true); 1175 check(".....^", flags1, t+"\n"+t, t, false); 1176 check(".....^", flags1, t+"\n", t, false); 1177 check("....^", flags1, t+"\r\n", t, false); 1178 1179 check("^....", flags2, t+"\n"+t, t, true); 1180 check("....^", flags2, t+"\n"+t, t, false); 1181 check(".....^", flags2, t+"\n", t, false); 1182 check("....^", flags2, t+"\r\n", t, false); 1183 1184 check("^....", flags3, t+"\n"+t, t, true); 1185 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1186 check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false); 1187 check(".....^", flags3, t+"\n", t, false); 1188 check(".....^", flags3, t+"\r\n", t, false); 1189 check("......^", flags3, t+"\r\n"+t, t+"\r\n", true); 1190 1191 check("^....", flags4, t+"\n"+t, t, true); 1192 check(".....^", flags3, t+"\n"+t, t+"\n", true); 1193 check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true); 1194 check(".....^", flags4, t+"\n", t+"\n", false); 1195 check(".....^", flags4, t+"\r\n", t+"\r", false); 1196 1197 report("Caret between terminators"); 1198 } 1199 1200 // This test is for 4727935 1201 private static void dollarAtEndTest() throws Exception { 1202 int flags1 = Pattern.DOTALL; 1203 int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES; 1204 int flags3 = Pattern.DOTALL | Pattern.MULTILINE; 1205 1206 check("....$", flags1, "test\n", "test", true); 1207 check("....$", flags1, "test\r\n", "test", true); 1208 check(".....$", flags1, "test\n", "test\n", true); 1209 check(".....$", flags1, "test\u0085", "test\u0085", true); 1210 check("....$", flags1, "test\u0085", "test", true); 1211 1212 check("....$", flags2, "test\n", "test", true); 1213 check(".....$", flags2, "test\n", "test\n", true); 1214 check(".....$", flags2, "test\u0085", "test\u0085", true); 1215 check("....$", flags2, "test\u0085", "est\u0085", true); 1216 1217 check("....$.blah", flags3, "test\nblah", "test\nblah", true); 1218 check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true); 1219 check("....$blah", flags3, "test\nblah", "!!!!", false); 1220 check(".....$blah", flags3, "test\nblah", "!!!!", false); 1221 1222 // Supplementary character test 1223 String t = toSupplementaries("test"); 1224 String b = toSupplementaries("blah"); 1225 check("....$", flags1, t+"\n", t, true); 1226 check("....$", flags1, t+"\r\n", t, true); 1227 check(".....$", flags1, t+"\n", t+"\n", true); 1228 check(".....$", flags1, t+"\u0085", t+"\u0085", true); 1229 check("....$", flags1, t+"\u0085", t, true); 1230 1231 check("....$", flags2, t+"\n", t, true); 1232 check(".....$", flags2, t+"\n", t+"\n", true); 1233 check(".....$", flags2, t+"\u0085", t+"\u0085", true); 1234 check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true); 1235 1236 check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true); 1237 check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true); 1238 check("....$"+b, flags3, t+"\n"+b, "!!!!", false); 1239 check(".....$"+b, flags3, t+"\n"+b, "!!!!", false); 1240 1241 report("Dollar at End"); 1242 } 1243 1244 // This test is for 4711773 1245 private static void multilineDollarTest() throws Exception { 1246 Pattern findCR = Pattern.compile("$", Pattern.MULTILINE); 1247 Matcher matcher = findCR.matcher("first bit\nsecond bit"); 1248 matcher.find(); 1249 if (matcher.start(0) != 9) 1250 failCount++; 1251 matcher.find(); 1252 if (matcher.start(0) != 20) 1253 failCount++; 1254 1255 // Supplementary character test 1256 matcher = findCR.matcher(toSupplementaries("first bit\n second bit")); // double BMP chars 1257 matcher.find(); 1258 if (matcher.start(0) != 9*2) 1259 failCount++; 1260 matcher.find(); 1261 if (matcher.start(0) != 20*2) 1262 failCount++; 1263 1264 report("Multiline Dollar"); 1265 } 1266 1267 private static void reluctantRepetitionTest() throws Exception { 1268 Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2"); 1269 check(p, "1 word word word 2", true); 1270 check(p, "1 wor wo w 2", true); 1271 check(p, "1 word word 2", true); 1272 check(p, "1 word 2", true); 1273 check(p, "1 wo w w 2", true); 1274 check(p, "1 wo w 2", true); 1275 check(p, "1 wor w 2", true); 1276 1277 p = Pattern.compile("([a-z])+?c"); 1278 Matcher m = p.matcher("ababcdefdec"); 1279 check(m, "ababc"); 1280 1281 // Supplementary character test 1282 p = Pattern.compile(toSupplementaries("([a-z])+?c")); 1283 m = p.matcher(toSupplementaries("ababcdefdec")); 1284 check(m, toSupplementaries("ababc")); 1285 1286 report("Reluctant Repetition"); 1287 } 1288 1289 private static void serializeTest() throws Exception { 1290 String patternStr = "(b)"; 1291 String matchStr = "b"; 1292 Pattern pattern = Pattern.compile(patternStr); 1293 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1294 ObjectOutputStream oos = new ObjectOutputStream(baos); 1295 oos.writeObject(pattern); 1296 oos.close(); 1297 ObjectInputStream ois = new ObjectInputStream( 1298 new ByteArrayInputStream(baos.toByteArray())); 1299 Pattern serializedPattern = (Pattern)ois.readObject(); 1300 ois.close(); 1301 Matcher matcher = serializedPattern.matcher(matchStr); 1302 if (!matcher.matches()) 1303 failCount++; 1304 if (matcher.groupCount() != 1) 1305 failCount++; 1306 1307 report("Serialization"); 1308 } 1309 1310 private static void gTest() { 1311 Pattern pattern = Pattern.compile("\\G\\w"); 1312 Matcher matcher = pattern.matcher("abc#x#x"); 1313 matcher.find(); 1314 matcher.find(); 1315 matcher.find(); 1316 if (matcher.find()) 1317 failCount++; 1318 1319 pattern = Pattern.compile("\\GA*"); 1320 matcher = pattern.matcher("1A2AA3"); 1321 matcher.find(); 1322 if (matcher.find()) 1323 failCount++; 1324 1325 pattern = Pattern.compile("\\GA*"); 1326 matcher = pattern.matcher("1A2AA3"); 1327 if (!matcher.find(1)) 1328 failCount++; 1329 matcher.find(); 1330 if (matcher.find()) 1331 failCount++; 1332 1333 report("\\G"); 1334 } 1335 1336 private static void zTest() { 1337 Pattern pattern = Pattern.compile("foo\\Z"); 1338 // Positives 1339 check(pattern, "foo\u0085", true); 1340 check(pattern, "foo\u2028", true); 1341 check(pattern, "foo\u2029", true); 1342 check(pattern, "foo\n", true); 1343 check(pattern, "foo\r", true); 1344 check(pattern, "foo\r\n", true); 1345 // Negatives 1346 check(pattern, "fooo", false); 1347 check(pattern, "foo\n\r", false); 1348 1349 pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES); 1350 // Positives 1351 check(pattern, "foo", true); 1352 check(pattern, "foo\n", true); 1353 // Negatives 1354 check(pattern, "foo\r", false); 1355 check(pattern, "foo\u0085", false); 1356 check(pattern, "foo\u2028", false); 1357 check(pattern, "foo\u2029", false); 1358 1359 report("\\Z"); 1360 } 1361 1362 private static void replaceFirstTest() { 1363 Pattern pattern = Pattern.compile("(ab)(c*)"); 1364 Matcher matcher = pattern.matcher("abccczzzabcczzzabccc"); 1365 if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc")) 1366 failCount++; 1367 1368 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1369 if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz")) 1370 failCount++; 1371 1372 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1373 String result = matcher.replaceFirst("$1"); 1374 if (!result.equals("zzzabzzzabcczzzabccczzz")) 1375 failCount++; 1376 1377 matcher.reset("zzzabccczzzabcczzzabccczzz"); 1378 result = matcher.replaceFirst("$2"); 1379 if (!result.equals("zzzccczzzabcczzzabccczzz")) 1380 failCount++; 1381 1382 pattern = Pattern.compile("a*"); 1383 matcher = pattern.matcher("aaaaaaaaaa"); 1384 if (!matcher.replaceFirst("test").equals("test")) 1385 failCount++; 1386 1387 pattern = Pattern.compile("a+"); 1388 matcher = pattern.matcher("zzzaaaaaaaaaa"); 1389 if (!matcher.replaceFirst("test").equals("zzztest")) 1390 failCount++; 1391 1392 // Supplementary character test 1393 pattern = Pattern.compile(toSupplementaries("(ab)(c*)")); 1394 matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc")); 1395 if (!matcher.replaceFirst(toSupplementaries("test")) 1396 .equals(toSupplementaries("testzzzabcczzzabccc"))) 1397 failCount++; 1398 1399 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1400 if (!matcher.replaceFirst(toSupplementaries("test")). 1401 equals(toSupplementaries("zzztestzzzabcczzzabccczzz"))) 1402 failCount++; 1403 1404 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1405 result = matcher.replaceFirst("$1"); 1406 if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz"))) 1407 failCount++; 1408 1409 matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 1410 result = matcher.replaceFirst("$2"); 1411 if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz"))) 1412 failCount++; 1413 1414 pattern = Pattern.compile(toSupplementaries("a*")); 1415 matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa")); 1416 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test"))) 1417 failCount++; 1418 1419 pattern = Pattern.compile(toSupplementaries("a+")); 1420 matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa")); 1421 if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest"))) 1422 failCount++; 1423 1424 report("Replace First"); 1425 } 1426 1427 private static void unixLinesTest() { 1428 Pattern pattern = Pattern.compile(".*"); 1429 Matcher matcher = pattern.matcher("aa\u2028blah"); 1430 matcher.find(); 1431 if (!matcher.group(0).equals("aa")) 1432 failCount++; 1433 1434 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1435 matcher = pattern.matcher("aa\u2028blah"); 1436 matcher.find(); 1437 if (!matcher.group(0).equals("aa\u2028blah")) 1438 failCount++; 1439 1440 pattern = Pattern.compile("[az]$", 1441 Pattern.MULTILINE | Pattern.UNIX_LINES); 1442 matcher = pattern.matcher("aa\u2028zz"); 1443 check(matcher, "a\u2028", false); 1444 1445 // Supplementary character test 1446 pattern = Pattern.compile(".*"); 1447 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1448 matcher.find(); 1449 if (!matcher.group(0).equals(toSupplementaries("aa"))) 1450 failCount++; 1451 1452 pattern = Pattern.compile(".*", Pattern.UNIX_LINES); 1453 matcher = pattern.matcher(toSupplementaries("aa\u2028blah")); 1454 matcher.find(); 1455 if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah"))) 1456 failCount++; 1457 1458 pattern = Pattern.compile(toSupplementaries("[az]$"), 1459 Pattern.MULTILINE | Pattern.UNIX_LINES); 1460 matcher = pattern.matcher(toSupplementaries("aa\u2028zz")); 1461 check(matcher, toSupplementaries("a\u2028"), false); 1462 1463 report("Unix Lines"); 1464 } 1465 1466 private static void commentsTest() { 1467 int flags = Pattern.COMMENTS; 1468 1469 Pattern pattern = Pattern.compile("aa \\# aa", flags); 1470 Matcher matcher = pattern.matcher("aa#aa"); 1471 if (!matcher.matches()) 1472 failCount++; 1473 1474 pattern = Pattern.compile("aa # blah", flags); 1475 matcher = pattern.matcher("aa"); 1476 if (!matcher.matches()) 1477 failCount++; 1478 1479 pattern = Pattern.compile("aa blah", flags); 1480 matcher = pattern.matcher("aablah"); 1481 if (!matcher.matches()) 1482 failCount++; 1483 1484 pattern = Pattern.compile("aa # blah blech ", flags); 1485 matcher = pattern.matcher("aa"); 1486 if (!matcher.matches()) 1487 failCount++; 1488 1489 pattern = Pattern.compile("aa # blah\n ", flags); 1490 matcher = pattern.matcher("aa"); 1491 if (!matcher.matches()) 1492 failCount++; 1493 1494 pattern = Pattern.compile("aa # blah\nbc # blech", flags); 1495 matcher = pattern.matcher("aabc"); 1496 if (!matcher.matches()) 1497 failCount++; 1498 1499 pattern = Pattern.compile("aa # blah\nbc# blech", flags); 1500 matcher = pattern.matcher("aabc"); 1501 if (!matcher.matches()) 1502 failCount++; 1503 1504 pattern = Pattern.compile("aa # blah\nbc\\# blech", flags); 1505 matcher = pattern.matcher("aabc#blech"); 1506 if (!matcher.matches()) 1507 failCount++; 1508 1509 // Supplementary character test 1510 pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags); 1511 matcher = pattern.matcher(toSupplementaries("aa#aa")); 1512 if (!matcher.matches()) 1513 failCount++; 1514 1515 pattern = Pattern.compile(toSupplementaries("aa # blah"), flags); 1516 matcher = pattern.matcher(toSupplementaries("aa")); 1517 if (!matcher.matches()) 1518 failCount++; 1519 1520 pattern = Pattern.compile(toSupplementaries("aa blah"), flags); 1521 matcher = pattern.matcher(toSupplementaries("aablah")); 1522 if (!matcher.matches()) 1523 failCount++; 1524 1525 pattern = Pattern.compile(toSupplementaries("aa # blah blech "), flags); 1526 matcher = pattern.matcher(toSupplementaries("aa")); 1527 if (!matcher.matches()) 1528 failCount++; 1529 1530 pattern = Pattern.compile(toSupplementaries("aa # blah\n "), flags); 1531 matcher = pattern.matcher(toSupplementaries("aa")); 1532 if (!matcher.matches()) 1533 failCount++; 1534 1535 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc # blech"), flags); 1536 matcher = pattern.matcher(toSupplementaries("aabc")); 1537 if (!matcher.matches()) 1538 failCount++; 1539 1540 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc# blech"), flags); 1541 matcher = pattern.matcher(toSupplementaries("aabc")); 1542 if (!matcher.matches()) 1543 failCount++; 1544 1545 pattern = Pattern.compile(toSupplementaries("aa # blah\nbc\\# blech"), flags); 1546 matcher = pattern.matcher(toSupplementaries("aabc#blech")); 1547 if (!matcher.matches()) 1548 failCount++; 1549 1550 report("Comments"); 1551 } 1552 1553 private static void caseFoldingTest() { // bug 4504687 1554 int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1555 Pattern pattern = Pattern.compile("aa", flags); 1556 Matcher matcher = pattern.matcher("ab"); 1557 if (matcher.matches()) 1558 failCount++; 1559 1560 pattern = Pattern.compile("aA", flags); 1561 matcher = pattern.matcher("ab"); 1562 if (matcher.matches()) 1563 failCount++; 1564 1565 pattern = Pattern.compile("aa", flags); 1566 matcher = pattern.matcher("aB"); 1567 if (matcher.matches()) 1568 failCount++; 1569 matcher = pattern.matcher("Ab"); 1570 if (matcher.matches()) 1571 failCount++; 1572 1573 // ASCII "a" 1574 // Latin-1 Supplement "a" + grave 1575 // Cyrillic "a" 1576 String[] patterns = new String[] { 1577 //single 1578 "a", "\u00e0", "\u0430", 1579 //slice 1580 "ab", "\u00e0\u00e1", "\u0430\u0431", 1581 //class single 1582 "[a]", "[\u00e0]", "[\u0430]", 1583 //class range 1584 "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]", 1585 //back reference 1586 "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1" 1587 }; 1588 1589 String[] texts = new String[] { 1590 "A", "\u00c0", "\u0410", 1591 "AB", "\u00c0\u00c1", "\u0410\u0411", 1592 "A", "\u00c0", "\u0410", 1593 "B", "\u00c2", "\u0411", 1594 "aA", "\u00e0\u00c0", "\u0430\u0410" 1595 }; 1596 1597 boolean[] expected = new boolean[] { 1598 true, false, false, 1599 true, false, false, 1600 true, false, false, 1601 true, false, false, 1602 true, false, false 1603 }; 1604 1605 flags = Pattern.CASE_INSENSITIVE; 1606 for (int i = 0; i < patterns.length; i++) { 1607 pattern = Pattern.compile(patterns[i], flags); 1608 matcher = pattern.matcher(texts[i]); 1609 if (matcher.matches() != expected[i]) { 1610 System.out.println("<1> Failed at " + i); 1611 failCount++; 1612 } 1613 } 1614 1615 flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; 1616 for (int i = 0; i < patterns.length; i++) { 1617 pattern = Pattern.compile(patterns[i], flags); 1618 matcher = pattern.matcher(texts[i]); 1619 if (!matcher.matches()) { 1620 System.out.println("<2> Failed at " + i); 1621 failCount++; 1622 } 1623 } 1624 // flag unicode_case alone should do nothing 1625 flags = Pattern.UNICODE_CASE; 1626 for (int i = 0; i < patterns.length; i++) { 1627 pattern = Pattern.compile(patterns[i], flags); 1628 matcher = pattern.matcher(texts[i]); 1629 if (matcher.matches()) { 1630 System.out.println("<3> Failed at " + i); 1631 failCount++; 1632 } 1633 } 1634 1635 // Special cases: i, I, u+0131 and u+0130 1636 flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 1637 pattern = Pattern.compile("[h-j]+", flags); 1638 if (!pattern.matcher("\u0131\u0130").matches()) 1639 failCount++; 1640 report("Case Folding"); 1641 } 1642 1643 private static void appendTest() { 1644 Pattern pattern = Pattern.compile("(ab)(cd)"); 1645 Matcher matcher = pattern.matcher("abcd"); 1646 String result = matcher.replaceAll("$2$1"); 1647 if (!result.equals("cdab")) 1648 failCount++; 1649 1650 String s1 = "Swap all: first = 123, second = 456"; 1651 String s2 = "Swap one: first = 123, second = 456"; 1652 String r = "$3$2$1"; 1653 pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)"); 1654 matcher = pattern.matcher(s1); 1655 1656 result = matcher.replaceAll(r); 1657 if (!result.equals("Swap all: 123 = first, 456 = second")) 1658 failCount++; 1659 1660 matcher = pattern.matcher(s2); 1661 1662 if (matcher.find()) { 1663 StringBuffer sb = new StringBuffer(); 1664 matcher.appendReplacement(sb, r); 1665 matcher.appendTail(sb); 1666 result = sb.toString(); 1667 if (!result.equals("Swap one: 123 = first, second = 456")) 1668 failCount++; 1669 } 1670 1671 // Supplementary character test 1672 pattern = Pattern.compile(toSupplementaries("(ab)(cd)")); 1673 matcher = pattern.matcher(toSupplementaries("abcd")); 1674 result = matcher.replaceAll("$2$1"); 1675 if (!result.equals(toSupplementaries("cdab"))) 1676 failCount++; 1677 1678 s1 = toSupplementaries("Swap all: first = 123, second = 456"); 1679 s2 = toSupplementaries("Swap one: first = 123, second = 456"); 1680 r = toSupplementaries("$3$2$1"); 1681 pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)")); 1682 matcher = pattern.matcher(s1); 1683 1684 result = matcher.replaceAll(r); 1685 if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second"))) 1686 failCount++; 1687 1688 matcher = pattern.matcher(s2); 1689 1690 if (matcher.find()) { 1691 StringBuffer sb = new StringBuffer(); 1692 matcher.appendReplacement(sb, r); 1693 matcher.appendTail(sb); 1694 result = sb.toString(); 1695 if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456"))) 1696 failCount++; 1697 } 1698 report("Append"); 1699 } 1700 1701 private static void splitTest() { 1702 Pattern pattern = Pattern.compile(":"); 1703 String[] result = pattern.split("foo:and:boo", 2); 1704 if (!result[0].equals("foo")) 1705 failCount++; 1706 if (!result[1].equals("and:boo")) 1707 failCount++; 1708 // Supplementary character test 1709 Pattern patternX = Pattern.compile(toSupplementaries("X")); 1710 result = patternX.split(toSupplementaries("fooXandXboo"), 2); 1711 if (!result[0].equals(toSupplementaries("foo"))) 1712 failCount++; 1713 if (!result[1].equals(toSupplementaries("andXboo"))) 1714 failCount++; 1715 1716 CharBuffer cb = CharBuffer.allocate(100); 1717 cb.put("foo:and:boo"); 1718 cb.flip(); 1719 result = pattern.split(cb); 1720 if (!result[0].equals("foo")) 1721 failCount++; 1722 if (!result[1].equals("and")) 1723 failCount++; 1724 if (!result[2].equals("boo")) 1725 failCount++; 1726 1727 // Supplementary character test 1728 CharBuffer cbs = CharBuffer.allocate(100); 1729 cbs.put(toSupplementaries("fooXandXboo")); 1730 cbs.flip(); 1731 result = patternX.split(cbs); 1732 if (!result[0].equals(toSupplementaries("foo"))) 1733 failCount++; 1734 if (!result[1].equals(toSupplementaries("and"))) 1735 failCount++; 1736 if (!result[2].equals(toSupplementaries("boo"))) 1737 failCount++; 1738 1739 String source = "0123456789"; 1740 for (int limit=-2; limit<3; limit++) { 1741 for (int x=0; x<10; x++) { 1742 result = source.split(Integer.toString(x), limit); 1743 int expectedLength = limit < 1 ? 2 : limit; 1744 1745 if ((limit == 0) && (x == 9)) { 1746 // expected dropping of "" 1747 if (result.length != 1) 1748 failCount++; 1749 if (!result[0].equals("012345678")) { 1750 failCount++; 1751 } 1752 } else { 1753 if (result.length != expectedLength) { 1754 failCount++; 1755 } 1756 if (!result[0].equals(source.substring(0,x))) { 1757 if (limit != 1) { 1758 failCount++; 1759 } else { 1760 if (!result[0].equals(source.substring(0,10))) { 1761 failCount++; 1762 } 1763 } 1764 } 1765 if (expectedLength > 1) { // Check segment 2 1766 if (!result[1].equals(source.substring(x+1,10))) 1767 failCount++; 1768 } 1769 } 1770 } 1771 } 1772 // Check the case for no match found 1773 for (int limit=-2; limit<3; limit++) { 1774 result = source.split("e", limit); 1775 if (result.length != 1) 1776 failCount++; 1777 if (!result[0].equals(source)) 1778 failCount++; 1779 } 1780 // Check the case for limit == 0, source = ""; 1781 // split() now returns 0-length for empty source "" see #6559590 1782 source = ""; 1783 result = source.split("e", 0); 1784 if (result.length != 0) 1785 failCount++; 1786 1787 // Check both split() and splitAsStraem(), especially for zero-lenth 1788 // input and zero-lenth match cases 1789 String[][] input = new String[][] { 1790 { " ", "Abc Efg Hij" }, // normal non-zero-match 1791 { " ", " Abc Efg Hij" }, // leading empty str for non-zero-match 1792 { " ", "Abc Efg Hij" }, // non-zero-match in the middle 1793 { "(?=\\p{Lu})", "AbcEfgHij" }, // no leading empty str for zero-match 1794 { "(?=\\p{Lu})", "AbcEfg" }, 1795 { "(?=\\p{Lu})", "Abc" }, 1796 { " ", "" }, // zero-length input 1797 { ".*", "" }, 1798 1799 // some tests from PatternStreamTest.java 1800 { "4", "awgqwefg1fefw4vssv1vvv1" }, 1801 { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" }, 1802 { "1", "awgqwefg1fefw4vssv1vvv1" }, 1803 { "1", "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" }, 1804 { "\u56da", "1\u56da23\u56da456\u56da7890" }, 1805 { "\u56da", "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" }, 1806 { "\u56da", "" }, 1807 { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs 1808 { "o", "boo:and:foo" }, 1809 { "o", "booooo:and:fooooo" }, 1810 { "o", "fooooo:" }, 1811 }; 1812 1813 String[][] expected = new String[][] { 1814 { "Abc", "Efg", "Hij" }, 1815 { "", "Abc", "Efg", "Hij" }, 1816 { "Abc", "", "Efg", "Hij" }, 1817 { "Abc", "Efg", "Hij" }, 1818 { "Abc", "Efg" }, 1819 { "Abc" }, 1820 {}, 1821 {}, 1822 1823 { "awgqwefg1fefw", "vssv1vvv1" }, 1824 { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" }, 1825 { "awgqwefg", "fefw4vssv", "vvv" }, 1826 { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" }, 1827 { "1", "23", "456", "7890" }, 1828 { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" }, 1829 {}, 1830 { "This", "is", "testing", "", "with", "different", "separators" }, 1831 { "b", "", ":and:f" }, 1832 { "b", "", "", "", "", ":and:f" }, 1833 { "f", "", "", "", "", ":" }, 1834 }; 1835 for (int i = 0; i < input.length; i++) { 1836 pattern = Pattern.compile(input[i][0]); 1837 if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) 1838 failCount++; 1839 if (!Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(), 1840 expected[i])) 1841 failCount++; 1842 } 1843 report("Split"); 1844 } 1845 1846 private static void negationTest() { 1847 Pattern pattern = Pattern.compile("[\\[@^]+"); 1848 Matcher matcher = pattern.matcher("@@@@[[[[^^^^"); 1849 if (!matcher.find()) 1850 failCount++; 1851 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1852 failCount++; 1853 pattern = Pattern.compile("[@\\[^]+"); 1854 matcher = pattern.matcher("@@@@[[[[^^^^"); 1855 if (!matcher.find()) 1856 failCount++; 1857 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1858 failCount++; 1859 pattern = Pattern.compile("[@\\[^@]+"); 1860 matcher = pattern.matcher("@@@@[[[[^^^^"); 1861 if (!matcher.find()) 1862 failCount++; 1863 if (!matcher.group(0).equals("@@@@[[[[^^^^")) 1864 failCount++; 1865 1866 pattern = Pattern.compile("\\)"); 1867 matcher = pattern.matcher("xxx)xxx"); 1868 if (!matcher.find()) 1869 failCount++; 1870 1871 report("Negation"); 1872 } 1873 1874 private static void ampersandTest() { 1875 Pattern pattern = Pattern.compile("[&@]+"); 1876 check(pattern, "@@@@&&&&", true); 1877 1878 pattern = Pattern.compile("[@&]+"); 1879 check(pattern, "@@@@&&&&", true); 1880 1881 pattern = Pattern.compile("[@\\&]+"); 1882 check(pattern, "@@@@&&&&", true); 1883 1884 report("Ampersand"); 1885 } 1886 1887 private static void octalTest() throws Exception { 1888 Pattern pattern = Pattern.compile("\\u0007"); 1889 Matcher matcher = pattern.matcher("\u0007"); 1890 if (!matcher.matches()) 1891 failCount++; 1892 pattern = Pattern.compile("\\07"); 1893 matcher = pattern.matcher("\u0007"); 1894 if (!matcher.matches()) 1895 failCount++; 1896 pattern = Pattern.compile("\\007"); 1897 matcher = pattern.matcher("\u0007"); 1898 if (!matcher.matches()) 1899 failCount++; 1900 pattern = Pattern.compile("\\0007"); 1901 matcher = pattern.matcher("\u0007"); 1902 if (!matcher.matches()) 1903 failCount++; 1904 pattern = Pattern.compile("\\040"); 1905 matcher = pattern.matcher("\u0020"); 1906 if (!matcher.matches()) 1907 failCount++; 1908 pattern = Pattern.compile("\\0403"); 1909 matcher = pattern.matcher("\u00203"); 1910 if (!matcher.matches()) 1911 failCount++; 1912 pattern = Pattern.compile("\\0103"); 1913 matcher = pattern.matcher("\u0043"); 1914 if (!matcher.matches()) 1915 failCount++; 1916 1917 report("Octal"); 1918 } 1919 1920 private static void longPatternTest() throws Exception { 1921 try { 1922 Pattern pattern = Pattern.compile( 1923 "a 32-character-long pattern xxxx"); 1924 pattern = Pattern.compile("a 33-character-long pattern xxxxx"); 1925 pattern = Pattern.compile("a thirty four character long regex"); 1926 StringBuffer patternToBe = new StringBuffer(101); 1927 for (int i=0; i<100; i++) 1928 patternToBe.append((char)(97 + i%26)); 1929 pattern = Pattern.compile(patternToBe.toString()); 1930 } catch (PatternSyntaxException e) { 1931 failCount++; 1932 } 1933 1934 // Supplementary character test 1935 try { 1936 Pattern pattern = Pattern.compile( 1937 toSupplementaries("a 32-character-long pattern xxxx")); 1938 pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx")); 1939 pattern = Pattern.compile(toSupplementaries("a thirty four character long regex")); 1940 StringBuffer patternToBe = new StringBuffer(101*2); 1941 for (int i=0; i<100; i++) 1942 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT 1943 + 97 + i%26)); 1944 pattern = Pattern.compile(patternToBe.toString()); 1945 } catch (PatternSyntaxException e) { 1946 failCount++; 1947 } 1948 report("LongPattern"); 1949 } 1950 1951 private static void group0Test() throws Exception { 1952 Pattern pattern = Pattern.compile("(tes)ting"); 1953 Matcher matcher = pattern.matcher("testing"); 1954 check(matcher, "testing"); 1955 1956 matcher.reset("testing"); 1957 if (matcher.lookingAt()) { 1958 if (!matcher.group(0).equals("testing")) 1959 failCount++; 1960 } else { 1961 failCount++; 1962 } 1963 1964 matcher.reset("testing"); 1965 if (matcher.matches()) { 1966 if (!matcher.group(0).equals("testing")) 1967 failCount++; 1968 } else { 1969 failCount++; 1970 } 1971 1972 pattern = Pattern.compile("(tes)ting"); 1973 matcher = pattern.matcher("testing"); 1974 if (matcher.lookingAt()) { 1975 if (!matcher.group(0).equals("testing")) 1976 failCount++; 1977 } else { 1978 failCount++; 1979 } 1980 1981 pattern = Pattern.compile("^(tes)ting"); 1982 matcher = pattern.matcher("testing"); 1983 if (matcher.matches()) { 1984 if (!matcher.group(0).equals("testing")) 1985 failCount++; 1986 } else { 1987 failCount++; 1988 } 1989 1990 // Supplementary character test 1991 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 1992 matcher = pattern.matcher(toSupplementaries("testing")); 1993 check(matcher, toSupplementaries("testing")); 1994 1995 matcher.reset(toSupplementaries("testing")); 1996 if (matcher.lookingAt()) { 1997 if (!matcher.group(0).equals(toSupplementaries("testing"))) 1998 failCount++; 1999 } else { 2000 failCount++; 2001 } 2002 2003 matcher.reset(toSupplementaries("testing")); 2004 if (matcher.matches()) { 2005 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2006 failCount++; 2007 } else { 2008 failCount++; 2009 } 2010 2011 pattern = Pattern.compile(toSupplementaries("(tes)ting")); 2012 matcher = pattern.matcher(toSupplementaries("testing")); 2013 if (matcher.lookingAt()) { 2014 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2015 failCount++; 2016 } else { 2017 failCount++; 2018 } 2019 2020 pattern = Pattern.compile(toSupplementaries("^(tes)ting")); 2021 matcher = pattern.matcher(toSupplementaries("testing")); 2022 if (matcher.matches()) { 2023 if (!matcher.group(0).equals(toSupplementaries("testing"))) 2024 failCount++; 2025 } else { 2026 failCount++; 2027 } 2028 2029 report("Group0"); 2030 } 2031 2032 private static void findIntTest() throws Exception { 2033 Pattern p = Pattern.compile("blah"); 2034 Matcher m = p.matcher("zzzzblahzzzzzblah"); 2035 boolean result = m.find(2); 2036 if (!result) 2037 failCount++; 2038 2039 p = Pattern.compile("$"); 2040 m = p.matcher("1234567890"); 2041 result = m.find(10); 2042 if (!result) 2043 failCount++; 2044 try { 2045 result = m.find(11); 2046 failCount++; 2047 } catch (IndexOutOfBoundsException e) { 2048 // correct result 2049 } 2050 2051 // Supplementary character test 2052 p = Pattern.compile(toSupplementaries("blah")); 2053 m = p.matcher(toSupplementaries("zzzzblahzzzzzblah")); 2054 result = m.find(2); 2055 if (!result) 2056 failCount++; 2057 2058 report("FindInt"); 2059 } 2060 2061 private static void emptyPatternTest() throws Exception { 2062 Pattern p = Pattern.compile(""); 2063 Matcher m = p.matcher("foo"); 2064 2065 // Should find empty pattern at beginning of input 2066 boolean result = m.find(); 2067 if (result != true) 2068 failCount++; 2069 if (m.start() != 0) 2070 failCount++; 2071 2072 // Should not match entire input if input is not empty 2073 m.reset(); 2074 result = m.matches(); 2075 if (result == true) 2076 failCount++; 2077 2078 try { 2079 m.start(0); 2080 failCount++; 2081 } catch (IllegalStateException e) { 2082 // Correct result 2083 } 2084 2085 // Should match entire input if input is empty 2086 m.reset(""); 2087 result = m.matches(); 2088 if (result != true) 2089 failCount++; 2090 2091 result = Pattern.matches("", ""); 2092 if (result != true) 2093 failCount++; 2094 2095 result = Pattern.matches("", "foo"); 2096 if (result == true) 2097 failCount++; 2098 report("EmptyPattern"); 2099 } 2100 2101 private static void charClassTest() throws Exception { 2102 Pattern pattern = Pattern.compile("blah[ab]]blech"); 2103 check(pattern, "blahb]blech", true); 2104 2105 pattern = Pattern.compile("[abc[def]]"); 2106 check(pattern, "b", true); 2107 2108 // Supplementary character tests 2109 pattern = Pattern.compile(toSupplementaries("blah[ab]]blech")); 2110 check(pattern, toSupplementaries("blahb]blech"), true); 2111 2112 pattern = Pattern.compile(toSupplementaries("[abc[def]]")); 2113 check(pattern, toSupplementaries("b"), true); 2114 2115 try { 2116 // u00ff when UNICODE_CASE 2117 pattern = Pattern.compile("[ab\u00ffcd]", 2118 Pattern.CASE_INSENSITIVE| 2119 Pattern.UNICODE_CASE); 2120 check(pattern, "ab\u00ffcd", true); 2121 check(pattern, "Ab\u0178Cd", true); 2122 2123 // u00b5 when UNICODE_CASE 2124 pattern = Pattern.compile("[ab\u00b5cd]", 2125 Pattern.CASE_INSENSITIVE| 2126 Pattern.UNICODE_CASE); 2127 check(pattern, "ab\u00b5cd", true); 2128 check(pattern, "Ab\u039cCd", true); 2129 } catch (Exception e) { failCount++; } 2130 2131 /* Special cases 2132 (1)LatinSmallLetterLongS u+017f 2133 (2)LatinSmallLetterDotlessI u+0131 2134 (3)LatineCapitalLetterIWithDotAbove u+0130 2135 (4)KelvinSign u+212a 2136 (5)AngstromSign u+212b 2137 */ 2138 int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE; 2139 pattern = Pattern.compile("[sik\u00c5]+", flags); 2140 if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches()) 2141 failCount++; 2142 2143 report("CharClass"); 2144 } 2145 2146 private static void caretTest() throws Exception { 2147 Pattern pattern = Pattern.compile("\\w*"); 2148 Matcher matcher = pattern.matcher("a#bc#def##g"); 2149 check(matcher, "a"); 2150 check(matcher, ""); 2151 check(matcher, "bc"); 2152 check(matcher, ""); 2153 check(matcher, "def"); 2154 check(matcher, ""); 2155 check(matcher, ""); 2156 check(matcher, "g"); 2157 check(matcher, ""); 2158 if (matcher.find()) 2159 failCount++; 2160 2161 pattern = Pattern.compile("^\\w*"); 2162 matcher = pattern.matcher("a#bc#def##g"); 2163 check(matcher, "a"); 2164 if (matcher.find()) 2165 failCount++; 2166 2167 pattern = Pattern.compile("\\w"); 2168 matcher = pattern.matcher("abc##x"); 2169 check(matcher, "a"); 2170 check(matcher, "b"); 2171 check(matcher, "c"); 2172 check(matcher, "x"); 2173 if (matcher.find()) 2174 failCount++; 2175 2176 pattern = Pattern.compile("^\\w"); 2177 matcher = pattern.matcher("abc##x"); 2178 check(matcher, "a"); 2179 if (matcher.find()) 2180 failCount++; 2181 2182 pattern = Pattern.compile("\\A\\p{Alpha}{3}"); 2183 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2184 check(matcher, "abc"); 2185 if (matcher.find()) 2186 failCount++; 2187 2188 pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE); 2189 matcher = pattern.matcher("abcdef-ghi\njklmno"); 2190 check(matcher, "abc"); 2191 check(matcher, "jkl"); 2192 if (matcher.find()) 2193 failCount++; 2194 2195 pattern = Pattern.compile("^", Pattern.MULTILINE); 2196 matcher = pattern.matcher("this is some text"); 2197 String result = matcher.replaceAll("X"); 2198 if (!result.equals("Xthis is some text")) 2199 failCount++; 2200 2201 pattern = Pattern.compile("^"); 2202 matcher = pattern.matcher("this is some text"); 2203 result = matcher.replaceAll("X"); 2204 if (!result.equals("Xthis is some text")) 2205 failCount++; 2206 2207 pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES); 2208 matcher = pattern.matcher("this is some text\n"); 2209 result = matcher.replaceAll("X"); 2210 if (!result.equals("Xthis is some text\n")) 2211 failCount++; 2212 2213 report("Caret"); 2214 } 2215 2216 private static void groupCaptureTest() throws Exception { 2217 // Independent group 2218 Pattern pattern = Pattern.compile("x+(?>y+)z+"); 2219 Matcher matcher = pattern.matcher("xxxyyyzzz"); 2220 matcher.find(); 2221 try { 2222 String blah = matcher.group(1); 2223 failCount++; 2224 } catch (IndexOutOfBoundsException ioobe) { 2225 // Good result 2226 } 2227 // Pure group 2228 pattern = Pattern.compile("x+(?:y+)z+"); 2229 matcher = pattern.matcher("xxxyyyzzz"); 2230 matcher.find(); 2231 try { 2232 String blah = matcher.group(1); 2233 failCount++; 2234 } catch (IndexOutOfBoundsException ioobe) { 2235 // Good result 2236 } 2237 2238 // Supplementary character tests 2239 // Independent group 2240 pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+")); 2241 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2242 matcher.find(); 2243 try { 2244 String blah = matcher.group(1); 2245 failCount++; 2246 } catch (IndexOutOfBoundsException ioobe) { 2247 // Good result 2248 } 2249 // Pure group 2250 pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+")); 2251 matcher = pattern.matcher(toSupplementaries("xxxyyyzzz")); 2252 matcher.find(); 2253 try { 2254 String blah = matcher.group(1); 2255 failCount++; 2256 } catch (IndexOutOfBoundsException ioobe) { 2257 // Good result 2258 } 2259 2260 report("GroupCapture"); 2261 } 2262 2263 private static void backRefTest() throws Exception { 2264 Pattern pattern = Pattern.compile("(a*)bc\\1"); 2265 check(pattern, "zzzaabcazzz", true); 2266 2267 pattern = Pattern.compile("(a*)bc\\1"); 2268 check(pattern, "zzzaabcaazzz", true); 2269 2270 pattern = Pattern.compile("(abc)(def)\\1"); 2271 check(pattern, "abcdefabc", true); 2272 2273 pattern = Pattern.compile("(abc)(def)\\3"); 2274 check(pattern, "abcdefabc", false); 2275 2276 try { 2277 for (int i = 1; i < 10; i++) { 2278 // Make sure backref 1-9 are always accepted 2279 pattern = Pattern.compile("abcdef\\" + i); 2280 // and fail to match if the target group does not exit 2281 check(pattern, "abcdef", false); 2282 } 2283 } catch(PatternSyntaxException e) { 2284 failCount++; 2285 } 2286 2287 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"); 2288 check(pattern, "abcdefghija", false); 2289 check(pattern, "abcdefghija1", true); 2290 2291 pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"); 2292 check(pattern, "abcdefghijkk", true); 2293 2294 pattern = Pattern.compile("(a)bcdefghij\\11"); 2295 check(pattern, "abcdefghija1", true); 2296 2297 // Supplementary character tests 2298 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2299 check(pattern, toSupplementaries("zzzaabcazzz"), true); 2300 2301 pattern = Pattern.compile(toSupplementaries("(a*)bc\\1")); 2302 check(pattern, toSupplementaries("zzzaabcaazzz"), true); 2303 2304 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1")); 2305 check(pattern, toSupplementaries("abcdefabc"), true); 2306 2307 pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3")); 2308 check(pattern, toSupplementaries("abcdefabc"), false); 2309 2310 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11")); 2311 check(pattern, toSupplementaries("abcdefghija"), false); 2312 check(pattern, toSupplementaries("abcdefghija1"), true); 2313 2314 pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11")); 2315 check(pattern, toSupplementaries("abcdefghijkk"), true); 2316 2317 report("BackRef"); 2318 } 2319 2320 /** 2321 * Unicode Technical Report #18, section 2.6 End of Line 2322 * There is no empty line to be matched in the sequence \u000D\u000A 2323 * but there is an empty line in the sequence \u000A\u000D. 2324 */ 2325 private static void anchorTest() throws Exception { 2326 Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE); 2327 Matcher m = p.matcher("blah1\r\nblah2"); 2328 m.find(); 2329 m.find(); 2330 if (!m.group().equals("blah2")) 2331 failCount++; 2332 2333 m.reset("blah1\n\rblah2"); 2334 m.find(); 2335 m.find(); 2336 m.find(); 2337 if (!m.group().equals("blah2")) 2338 failCount++; 2339 2340 // Test behavior of $ with \r\n at end of input 2341 p = Pattern.compile(".+$"); 2342 m = p.matcher("blah1\r\n"); 2343 if (!m.find()) 2344 failCount++; 2345 if (!m.group().equals("blah1")) 2346 failCount++; 2347 if (m.find()) 2348 failCount++; 2349 2350 // Test behavior of $ with \r\n at end of input in multiline 2351 p = Pattern.compile(".+$", Pattern.MULTILINE); 2352 m = p.matcher("blah1\r\n"); 2353 if (!m.find()) 2354 failCount++; 2355 if (m.find()) 2356 failCount++; 2357 2358 // Test for $ recognition of \u0085 for bug 4527731 2359 p = Pattern.compile(".+$", Pattern.MULTILINE); 2360 m = p.matcher("blah1\u0085"); 2361 if (!m.find()) 2362 failCount++; 2363 2364 // Supplementary character test 2365 p = Pattern.compile("^.*$", Pattern.MULTILINE); 2366 m = p.matcher(toSupplementaries("blah1\r\nblah2")); 2367 m.find(); 2368 m.find(); 2369 if (!m.group().equals(toSupplementaries("blah2"))) 2370 failCount++; 2371 2372 m.reset(toSupplementaries("blah1\n\rblah2")); 2373 m.find(); 2374 m.find(); 2375 m.find(); 2376 if (!m.group().equals(toSupplementaries("blah2"))) 2377 failCount++; 2378 2379 // Test behavior of $ with \r\n at end of input 2380 p = Pattern.compile(".+$"); 2381 m = p.matcher(toSupplementaries("blah1\r\n")); 2382 if (!m.find()) 2383 failCount++; 2384 if (!m.group().equals(toSupplementaries("blah1"))) 2385 failCount++; 2386 if (m.find()) 2387 failCount++; 2388 2389 // Test behavior of $ with \r\n at end of input in multiline 2390 p = Pattern.compile(".+$", Pattern.MULTILINE); 2391 m = p.matcher(toSupplementaries("blah1\r\n")); 2392 if (!m.find()) 2393 failCount++; 2394 if (m.find()) 2395 failCount++; 2396 2397 // Test for $ recognition of \u0085 for bug 4527731 2398 p = Pattern.compile(".+$", Pattern.MULTILINE); 2399 m = p.matcher(toSupplementaries("blah1\u0085")); 2400 if (!m.find()) 2401 failCount++; 2402 2403 report("Anchors"); 2404 } 2405 2406 /** 2407 * A basic sanity test of Matcher.lookingAt(). 2408 */ 2409 private static void lookingAtTest() throws Exception { 2410 Pattern p = Pattern.compile("(ab)(c*)"); 2411 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2412 2413 if (!m.lookingAt()) 2414 failCount++; 2415 2416 if (!m.group().equals(m.group(0))) 2417 failCount++; 2418 2419 m = p.matcher("zzzabccczzzabcczzzabccczzz"); 2420 if (m.lookingAt()) 2421 failCount++; 2422 2423 // Supplementary character test 2424 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2425 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2426 2427 if (!m.lookingAt()) 2428 failCount++; 2429 2430 if (!m.group().equals(m.group(0))) 2431 failCount++; 2432 2433 m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2434 if (m.lookingAt()) 2435 failCount++; 2436 2437 report("Looking At"); 2438 } 2439 2440 /** 2441 * A basic sanity test of Matcher.matches(). 2442 */ 2443 private static void matchesTest() throws Exception { 2444 // matches() 2445 Pattern p = Pattern.compile("ulb(c*)"); 2446 Matcher m = p.matcher("ulbcccccc"); 2447 if (!m.matches()) 2448 failCount++; 2449 2450 // find() but not matches() 2451 m.reset("zzzulbcccccc"); 2452 if (m.matches()) 2453 failCount++; 2454 2455 // lookingAt() but not matches() 2456 m.reset("ulbccccccdef"); 2457 if (m.matches()) 2458 failCount++; 2459 2460 // matches() 2461 p = Pattern.compile("a|ad"); 2462 m = p.matcher("ad"); 2463 if (!m.matches()) 2464 failCount++; 2465 2466 // Supplementary character test 2467 // matches() 2468 p = Pattern.compile(toSupplementaries("ulb(c*)")); 2469 m = p.matcher(toSupplementaries("ulbcccccc")); 2470 if (!m.matches()) 2471 failCount++; 2472 2473 // find() but not matches() 2474 m.reset(toSupplementaries("zzzulbcccccc")); 2475 if (m.matches()) 2476 failCount++; 2477 2478 // lookingAt() but not matches() 2479 m.reset(toSupplementaries("ulbccccccdef")); 2480 if (m.matches()) 2481 failCount++; 2482 2483 // matches() 2484 p = Pattern.compile(toSupplementaries("a|ad")); 2485 m = p.matcher(toSupplementaries("ad")); 2486 if (!m.matches()) 2487 failCount++; 2488 2489 report("Matches"); 2490 } 2491 2492 /** 2493 * A basic sanity test of Pattern.matches(). 2494 */ 2495 private static void patternMatchesTest() throws Exception { 2496 // matches() 2497 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2498 toSupplementaries("ulbcccccc"))) 2499 failCount++; 2500 2501 // find() but not matches() 2502 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2503 toSupplementaries("zzzulbcccccc"))) 2504 failCount++; 2505 2506 // lookingAt() but not matches() 2507 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2508 toSupplementaries("ulbccccccdef"))) 2509 failCount++; 2510 2511 // Supplementary character test 2512 // matches() 2513 if (!Pattern.matches(toSupplementaries("ulb(c*)"), 2514 toSupplementaries("ulbcccccc"))) 2515 failCount++; 2516 2517 // find() but not matches() 2518 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2519 toSupplementaries("zzzulbcccccc"))) 2520 failCount++; 2521 2522 // lookingAt() but not matches() 2523 if (Pattern.matches(toSupplementaries("ulb(c*)"), 2524 toSupplementaries("ulbccccccdef"))) 2525 failCount++; 2526 2527 report("Pattern Matches"); 2528 } 2529 2530 /** 2531 * Canonical equivalence testing. Tests the ability of the engine 2532 * to match sequences that are not explicitly specified in the 2533 * pattern when they are considered equivalent by the Unicode Standard. 2534 */ 2535 private static void ceTest() throws Exception { 2536 // Decomposed char outside char classes 2537 Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ); 2538 Matcher m = p.matcher("test\u00e5"); 2539 if (!m.matches()) 2540 failCount++; 2541 2542 m.reset("testa\u030a"); 2543 if (!m.matches()) 2544 failCount++; 2545 2546 // Composed char outside char classes 2547 p = Pattern.compile("test\u00e5", Pattern.CANON_EQ); 2548 m = p.matcher("test\u00e5"); 2549 if (!m.matches()) 2550 failCount++; 2551 2552 m.reset("testa\u030a"); 2553 if (!m.find()) 2554 failCount++; 2555 2556 // Decomposed char inside a char class 2557 p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ); 2558 m = p.matcher("test\u00e5"); 2559 if (!m.find()) 2560 failCount++; 2561 2562 m.reset("testa\u030a"); 2563 if (!m.find()) 2564 failCount++; 2565 2566 // Composed char inside a char class 2567 p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ); 2568 m = p.matcher("test\u00e5"); 2569 if (!m.find()) 2570 failCount++; 2571 2572 m.reset("testa\u0300"); 2573 if (!m.find()) 2574 failCount++; 2575 2576 m.reset("testa\u030a"); 2577 if (!m.find()) 2578 failCount++; 2579 2580 // Marks that cannot legally change order and be equivalent 2581 p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ); 2582 check(p, "testa\u0308\u0300", true); 2583 check(p, "testa\u0300\u0308", false); 2584 2585 // Marks that can legally change order and be equivalent 2586 p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ); 2587 check(p, "testa\u0308\u0323", true); 2588 check(p, "testa\u0323\u0308", true); 2589 2590 // Test all equivalences of the sequence a\u0308\u0323\u0300 2591 p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ); 2592 check(p, "testa\u0308\u0323\u0300", true); 2593 check(p, "testa\u0323\u0308\u0300", true); 2594 check(p, "testa\u0308\u0300\u0323", true); 2595 check(p, "test\u00e4\u0323\u0300", true); 2596 check(p, "test\u00e4\u0300\u0323", true); 2597 2598 /* 2599 * The following canonical equivalence tests don't work. Bug id: 4916384. 2600 * 2601 // Decomposed hangul (jamos) 2602 p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ); 2603 m = p.matcher("\u1100\u1161"); 2604 if (!m.matches()) 2605 failCount++; 2606 2607 m.reset("\uac00"); 2608 if (!m.matches()) 2609 failCount++; 2610 2611 // Composed hangul 2612 p = Pattern.compile("\uac00", Pattern.CANON_EQ); 2613 m = p.matcher("\u1100\u1161"); 2614 if (!m.matches()) 2615 failCount++; 2616 2617 m.reset("\uac00"); 2618 if (!m.matches()) 2619 failCount++; 2620 2621 // Decomposed supplementary outside char classes 2622 p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ); 2623 m = p.matcher("test\ud834\uddc0"); 2624 if (!m.matches()) 2625 failCount++; 2626 2627 m.reset("test\ud834\uddbc\ud834\udd6f"); 2628 if (!m.matches()) 2629 failCount++; 2630 2631 // Composed supplementary outside char classes 2632 p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ); 2633 m.reset("test\ud834\uddbc\ud834\udd6f"); 2634 if (!m.matches()) 2635 failCount++; 2636 2637 m = p.matcher("test\ud834\uddc0"); 2638 if (!m.matches()) 2639 failCount++; 2640 2641 */ 2642 2643 report("Canonical Equivalence"); 2644 } 2645 2646 /** 2647 * A basic sanity test of Matcher.replaceAll(). 2648 */ 2649 private static void globalSubstitute() throws Exception { 2650 // Global substitution with a literal 2651 Pattern p = Pattern.compile("(ab)(c*)"); 2652 Matcher m = p.matcher("abccczzzabcczzzabccc"); 2653 if (!m.replaceAll("test").equals("testzzztestzzztest")) 2654 failCount++; 2655 2656 m.reset("zzzabccczzzabcczzzabccczzz"); 2657 if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz")) 2658 failCount++; 2659 2660 // Global substitution with groups 2661 m.reset("zzzabccczzzabcczzzabccczzz"); 2662 String result = m.replaceAll("$1"); 2663 if (!result.equals("zzzabzzzabzzzabzzz")) 2664 failCount++; 2665 2666 // Supplementary character test 2667 // Global substitution with a literal 2668 p = Pattern.compile(toSupplementaries("(ab)(c*)")); 2669 m = p.matcher(toSupplementaries("abccczzzabcczzzabccc")); 2670 if (!m.replaceAll(toSupplementaries("test")). 2671 equals(toSupplementaries("testzzztestzzztest"))) 2672 failCount++; 2673 2674 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2675 if (!m.replaceAll(toSupplementaries("test")). 2676 equals(toSupplementaries("zzztestzzztestzzztestzzz"))) 2677 failCount++; 2678 2679 // Global substitution with groups 2680 m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz")); 2681 result = m.replaceAll("$1"); 2682 if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz"))) 2683 failCount++; 2684 2685 report("Global Substitution"); 2686 } 2687 2688 /** 2689 * Tests the usage of Matcher.appendReplacement() with literal 2690 * and group substitutions. 2691 */ 2692 private static void stringbufferSubstitute() throws Exception { 2693 // SB substitution with literal 2694 String blah = "zzzblahzzz"; 2695 Pattern p = Pattern.compile("blah"); 2696 Matcher m = p.matcher(blah); 2697 StringBuffer result = new StringBuffer(); 2698 try { 2699 m.appendReplacement(result, "blech"); 2700 failCount++; 2701 } catch (IllegalStateException e) { 2702 } 2703 m.find(); 2704 m.appendReplacement(result, "blech"); 2705 if (!result.toString().equals("zzzblech")) 2706 failCount++; 2707 2708 m.appendTail(result); 2709 if (!result.toString().equals("zzzblechzzz")) 2710 failCount++; 2711 2712 // SB substitution with groups 2713 blah = "zzzabcdzzz"; 2714 p = Pattern.compile("(ab)(cd)*"); 2715 m = p.matcher(blah); 2716 result = new StringBuffer(); 2717 try { 2718 m.appendReplacement(result, "$1"); 2719 failCount++; 2720 } catch (IllegalStateException e) { 2721 } 2722 m.find(); 2723 m.appendReplacement(result, "$1"); 2724 if (!result.toString().equals("zzzab")) 2725 failCount++; 2726 2727 m.appendTail(result); 2728 if (!result.toString().equals("zzzabzzz")) 2729 failCount++; 2730 2731 // SB substitution with 3 groups 2732 blah = "zzzabcdcdefzzz"; 2733 p = Pattern.compile("(ab)(cd)*(ef)"); 2734 m = p.matcher(blah); 2735 result = new StringBuffer(); 2736 try { 2737 m.appendReplacement(result, "$1w$2w$3"); 2738 failCount++; 2739 } catch (IllegalStateException e) { 2740 } 2741 m.find(); 2742 m.appendReplacement(result, "$1w$2w$3"); 2743 if (!result.toString().equals("zzzabwcdwef")) 2744 failCount++; 2745 2746 m.appendTail(result); 2747 if (!result.toString().equals("zzzabwcdwefzzz")) 2748 failCount++; 2749 2750 // SB substitution with groups and three matches 2751 // skipping middle match 2752 blah = "zzzabcdzzzabcddzzzabcdzzz"; 2753 p = Pattern.compile("(ab)(cd*)"); 2754 m = p.matcher(blah); 2755 result = new StringBuffer(); 2756 try { 2757 m.appendReplacement(result, "$1"); 2758 failCount++; 2759 } catch (IllegalStateException e) { 2760 } 2761 m.find(); 2762 m.appendReplacement(result, "$1"); 2763 if (!result.toString().equals("zzzab")) 2764 failCount++; 2765 2766 m.find(); 2767 m.find(); 2768 m.appendReplacement(result, "$2"); 2769 if (!result.toString().equals("zzzabzzzabcddzzzcd")) 2770 failCount++; 2771 2772 m.appendTail(result); 2773 if (!result.toString().equals("zzzabzzzabcddzzzcdzzz")) 2774 failCount++; 2775 2776 // Check to make sure escaped $ is ignored 2777 blah = "zzzabcdcdefzzz"; 2778 p = Pattern.compile("(ab)(cd)*(ef)"); 2779 m = p.matcher(blah); 2780 result = new StringBuffer(); 2781 m.find(); 2782 m.appendReplacement(result, "$1w\\$2w$3"); 2783 if (!result.toString().equals("zzzabw$2wef")) 2784 failCount++; 2785 2786 m.appendTail(result); 2787 if (!result.toString().equals("zzzabw$2wefzzz")) 2788 failCount++; 2789 2790 // Check to make sure a reference to nonexistent group causes error 2791 blah = "zzzabcdcdefzzz"; 2792 p = Pattern.compile("(ab)(cd)*(ef)"); 2793 m = p.matcher(blah); 2794 result = new StringBuffer(); 2795 m.find(); 2796 try { 2797 m.appendReplacement(result, "$1w$5w$3"); 2798 failCount++; 2799 } catch (IndexOutOfBoundsException ioobe) { 2800 // Correct result 2801 } 2802 2803 // Check double digit group references 2804 blah = "zzz123456789101112zzz"; 2805 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2806 m = p.matcher(blah); 2807 result = new StringBuffer(); 2808 m.find(); 2809 m.appendReplacement(result, "$1w$11w$3"); 2810 if (!result.toString().equals("zzz1w11w3")) 2811 failCount++; 2812 2813 // Check to make sure it backs off $15 to $1 if only three groups 2814 blah = "zzzabcdcdefzzz"; 2815 p = Pattern.compile("(ab)(cd)*(ef)"); 2816 m = p.matcher(blah); 2817 result = new StringBuffer(); 2818 m.find(); 2819 m.appendReplacement(result, "$1w$15w$3"); 2820 if (!result.toString().equals("zzzabwab5wef")) 2821 failCount++; 2822 2823 2824 // Supplementary character test 2825 // SB substitution with literal 2826 blah = toSupplementaries("zzzblahzzz"); 2827 p = Pattern.compile(toSupplementaries("blah")); 2828 m = p.matcher(blah); 2829 result = new StringBuffer(); 2830 try { 2831 m.appendReplacement(result, toSupplementaries("blech")); 2832 failCount++; 2833 } catch (IllegalStateException e) { 2834 } 2835 m.find(); 2836 m.appendReplacement(result, toSupplementaries("blech")); 2837 if (!result.toString().equals(toSupplementaries("zzzblech"))) 2838 failCount++; 2839 2840 m.appendTail(result); 2841 if (!result.toString().equals(toSupplementaries("zzzblechzzz"))) 2842 failCount++; 2843 2844 // SB substitution with groups 2845 blah = toSupplementaries("zzzabcdzzz"); 2846 p = Pattern.compile(toSupplementaries("(ab)(cd)*")); 2847 m = p.matcher(blah); 2848 result = new StringBuffer(); 2849 try { 2850 m.appendReplacement(result, "$1"); 2851 failCount++; 2852 } catch (IllegalStateException e) { 2853 } 2854 m.find(); 2855 m.appendReplacement(result, "$1"); 2856 if (!result.toString().equals(toSupplementaries("zzzab"))) 2857 failCount++; 2858 2859 m.appendTail(result); 2860 if (!result.toString().equals(toSupplementaries("zzzabzzz"))) 2861 failCount++; 2862 2863 // SB substitution with 3 groups 2864 blah = toSupplementaries("zzzabcdcdefzzz"); 2865 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2866 m = p.matcher(blah); 2867 result = new StringBuffer(); 2868 try { 2869 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2870 failCount++; 2871 } catch (IllegalStateException e) { 2872 } 2873 m.find(); 2874 m.appendReplacement(result, toSupplementaries("$1w$2w$3")); 2875 if (!result.toString().equals(toSupplementaries("zzzabwcdwef"))) 2876 failCount++; 2877 2878 m.appendTail(result); 2879 if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz"))) 2880 failCount++; 2881 2882 // SB substitution with groups and three matches 2883 // skipping middle match 2884 blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz"); 2885 p = Pattern.compile(toSupplementaries("(ab)(cd*)")); 2886 m = p.matcher(blah); 2887 result = new StringBuffer(); 2888 try { 2889 m.appendReplacement(result, "$1"); 2890 failCount++; 2891 } catch (IllegalStateException e) { 2892 } 2893 m.find(); 2894 m.appendReplacement(result, "$1"); 2895 if (!result.toString().equals(toSupplementaries("zzzab"))) 2896 failCount++; 2897 2898 m.find(); 2899 m.find(); 2900 m.appendReplacement(result, "$2"); 2901 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd"))) 2902 failCount++; 2903 2904 m.appendTail(result); 2905 if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz"))) 2906 failCount++; 2907 2908 // Check to make sure escaped $ is ignored 2909 blah = toSupplementaries("zzzabcdcdefzzz"); 2910 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2911 m = p.matcher(blah); 2912 result = new StringBuffer(); 2913 m.find(); 2914 m.appendReplacement(result, toSupplementaries("$1w\\$2w$3")); 2915 if (!result.toString().equals(toSupplementaries("zzzabw$2wef"))) 2916 failCount++; 2917 2918 m.appendTail(result); 2919 if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz"))) 2920 failCount++; 2921 2922 // Check to make sure a reference to nonexistent group causes error 2923 blah = toSupplementaries("zzzabcdcdefzzz"); 2924 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2925 m = p.matcher(blah); 2926 result = new StringBuffer(); 2927 m.find(); 2928 try { 2929 m.appendReplacement(result, toSupplementaries("$1w$5w$3")); 2930 failCount++; 2931 } catch (IndexOutOfBoundsException ioobe) { 2932 // Correct result 2933 } 2934 2935 // Check double digit group references 2936 blah = toSupplementaries("zzz123456789101112zzz"); 2937 p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)"); 2938 m = p.matcher(blah); 2939 result = new StringBuffer(); 2940 m.find(); 2941 m.appendReplacement(result, toSupplementaries("$1w$11w$3")); 2942 if (!result.toString().equals(toSupplementaries("zzz1w11w3"))) 2943 failCount++; 2944 2945 // Check to make sure it backs off $15 to $1 if only three groups 2946 blah = toSupplementaries("zzzabcdcdefzzz"); 2947 p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)")); 2948 m = p.matcher(blah); 2949 result = new StringBuffer(); 2950 m.find(); 2951 m.appendReplacement(result, toSupplementaries("$1w$15w$3")); 2952 if (!result.toString().equals(toSupplementaries("zzzabwab5wef"))) 2953 failCount++; 2954 2955 // Check nothing has been appended into the output buffer if 2956 // the replacement string triggers IllegalArgumentException. 2957 p = Pattern.compile("(abc)"); 2958 m = p.matcher("abcd"); 2959 result = new StringBuffer(); 2960 m.find(); 2961 try { 2962 m.appendReplacement(result, ("xyz$g")); 2963 failCount++; 2964 } catch (IllegalArgumentException iae) { 2965 if (result.length() != 0) 2966 failCount++; 2967 } 2968 2969 report("SB Substitution"); 2970 } 2971 2972 /* 2973 * 5 groups of characters are created to make a substitution string. 2974 * A base string will be created including random lead chars, the 2975 * substitution string, and random trailing chars. 2976 * A pattern containing the 5 groups is searched for and replaced with: 2977 * random group + random string + random group. 2978 * The results are checked for correctness. 2979 */ 2980 private static void substitutionBasher() { 2981 for (int runs = 0; runs<1000; runs++) { 2982 // Create a base string to work in 2983 int leadingChars = generator.nextInt(10); 2984 StringBuffer baseBuffer = new StringBuffer(100); 2985 String leadingString = getRandomAlphaString(leadingChars); 2986 baseBuffer.append(leadingString); 2987 2988 // Create 5 groups of random number of random chars 2989 // Create the string to substitute 2990 // Create the pattern string to search for 2991 StringBuffer bufferToSub = new StringBuffer(25); 2992 StringBuffer bufferToPat = new StringBuffer(50); 2993 String[] groups = new String[5]; 2994 for(int i=0; i<5; i++) { 2995 int aGroupSize = generator.nextInt(5)+1; 2996 groups[i] = getRandomAlphaString(aGroupSize); 2997 bufferToSub.append(groups[i]); 2998 bufferToPat.append('('); 2999 bufferToPat.append(groups[i]); 3000 bufferToPat.append(')'); 3001 } 3002 String stringToSub = bufferToSub.toString(); 3003 String pattern = bufferToPat.toString(); 3004 3005 // Place sub string into working string at random index 3006 baseBuffer.append(stringToSub); 3007 3008 // Append random chars to end 3009 int trailingChars = generator.nextInt(10); 3010 String trailingString = getRandomAlphaString(trailingChars); 3011 baseBuffer.append(trailingString); 3012 String baseString = baseBuffer.toString(); 3013 3014 // Create test pattern and matcher 3015 Pattern p = Pattern.compile(pattern); 3016 Matcher m = p.matcher(baseString); 3017 3018 // Reject candidate if pattern happens to start early 3019 m.find(); 3020 if (m.start() < leadingChars) 3021 continue; 3022 3023 // Reject candidate if more than one match 3024 if (m.find()) 3025 continue; 3026 3027 // Construct a replacement string with : 3028 // random group + random string + random group 3029 StringBuffer bufferToRep = new StringBuffer(); 3030 int groupIndex1 = generator.nextInt(5); 3031 bufferToRep.append("$" + (groupIndex1 + 1)); 3032 String randomMidString = getRandomAlphaString(5); 3033 bufferToRep.append(randomMidString); 3034 int groupIndex2 = generator.nextInt(5); 3035 bufferToRep.append("$" + (groupIndex2 + 1)); 3036 String replacement = bufferToRep.toString(); 3037 3038 // Do the replacement 3039 String result = m.replaceAll(replacement); 3040 3041 // Construct expected result 3042 StringBuffer bufferToRes = new StringBuffer(); 3043 bufferToRes.append(leadingString); 3044 bufferToRes.append(groups[groupIndex1]); 3045 bufferToRes.append(randomMidString); 3046 bufferToRes.append(groups[groupIndex2]); 3047 bufferToRes.append(trailingString); 3048 String expectedResult = bufferToRes.toString(); 3049 3050 // Check results 3051 if (!result.equals(expectedResult)) 3052 failCount++; 3053 } 3054 3055 report("Substitution Basher"); 3056 } 3057 3058 /** 3059 * Checks the handling of some escape sequences that the Pattern 3060 * class should process instead of the java compiler. These are 3061 * not in the file because the escapes should be be processed 3062 * by the Pattern class when the regex is compiled. 3063 */ 3064 private static void escapes() throws Exception { 3065 Pattern p = Pattern.compile("\\043"); 3066 Matcher m = p.matcher("#"); 3067 if (!m.find()) 3068 failCount++; 3069 3070 p = Pattern.compile("\\x23"); 3071 m = p.matcher("#"); 3072 if (!m.find()) 3073 failCount++; 3074 3075 p = Pattern.compile("\\u0023"); 3076 m = p.matcher("#"); 3077 if (!m.find()) 3078 failCount++; 3079 3080 report("Escape sequences"); 3081 } 3082 3083 /** 3084 * Checks the handling of blank input situations. These 3085 * tests are incompatible with my test file format. 3086 */ 3087 private static void blankInput() throws Exception { 3088 Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE); 3089 Matcher m = p.matcher(""); 3090 if (m.find()) 3091 failCount++; 3092 3093 p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE); 3094 m = p.matcher(""); 3095 if (!m.find()) 3096 failCount++; 3097 3098 p = Pattern.compile("abc"); 3099 m = p.matcher(""); 3100 if (m.find()) 3101 failCount++; 3102 3103 p = Pattern.compile("a*"); 3104 m = p.matcher(""); 3105 if (!m.find()) 3106 failCount++; 3107 3108 report("Blank input"); 3109 } 3110 3111 /** 3112 * Tests the Boyer-Moore pattern matching of a character sequence 3113 * on randomly generated patterns. 3114 */ 3115 private static void bm() throws Exception { 3116 doBnM('a'); 3117 report("Boyer Moore (ASCII)"); 3118 3119 doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10); 3120 report("Boyer Moore (Supplementary)"); 3121 } 3122 3123 private static void doBnM(int baseCharacter) throws Exception { 3124 int achar=0; 3125 3126 for (int i=0; i<100; i++) { 3127 // Create a short pattern to search for 3128 int patternLength = generator.nextInt(7) + 4; 3129 StringBuffer patternBuffer = new StringBuffer(patternLength); 3130 for (int x=0; x<patternLength; x++) { 3131 int ch = baseCharacter + generator.nextInt(26); 3132 if (Character.isSupplementaryCodePoint(ch)) { 3133 patternBuffer.append(Character.toChars(ch)); 3134 } else { 3135 patternBuffer.append((char)ch); 3136 } 3137 } 3138 String pattern = patternBuffer.toString(); 3139 Pattern p = Pattern.compile(pattern); 3140 3141 // Create a buffer with random ASCII chars that does 3142 // not match the sample 3143 String toSearch = null; 3144 StringBuffer s = null; 3145 Matcher m = p.matcher(""); 3146 do { 3147 s = new StringBuffer(100); 3148 for (int x=0; x<100; x++) { 3149 int ch = baseCharacter + generator.nextInt(26); 3150 if (Character.isSupplementaryCodePoint(ch)) { 3151 s.append(Character.toChars(ch)); 3152 } else { 3153 s.append((char)ch); 3154 } 3155 } 3156 toSearch = s.toString(); 3157 m.reset(toSearch); 3158 } while (m.find()); 3159 3160 // Insert the pattern at a random spot 3161 int insertIndex = generator.nextInt(99); 3162 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3163 insertIndex++; 3164 s = s.insert(insertIndex, pattern); 3165 toSearch = s.toString(); 3166 3167 // Make sure that the pattern is found 3168 m.reset(toSearch); 3169 if (!m.find()) 3170 failCount++; 3171 3172 // Make sure that the match text is the pattern 3173 if (!m.group().equals(pattern)) 3174 failCount++; 3175 3176 // Make sure match occured at insertion point 3177 if (m.start() != insertIndex) 3178 failCount++; 3179 } 3180 } 3181 3182 /** 3183 * Tests the matching of slices on randomly generated patterns. 3184 * The Boyer-Moore optimization is not done on these patterns 3185 * because it uses unicode case folding. 3186 */ 3187 private static void slice() throws Exception { 3188 doSlice(Character.MAX_VALUE); 3189 report("Slice"); 3190 3191 doSlice(Character.MAX_CODE_POINT); 3192 report("Slice (Supplementary)"); 3193 } 3194 3195 private static void doSlice(int maxCharacter) throws Exception { 3196 Random generator = new Random(); 3197 int achar=0; 3198 3199 for (int i=0; i<100; i++) { 3200 // Create a short pattern to search for 3201 int patternLength = generator.nextInt(7) + 4; 3202 StringBuffer patternBuffer = new StringBuffer(patternLength); 3203 for (int x=0; x<patternLength; x++) { 3204 int randomChar = 0; 3205 while (!Character.isLetterOrDigit(randomChar)) 3206 randomChar = generator.nextInt(maxCharacter); 3207 if (Character.isSupplementaryCodePoint(randomChar)) { 3208 patternBuffer.append(Character.toChars(randomChar)); 3209 } else { 3210 patternBuffer.append((char) randomChar); 3211 } 3212 } 3213 String pattern = patternBuffer.toString(); 3214 Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE); 3215 3216 // Create a buffer with random chars that does not match the sample 3217 String toSearch = null; 3218 StringBuffer s = null; 3219 Matcher m = p.matcher(""); 3220 do { 3221 s = new StringBuffer(100); 3222 for (int x=0; x<100; x++) { 3223 int randomChar = 0; 3224 while (!Character.isLetterOrDigit(randomChar)) 3225 randomChar = generator.nextInt(maxCharacter); 3226 if (Character.isSupplementaryCodePoint(randomChar)) { 3227 s.append(Character.toChars(randomChar)); 3228 } else { 3229 s.append((char) randomChar); 3230 } 3231 } 3232 toSearch = s.toString(); 3233 m.reset(toSearch); 3234 } while (m.find()); 3235 3236 // Insert the pattern at a random spot 3237 int insertIndex = generator.nextInt(99); 3238 if (Character.isLowSurrogate(s.charAt(insertIndex))) 3239 insertIndex++; 3240 s = s.insert(insertIndex, pattern); 3241 toSearch = s.toString(); 3242 3243 // Make sure that the pattern is found 3244 m.reset(toSearch); 3245 if (!m.find()) 3246 failCount++; 3247 3248 // Make sure that the match text is the pattern 3249 if (!m.group().equals(pattern)) 3250 failCount++; 3251 3252 // Make sure match occured at insertion point 3253 if (m.start() != insertIndex) 3254 failCount++; 3255 } 3256 } 3257 3258 private static void explainFailure(String pattern, String data, 3259 String expected, String actual) { 3260 System.err.println("----------------------------------------"); 3261 System.err.println("Pattern = "+pattern); 3262 System.err.println("Data = "+data); 3263 System.err.println("Expected = " + expected); 3264 System.err.println("Actual = " + actual); 3265 } 3266 3267 private static void explainFailure(String pattern, String data, 3268 Throwable t) { 3269 System.err.println("----------------------------------------"); 3270 System.err.println("Pattern = "+pattern); 3271 System.err.println("Data = "+data); 3272 t.printStackTrace(System.err); 3273 } 3274 3275 // Testing examples from a file 3276 3277 /** 3278 * Goes through the file "TestCases.txt" and creates many patterns 3279 * described in the file, matching the patterns against input lines in 3280 * the file, and comparing the results against the correct results 3281 * also found in the file. The file format is described in comments 3282 * at the head of the file. 3283 */ 3284 private static void processFile(String fileName) throws Exception { 3285 File testCases = new File(System.getProperty("test.src", "."), 3286 fileName); 3287 FileInputStream in = new FileInputStream(testCases); 3288 BufferedReader r = new BufferedReader(new InputStreamReader(in)); 3289 3290 // Process next test case. 3291 String aLine; 3292 while((aLine = r.readLine()) != null) { 3293 // Read a line for pattern 3294 String patternString = grabLine(r); 3295 Pattern p = null; 3296 try { 3297 p = compileTestPattern(patternString); 3298 } catch (PatternSyntaxException e) { 3299 String dataString = grabLine(r); 3300 String expectedResult = grabLine(r); 3301 if (expectedResult.startsWith("error")) 3302 continue; 3303 explainFailure(patternString, dataString, e); 3304 failCount++; 3305 continue; 3306 } 3307 3308 // Read a line for input string 3309 String dataString = grabLine(r); 3310 Matcher m = p.matcher(dataString); 3311 StringBuffer result = new StringBuffer(); 3312 3313 // Check for IllegalStateExceptions before a match 3314 failCount += preMatchInvariants(m); 3315 3316 boolean found = m.find(); 3317 3318 if (found) 3319 failCount += postTrueMatchInvariants(m); 3320 else 3321 failCount += postFalseMatchInvariants(m); 3322 3323 if (found) { 3324 result.append("true "); 3325 result.append(m.group(0) + " "); 3326 } else { 3327 result.append("false "); 3328 } 3329 3330 result.append(m.groupCount()); 3331 3332 if (found) { 3333 for (int i=1; i<m.groupCount()+1; i++) 3334 if (m.group(i) != null) 3335 result.append(" " +m.group(i)); 3336 } 3337 3338 // Read a line for the expected result 3339 String expectedResult = grabLine(r); 3340 3341 if (!result.toString().equals(expectedResult)) { 3342 explainFailure(patternString, dataString, expectedResult, result.toString()); 3343 failCount++; 3344 } 3345 } 3346 3347 report(fileName); 3348 } 3349 3350 private static int preMatchInvariants(Matcher m) { 3351 int failCount = 0; 3352 try { 3353 m.start(); 3354 failCount++; 3355 } catch (IllegalStateException ise) {} 3356 try { 3357 m.end(); 3358 failCount++; 3359 } catch (IllegalStateException ise) {} 3360 try { 3361 m.group(); 3362 failCount++; 3363 } catch (IllegalStateException ise) {} 3364 return failCount; 3365 } 3366 3367 private static int postFalseMatchInvariants(Matcher m) { 3368 int failCount = 0; 3369 try { 3370 m.group(); 3371 failCount++; 3372 } catch (IllegalStateException ise) {} 3373 try { 3374 m.start(); 3375 failCount++; 3376 } catch (IllegalStateException ise) {} 3377 try { 3378 m.end(); 3379 failCount++; 3380 } catch (IllegalStateException ise) {} 3381 return failCount; 3382 } 3383 3384 private static int postTrueMatchInvariants(Matcher m) { 3385 int failCount = 0; 3386 //assert(m.start() = m.start(0); 3387 if (m.start() != m.start(0)) 3388 failCount++; 3389 //assert(m.end() = m.end(0); 3390 if (m.start() != m.start(0)) 3391 failCount++; 3392 //assert(m.group() = m.group(0); 3393 if (!m.group().equals(m.group(0))) 3394 failCount++; 3395 try { 3396 m.group(50); 3397 failCount++; 3398 } catch (IndexOutOfBoundsException ise) {} 3399 3400 return failCount; 3401 } 3402 3403 private static Pattern compileTestPattern(String patternString) { 3404 if (!patternString.startsWith("'")) { 3405 return Pattern.compile(patternString); 3406 } 3407 3408 int break1 = patternString.lastIndexOf("'"); 3409 String flagString = patternString.substring( 3410 break1+1, patternString.length()); 3411 patternString = patternString.substring(1, break1); 3412 3413 if (flagString.equals("i")) 3414 return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); 3415 3416 if (flagString.equals("m")) 3417 return Pattern.compile(patternString, Pattern.MULTILINE); 3418 3419 return Pattern.compile(patternString); 3420 } 3421 3422 /** 3423 * Reads a line from the input file. Keeps reading lines until a non 3424 * empty non comment line is read. If the line contains a \n then 3425 * these two characters are replaced by a newline char. If a \\uxxxx 3426 * sequence is read then the sequence is replaced by the unicode char. 3427 */ 3428 private static String grabLine(BufferedReader r) throws Exception { 3429 int index = 0; 3430 String line = r.readLine(); 3431 while (line.startsWith("//") || line.length() < 1) 3432 line = r.readLine(); 3433 while ((index = line.indexOf("\\n")) != -1) { 3434 StringBuffer temp = new StringBuffer(line); 3435 temp.replace(index, index+2, "\n"); 3436 line = temp.toString(); 3437 } 3438 while ((index = line.indexOf("\\u")) != -1) { 3439 StringBuffer temp = new StringBuffer(line); 3440 String value = temp.substring(index+2, index+6); 3441 char aChar = (char)Integer.parseInt(value, 16); 3442 String unicodeChar = "" + aChar; 3443 temp.replace(index, index+6, unicodeChar); 3444 line = temp.toString(); 3445 } 3446 3447 return line; 3448 } 3449 3450 private static void check(Pattern p, String s, String g, String expected) { 3451 Matcher m = p.matcher(s); 3452 m.find(); 3453 if (!m.group(g).equals(expected) || 3454 s.charAt(m.start(g)) != expected.charAt(0) || 3455 s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1)) 3456 failCount++; 3457 } 3458 3459 private static void checkReplaceFirst(String p, String s, String r, String expected) 3460 { 3461 if (!expected.equals(Pattern.compile(p) 3462 .matcher(s) 3463 .replaceFirst(r))) 3464 failCount++; 3465 } 3466 3467 private static void checkReplaceAll(String p, String s, String r, String expected) 3468 { 3469 if (!expected.equals(Pattern.compile(p) 3470 .matcher(s) 3471 .replaceAll(r))) 3472 failCount++; 3473 } 3474 3475 private static void checkExpectedFail(String p) { 3476 try { 3477 Pattern.compile(p); 3478 } catch (PatternSyntaxException pse) { 3479 //pse.printStackTrace(); 3480 return; 3481 } 3482 failCount++; 3483 } 3484 3485 private static void checkExpectedIAE(Matcher m, String g) { 3486 m.find(); 3487 try { 3488 m.group(g); 3489 } catch (IllegalArgumentException x) { 3490 //iae.printStackTrace(); 3491 try { 3492 m.start(g); 3493 } catch (IllegalArgumentException xx) { 3494 try { 3495 m.start(g); 3496 } catch (IllegalArgumentException xxx) { 3497 return; 3498 } 3499 } 3500 } 3501 failCount++; 3502 } 3503 3504 private static void checkExpectedNPE(Matcher m) { 3505 m.find(); 3506 try { 3507 m.group(null); 3508 } catch (NullPointerException x) { 3509 try { 3510 m.start(null); 3511 } catch (NullPointerException xx) { 3512 try { 3513 m.end(null); 3514 } catch (NullPointerException xxx) { 3515 return; 3516 } 3517 } 3518 } 3519 failCount++; 3520 } 3521 3522 private static void namedGroupCaptureTest() throws Exception { 3523 check(Pattern.compile("x+(?<gname>y+)z+"), 3524 "xxxyyyzzz", 3525 "gname", 3526 "yyy"); 3527 3528 check(Pattern.compile("x+(?<gname8>y+)z+"), 3529 "xxxyyyzzz", 3530 "gname8", 3531 "yyy"); 3532 3533 //backref 3534 Pattern pattern = Pattern.compile("(a*)bc\\1"); 3535 check(pattern, "zzzaabcazzz", true); // found "abca" 3536 3537 check(Pattern.compile("(?<gname>a*)bc\\k<gname>"), 3538 "zzzaabcaazzz", true); 3539 3540 check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"), 3541 "abcdefabc", true); 3542 3543 check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"), 3544 "abcdefghijkk", true); 3545 3546 // Supplementary character tests 3547 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3548 toSupplementaries("zzzaabcazzz"), true); 3549 3550 check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"), 3551 toSupplementaries("zzzaabcaazzz"), true); 3552 3553 check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"), 3554 toSupplementaries("abcdefabc"), true); 3555 3556 check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") + 3557 "(?<gname>" + 3558 toSupplementaries("k)") + "\\k<gname>"), 3559 toSupplementaries("abcdefghijkk"), true); 3560 3561 check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"), 3562 "xxxyyyzzzyyy", 3563 "gname", 3564 "yyy"); 3565 3566 //replaceFirst/All 3567 checkReplaceFirst("(?<gn>ab)(c*)", 3568 "abccczzzabcczzzabccc", 3569 "${gn}", 3570 "abzzzabcczzzabccc"); 3571 3572 checkReplaceAll("(?<gn>ab)(c*)", 3573 "abccczzzabcczzzabccc", 3574 "${gn}", 3575 "abzzzabzzzab"); 3576 3577 3578 checkReplaceFirst("(?<gn>ab)(c*)", 3579 "zzzabccczzzabcczzzabccczzz", 3580 "${gn}", 3581 "zzzabzzzabcczzzabccczzz"); 3582 3583 checkReplaceAll("(?<gn>ab)(c*)", 3584 "zzzabccczzzabcczzzabccczzz", 3585 "${gn}", 3586 "zzzabzzzabzzzabzzz"); 3587 3588 checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)", 3589 "zzzabccczzzabcczzzabccczzz", 3590 "${gn2}", 3591 "zzzccczzzabcczzzabccczzz"); 3592 3593 checkReplaceAll("(?<gn1>ab)(?<gn2>c*)", 3594 "zzzabccczzzabcczzzabccczzz", 3595 "${gn2}", 3596 "zzzccczzzcczzzccczzz"); 3597 3598 //toSupplementaries("(ab)(c*)")); 3599 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3600 ")(?<gn2>" + toSupplementaries("c") + "*)", 3601 toSupplementaries("abccczzzabcczzzabccc"), 3602 "${gn1}", 3603 toSupplementaries("abzzzabcczzzabccc")); 3604 3605 3606 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3607 ")(?<gn2>" + toSupplementaries("c") + "*)", 3608 toSupplementaries("abccczzzabcczzzabccc"), 3609 "${gn1}", 3610 toSupplementaries("abzzzabzzzab")); 3611 3612 checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") + 3613 ")(?<gn2>" + toSupplementaries("c") + "*)", 3614 toSupplementaries("abccczzzabcczzzabccc"), 3615 "${gn2}", 3616 toSupplementaries("ccczzzabcczzzabccc")); 3617 3618 3619 checkReplaceAll("(?<gn1>" + toSupplementaries("ab") + 3620 ")(?<gn2>" + toSupplementaries("c") + "*)", 3621 toSupplementaries("abccczzzabcczzzabccc"), 3622 "${gn2}", 3623 toSupplementaries("ccczzzcczzzccc")); 3624 3625 checkReplaceFirst("(?<dog>Dog)AndCat", 3626 "zzzDogAndCatzzzDogAndCatzzz", 3627 "${dog}", 3628 "zzzDogzzzDogAndCatzzz"); 3629 3630 3631 checkReplaceAll("(?<dog>Dog)AndCat", 3632 "zzzDogAndCatzzzDogAndCatzzz", 3633 "${dog}", 3634 "zzzDogzzzDogzzz"); 3635 3636 // backref in Matcher & String 3637 if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") || 3638 !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh")) 3639 failCount++; 3640 3641 // negative 3642 checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)"); 3643 checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)"); 3644 checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)"); 3645 checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>"); 3646 checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>"); 3647 checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"), 3648 "gnameX"); 3649 checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef")); 3650 report("NamedGroupCapture"); 3651 } 3652 3653 // This is for bug 6969132 3654 private static void nonBmpClassComplementTest() throws Exception { 3655 Pattern p = Pattern.compile("\\P{Lu}"); 3656 Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3657 if (m.find() && m.start() == 1) 3658 failCount++; 3659 3660 // from a unicode category 3661 p = Pattern.compile("\\P{Lu}"); 3662 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3663 if (m.find()) 3664 failCount++; 3665 if (!m.hitEnd()) 3666 failCount++; 3667 3668 // block 3669 p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}"); 3670 m = p.matcher(new String(new int[] {0x1d400}, 0, 1)); 3671 if (m.find() && m.start() == 1) 3672 failCount++; 3673 3674 report("NonBmpClassComplement"); 3675 } 3676 3677 private static void unicodePropertiesTest() throws Exception { 3678 // different forms 3679 if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() || 3680 !Pattern.compile("\\p{Lu}").matcher("A").matches() || 3681 !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() || 3682 !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() || 3683 !Pattern.compile("\\p{IsLatin}").matcher("B").matches() || 3684 !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() || 3685 !Pattern.compile("\\p{script=Latin}").matcher("B").matches() || 3686 !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() || 3687 !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() || 3688 !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches()) 3689 failCount++; 3690 3691 Matcher common = Pattern.compile("\\p{script=Common}").matcher(""); 3692 Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher(""); 3693 Matcher lastSM = common; 3694 Character.UnicodeScript lastScript = Character.UnicodeScript.of(0); 3695 3696 Matcher latin = Pattern.compile("\\p{block=basic_latin}").matcher(""); 3697 Matcher greek = Pattern.compile("\\p{InGreek}").matcher(""); 3698 Matcher lastBM = latin; 3699 Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0); 3700 3701 for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) { 3702 if (cp >= 0x30000 && (cp & 0x70) == 0){ 3703 continue; // only pick couple code points, they are the same 3704 } 3705 3706 // Unicode Script 3707 Character.UnicodeScript script = Character.UnicodeScript.of(cp); 3708 Matcher m; 3709 String str = new String(Character.toChars(cp)); 3710 if (script == lastScript) { 3711 m = lastSM; 3712 m.reset(str); 3713 } else { 3714 m = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str); 3715 } 3716 if (!m.matches()) { 3717 failCount++; 3718 } 3719 Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common; 3720 other.reset(str); 3721 if (other.matches()) { 3722 failCount++; 3723 } 3724 lastSM = m; 3725 lastScript = script; 3726 3727 // Unicode Block 3728 Character.UnicodeBlock block = Character.UnicodeBlock.of(cp); 3729 if (block == null) { 3730 //System.out.printf("Not a Block: cp=%x%n", cp); 3731 continue; 3732 } 3733 if (block == lastBlock) { 3734 m = lastBM; 3735 m.reset(str); 3736 } else { 3737 m = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str); 3738 } 3739 if (!m.matches()) { 3740 failCount++; 3741 } 3742 other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin; 3743 other.reset(str); 3744 if (other.matches()) { 3745 failCount++; 3746 } 3747 lastBM = m; 3748 lastBlock = block; 3749 } 3750 report("unicodeProperties"); 3751 } 3752 3753 private static void unicodeHexNotationTest() throws Exception { 3754 3755 // negative 3756 checkExpectedFail("\\x{-23}"); 3757 checkExpectedFail("\\x{110000}"); 3758 checkExpectedFail("\\x{}"); 3759 checkExpectedFail("\\x{AB[ef]"); 3760 3761 // codepoint 3762 check("^\\x{1033c}$", "\uD800\uDF3C", true); 3763 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3764 check("^\\x{D800}\\x{DF3c}+$", "\uD800\uDF3C", false); 3765 check("^\\xF0\\x90\\x8C\\xBC$", "\uD800\uDF3C", false); 3766 3767 // in class 3768 check("^[\\x{D800}\\x{DF3c}]+$", "\uD800\uDF3C", false); 3769 check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false); 3770 check("^[\\x{D800}\\x{DF3C}]+$", "\uD800\uDF3C", false); 3771 check("^[\\x{DF3C}\\x{D800}]+$", "\uD800\uDF3C", false); 3772 check("^[\\x{D800}\\x{DF3C}]+$", "\uDF3C\uD800", true); 3773 check("^[\\x{DF3C}\\x{D800}]+$", "\uDF3C\uD800", true); 3774 3775 for (int cp = 0; cp <= 0x10FFFF; cp++) { 3776 String s = "A" + new String(Character.toChars(cp)) + "B"; 3777 String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp) 3778 : String.format("\\u%04x\\u%04x", 3779 (int) Character.toChars(cp)[0], 3780 (int) Character.toChars(cp)[1]); 3781 String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}"; 3782 if (!Pattern.matches("A" + hexUTF16 + "B", s)) 3783 failCount++; 3784 if (!Pattern.matches("A[" + hexUTF16 + "]B", s)) 3785 failCount++; 3786 if (!Pattern.matches("A" + hexCodePoint + "B", s)) 3787 failCount++; 3788 if (!Pattern.matches("A[" + hexCodePoint + "]B", s)) 3789 failCount++; 3790 } 3791 report("unicodeHexNotation"); 3792 } 3793 3794 private static void unicodeClassesTest() throws Exception { 3795 3796 Matcher lower = Pattern.compile("\\p{Lower}").matcher(""); 3797 Matcher upper = Pattern.compile("\\p{Upper}").matcher(""); 3798 Matcher ASCII = Pattern.compile("\\p{ASCII}").matcher(""); 3799 Matcher alpha = Pattern.compile("\\p{Alpha}").matcher(""); 3800 Matcher digit = Pattern.compile("\\p{Digit}").matcher(""); 3801 Matcher alnum = Pattern.compile("\\p{Alnum}").matcher(""); 3802 Matcher punct = Pattern.compile("\\p{Punct}").matcher(""); 3803 Matcher graph = Pattern.compile("\\p{Graph}").matcher(""); 3804 Matcher print = Pattern.compile("\\p{Print}").matcher(""); 3805 Matcher blank = Pattern.compile("\\p{Blank}").matcher(""); 3806 Matcher cntrl = Pattern.compile("\\p{Cntrl}").matcher(""); 3807 Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher(""); 3808 Matcher space = Pattern.compile("\\p{Space}").matcher(""); 3809 Matcher bound = Pattern.compile("\\b").matcher(""); 3810 Matcher word = Pattern.compile("\\w++").matcher(""); 3811 // UNICODE_CHARACTER_CLASS 3812 Matcher lowerU = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3813 Matcher upperU = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3814 Matcher ASCIIU = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3815 Matcher alphaU = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3816 Matcher digitU = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3817 Matcher alnumU = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3818 Matcher punctU = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3819 Matcher graphU = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3820 Matcher printU = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3821 Matcher blankU = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3822 Matcher cntrlU = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3823 Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3824 Matcher spaceU = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3825 Matcher boundU = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3826 Matcher wordU = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3827 // embedded flag (?U) 3828 Matcher lowerEU = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3829 Matcher graphEU = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3830 Matcher wordEU = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3831 3832 Matcher bwb = Pattern.compile("\\b\\w\\b").matcher(""); 3833 Matcher bwbU = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3834 Matcher bwbEU = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher(""); 3835 // properties 3836 Matcher lowerP = Pattern.compile("\\p{IsLowerCase}").matcher(""); 3837 Matcher upperP = Pattern.compile("\\p{IsUpperCase}").matcher(""); 3838 Matcher titleP = Pattern.compile("\\p{IsTitleCase}").matcher(""); 3839 Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher(""); 3840 Matcher alphaP = Pattern.compile("\\p{IsAlphabetic}").matcher(""); 3841 Matcher ideogP = Pattern.compile("\\p{IsIdeographic}").matcher(""); 3842 Matcher cntrlP = Pattern.compile("\\p{IsControl}").matcher(""); 3843 Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher(""); 3844 Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher(""); 3845 Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher(""); 3846 Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher(""); 3847 3848 // javaMethod 3849 Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher(""); 3850 Matcher upperJ = Pattern.compile("\\p{javaUpperCase}").matcher(""); 3851 Matcher alphaJ = Pattern.compile("\\p{javaAlphabetic}").matcher(""); 3852 Matcher ideogJ = Pattern.compile("\\p{javaIdeographic}").matcher(""); 3853 3854 for (int cp = 1; cp < 0x30000; cp++) { 3855 String str = new String(Character.toChars(cp)); 3856 int type = Character.getType(cp); 3857 if (// lower 3858 POSIX_ASCII.isLower(cp) != lower.reset(str).matches() || 3859 Character.isLowerCase(cp) != lowerU.reset(str).matches() || 3860 Character.isLowerCase(cp) != lowerP.reset(str).matches() || 3861 Character.isLowerCase(cp) != lowerEU.reset(str).matches()|| 3862 Character.isLowerCase(cp) != lowerJ.reset(str).matches()|| 3863 // upper 3864 POSIX_ASCII.isUpper(cp) != upper.reset(str).matches() || 3865 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() || 3866 Character.isUpperCase(cp) != upperP.reset(str).matches() || 3867 Character.isUpperCase(cp) != upperJ.reset(str).matches() || 3868 // alpha 3869 POSIX_ASCII.isAlpha(cp) != alpha.reset(str).matches() || 3870 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() || 3871 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() || 3872 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() || 3873 // digit 3874 POSIX_ASCII.isDigit(cp) != digit.reset(str).matches() || 3875 Character.isDigit(cp) != digitU.reset(str).matches() || 3876 // alnum 3877 POSIX_ASCII.isAlnum(cp) != alnum.reset(str).matches() || 3878 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() || 3879 // punct 3880 POSIX_ASCII.isPunct(cp) != punct.reset(str).matches() || 3881 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() || 3882 // graph 3883 POSIX_ASCII.isGraph(cp) != graph.reset(str).matches() || 3884 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() || 3885 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()|| 3886 // blank 3887 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK) 3888 != blank.reset(str).matches() || 3889 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() || 3890 // print 3891 POSIX_ASCII.isPrint(cp) != print.reset(str).matches() || 3892 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() || 3893 // cntrl 3894 POSIX_ASCII.isCntrl(cp) != cntrl.reset(str).matches() || 3895 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() || 3896 (Character.CONTROL == type) != cntrlP.reset(str).matches() || 3897 // hexdigit 3898 POSIX_ASCII.isHexDigit(cp) != xdigit.reset(str).matches() || 3899 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() || 3900 // space 3901 POSIX_ASCII.isSpace(cp) != space.reset(str).matches() || 3902 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() || 3903 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() || 3904 // word 3905 POSIX_ASCII.isWord(cp) != word.reset(str).matches() || 3906 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() || 3907 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()|| 3908 // bwordb 3909 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() || 3910 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() || 3911 // properties 3912 Character.isTitleCase(cp) != titleP.reset(str).matches() || 3913 Character.isLetter(cp) != letterP.reset(str).matches()|| 3914 Character.isIdeographic(cp) != ideogP.reset(str).matches() || 3915 Character.isIdeographic(cp) != ideogJ.reset(str).matches() || 3916 (Character.UNASSIGNED == type) == definedP.reset(str).matches() || 3917 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() || 3918 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches()) 3919 failCount++; 3920 } 3921 3922 // bounds/word align 3923 twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10); 3924 if (!bwbU.reset("\u0180sherman\u0400").matches()) 3925 failCount++; 3926 twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11); 3927 if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches()) 3928 failCount++; 3929 twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4); 3930 if (!bwbU.reset("\u0724\u0739\u0724").matches()) 3931 failCount++; 3932 if (!bwbEU.reset("\u0724\u0739\u0724").matches()) 3933 failCount++; 3934 report("unicodePredefinedClasses"); 3935 } 3936 3937 private static void horizontalAndVerticalWSTest() throws Exception { 3938 String hws = new String (new char[] { 3939 0x09, 0x20, 0xa0, 0x1680, 0x180e, 3940 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 3941 0x2006, 0x2007, 0x2008, 0x2009, 0x200a, 3942 0x202f, 0x205f, 0x3000 }); 3943 String vws = new String (new char[] { 3944 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 }); 3945 if (!Pattern.compile("\\h+").matcher(hws).matches() || 3946 !Pattern.compile("[\\h]+").matcher(hws).matches()) 3947 failCount++; 3948 if (Pattern.compile("\\H").matcher(hws).find() || 3949 Pattern.compile("[\\H]").matcher(hws).find()) 3950 failCount++; 3951 if (!Pattern.compile("\\v+").matcher(vws).matches() || 3952 !Pattern.compile("[\\v]+").matcher(vws).matches()) 3953 failCount++; 3954 if (Pattern.compile("\\V").matcher(vws).find() || 3955 Pattern.compile("[\\V]").matcher(vws).find()) 3956 failCount++; 3957 String prefix = "abcd"; 3958 String suffix = "efgh"; 3959 String ng = "A"; 3960 for (int i = 0; i < hws.length(); i++) { 3961 String c = String.valueOf(hws.charAt(i)); 3962 Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix); 3963 if (!m.find() || !c.equals(m.group())) 3964 failCount++; 3965 m = Pattern.compile("[\\h]").matcher(prefix + c + suffix); 3966 if (!m.find() || !c.equals(m.group())) 3967 failCount++; 3968 3969 m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3970 if (!m.find() || !ng.equals(m.group())) 3971 failCount++; 3972 m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i)); 3973 if (!m.find() || !ng.equals(m.group())) 3974 failCount++; 3975 } 3976 for (int i = 0; i < vws.length(); i++) { 3977 String c = String.valueOf(vws.charAt(i)); 3978 Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix); 3979 if (!m.find() || !c.equals(m.group())) 3980 failCount++; 3981 m = Pattern.compile("[\\v]").matcher(prefix + c + suffix); 3982 if (!m.find() || !c.equals(m.group())) 3983 failCount++; 3984 3985 m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i)); 3986 if (!m.find() || !ng.equals(m.group())) 3987 failCount++; 3988 m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i)); 3989 if (!m.find() || !ng.equals(m.group())) 3990 failCount++; 3991 } 3992 // \v in range is interpreted as 0x0B. This is the undocumented behavior 3993 if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches()) 3994 failCount++; 3995 report("horizontalAndVerticalWSTest"); 3996 } 3997 3998 private static void linebreakTest() throws Exception { 3999 String linebreaks = new String (new char[] { 4000 0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 }); 4001 String crnl = "\r\n"; 4002 if (!Pattern.compile("\\R+").matcher(linebreaks).matches() || 4003 !Pattern.compile("\\R").matcher(crnl).matches() || 4004 Pattern.compile("\\R\\R").matcher(crnl).matches()) 4005 failCount++; 4006 report("linebreakTest"); 4007 } 4008 4009 // #7189363 4010 private static void branchTest() throws Exception { 4011 if (!Pattern.compile("(a)?bc|d").matcher("d").find() || // greedy 4012 !Pattern.compile("(a)+bc|d").matcher("d").find() || 4013 !Pattern.compile("(a)*bc|d").matcher("d").find() || 4014 !Pattern.compile("(a)??bc|d").matcher("d").find() || // reluctant 4015 !Pattern.compile("(a)+?bc|d").matcher("d").find() || 4016 !Pattern.compile("(a)*?bc|d").matcher("d").find() || 4017 !Pattern.compile("(a)?+bc|d").matcher("d").find() || // possessive 4018 !Pattern.compile("(a)++bc|d").matcher("d").find() || 4019 !Pattern.compile("(a)*+bc|d").matcher("d").find() || 4020 !Pattern.compile("(a)?bc|d").matcher("d").matches() || // greedy 4021 !Pattern.compile("(a)+bc|d").matcher("d").matches() || 4022 !Pattern.compile("(a)*bc|d").matcher("d").matches() || 4023 !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant 4024 !Pattern.compile("(a)+?bc|d").matcher("d").matches() || 4025 !Pattern.compile("(a)*?bc|d").matcher("d").matches() || 4026 !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive 4027 !Pattern.compile("(a)++bc|d").matcher("d").matches() || 4028 !Pattern.compile("(a)*+bc|d").matcher("d").matches() || 4029 !Pattern.compile("(a)?bc|de").matcher("de").find() || // others 4030 !Pattern.compile("(a)??bc|de").matcher("de").find() || 4031 !Pattern.compile("(a)?bc|de").matcher("de").matches() || 4032 !Pattern.compile("(a)??bc|de").matcher("de").matches()) 4033 failCount++; 4034 report("branchTest"); 4035 } 4036 4037 // This test is for 8007395 4038 private static void groupCurlyNotFoundSuppTest() throws Exception { 4039 String input = "test this as \ud83d\ude0d"; 4040 for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)", 4041 "test(.)*(@[a-zA-Z.]+)", 4042 "test([^B])+(@[a-zA-Z.]+)", 4043 "test([^B])*(@[a-zA-Z.]+)", 4044 "test(\\P{IsControl})+(@[a-zA-Z.]+)", 4045 "test(\\P{IsControl})*(@[a-zA-Z.]+)", 4046 }) { 4047 Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE) 4048 .matcher(input); 4049 try { 4050 if (m.find()) { 4051 failCount++; 4052 } 4053 } catch (Exception x) { 4054 failCount++; 4055 } 4056 } 4057 report("GroupCurly NotFoundSupp"); 4058 } 4059 4060 // This test is for 8023647 4061 private static void groupCurlyBackoffTest() throws Exception { 4062 if (!"abc1c".matches("(\\w)+1\\1") || 4063 "abc11".matches("(\\w)+1\\1")) { 4064 failCount++; 4065 } 4066 report("GroupCurly backoff"); 4067 } 4068 4069 // This test is for 8012646 4070 private static void patternAsPredicate() throws Exception { 4071 Predicate<String> p = Pattern.compile("[a-z]+").asPredicate(); 4072 4073 if (p.test("")) { 4074 failCount++; 4075 } 4076 if (!p.test("word")) { 4077 failCount++; 4078 } 4079 if (p.test("1234")) { 4080 failCount++; 4081 } 4082 report("Pattern.asPredicate"); 4083 } 4084 }