1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384
  37  * @library /lib/testlibrary
  38  * @build jdk.testlibrary.*
  39  * @run main RegExTest
  40  * @key randomness
  41  */
  42 
  43 import java.util.function.Function;
  44 import java.util.regex.*;
  45 import java.util.Random;
  46 import java.util.Scanner;
  47 import java.io.*;
  48 import java.nio.file.*;
  49 import java.util.*;
  50 import java.nio.CharBuffer;
  51 import java.util.function.Predicate;
  52 import jdk.testlibrary.RandomFactory;
  53 
  54 /**
  55  * This is a test class created to check the operation of
  56  * the Pattern and Matcher classes.
  57  */
  58 public class RegExTest {
  59 
  60     private static Random generator = RandomFactory.getRandom();
  61     private static boolean failure = false;
  62     private static int failCount = 0;
  63     private static String firstFailure = null;
  64 
  65     /**
  66      * Main to interpret arguments and run several tests.
  67      *
  68      */
  69     public static void main(String[] args) throws Exception {
  70         // Most of the tests are in a file
  71         processFile("TestCases.txt");
  72         //processFile("PerlCases.txt");
  73         processFile("BMPTestCases.txt");
  74         processFile("SupplementaryTestCases.txt");
  75 
  76         // These test many randomly generated char patterns
  77         bm();
  78         slice();
  79 
  80         // These are hard to put into the file
  81         escapes();
  82         blankInput();
  83 
  84         // Substitition tests on randomly generated sequences
  85         globalSubstitute();
  86         stringbufferSubstitute();
  87         stringbuilderSubstitute();
  88 
  89         substitutionBasher();
  90         substitutionBasher2();
  91 
  92         // Canonical Equivalence
  93         ceTest();
  94 
  95         // Anchors
  96         anchorTest();
  97 
  98         // boolean match calls
  99         matchesTest();
 100         lookingAtTest();
 101 
 102         // Pattern API
 103         patternMatchesTest();
 104 
 105         // Misc
 106         lookbehindTest();
 107         nullArgumentTest();
 108         backRefTest();
 109         groupCaptureTest();
 110         caretTest();
 111         charClassTest();
 112         emptyPatternTest();
 113         findIntTest();
 114         group0Test();
 115         longPatternTest();
 116         octalTest();
 117         ampersandTest();
 118         negationTest();
 119         splitTest();
 120         appendTest();
 121         caseFoldingTest();
 122         commentsTest();
 123         unixLinesTest();
 124         replaceFirstTest();
 125         gTest();
 126         zTest();
 127         serializeTest();
 128         reluctantRepetitionTest();
 129         multilineDollarTest();
 130         dollarAtEndTest();
 131         caretBetweenTerminatorsTest();
 132         // This RFE rejected in Tiger numOccurrencesTest();
 133         javaCharClassTest();
 134         nonCaptureRepetitionTest();
 135         notCapturedGroupCurlyMatchTest();
 136         escapedSegmentTest();
 137         literalPatternTest();
 138         literalReplacementTest();
 139         regionTest();
 140         toStringTest();
 141         negatedCharClassTest();
 142         findFromTest();
 143         boundsTest();
 144         unicodeWordBoundsTest();
 145         caretAtEndTest();
 146         wordSearchTest();
 147         hitEndTest();
 148         toMatchResultTest();
 149         toMatchResultTest2();
 150         surrogatesInClassTest();
 151         removeQEQuotingTest();
 152         namedGroupCaptureTest();
 153         nonBmpClassComplementTest();
 154         unicodePropertiesTest();
 155         unicodeHexNotationTest();
 156         unicodeClassesTest();
 157         unicodeCharacterNameTest();
 158         horizontalAndVerticalWSTest();
 159         linebreakTest();
 160         branchTest();
 161         groupCurlyNotFoundSuppTest();
 162         groupCurlyBackoffTest();
 163         patternAsPredicate();
 164         invalidFlags();
 165         grapheme();
 166 
 167         if (failure) {
 168             throw new
 169                 RuntimeException("RegExTest failed, 1st failure: " +
 170                                  firstFailure);
 171         } else {
 172             System.err.println("OKAY: All tests passed.");
 173         }
 174     }
 175 
 176     // Utility functions
 177 
 178     private static String getRandomAlphaString(int length) {
 179         StringBuffer buf = new StringBuffer(length);
 180         for (int i=0; i<length; i++) {
 181             char randChar = (char)(97 + generator.nextInt(26));
 182             buf.append(randChar);
 183         }
 184         return buf.toString();
 185     }
 186 
 187     private static void check(Matcher m, String expected) {
 188         m.find();
 189         if (!m.group().equals(expected))
 190             failCount++;
 191     }
 192 
 193     private static void check(Matcher m, String result, boolean expected) {
 194         m.find();
 195         if (m.group().equals(result) != expected)
 196             failCount++;
 197     }
 198 
 199     private static void check(Pattern p, String s, boolean expected) {
 200         if (p.matcher(s).find() != expected)
 201             failCount++;
 202     }
 203 
 204     private static void check(String p, String s, boolean expected) {
 205         Matcher matcher = Pattern.compile(p).matcher(s);
 206         if (matcher.find() != expected)
 207             failCount++;
 208     }
 209 
 210     private static void check(String p, char c, boolean expected) {
 211         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 212         Pattern pattern = Pattern.compile(propertyPattern);
 213         char[] ca = new char[1]; ca[0] = c;
 214         Matcher matcher = pattern.matcher(new String(ca));
 215         if (!matcher.find())
 216             failCount++;
 217     }
 218 
 219     private static void check(String p, int codePoint, boolean expected) {
 220         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 221         Pattern pattern = Pattern.compile(propertyPattern);
 222         char[] ca = Character.toChars(codePoint);
 223         Matcher matcher = pattern.matcher(new String(ca));
 224         if (!matcher.find())
 225             failCount++;
 226     }
 227 
 228     private static void check(String p, int flag, String input, String s,
 229                               boolean expected)
 230     {
 231         Pattern pattern = Pattern.compile(p, flag);
 232         Matcher matcher = pattern.matcher(input);
 233         if (expected)
 234             check(matcher, s, expected);
 235         else
 236             check(pattern, input, false);
 237     }
 238 
 239     private static void report(String testName) {
 240         int spacesToAdd = 30 - testName.length();
 241         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 242         for (int i=0; i<spacesToAdd; i++)
 243             paddedNameBuffer.append(" ");
 244         String paddedName = paddedNameBuffer.toString();
 245         System.err.println(paddedName + ": " +
 246                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 247         if (failCount > 0) {
 248             failure = true;
 249 
 250             if (firstFailure == null) {
 251                 firstFailure = testName;
 252             }
 253         }
 254 
 255         failCount = 0;
 256     }
 257 
 258     /**
 259      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 260      * supplementary characters. This method does NOT fully take care
 261      * of the regex syntax.
 262      */
 263     private static String toSupplementaries(String s) {
 264         int length = s.length();
 265         StringBuffer sb = new StringBuffer(length * 2);
 266 
 267         for (int i = 0; i < length; ) {
 268             char c = s.charAt(i++);
 269             if (c == '\\') {
 270                 sb.append(c);
 271                 if (i < length) {
 272                     c = s.charAt(i++);
 273                     sb.append(c);
 274                     if (c == 'u') {
 275                         // assume no syntax error
 276                         sb.append(s.charAt(i++));
 277                         sb.append(s.charAt(i++));
 278                         sb.append(s.charAt(i++));
 279                         sb.append(s.charAt(i++));
 280                     }
 281                 }
 282             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 283                 sb.append('\ud800').append((char)('\udc00'+c));
 284             } else {
 285                 sb.append(c);
 286             }
 287         }
 288         return sb.toString();
 289     }
 290 
 291     // Regular expression tests
 292 
 293     // This is for bug 6178785
 294     // Test if an expected NPE gets thrown when passing in a null argument
 295     private static boolean check(Runnable test) {
 296         try {
 297             test.run();
 298             failCount++;
 299             return false;
 300         } catch (NullPointerException npe) {
 301             return true;
 302         }
 303     }
 304 
 305     private static void nullArgumentTest() {
 306         check(() -> Pattern.compile(null));
 307         check(() -> Pattern.matches(null, null));
 308         check(() -> Pattern.matches("xyz", null));
 309         check(() -> Pattern.quote(null));
 310         check(() -> Pattern.compile("xyz").split(null));
 311         check(() -> Pattern.compile("xyz").matcher(null));
 312 
 313         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 314         m.matches();
 315         check(() -> m.appendTail((StringBuffer) null));
 316         check(() -> m.appendTail((StringBuilder)null));
 317         check(() -> m.replaceAll((String) null));
 318         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 319         check(() -> m.replaceFirst((String)null));
 320         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 321         check(() -> m.appendReplacement((StringBuffer)null, null));
 322         check(() -> m.appendReplacement((StringBuilder)null, null));
 323         check(() -> m.reset(null));
 324         check(() -> Matcher.quoteReplacement(null));
 325         //check(() -> m.usePattern(null));
 326 
 327         report("Null Argument");
 328     }
 329 
 330     // This is for bug6635133
 331     // Test if surrogate pair in Unicode escapes can be handled correctly.
 332     private static void surrogatesInClassTest() throws Exception {
 333         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 334         Matcher matcher = pattern.matcher("\ud834\udd22");
 335         if (!matcher.find())
 336             failCount++;
 337 
 338         report("Surrogate pair in Unicode escape");
 339     }
 340 
 341     // This is for bug6990617
 342     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 343     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 344     // char is an octal digit.
 345     private static void removeQEQuotingTest() throws Exception {
 346         Pattern pattern =
 347             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 348         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 349         if (!matcher.find())
 350             failCount++;
 351 
 352         report("Remove Q/E Quoting");
 353     }
 354 
 355     // This is for bug 4988891
 356     // Test toMatchResult to see that it is a copy of the Matcher
 357     // that is not affected by subsequent operations on the original
 358     private static void toMatchResultTest() throws Exception {
 359         Pattern pattern = Pattern.compile("squid");
 360         Matcher matcher = pattern.matcher(
 361             "agiantsquidofdestinyasmallsquidoffate");
 362         matcher.find();
 363         int matcherStart1 = matcher.start();
 364         MatchResult mr = matcher.toMatchResult();
 365         if (mr == matcher)
 366             failCount++;
 367         int resultStart1 = mr.start();
 368         if (matcherStart1 != resultStart1)
 369             failCount++;
 370         matcher.find();
 371         int matcherStart2 = matcher.start();
 372         int resultStart2 = mr.start();
 373         if (matcherStart2 == resultStart2)
 374             failCount++;
 375         if (resultStart1 != resultStart2)
 376             failCount++;
 377         MatchResult mr2 = matcher.toMatchResult();
 378         if (mr == mr2)
 379             failCount++;
 380         if (mr2.start() != matcherStart2)
 381             failCount++;
 382         report("toMatchResult is a copy");
 383     }
 384 
 385     private static void checkExpectedISE(Runnable test) {
 386         try {
 387             test.run();
 388             failCount++;
 389         } catch (IllegalStateException x) {
 390         } catch (IndexOutOfBoundsException xx) {
 391             failCount++;
 392         }
 393     }
 394 
 395     private static void checkExpectedIOOE(Runnable test) {
 396         try {
 397             test.run();
 398             failCount++;
 399         } catch (IndexOutOfBoundsException x) {}
 400     }
 401 
 402     // This is for bug 8074678
 403     // Test the result of toMatchResult throws ISE if no match is availble
 404     private static void toMatchResultTest2() throws Exception {
 405         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 406         matcher.find();
 407         MatchResult mr = matcher.toMatchResult();
 408 
 409         checkExpectedISE(() -> mr.start());
 410         checkExpectedISE(() -> mr.start(2));
 411         checkExpectedISE(() -> mr.end());
 412         checkExpectedISE(() -> mr.end(2));
 413         checkExpectedISE(() -> mr.group());
 414         checkExpectedISE(() -> mr.group(2));
 415 
 416         matcher = Pattern.compile("(match)").matcher("there is a match");
 417         matcher.find();
 418         MatchResult mr2 = matcher.toMatchResult();
 419         checkExpectedIOOE(() -> mr2.start(2));
 420         checkExpectedIOOE(() -> mr2.end(2));
 421         checkExpectedIOOE(() -> mr2.group(2));
 422 
 423         report("toMatchResult2 appropriate exceptions");
 424     }
 425 
 426     // This is for bug 5013885
 427     // Must test a slice to see if it reports hitEnd correctly
 428     private static void hitEndTest() throws Exception {
 429         // Basic test of Slice node
 430         Pattern p = Pattern.compile("^squidattack");
 431         Matcher m = p.matcher("squack");
 432         m.find();
 433         if (m.hitEnd())
 434             failCount++;
 435         m.reset("squid");
 436         m.find();
 437         if (!m.hitEnd())
 438             failCount++;
 439 
 440         // Test Slice, SliceA and SliceU nodes
 441         for (int i=0; i<3; i++) {
 442             int flags = 0;
 443             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 444             if (i==2) flags = Pattern.UNICODE_CASE;
 445             p = Pattern.compile("^abc", flags);
 446             m = p.matcher("ad");
 447             m.find();
 448             if (m.hitEnd())
 449                 failCount++;
 450             m.reset("ab");
 451             m.find();
 452             if (!m.hitEnd())
 453                 failCount++;
 454         }
 455 
 456         // Test Boyer-Moore node
 457         p = Pattern.compile("catattack");
 458         m = p.matcher("attack");
 459         m.find();
 460         if (!m.hitEnd())
 461             failCount++;
 462 
 463         p = Pattern.compile("catattack");
 464         m = p.matcher("attackattackattackcatatta");
 465         m.find();
 466         if (!m.hitEnd())
 467             failCount++;
 468         report("hitEnd from a Slice");
 469     }
 470 
 471     // This is for bug 4997476
 472     // It is weird code submitted by customer demonstrating a regression
 473     private static void wordSearchTest() throws Exception {
 474         String testString = new String("word1 word2 word3");
 475         Pattern p = Pattern.compile("\\b");
 476         Matcher m = p.matcher(testString);
 477         int position = 0;
 478         int start = 0;
 479         while (m.find(position)) {
 480             start = m.start();
 481             if (start == testString.length())
 482                 break;
 483             if (m.find(start+1)) {
 484                 position = m.start();
 485             } else {
 486                 position = testString.length();
 487             }
 488             if (testString.substring(start, position).equals(" "))
 489                 continue;
 490             if (!testString.substring(start, position-1).startsWith("word"))
 491                 failCount++;
 492         }
 493         report("Customer word search");
 494     }
 495 
 496     // This is for bug 4994840
 497     private static void caretAtEndTest() throws Exception {
 498         // Problem only occurs with multiline patterns
 499         // containing a beginning-of-line caret "^" followed
 500         // by an expression that also matches the empty string.
 501         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 502         Matcher matcher = pattern.matcher("\r");
 503         matcher.find();
 504         matcher.find();
 505         report("Caret at end");
 506     }
 507 
 508     // This test is for 4979006
 509     // Check to see if word boundary construct properly handles unicode
 510     // non spacing marks
 511     private static void unicodeWordBoundsTest() throws Exception {
 512         String spaces = "  ";
 513         String wordChar = "a";
 514         String nsm = "\u030a";
 515 
 516         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 517 
 518         Pattern pattern = Pattern.compile("\\b");
 519         Matcher matcher = pattern.matcher("");
 520         // S=other B=word character N=non spacing mark .=word boundary
 521         // SS.BB.SS
 522         String input = spaces + wordChar + wordChar + spaces;
 523         twoFindIndexes(input, matcher, 2, 4);
 524         // SS.BBN.SS
 525         input = spaces + wordChar +wordChar + nsm + spaces;
 526         twoFindIndexes(input, matcher, 2, 5);
 527         // SS.BN.SS
 528         input = spaces + wordChar + nsm + spaces;
 529         twoFindIndexes(input, matcher, 2, 4);
 530         // SS.BNN.SS
 531         input = spaces + wordChar + nsm + nsm + spaces;
 532         twoFindIndexes(input, matcher, 2, 5);
 533         // SSN.BB.SS
 534         input = spaces + nsm + wordChar + wordChar + spaces;
 535         twoFindIndexes(input, matcher, 3, 5);
 536         // SS.BNB.SS
 537         input = spaces + wordChar + nsm + wordChar + spaces;
 538         twoFindIndexes(input, matcher, 2, 5);
 539         // SSNNSS
 540         input = spaces + nsm + nsm + spaces;
 541         matcher.reset(input);
 542         if (matcher.find())
 543             failCount++;
 544         // SSN.BBN.SS
 545         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 546         twoFindIndexes(input, matcher, 3, 6);
 547 
 548         report("Unicode word boundary");
 549     }
 550 
 551     private static void twoFindIndexes(String input, Matcher matcher, int a,
 552                                        int b) throws Exception
 553     {
 554         matcher.reset(input);
 555         matcher.find();
 556         if (matcher.start() != a)
 557             failCount++;
 558         matcher.find();
 559         if (matcher.start() != b)
 560             failCount++;
 561     }
 562 
 563     // This test is for 6284152
 564     static void check(String regex, String input, String[] expected) {
 565         List<String> result = new ArrayList<String>();
 566         Pattern p = Pattern.compile(regex);
 567         Matcher m = p.matcher(input);
 568         while (m.find()) {
 569             result.add(m.group());
 570         }
 571         if (!Arrays.asList(expected).equals(result))
 572             failCount++;
 573     }
 574 
 575     private static void lookbehindTest() throws Exception {
 576         //Positive
 577         check("(?<=%.{0,5})foo\\d",
 578               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 579               new String[]{"foo1", "foo2", "foo3"});
 580 
 581         //boundary at end of the lookbehind sub-regex should work consistently
 582         //with the boundary just after the lookbehind sub-regex
 583         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 584         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 585         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 586         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 587 
 588         //Negative
 589         check("(?<!%.{0,5})foo\\d",
 590               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 591               new String[] {"foo4", "foo5"});
 592 
 593         //Positive greedy
 594         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 595 
 596         //Positive reluctant
 597         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 598 
 599         //supplementary
 600         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 601               new String[] {"fo\ud800\udc00o"});
 602         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 603               new String[] {"fo\ud800\udc00o"});
 604         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 605               new String[] {"fo\ud800\udc00o"});
 606         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 607               new String[] {"fo\ud800\udc00o"});
 608         report("Lookbehind");
 609     }
 610 
 611     // This test is for 4938995
 612     // Check to see if weak region boundaries are transparent to
 613     // lookahead and lookbehind constructs
 614     private static void boundsTest() throws Exception {
 615         String fullMessage = "catdogcat";
 616         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 617         Matcher matcher = pattern.matcher("catdogca");
 618         matcher.useTransparentBounds(true);
 619         if (matcher.find())
 620             failCount++;
 621         matcher.reset("atdogcat");
 622         if (matcher.find())
 623             failCount++;
 624         matcher.reset(fullMessage);
 625         if (!matcher.find())
 626             failCount++;
 627         matcher.reset(fullMessage);
 628         matcher.region(0,9);
 629         if (!matcher.find())
 630             failCount++;
 631         matcher.reset(fullMessage);
 632         matcher.region(0,6);
 633         if (!matcher.find())
 634             failCount++;
 635         matcher.reset(fullMessage);
 636         matcher.region(3,6);
 637         if (!matcher.find())
 638             failCount++;
 639         matcher.useTransparentBounds(false);
 640         if (matcher.find())
 641             failCount++;
 642 
 643         // Negative lookahead/lookbehind
 644         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 645         matcher = pattern.matcher("dogcat");
 646         matcher.useTransparentBounds(true);
 647         matcher.region(0,3);
 648         if (matcher.find())
 649             failCount++;
 650         matcher.reset("catdog");
 651         matcher.region(3,6);
 652         if (matcher.find())
 653             failCount++;
 654         matcher.useTransparentBounds(false);
 655         matcher.reset("dogcat");
 656         matcher.region(0,3);
 657         if (!matcher.find())
 658             failCount++;
 659         matcher.reset("catdog");
 660         matcher.region(3,6);
 661         if (!matcher.find())
 662             failCount++;
 663 
 664         report("Region bounds transparency");
 665     }
 666 
 667     // This test is for 4945394
 668     private static void findFromTest() throws Exception {
 669         String message = "This is 40 $0 message.";
 670         Pattern pat = Pattern.compile("\\$0");
 671         Matcher match = pat.matcher(message);
 672         if (!match.find())
 673             failCount++;
 674         if (match.find())
 675             failCount++;
 676         if (match.find())
 677             failCount++;
 678         report("Check for alternating find");
 679     }
 680 
 681     // This test is for 4872664 and 4892980
 682     private static void negatedCharClassTest() throws Exception {
 683         Pattern pattern = Pattern.compile("[^>]");
 684         Matcher matcher = pattern.matcher("\u203A");
 685         if (!matcher.matches())
 686             failCount++;
 687         pattern = Pattern.compile("[^fr]");
 688         matcher = pattern.matcher("a");
 689         if (!matcher.find())
 690             failCount++;
 691         matcher.reset("\u203A");
 692         if (!matcher.find())
 693             failCount++;
 694         String s = "for";
 695         String result[] = s.split("[^fr]");
 696         if (!result[0].equals("f"))
 697             failCount++;
 698         if (!result[1].equals("r"))
 699             failCount++;
 700         s = "f\u203Ar";
 701         result = s.split("[^fr]");
 702         if (!result[0].equals("f"))
 703             failCount++;
 704         if (!result[1].equals("r"))
 705             failCount++;
 706 
 707         // Test adding to bits, subtracting a node, then adding to bits again
 708         pattern = Pattern.compile("[^f\u203Ar]");
 709         matcher = pattern.matcher("a");
 710         if (!matcher.find())
 711             failCount++;
 712         matcher.reset("f");
 713         if (matcher.find())
 714             failCount++;
 715         matcher.reset("\u203A");
 716         if (matcher.find())
 717             failCount++;
 718         matcher.reset("r");
 719         if (matcher.find())
 720             failCount++;
 721         matcher.reset("\u203B");
 722         if (!matcher.find())
 723             failCount++;
 724 
 725         // Test subtracting a node, adding to bits, subtracting again
 726         pattern = Pattern.compile("[^\u203Ar\u203B]");
 727         matcher = pattern.matcher("a");
 728         if (!matcher.find())
 729             failCount++;
 730         matcher.reset("\u203A");
 731         if (matcher.find())
 732             failCount++;
 733         matcher.reset("r");
 734         if (matcher.find())
 735             failCount++;
 736         matcher.reset("\u203B");
 737         if (matcher.find())
 738             failCount++;
 739         matcher.reset("\u203C");
 740         if (!matcher.find())
 741             failCount++;
 742 
 743         report("Negated Character Class");
 744     }
 745 
 746     // This test is for 4628291
 747     private static void toStringTest() throws Exception {
 748         Pattern pattern = Pattern.compile("b+");
 749         if (pattern.toString() != "b+")
 750             failCount++;
 751         Matcher matcher = pattern.matcher("aaabbbccc");
 752         String matcherString = matcher.toString(); // unspecified
 753         matcher.find();
 754         matcherString = matcher.toString(); // unspecified
 755         matcher.region(0,3);
 756         matcherString = matcher.toString(); // unspecified
 757         matcher.reset();
 758         matcherString = matcher.toString(); // unspecified
 759         report("toString");
 760     }
 761 
 762     // This test is for 4808962
 763     private static void literalPatternTest() throws Exception {
 764         int flags = Pattern.LITERAL;
 765 
 766         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 767         check(pattern, "abc\\t$^", true);
 768 
 769         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 770         check(pattern, "abc\\t$^", true);
 771 
 772         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 773         check(pattern, "\\Qa^$bcabc\\E", true);
 774         check(pattern, "a^$bcabc", false);
 775 
 776         pattern = Pattern.compile("\\\\Q\\\\E");
 777         check(pattern, "\\Q\\E", true);
 778 
 779         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 780         check(pattern, "abcefg\\Q\\Ehij", true);
 781 
 782         pattern = Pattern.compile("\\\\\\Q\\\\E");
 783         check(pattern, "\\\\\\\\", true);
 784 
 785         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 786         check(pattern, "\\Qa^$bcabc\\E", true);
 787         check(pattern, "a^$bcabc", false);
 788 
 789         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 790         check(pattern, "\\Qabc\\Edef", true);
 791         check(pattern, "abcdef", false);
 792 
 793         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 794         check(pattern, "abc\\Edef", true);
 795         check(pattern, "abcdef", false);
 796 
 797         pattern = Pattern.compile(Pattern.quote("\\E"));
 798         check(pattern, "\\E", true);
 799 
 800         pattern = Pattern.compile("((((abc.+?:)", flags);
 801         check(pattern, "((((abc.+?:)", true);
 802 
 803         flags |= Pattern.MULTILINE;
 804 
 805         pattern = Pattern.compile("^cat$", flags);
 806         check(pattern, "abc^cat$def", true);
 807         check(pattern, "cat", false);
 808 
 809         flags |= Pattern.CASE_INSENSITIVE;
 810 
 811         pattern = Pattern.compile("abcdef", flags);
 812         check(pattern, "ABCDEF", true);
 813         check(pattern, "AbCdEf", true);
 814 
 815         flags |= Pattern.DOTALL;
 816 
 817         pattern = Pattern.compile("a...b", flags);
 818         check(pattern, "A...b", true);
 819         check(pattern, "Axxxb", false);
 820 
 821         flags |= Pattern.CANON_EQ;
 822 
 823         Pattern p = Pattern.compile("testa\u030a", flags);
 824         check(pattern, "testa\u030a", false);
 825         check(pattern, "test\u00e5", false);
 826 
 827         // Supplementary character test
 828         flags = Pattern.LITERAL;
 829 
 830         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 831         check(pattern, toSupplementaries("abc\\t$^"), true);
 832 
 833         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 834         check(pattern, toSupplementaries("abc\\t$^"), true);
 835 
 836         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 837         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 838         check(pattern, toSupplementaries("a^$bcabc"), false);
 839 
 840         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 841         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 842         check(pattern, toSupplementaries("a^$bcabc"), false);
 843 
 844         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 845         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 846         check(pattern, toSupplementaries("abcdef"), false);
 847 
 848         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 849         check(pattern, toSupplementaries("abc\\Edef"), true);
 850         check(pattern, toSupplementaries("abcdef"), false);
 851 
 852         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 853         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 854 
 855         flags |= Pattern.MULTILINE;
 856 
 857         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 858         check(pattern, toSupplementaries("abc^cat$def"), true);
 859         check(pattern, toSupplementaries("cat"), false);
 860 
 861         flags |= Pattern.DOTALL;
 862 
 863         // note: this is case-sensitive.
 864         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 865         check(pattern, toSupplementaries("a...b"), true);
 866         check(pattern, toSupplementaries("axxxb"), false);
 867 
 868         flags |= Pattern.CANON_EQ;
 869 
 870         String t = toSupplementaries("test");
 871         p = Pattern.compile(t + "a\u030a", flags);
 872         check(pattern, t + "a\u030a", false);
 873         check(pattern, t + "\u00e5", false);
 874 
 875         report("Literal pattern");
 876     }
 877 
 878     // This test is for 4803179
 879     // This test is also for 4808962, replacement parts
 880     private static void literalReplacementTest() throws Exception {
 881         int flags = Pattern.LITERAL;
 882 
 883         Pattern pattern = Pattern.compile("abc", flags);
 884         Matcher matcher = pattern.matcher("zzzabczzz");
 885         String replaceTest = "$0";
 886         String result = matcher.replaceAll(replaceTest);
 887         if (!result.equals("zzzabczzz"))
 888             failCount++;
 889 
 890         matcher.reset();
 891         String literalReplacement = matcher.quoteReplacement(replaceTest);
 892         result = matcher.replaceAll(literalReplacement);
 893         if (!result.equals("zzz$0zzz"))
 894             failCount++;
 895 
 896         matcher.reset();
 897         replaceTest = "\\t$\\$";
 898         literalReplacement = matcher.quoteReplacement(replaceTest);
 899         result = matcher.replaceAll(literalReplacement);
 900         if (!result.equals("zzz\\t$\\$zzz"))
 901             failCount++;
 902 
 903         // Supplementary character test
 904         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 905         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 906         replaceTest = "$0";
 907         result = matcher.replaceAll(replaceTest);
 908         if (!result.equals(toSupplementaries("zzzabczzz")))
 909             failCount++;
 910 
 911         matcher.reset();
 912         literalReplacement = matcher.quoteReplacement(replaceTest);
 913         result = matcher.replaceAll(literalReplacement);
 914         if (!result.equals(toSupplementaries("zzz$0zzz")))
 915             failCount++;
 916 
 917         matcher.reset();
 918         replaceTest = "\\t$\\$";
 919         literalReplacement = matcher.quoteReplacement(replaceTest);
 920         result = matcher.replaceAll(literalReplacement);
 921         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 922             failCount++;
 923 
 924         // IAE should be thrown if backslash or '$' is the last character
 925         // in replacement string
 926         try {
 927             "\uac00".replaceAll("\uac00", "$");
 928             failCount++;
 929         } catch (IllegalArgumentException iie) {
 930         } catch (Exception e) {
 931             failCount++;
 932         }
 933         try {
 934             "\uac00".replaceAll("\uac00", "\\");
 935             failCount++;
 936         } catch (IllegalArgumentException iie) {
 937         } catch (Exception e) {
 938             failCount++;
 939         }
 940         report("Literal replacement");
 941     }
 942 
 943     // This test is for 4757029
 944     private static void regionTest() throws Exception {
 945         Pattern pattern = Pattern.compile("abc");
 946         Matcher matcher = pattern.matcher("abcdefabc");
 947 
 948         matcher.region(0,9);
 949         if (!matcher.find())
 950             failCount++;
 951         if (!matcher.find())
 952             failCount++;
 953         matcher.region(0,3);
 954         if (!matcher.find())
 955            failCount++;
 956         matcher.region(3,6);
 957         if (matcher.find())
 958            failCount++;
 959         matcher.region(0,2);
 960         if (matcher.find())
 961            failCount++;
 962 
 963         expectRegionFail(matcher, 1, -1);
 964         expectRegionFail(matcher, -1, -1);
 965         expectRegionFail(matcher, -1, 1);
 966         expectRegionFail(matcher, 5, 3);
 967         expectRegionFail(matcher, 5, 12);
 968         expectRegionFail(matcher, 12, 12);
 969 
 970         pattern = Pattern.compile("^abc$");
 971         matcher = pattern.matcher("zzzabczzz");
 972         matcher.region(0,9);
 973         if (matcher.find())
 974             failCount++;
 975         matcher.region(3,6);
 976         if (!matcher.find())
 977            failCount++;
 978         matcher.region(3,6);
 979         matcher.useAnchoringBounds(false);
 980         if (matcher.find())
 981            failCount++;
 982 
 983         // Supplementary character test
 984         pattern = Pattern.compile(toSupplementaries("abc"));
 985         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 986         matcher.region(0,9*2);
 987         if (!matcher.find())
 988             failCount++;
 989         if (!matcher.find())
 990             failCount++;
 991         matcher.region(0,3*2);
 992         if (!matcher.find())
 993            failCount++;
 994         matcher.region(1,3*2);
 995         if (matcher.find())
 996            failCount++;
 997         matcher.region(3*2,6*2);
 998         if (matcher.find())
 999            failCount++;
1000         matcher.region(0,2*2);
1001         if (matcher.find())
1002            failCount++;
1003         matcher.region(0,2*2+1);
1004         if (matcher.find())
1005            failCount++;
1006 
1007         expectRegionFail(matcher, 1*2, -1);
1008         expectRegionFail(matcher, -1, -1);
1009         expectRegionFail(matcher, -1, 1*2);
1010         expectRegionFail(matcher, 5*2, 3*2);
1011         expectRegionFail(matcher, 5*2, 12*2);
1012         expectRegionFail(matcher, 12*2, 12*2);
1013 
1014         pattern = Pattern.compile(toSupplementaries("^abc$"));
1015         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1016         matcher.region(0,9*2);
1017         if (matcher.find())
1018             failCount++;
1019         matcher.region(3*2,6*2);
1020         if (!matcher.find())
1021            failCount++;
1022         matcher.region(3*2+1,6*2);
1023         if (matcher.find())
1024            failCount++;
1025         matcher.region(3*2,6*2-1);
1026         if (matcher.find())
1027            failCount++;
1028         matcher.region(3*2,6*2);
1029         matcher.useAnchoringBounds(false);
1030         if (matcher.find())
1031            failCount++;
1032         report("Regions");
1033     }
1034 
1035     private static void expectRegionFail(Matcher matcher, int index1,
1036                                          int index2)
1037     {
1038         try {
1039             matcher.region(index1, index2);
1040             failCount++;
1041         } catch (IndexOutOfBoundsException ioobe) {
1042             // Correct result
1043         } catch (IllegalStateException ise) {
1044             // Correct result
1045         }
1046     }
1047 
1048     // This test is for 4803197
1049     private static void escapedSegmentTest() throws Exception {
1050 
1051         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1052         check(pattern, "dir1\\dir2", true);
1053 
1054         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1055         check(pattern, "dir1\\dir2\\", true);
1056 
1057         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1058         check(pattern, "dir1\\dir2\\", true);
1059 
1060         // Supplementary character test
1061         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1062         check(pattern, toSupplementaries("dir1\\dir2"), true);
1063 
1064         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1065         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1066 
1067         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1068         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1069 
1070         report("Escaped segment");
1071     }
1072 
1073     // This test is for 4792284
1074     private static void nonCaptureRepetitionTest() throws Exception {
1075         String input = "abcdefgh;";
1076 
1077         String[] patterns = new String[] {
1078             "(?:\\w{4})+;",
1079             "(?:\\w{8})*;",
1080             "(?:\\w{2}){2,4};",
1081             "(?:\\w{4}){2,};",   // only matches the
1082             ".*?(?:\\w{5})+;",   //     specified minimum
1083             ".*?(?:\\w{9})*;",   //     number of reps - OK
1084             "(?:\\w{4})+?;",     // lazy repetition - OK
1085             "(?:\\w{4})++;",     // possessive repetition - OK
1086             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1087             "(\\w{4})+;",        // capturing group - OK
1088         };
1089 
1090         for (int i = 0; i < patterns.length; i++) {
1091             // Check find()
1092             check(patterns[i], 0, input, input, true);
1093             // Check matches()
1094             Pattern p = Pattern.compile(patterns[i]);
1095             Matcher m = p.matcher(input);
1096 
1097             if (m.matches()) {
1098                 if (!m.group(0).equals(input))
1099                     failCount++;
1100             } else {
1101                 failCount++;
1102             }
1103         }
1104 
1105         report("Non capturing repetition");
1106     }
1107 
1108     // This test is for 6358731
1109     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1110         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1111         Matcher matcher = pattern.matcher("abcd");
1112         if (!matcher.matches() ||
1113              matcher.group(1) != null ||
1114              !matcher.group(2).equals("abcd")) {
1115             failCount++;
1116         }
1117         report("Not captured GroupCurly");
1118     }
1119 
1120     // This test is for 4706545
1121     private static void javaCharClassTest() throws Exception {
1122         for (int i=0; i<1000; i++) {
1123             char c = (char)generator.nextInt();
1124             check("{javaLowerCase}", c, Character.isLowerCase(c));
1125             check("{javaUpperCase}", c, Character.isUpperCase(c));
1126             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1127             check("{javaTitleCase}", c, Character.isTitleCase(c));
1128             check("{javaDigit}", c, Character.isDigit(c));
1129             check("{javaDefined}", c, Character.isDefined(c));
1130             check("{javaLetter}", c, Character.isLetter(c));
1131             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1132             check("{javaJavaIdentifierStart}", c,
1133                   Character.isJavaIdentifierStart(c));
1134             check("{javaJavaIdentifierPart}", c,
1135                   Character.isJavaIdentifierPart(c));
1136             check("{javaUnicodeIdentifierStart}", c,
1137                   Character.isUnicodeIdentifierStart(c));
1138             check("{javaUnicodeIdentifierPart}", c,
1139                   Character.isUnicodeIdentifierPart(c));
1140             check("{javaIdentifierIgnorable}", c,
1141                   Character.isIdentifierIgnorable(c));
1142             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1143             check("{javaWhitespace}", c, Character.isWhitespace(c));
1144             check("{javaISOControl}", c, Character.isISOControl(c));
1145             check("{javaMirrored}", c, Character.isMirrored(c));
1146 
1147         }
1148 
1149         // Supplementary character test
1150         for (int i=0; i<1000; i++) {
1151             int c = generator.nextInt(Character.MAX_CODE_POINT
1152                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1153                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1154             check("{javaLowerCase}", c, Character.isLowerCase(c));
1155             check("{javaUpperCase}", c, Character.isUpperCase(c));
1156             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1157             check("{javaTitleCase}", c, Character.isTitleCase(c));
1158             check("{javaDigit}", c, Character.isDigit(c));
1159             check("{javaDefined}", c, Character.isDefined(c));
1160             check("{javaLetter}", c, Character.isLetter(c));
1161             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1162             check("{javaJavaIdentifierStart}", c,
1163                   Character.isJavaIdentifierStart(c));
1164             check("{javaJavaIdentifierPart}", c,
1165                   Character.isJavaIdentifierPart(c));
1166             check("{javaUnicodeIdentifierStart}", c,
1167                   Character.isUnicodeIdentifierStart(c));
1168             check("{javaUnicodeIdentifierPart}", c,
1169                   Character.isUnicodeIdentifierPart(c));
1170             check("{javaIdentifierIgnorable}", c,
1171                   Character.isIdentifierIgnorable(c));
1172             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1173             check("{javaWhitespace}", c, Character.isWhitespace(c));
1174             check("{javaISOControl}", c, Character.isISOControl(c));
1175             check("{javaMirrored}", c, Character.isMirrored(c));
1176         }
1177 
1178         report("Java character classes");
1179     }
1180 
1181     // This test is for 4523620
1182     /*
1183     private static void numOccurrencesTest() throws Exception {
1184         Pattern pattern = Pattern.compile("aaa");
1185 
1186         if (pattern.numOccurrences("aaaaaa", false) != 2)
1187             failCount++;
1188         if (pattern.numOccurrences("aaaaaa", true) != 4)
1189             failCount++;
1190 
1191         pattern = Pattern.compile("^");
1192         if (pattern.numOccurrences("aaaaaa", false) != 1)
1193             failCount++;
1194         if (pattern.numOccurrences("aaaaaa", true) != 1)
1195             failCount++;
1196 
1197         report("Number of Occurrences");
1198     }
1199     */
1200 
1201     // This test is for 4776374
1202     private static void caretBetweenTerminatorsTest() throws Exception {
1203         int flags1 = Pattern.DOTALL;
1204         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1205         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1206         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1207 
1208         check("^....", flags1, "test\ntest", "test", true);
1209         check(".....^", flags1, "test\ntest", "test", false);
1210         check(".....^", flags1, "test\n", "test", false);
1211         check("....^", flags1, "test\r\n", "test", false);
1212 
1213         check("^....", flags2, "test\ntest", "test", true);
1214         check("....^", flags2, "test\ntest", "test", false);
1215         check(".....^", flags2, "test\n", "test", false);
1216         check("....^", flags2, "test\r\n", "test", false);
1217 
1218         check("^....", flags3, "test\ntest", "test", true);
1219         check(".....^", flags3, "test\ntest", "test\n", true);
1220         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1221         check(".....^", flags3, "test\n", "test", false);
1222         check(".....^", flags3, "test\r\n", "test", false);
1223         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1224 
1225         check("^....", flags4, "test\ntest", "test", true);
1226         check(".....^", flags3, "test\ntest", "test\n", true);
1227         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1228         check(".....^", flags4, "test\n", "test\n", false);
1229         check(".....^", flags4, "test\r\n", "test\r", false);
1230 
1231         // Supplementary character test
1232         String t = toSupplementaries("test");
1233         check("^....", flags1, t+"\n"+t, t, true);
1234         check(".....^", flags1, t+"\n"+t, t, false);
1235         check(".....^", flags1, t+"\n", t, false);
1236         check("....^", flags1, t+"\r\n", t, false);
1237 
1238         check("^....", flags2, t+"\n"+t, t, true);
1239         check("....^", flags2, t+"\n"+t, t, false);
1240         check(".....^", flags2, t+"\n", t, false);
1241         check("....^", flags2, t+"\r\n", t, false);
1242 
1243         check("^....", flags3, t+"\n"+t, t, true);
1244         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1245         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1246         check(".....^", flags3, t+"\n", t, false);
1247         check(".....^", flags3, t+"\r\n", t, false);
1248         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1249 
1250         check("^....", flags4, t+"\n"+t, t, true);
1251         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1252         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1253         check(".....^", flags4, t+"\n", t+"\n", false);
1254         check(".....^", flags4, t+"\r\n", t+"\r", false);
1255 
1256         report("Caret between terminators");
1257     }
1258 
1259     // This test is for 4727935
1260     private static void dollarAtEndTest() throws Exception {
1261         int flags1 = Pattern.DOTALL;
1262         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1263         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1264 
1265         check("....$", flags1, "test\n", "test", true);
1266         check("....$", flags1, "test\r\n", "test", true);
1267         check(".....$", flags1, "test\n", "test\n", true);
1268         check(".....$", flags1, "test\u0085", "test\u0085", true);
1269         check("....$", flags1, "test\u0085", "test", true);
1270 
1271         check("....$", flags2, "test\n", "test", true);
1272         check(".....$", flags2, "test\n", "test\n", true);
1273         check(".....$", flags2, "test\u0085", "test\u0085", true);
1274         check("....$", flags2, "test\u0085", "est\u0085", true);
1275 
1276         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1277         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1278         check("....$blah", flags3, "test\nblah", "!!!!", false);
1279         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1280 
1281         // Supplementary character test
1282         String t = toSupplementaries("test");
1283         String b = toSupplementaries("blah");
1284         check("....$", flags1, t+"\n", t, true);
1285         check("....$", flags1, t+"\r\n", t, true);
1286         check(".....$", flags1, t+"\n", t+"\n", true);
1287         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1288         check("....$", flags1, t+"\u0085", t, true);
1289 
1290         check("....$", flags2, t+"\n", t, true);
1291         check(".....$", flags2, t+"\n", t+"\n", true);
1292         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1293         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1294 
1295         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1296         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1297         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1298         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1299 
1300         report("Dollar at End");
1301     }
1302 
1303     // This test is for 4711773
1304     private static void multilineDollarTest() throws Exception {
1305         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1306         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1307         matcher.find();
1308         if (matcher.start(0) != 9)
1309             failCount++;
1310         matcher.find();
1311         if (matcher.start(0) != 20)
1312             failCount++;
1313 
1314         // Supplementary character test
1315         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1316         matcher.find();
1317         if (matcher.start(0) != 9*2)
1318             failCount++;
1319         matcher.find();
1320         if (matcher.start(0) != 20*2)
1321             failCount++;
1322 
1323         report("Multiline Dollar");
1324     }
1325 
1326     private static void reluctantRepetitionTest() throws Exception {
1327         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1328         check(p, "1 word word word 2", true);
1329         check(p, "1 wor wo w 2", true);
1330         check(p, "1 word word 2", true);
1331         check(p, "1 word 2", true);
1332         check(p, "1 wo w w 2", true);
1333         check(p, "1 wo w 2", true);
1334         check(p, "1 wor w 2", true);
1335 
1336         p = Pattern.compile("([a-z])+?c");
1337         Matcher m = p.matcher("ababcdefdec");
1338         check(m, "ababc");
1339 
1340         // Supplementary character test
1341         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1342         m = p.matcher(toSupplementaries("ababcdefdec"));
1343         check(m, toSupplementaries("ababc"));
1344 
1345         report("Reluctant Repetition");
1346     }
1347 
1348     private static void serializeTest() throws Exception {
1349         String patternStr = "(b)";
1350         String matchStr = "b";
1351         Pattern pattern = Pattern.compile(patternStr);
1352         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1353         ObjectOutputStream oos = new ObjectOutputStream(baos);
1354         oos.writeObject(pattern);
1355         oos.close();
1356         ObjectInputStream ois = new ObjectInputStream(
1357             new ByteArrayInputStream(baos.toByteArray()));
1358         Pattern serializedPattern = (Pattern)ois.readObject();
1359         ois.close();
1360         Matcher matcher = serializedPattern.matcher(matchStr);
1361         if (!matcher.matches())
1362             failCount++;
1363         if (matcher.groupCount() != 1)
1364             failCount++;
1365 
1366         report("Serialization");
1367     }
1368 
1369     private static void gTest() {
1370         Pattern pattern = Pattern.compile("\\G\\w");
1371         Matcher matcher = pattern.matcher("abc#x#x");
1372         matcher.find();
1373         matcher.find();
1374         matcher.find();
1375         if (matcher.find())
1376             failCount++;
1377 
1378         pattern = Pattern.compile("\\GA*");
1379         matcher = pattern.matcher("1A2AA3");
1380         matcher.find();
1381         if (matcher.find())
1382             failCount++;
1383 
1384         pattern = Pattern.compile("\\GA*");
1385         matcher = pattern.matcher("1A2AA3");
1386         if (!matcher.find(1))
1387             failCount++;
1388         matcher.find();
1389         if (matcher.find())
1390             failCount++;
1391 
1392         report("\\G");
1393     }
1394 
1395     private static void zTest() {
1396         Pattern pattern = Pattern.compile("foo\\Z");
1397         // Positives
1398         check(pattern, "foo\u0085", true);
1399         check(pattern, "foo\u2028", true);
1400         check(pattern, "foo\u2029", true);
1401         check(pattern, "foo\n", true);
1402         check(pattern, "foo\r", true);
1403         check(pattern, "foo\r\n", true);
1404         // Negatives
1405         check(pattern, "fooo", false);
1406         check(pattern, "foo\n\r", false);
1407 
1408         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1409         // Positives
1410         check(pattern, "foo", true);
1411         check(pattern, "foo\n", true);
1412         // Negatives
1413         check(pattern, "foo\r", false);
1414         check(pattern, "foo\u0085", false);
1415         check(pattern, "foo\u2028", false);
1416         check(pattern, "foo\u2029", false);
1417 
1418         report("\\Z");
1419     }
1420 
1421     private static void replaceFirstTest() {
1422         Pattern pattern = Pattern.compile("(ab)(c*)");
1423         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1424         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1425             failCount++;
1426 
1427         matcher.reset("zzzabccczzzabcczzzabccczzz");
1428         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1429             failCount++;
1430 
1431         matcher.reset("zzzabccczzzabcczzzabccczzz");
1432         String result = matcher.replaceFirst("$1");
1433         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1434             failCount++;
1435 
1436         matcher.reset("zzzabccczzzabcczzzabccczzz");
1437         result = matcher.replaceFirst("$2");
1438         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1439             failCount++;
1440 
1441         pattern = Pattern.compile("a*");
1442         matcher = pattern.matcher("aaaaaaaaaa");
1443         if (!matcher.replaceFirst("test").equals("test"))
1444             failCount++;
1445 
1446         pattern = Pattern.compile("a+");
1447         matcher = pattern.matcher("zzzaaaaaaaaaa");
1448         if (!matcher.replaceFirst("test").equals("zzztest"))
1449             failCount++;
1450 
1451         // Supplementary character test
1452         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1453         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1454         if (!matcher.replaceFirst(toSupplementaries("test"))
1455                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1456             failCount++;
1457 
1458         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1459         if (!matcher.replaceFirst(toSupplementaries("test")).
1460             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1461             failCount++;
1462 
1463         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1464         result = matcher.replaceFirst("$1");
1465         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1466             failCount++;
1467 
1468         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1469         result = matcher.replaceFirst("$2");
1470         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1471             failCount++;
1472 
1473         pattern = Pattern.compile(toSupplementaries("a*"));
1474         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1475         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1476             failCount++;
1477 
1478         pattern = Pattern.compile(toSupplementaries("a+"));
1479         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1480         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1481             failCount++;
1482 
1483         report("Replace First");
1484     }
1485 
1486     private static void unixLinesTest() {
1487         Pattern pattern = Pattern.compile(".*");
1488         Matcher matcher = pattern.matcher("aa\u2028blah");
1489         matcher.find();
1490         if (!matcher.group(0).equals("aa"))
1491             failCount++;
1492 
1493         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1494         matcher = pattern.matcher("aa\u2028blah");
1495         matcher.find();
1496         if (!matcher.group(0).equals("aa\u2028blah"))
1497             failCount++;
1498 
1499         pattern = Pattern.compile("[az]$",
1500                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1501         matcher = pattern.matcher("aa\u2028zz");
1502         check(matcher, "a\u2028", false);
1503 
1504         // Supplementary character test
1505         pattern = Pattern.compile(".*");
1506         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1507         matcher.find();
1508         if (!matcher.group(0).equals(toSupplementaries("aa")))
1509             failCount++;
1510 
1511         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1512         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1513         matcher.find();
1514         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1515             failCount++;
1516 
1517         pattern = Pattern.compile(toSupplementaries("[az]$"),
1518                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1519         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1520         check(matcher, toSupplementaries("a\u2028"), false);
1521 
1522         report("Unix Lines");
1523     }
1524 
1525     private static void commentsTest() {
1526         int flags = Pattern.COMMENTS;
1527 
1528         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1529         Matcher matcher = pattern.matcher("aa#aa");
1530         if (!matcher.matches())
1531             failCount++;
1532 
1533         pattern = Pattern.compile("aa  # blah", flags);
1534         matcher = pattern.matcher("aa");
1535         if (!matcher.matches())
1536             failCount++;
1537 
1538         pattern = Pattern.compile("aa blah", flags);
1539         matcher = pattern.matcher("aablah");
1540         if (!matcher.matches())
1541              failCount++;
1542 
1543         pattern = Pattern.compile("aa  # blah blech  ", flags);
1544         matcher = pattern.matcher("aa");
1545         if (!matcher.matches())
1546             failCount++;
1547 
1548         pattern = Pattern.compile("aa  # blah\n  ", flags);
1549         matcher = pattern.matcher("aa");
1550         if (!matcher.matches())
1551             failCount++;
1552 
1553         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1554         matcher = pattern.matcher("aabc");
1555         if (!matcher.matches())
1556              failCount++;
1557 
1558         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1559         matcher = pattern.matcher("aabc");
1560         if (!matcher.matches())
1561              failCount++;
1562 
1563         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1564         matcher = pattern.matcher("aabc#blech");
1565         if (!matcher.matches())
1566              failCount++;
1567 
1568         // Supplementary character test
1569         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1570         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1571         if (!matcher.matches())
1572             failCount++;
1573 
1574         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1575         matcher = pattern.matcher(toSupplementaries("aa"));
1576         if (!matcher.matches())
1577             failCount++;
1578 
1579         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1580         matcher = pattern.matcher(toSupplementaries("aablah"));
1581         if (!matcher.matches())
1582              failCount++;
1583 
1584         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1585         matcher = pattern.matcher(toSupplementaries("aa"));
1586         if (!matcher.matches())
1587             failCount++;
1588 
1589         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1590         matcher = pattern.matcher(toSupplementaries("aa"));
1591         if (!matcher.matches())
1592             failCount++;
1593 
1594         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1595         matcher = pattern.matcher(toSupplementaries("aabc"));
1596         if (!matcher.matches())
1597              failCount++;
1598 
1599         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1600         matcher = pattern.matcher(toSupplementaries("aabc"));
1601         if (!matcher.matches())
1602              failCount++;
1603 
1604         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1605         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1606         if (!matcher.matches())
1607              failCount++;
1608 
1609         report("Comments");
1610     }
1611 
1612     private static void caseFoldingTest() { // bug 4504687
1613         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1614         Pattern pattern = Pattern.compile("aa", flags);
1615         Matcher matcher = pattern.matcher("ab");
1616         if (matcher.matches())
1617             failCount++;
1618 
1619         pattern = Pattern.compile("aA", flags);
1620         matcher = pattern.matcher("ab");
1621         if (matcher.matches())
1622             failCount++;
1623 
1624         pattern = Pattern.compile("aa", flags);
1625         matcher = pattern.matcher("aB");
1626         if (matcher.matches())
1627             failCount++;
1628         matcher = pattern.matcher("Ab");
1629         if (matcher.matches())
1630             failCount++;
1631 
1632         // ASCII               "a"
1633         // Latin-1 Supplement  "a" + grave
1634         // Cyrillic            "a"
1635         String[] patterns = new String[] {
1636             //single
1637             "a", "\u00e0", "\u0430",
1638             //slice
1639             "ab", "\u00e0\u00e1", "\u0430\u0431",
1640             //class single
1641             "[a]", "[\u00e0]", "[\u0430]",
1642             //class range
1643             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1644             //back reference
1645             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1646         };
1647 
1648         String[] texts = new String[] {
1649             "A", "\u00c0", "\u0410",
1650             "AB", "\u00c0\u00c1", "\u0410\u0411",
1651             "A", "\u00c0", "\u0410",
1652             "B", "\u00c2", "\u0411",
1653             "aA", "\u00e0\u00c0", "\u0430\u0410"
1654         };
1655 
1656         boolean[] expected = new boolean[] {
1657             true, false, false,
1658             true, false, false,
1659             true, false, false,
1660             true, false, false,
1661             true, false, false
1662         };
1663 
1664         flags = Pattern.CASE_INSENSITIVE;
1665         for (int i = 0; i < patterns.length; i++) {
1666             pattern = Pattern.compile(patterns[i], flags);
1667             matcher = pattern.matcher(texts[i]);
1668             if (matcher.matches() != expected[i]) {
1669                 System.out.println("<1> Failed at " + i);
1670                 failCount++;
1671             }
1672         }
1673 
1674         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1675         for (int i = 0; i < patterns.length; i++) {
1676             pattern = Pattern.compile(patterns[i], flags);
1677             matcher = pattern.matcher(texts[i]);
1678             if (!matcher.matches()) {
1679                 System.out.println("<2> Failed at " + i);
1680                 failCount++;
1681             }
1682         }
1683         // flag unicode_case alone should do nothing
1684         flags = Pattern.UNICODE_CASE;
1685         for (int i = 0; i < patterns.length; i++) {
1686             pattern = Pattern.compile(patterns[i], flags);
1687             matcher = pattern.matcher(texts[i]);
1688             if (matcher.matches()) {
1689                 System.out.println("<3> Failed at " + i);
1690                 failCount++;
1691             }
1692         }
1693 
1694         // Special cases: i, I, u+0131 and u+0130
1695         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1696         pattern = Pattern.compile("[h-j]+", flags);
1697         if (!pattern.matcher("\u0131\u0130").matches())
1698             failCount++;
1699         report("Case Folding");
1700     }
1701 
1702     private static void appendTest() {
1703         Pattern pattern = Pattern.compile("(ab)(cd)");
1704         Matcher matcher = pattern.matcher("abcd");
1705         String result = matcher.replaceAll("$2$1");
1706         if (!result.equals("cdab"))
1707             failCount++;
1708 
1709         String  s1 = "Swap all: first = 123, second = 456";
1710         String  s2 = "Swap one: first = 123, second = 456";
1711         String  r  = "$3$2$1";
1712         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1713         matcher = pattern.matcher(s1);
1714 
1715         result = matcher.replaceAll(r);
1716         if (!result.equals("Swap all: 123 = first, 456 = second"))
1717             failCount++;
1718 
1719         matcher = pattern.matcher(s2);
1720 
1721         if (matcher.find()) {
1722             StringBuffer sb = new StringBuffer();
1723             matcher.appendReplacement(sb, r);
1724             matcher.appendTail(sb);
1725             result = sb.toString();
1726             if (!result.equals("Swap one: 123 = first, second = 456"))
1727                 failCount++;
1728         }
1729 
1730         // Supplementary character test
1731         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1732         matcher = pattern.matcher(toSupplementaries("abcd"));
1733         result = matcher.replaceAll("$2$1");
1734         if (!result.equals(toSupplementaries("cdab")))
1735             failCount++;
1736 
1737         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1738         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1739         r  = toSupplementaries("$3$2$1");
1740         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1741         matcher = pattern.matcher(s1);
1742 
1743         result = matcher.replaceAll(r);
1744         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1745             failCount++;
1746 
1747         matcher = pattern.matcher(s2);
1748 
1749         if (matcher.find()) {
1750             StringBuffer sb = new StringBuffer();
1751             matcher.appendReplacement(sb, r);
1752             matcher.appendTail(sb);
1753             result = sb.toString();
1754             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1755                 failCount++;
1756         }
1757         report("Append");
1758     }
1759 
1760     private static void splitTest() {
1761         Pattern pattern = Pattern.compile(":");
1762         String[] result = pattern.split("foo:and:boo", 2);
1763         if (!result[0].equals("foo"))
1764             failCount++;
1765         if (!result[1].equals("and:boo"))
1766             failCount++;
1767         // Supplementary character test
1768         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1769         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1770         if (!result[0].equals(toSupplementaries("foo")))
1771             failCount++;
1772         if (!result[1].equals(toSupplementaries("andXboo")))
1773             failCount++;
1774 
1775         CharBuffer cb = CharBuffer.allocate(100);
1776         cb.put("foo:and:boo");
1777         cb.flip();
1778         result = pattern.split(cb);
1779         if (!result[0].equals("foo"))
1780             failCount++;
1781         if (!result[1].equals("and"))
1782             failCount++;
1783         if (!result[2].equals("boo"))
1784             failCount++;
1785 
1786         // Supplementary character test
1787         CharBuffer cbs = CharBuffer.allocate(100);
1788         cbs.put(toSupplementaries("fooXandXboo"));
1789         cbs.flip();
1790         result = patternX.split(cbs);
1791         if (!result[0].equals(toSupplementaries("foo")))
1792             failCount++;
1793         if (!result[1].equals(toSupplementaries("and")))
1794             failCount++;
1795         if (!result[2].equals(toSupplementaries("boo")))
1796             failCount++;
1797 
1798         String source = "0123456789";
1799         for (int limit=-2; limit<3; limit++) {
1800             for (int x=0; x<10; x++) {
1801                 result = source.split(Integer.toString(x), limit);
1802                 int expectedLength = limit < 1 ? 2 : limit;
1803 
1804                 if ((limit == 0) && (x == 9)) {
1805                     // expected dropping of ""
1806                     if (result.length != 1)
1807                         failCount++;
1808                     if (!result[0].equals("012345678")) {
1809                         failCount++;
1810                     }
1811                 } else {
1812                     if (result.length != expectedLength) {
1813                         failCount++;
1814                     }
1815                     if (!result[0].equals(source.substring(0,x))) {
1816                         if (limit != 1) {
1817                             failCount++;
1818                         } else {
1819                             if (!result[0].equals(source.substring(0,10))) {
1820                                 failCount++;
1821                             }
1822                         }
1823                     }
1824                     if (expectedLength > 1) { // Check segment 2
1825                         if (!result[1].equals(source.substring(x+1,10)))
1826                             failCount++;
1827                     }
1828                 }
1829             }
1830         }
1831         // Check the case for no match found
1832         for (int limit=-2; limit<3; limit++) {
1833             result = source.split("e", limit);
1834             if (result.length != 1)
1835                 failCount++;
1836             if (!result[0].equals(source))
1837                 failCount++;
1838         }
1839         // Check the case for limit == 0, source = "";
1840         // split() now returns 0-length for empty source "" see #6559590
1841         source = "";
1842         result = source.split("e", 0);
1843         if (result.length != 1)
1844             failCount++;
1845         if (!result[0].equals(source))
1846             failCount++;
1847 
1848         // Check both split() and splitAsStraem(), especially for zero-lenth
1849         // input and zero-lenth match cases
1850         String[][] input = new String[][] {
1851             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1852             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1853             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1854             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1855             { "(?=\\p{Lu})", "AbcEfg" },
1856             { "(?=\\p{Lu})", "Abc" },
1857             { " ",           "" },              // zero-length input
1858             { ".*",          "" },
1859 
1860             // some tests from PatternStreamTest.java
1861             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1862             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1863             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1864             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1865             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1866             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1867             { "\u56da",  "" },
1868             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1869             { "o",       "boo:and:foo" },
1870             { "o",       "booooo:and:fooooo" },
1871             { "o",       "fooooo:" },
1872         };
1873 
1874         String[][] expected = new String[][] {
1875             { "Abc", "Efg", "Hij" },
1876             { "", "Abc", "Efg", "Hij" },
1877             { "Abc", "", "Efg", "Hij" },
1878             { "Abc", "Efg", "Hij" },
1879             { "Abc", "Efg" },
1880             { "Abc" },
1881             { "" },
1882             { "" },
1883 
1884             { "awgqwefg1fefw", "vssv1vvv1" },
1885             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1886             { "awgqwefg", "fefw4vssv", "vvv" },
1887             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1888             { "1", "23", "456", "7890" },
1889             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1890             { "" },
1891             { "This", "is", "testing", "", "with", "different", "separators" },
1892             { "b", "", ":and:f" },
1893             { "b", "", "", "", "", ":and:f" },
1894             { "f", "", "", "", "", ":" },
1895         };
1896         for (int i = 0; i < input.length; i++) {
1897             pattern = Pattern.compile(input[i][0]);
1898             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1899                 failCount++;
1900             }
1901             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1902                                              // array for zero-length input for now
1903                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1904                                expected[i])) {
1905                 failCount++;
1906             }
1907         }
1908         report("Split");
1909     }
1910 
1911     private static void negationTest() {
1912         Pattern pattern = Pattern.compile("[\\[@^]+");
1913         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1914         if (!matcher.find())
1915             failCount++;
1916         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1917             failCount++;
1918         pattern = Pattern.compile("[@\\[^]+");
1919         matcher = pattern.matcher("@@@@[[[[^^^^");
1920         if (!matcher.find())
1921             failCount++;
1922         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1923             failCount++;
1924         pattern = Pattern.compile("[@\\[^@]+");
1925         matcher = pattern.matcher("@@@@[[[[^^^^");
1926         if (!matcher.find())
1927             failCount++;
1928         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1929             failCount++;
1930 
1931         pattern = Pattern.compile("\\)");
1932         matcher = pattern.matcher("xxx)xxx");
1933         if (!matcher.find())
1934             failCount++;
1935 
1936         report("Negation");
1937     }
1938 
1939     private static void ampersandTest() {
1940         Pattern pattern = Pattern.compile("[&@]+");
1941         check(pattern, "@@@@&&&&", true);
1942 
1943         pattern = Pattern.compile("[@&]+");
1944         check(pattern, "@@@@&&&&", true);
1945 
1946         pattern = Pattern.compile("[@\\&]+");
1947         check(pattern, "@@@@&&&&", true);
1948 
1949         report("Ampersand");
1950     }
1951 
1952     private static void octalTest() throws Exception {
1953         Pattern pattern = Pattern.compile("\\u0007");
1954         Matcher matcher = pattern.matcher("\u0007");
1955         if (!matcher.matches())
1956             failCount++;
1957         pattern = Pattern.compile("\\07");
1958         matcher = pattern.matcher("\u0007");
1959         if (!matcher.matches())
1960             failCount++;
1961         pattern = Pattern.compile("\\007");
1962         matcher = pattern.matcher("\u0007");
1963         if (!matcher.matches())
1964             failCount++;
1965         pattern = Pattern.compile("\\0007");
1966         matcher = pattern.matcher("\u0007");
1967         if (!matcher.matches())
1968             failCount++;
1969         pattern = Pattern.compile("\\040");
1970         matcher = pattern.matcher("\u0020");
1971         if (!matcher.matches())
1972             failCount++;
1973         pattern = Pattern.compile("\\0403");
1974         matcher = pattern.matcher("\u00203");
1975         if (!matcher.matches())
1976             failCount++;
1977         pattern = Pattern.compile("\\0103");
1978         matcher = pattern.matcher("\u0043");
1979         if (!matcher.matches())
1980             failCount++;
1981 
1982         report("Octal");
1983     }
1984 
1985     private static void longPatternTest() throws Exception {
1986         try {
1987             Pattern pattern = Pattern.compile(
1988                 "a 32-character-long pattern xxxx");
1989             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1990             pattern = Pattern.compile("a thirty four character long regex");
1991             StringBuffer patternToBe = new StringBuffer(101);
1992             for (int i=0; i<100; i++)
1993                 patternToBe.append((char)(97 + i%26));
1994             pattern = Pattern.compile(patternToBe.toString());
1995         } catch (PatternSyntaxException e) {
1996             failCount++;
1997         }
1998 
1999         // Supplementary character test
2000         try {
2001             Pattern pattern = Pattern.compile(
2002                 toSupplementaries("a 32-character-long pattern xxxx"));
2003             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2004             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2005             StringBuffer patternToBe = new StringBuffer(101*2);
2006             for (int i=0; i<100; i++)
2007                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2008                                                      + 97 + i%26));
2009             pattern = Pattern.compile(patternToBe.toString());
2010         } catch (PatternSyntaxException e) {
2011             failCount++;
2012         }
2013         report("LongPattern");
2014     }
2015 
2016     private static void group0Test() throws Exception {
2017         Pattern pattern = Pattern.compile("(tes)ting");
2018         Matcher matcher = pattern.matcher("testing");
2019         check(matcher, "testing");
2020 
2021         matcher.reset("testing");
2022         if (matcher.lookingAt()) {
2023             if (!matcher.group(0).equals("testing"))
2024                 failCount++;
2025         } else {
2026             failCount++;
2027         }
2028 
2029         matcher.reset("testing");
2030         if (matcher.matches()) {
2031             if (!matcher.group(0).equals("testing"))
2032                 failCount++;
2033         } else {
2034             failCount++;
2035         }
2036 
2037         pattern = Pattern.compile("(tes)ting");
2038         matcher = pattern.matcher("testing");
2039         if (matcher.lookingAt()) {
2040             if (!matcher.group(0).equals("testing"))
2041                 failCount++;
2042         } else {
2043             failCount++;
2044         }
2045 
2046         pattern = Pattern.compile("^(tes)ting");
2047         matcher = pattern.matcher("testing");
2048         if (matcher.matches()) {
2049             if (!matcher.group(0).equals("testing"))
2050                 failCount++;
2051         } else {
2052             failCount++;
2053         }
2054 
2055         // Supplementary character test
2056         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2057         matcher = pattern.matcher(toSupplementaries("testing"));
2058         check(matcher, toSupplementaries("testing"));
2059 
2060         matcher.reset(toSupplementaries("testing"));
2061         if (matcher.lookingAt()) {
2062             if (!matcher.group(0).equals(toSupplementaries("testing")))
2063                 failCount++;
2064         } else {
2065             failCount++;
2066         }
2067 
2068         matcher.reset(toSupplementaries("testing"));
2069         if (matcher.matches()) {
2070             if (!matcher.group(0).equals(toSupplementaries("testing")))
2071                 failCount++;
2072         } else {
2073             failCount++;
2074         }
2075 
2076         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2077         matcher = pattern.matcher(toSupplementaries("testing"));
2078         if (matcher.lookingAt()) {
2079             if (!matcher.group(0).equals(toSupplementaries("testing")))
2080                 failCount++;
2081         } else {
2082             failCount++;
2083         }
2084 
2085         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2086         matcher = pattern.matcher(toSupplementaries("testing"));
2087         if (matcher.matches()) {
2088             if (!matcher.group(0).equals(toSupplementaries("testing")))
2089                 failCount++;
2090         } else {
2091             failCount++;
2092         }
2093 
2094         report("Group0");
2095     }
2096 
2097     private static void findIntTest() throws Exception {
2098         Pattern p = Pattern.compile("blah");
2099         Matcher m = p.matcher("zzzzblahzzzzzblah");
2100         boolean result = m.find(2);
2101         if (!result)
2102             failCount++;
2103 
2104         p = Pattern.compile("$");
2105         m = p.matcher("1234567890");
2106         result = m.find(10);
2107         if (!result)
2108             failCount++;
2109         try {
2110             result = m.find(11);
2111             failCount++;
2112         } catch (IndexOutOfBoundsException e) {
2113             // correct result
2114         }
2115 
2116         // Supplementary character test
2117         p = Pattern.compile(toSupplementaries("blah"));
2118         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2119         result = m.find(2);
2120         if (!result)
2121             failCount++;
2122 
2123         report("FindInt");
2124     }
2125 
2126     private static void emptyPatternTest() throws Exception {
2127         Pattern p = Pattern.compile("");
2128         Matcher m = p.matcher("foo");
2129 
2130         // Should find empty pattern at beginning of input
2131         boolean result = m.find();
2132         if (result != true)
2133             failCount++;
2134         if (m.start() != 0)
2135             failCount++;
2136 
2137         // Should not match entire input if input is not empty
2138         m.reset();
2139         result = m.matches();
2140         if (result == true)
2141             failCount++;
2142 
2143         try {
2144             m.start(0);
2145             failCount++;
2146         } catch (IllegalStateException e) {
2147             // Correct result
2148         }
2149 
2150         // Should match entire input if input is empty
2151         m.reset("");
2152         result = m.matches();
2153         if (result != true)
2154             failCount++;
2155 
2156         result = Pattern.matches("", "");
2157         if (result != true)
2158             failCount++;
2159 
2160         result = Pattern.matches("", "foo");
2161         if (result == true)
2162             failCount++;
2163         report("EmptyPattern");
2164     }
2165 
2166     private static void charClassTest() throws Exception {
2167         Pattern pattern = Pattern.compile("blah[ab]]blech");
2168         check(pattern, "blahb]blech", true);
2169 
2170         pattern = Pattern.compile("[abc[def]]");
2171         check(pattern, "b", true);
2172 
2173         // Supplementary character tests
2174         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2175         check(pattern, toSupplementaries("blahb]blech"), true);
2176 
2177         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2178         check(pattern, toSupplementaries("b"), true);
2179 
2180         try {
2181             // u00ff when UNICODE_CASE
2182             pattern = Pattern.compile("[ab\u00ffcd]",
2183                                       Pattern.CASE_INSENSITIVE|
2184                                       Pattern.UNICODE_CASE);
2185             check(pattern, "ab\u00ffcd", true);
2186             check(pattern, "Ab\u0178Cd", true);
2187 
2188             // u00b5 when UNICODE_CASE
2189             pattern = Pattern.compile("[ab\u00b5cd]",
2190                                       Pattern.CASE_INSENSITIVE|
2191                                       Pattern.UNICODE_CASE);
2192             check(pattern, "ab\u00b5cd", true);
2193             check(pattern, "Ab\u039cCd", true);
2194         } catch (Exception e) { failCount++; }
2195 
2196         /* Special cases
2197            (1)LatinSmallLetterLongS u+017f
2198            (2)LatinSmallLetterDotlessI u+0131
2199            (3)LatineCapitalLetterIWithDotAbove u+0130
2200            (4)KelvinSign u+212a
2201            (5)AngstromSign u+212b
2202         */
2203         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2204         pattern = Pattern.compile("[sik\u00c5]+", flags);
2205         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2206             failCount++;
2207 
2208         report("CharClass");
2209     }
2210 
2211     private static void caretTest() throws Exception {
2212         Pattern pattern = Pattern.compile("\\w*");
2213         Matcher matcher = pattern.matcher("a#bc#def##g");
2214         check(matcher, "a");
2215         check(matcher, "");
2216         check(matcher, "bc");
2217         check(matcher, "");
2218         check(matcher, "def");
2219         check(matcher, "");
2220         check(matcher, "");
2221         check(matcher, "g");
2222         check(matcher, "");
2223         if (matcher.find())
2224             failCount++;
2225 
2226         pattern = Pattern.compile("^\\w*");
2227         matcher = pattern.matcher("a#bc#def##g");
2228         check(matcher, "a");
2229         if (matcher.find())
2230             failCount++;
2231 
2232         pattern = Pattern.compile("\\w");
2233         matcher = pattern.matcher("abc##x");
2234         check(matcher, "a");
2235         check(matcher, "b");
2236         check(matcher, "c");
2237         check(matcher, "x");
2238         if (matcher.find())
2239             failCount++;
2240 
2241         pattern = Pattern.compile("^\\w");
2242         matcher = pattern.matcher("abc##x");
2243         check(matcher, "a");
2244         if (matcher.find())
2245             failCount++;
2246 
2247         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2248         matcher = pattern.matcher("abcdef-ghi\njklmno");
2249         check(matcher, "abc");
2250         if (matcher.find())
2251             failCount++;
2252 
2253         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2254         matcher = pattern.matcher("abcdef-ghi\njklmno");
2255         check(matcher, "abc");
2256         check(matcher, "jkl");
2257         if (matcher.find())
2258             failCount++;
2259 
2260         pattern = Pattern.compile("^", Pattern.MULTILINE);
2261         matcher = pattern.matcher("this is some text");
2262         String result = matcher.replaceAll("X");
2263         if (!result.equals("Xthis is some text"))
2264             failCount++;
2265 
2266         pattern = Pattern.compile("^");
2267         matcher = pattern.matcher("this is some text");
2268         result = matcher.replaceAll("X");
2269         if (!result.equals("Xthis is some text"))
2270             failCount++;
2271 
2272         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2273         matcher = pattern.matcher("this is some text\n");
2274         result = matcher.replaceAll("X");
2275         if (!result.equals("Xthis is some text\n"))
2276             failCount++;
2277 
2278         report("Caret");
2279     }
2280 
2281     private static void groupCaptureTest() throws Exception {
2282         // Independent group
2283         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2284         Matcher matcher = pattern.matcher("xxxyyyzzz");
2285         matcher.find();
2286         try {
2287             String blah = matcher.group(1);
2288             failCount++;
2289         } catch (IndexOutOfBoundsException ioobe) {
2290             // Good result
2291         }
2292         // Pure group
2293         pattern = Pattern.compile("x+(?:y+)z+");
2294         matcher = pattern.matcher("xxxyyyzzz");
2295         matcher.find();
2296         try {
2297             String blah = matcher.group(1);
2298             failCount++;
2299         } catch (IndexOutOfBoundsException ioobe) {
2300             // Good result
2301         }
2302 
2303         // Supplementary character tests
2304         // Independent group
2305         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2306         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2307         matcher.find();
2308         try {
2309             String blah = matcher.group(1);
2310             failCount++;
2311         } catch (IndexOutOfBoundsException ioobe) {
2312             // Good result
2313         }
2314         // Pure group
2315         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2316         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2317         matcher.find();
2318         try {
2319             String blah = matcher.group(1);
2320             failCount++;
2321         } catch (IndexOutOfBoundsException ioobe) {
2322             // Good result
2323         }
2324 
2325         report("GroupCapture");
2326     }
2327 
2328     private static void backRefTest() throws Exception {
2329         Pattern pattern = Pattern.compile("(a*)bc\\1");
2330         check(pattern, "zzzaabcazzz", true);
2331 
2332         pattern = Pattern.compile("(a*)bc\\1");
2333         check(pattern, "zzzaabcaazzz", true);
2334 
2335         pattern = Pattern.compile("(abc)(def)\\1");
2336         check(pattern, "abcdefabc", true);
2337 
2338         pattern = Pattern.compile("(abc)(def)\\3");
2339         check(pattern, "abcdefabc", false);
2340 
2341         try {
2342             for (int i = 1; i < 10; i++) {
2343                 // Make sure backref 1-9 are always accepted
2344                 pattern = Pattern.compile("abcdef\\" + i);
2345                 // and fail to match if the target group does not exit
2346                 check(pattern, "abcdef", false);
2347             }
2348         } catch(PatternSyntaxException e) {
2349             failCount++;
2350         }
2351 
2352         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2353         check(pattern, "abcdefghija", false);
2354         check(pattern, "abcdefghija1", true);
2355 
2356         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2357         check(pattern, "abcdefghijkk", true);
2358 
2359         pattern = Pattern.compile("(a)bcdefghij\\11");
2360         check(pattern, "abcdefghija1", true);
2361 
2362         // Supplementary character tests
2363         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2364         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2365 
2366         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2367         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2368 
2369         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2370         check(pattern, toSupplementaries("abcdefabc"), true);
2371 
2372         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2373         check(pattern, toSupplementaries("abcdefabc"), false);
2374 
2375         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2376         check(pattern, toSupplementaries("abcdefghija"), false);
2377         check(pattern, toSupplementaries("abcdefghija1"), true);
2378 
2379         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2380         check(pattern, toSupplementaries("abcdefghijkk"), true);
2381 
2382         report("BackRef");
2383     }
2384 
2385     /**
2386      * Unicode Technical Report #18, section 2.6 End of Line
2387      * There is no empty line to be matched in the sequence \u000D\u000A
2388      * but there is an empty line in the sequence \u000A\u000D.
2389      */
2390     private static void anchorTest() throws Exception {
2391         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2392         Matcher m = p.matcher("blah1\r\nblah2");
2393         m.find();
2394         m.find();
2395         if (!m.group().equals("blah2"))
2396             failCount++;
2397 
2398         m.reset("blah1\n\rblah2");
2399         m.find();
2400         m.find();
2401         m.find();
2402         if (!m.group().equals("blah2"))
2403             failCount++;
2404 
2405         // Test behavior of $ with \r\n at end of input
2406         p = Pattern.compile(".+$");
2407         m = p.matcher("blah1\r\n");
2408         if (!m.find())
2409             failCount++;
2410        if (!m.group().equals("blah1"))
2411             failCount++;
2412         if (m.find())
2413             failCount++;
2414 
2415         // Test behavior of $ with \r\n at end of input in multiline
2416         p = Pattern.compile(".+$", Pattern.MULTILINE);
2417         m = p.matcher("blah1\r\n");
2418         if (!m.find())
2419             failCount++;
2420         if (m.find())
2421             failCount++;
2422 
2423         // Test for $ recognition of \u0085 for bug 4527731
2424         p = Pattern.compile(".+$", Pattern.MULTILINE);
2425         m = p.matcher("blah1\u0085");
2426         if (!m.find())
2427             failCount++;
2428 
2429         // Supplementary character test
2430         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2431         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2432         m.find();
2433         m.find();
2434         if (!m.group().equals(toSupplementaries("blah2")))
2435             failCount++;
2436 
2437         m.reset(toSupplementaries("blah1\n\rblah2"));
2438         m.find();
2439         m.find();
2440         m.find();
2441         if (!m.group().equals(toSupplementaries("blah2")))
2442             failCount++;
2443 
2444         // Test behavior of $ with \r\n at end of input
2445         p = Pattern.compile(".+$");
2446         m = p.matcher(toSupplementaries("blah1\r\n"));
2447         if (!m.find())
2448             failCount++;
2449         if (!m.group().equals(toSupplementaries("blah1")))
2450             failCount++;
2451         if (m.find())
2452             failCount++;
2453 
2454         // Test behavior of $ with \r\n at end of input in multiline
2455         p = Pattern.compile(".+$", Pattern.MULTILINE);
2456         m = p.matcher(toSupplementaries("blah1\r\n"));
2457         if (!m.find())
2458             failCount++;
2459         if (m.find())
2460             failCount++;
2461 
2462         // Test for $ recognition of \u0085 for bug 4527731
2463         p = Pattern.compile(".+$", Pattern.MULTILINE);
2464         m = p.matcher(toSupplementaries("blah1\u0085"));
2465         if (!m.find())
2466             failCount++;
2467 
2468         report("Anchors");
2469     }
2470 
2471     /**
2472      * A basic sanity test of Matcher.lookingAt().
2473      */
2474     private static void lookingAtTest() throws Exception {
2475         Pattern p = Pattern.compile("(ab)(c*)");
2476         Matcher m = p.matcher("abccczzzabcczzzabccc");
2477 
2478         if (!m.lookingAt())
2479             failCount++;
2480 
2481         if (!m.group().equals(m.group(0)))
2482             failCount++;
2483 
2484         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2485         if (m.lookingAt())
2486             failCount++;
2487 
2488         // Supplementary character test
2489         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2490         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2491 
2492         if (!m.lookingAt())
2493             failCount++;
2494 
2495         if (!m.group().equals(m.group(0)))
2496             failCount++;
2497 
2498         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2499         if (m.lookingAt())
2500             failCount++;
2501 
2502         report("Looking At");
2503     }
2504 
2505     /**
2506      * A basic sanity test of Matcher.matches().
2507      */
2508     private static void matchesTest() throws Exception {
2509         // matches()
2510         Pattern p = Pattern.compile("ulb(c*)");
2511         Matcher m = p.matcher("ulbcccccc");
2512         if (!m.matches())
2513             failCount++;
2514 
2515         // find() but not matches()
2516         m.reset("zzzulbcccccc");
2517         if (m.matches())
2518             failCount++;
2519 
2520         // lookingAt() but not matches()
2521         m.reset("ulbccccccdef");
2522         if (m.matches())
2523             failCount++;
2524 
2525         // matches()
2526         p = Pattern.compile("a|ad");
2527         m = p.matcher("ad");
2528         if (!m.matches())
2529             failCount++;
2530 
2531         // Supplementary character test
2532         // matches()
2533         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2534         m = p.matcher(toSupplementaries("ulbcccccc"));
2535         if (!m.matches())
2536             failCount++;
2537 
2538         // find() but not matches()
2539         m.reset(toSupplementaries("zzzulbcccccc"));
2540         if (m.matches())
2541             failCount++;
2542 
2543         // lookingAt() but not matches()
2544         m.reset(toSupplementaries("ulbccccccdef"));
2545         if (m.matches())
2546             failCount++;
2547 
2548         // matches()
2549         p = Pattern.compile(toSupplementaries("a|ad"));
2550         m = p.matcher(toSupplementaries("ad"));
2551         if (!m.matches())
2552             failCount++;
2553 
2554         report("Matches");
2555     }
2556 
2557     /**
2558      * A basic sanity test of Pattern.matches().
2559      */
2560     private static void patternMatchesTest() throws Exception {
2561         // matches()
2562         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2563                              toSupplementaries("ulbcccccc")))
2564             failCount++;
2565 
2566         // find() but not matches()
2567         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2568                             toSupplementaries("zzzulbcccccc")))
2569             failCount++;
2570 
2571         // lookingAt() but not matches()
2572         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2573                             toSupplementaries("ulbccccccdef")))
2574             failCount++;
2575 
2576         // Supplementary character test
2577         // matches()
2578         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2579                              toSupplementaries("ulbcccccc")))
2580             failCount++;
2581 
2582         // find() but not matches()
2583         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2584                             toSupplementaries("zzzulbcccccc")))
2585             failCount++;
2586 
2587         // lookingAt() but not matches()
2588         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2589                             toSupplementaries("ulbccccccdef")))
2590             failCount++;
2591 
2592         report("Pattern Matches");
2593     }
2594 
2595     /**
2596      * Canonical equivalence testing. Tests the ability of the engine
2597      * to match sequences that are not explicitly specified in the
2598      * pattern when they are considered equivalent by the Unicode Standard.
2599      */
2600     private static void ceTest() throws Exception {
2601         // Decomposed char outside char classes
2602         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2603         Matcher m = p.matcher("test\u00e5");
2604         if (!m.matches())
2605             failCount++;
2606 
2607         m.reset("testa\u030a");
2608         if (!m.matches())
2609             failCount++;
2610 
2611         // Composed char outside char classes
2612         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2613         m = p.matcher("test\u00e5");
2614         if (!m.matches())
2615             failCount++;
2616 
2617         m.reset("testa\u030a");
2618         if (!m.find())
2619             failCount++;
2620 
2621         // Decomposed char inside a char class
2622         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2623         m = p.matcher("test\u00e5");
2624         if (!m.find())
2625             failCount++;
2626 
2627         m.reset("testa\u030a");
2628         if (!m.find())
2629             failCount++;
2630 
2631         // Composed char inside a char class
2632         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2633         m = p.matcher("test\u00e5");
2634         if (!m.find())
2635             failCount++;
2636 
2637         m.reset("testa\u0300");
2638         if (!m.find())
2639             failCount++;
2640 
2641         m.reset("testa\u030a");
2642         if (!m.find())
2643             failCount++;
2644 
2645         // Marks that cannot legally change order and be equivalent
2646         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2647         check(p, "testa\u0308\u0300", true);
2648         check(p, "testa\u0300\u0308", false);
2649 
2650         // Marks that can legally change order and be equivalent
2651         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2652         check(p, "testa\u0308\u0323", true);
2653         check(p, "testa\u0323\u0308", true);
2654 
2655         // Test all equivalences of the sequence a\u0308\u0323\u0300
2656         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2657         check(p, "testa\u0308\u0323\u0300", true);
2658         check(p, "testa\u0323\u0308\u0300", true);
2659         check(p, "testa\u0308\u0300\u0323", true);
2660         check(p, "test\u00e4\u0323\u0300", true);
2661         check(p, "test\u00e4\u0300\u0323", true);
2662 
2663         Object[][] data = new Object[][] {
2664 
2665         // JDK-4867170
2666         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2667         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2668         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2669         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2670         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2671         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2672         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2673 
2674         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2675         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2676         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2677         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2678         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2679 
2680         // backtracking, force to match "\u1f80", instead of \u1f82"
2681         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2682 
2683         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2684         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2685  
2686         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2687         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2688         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2689  
2690         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2691         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2692         { "\u1f80",          "ab\u1f80cd",             "f", true },
2693         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2694         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2695         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2696         { "\u1f82",          "\u1f80\u0300",           "m", true },
2697  
2698         // JDK-7080302       # compile failed
2699         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2700 
2701         // JDK-6728861, same cause as above one
2702         // Pattern pt = Pattern.compile("één", Pattern.CANON_EQ);
2703         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2704 
2705         // JDK-6995635
2706         // Pattern patternThatIsGonnaBug=Pattern.compile("(ë)",Pattern.CANON_EQ);
2707         { "(\u00e9)", "e\u0301", "m", true },
2708 
2709         // JDK-6736245
2710         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2711         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2712         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD 
2713 
2714         //  4916384.
2715         // Decomposed hangul (jamos) works inside clazz
2716         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2717         { "[\u1100\u1161]", "\uac00", "m", true},
2718 
2719         { "[\uac00]", "\u1100\u1161", "m", true},
2720         { "[\uac00]", "\uac00", "m", true},
2721 
2722         // Decomposed hangul (jamos)
2723         { "\u1100\u1161", "\u1100\u1161", "m", true},
2724         { "\u1100\u1161", "\uac00", "m", true},
2725 
2726         // Composed hangul
2727         { "\uac00",  "\u1100\u1161", "m", true },
2728         { "\uac00",  "\uac00", "m", true },
2729 
2730         /* Need a NFDSlice to nfd the source to solve this issue
2731            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2732            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2733            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2734 
2735         // Decomposed supplementary outside char classes
2736         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2737         // Composed supplementary outside char classes
2738         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2739         */
2740         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2741         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2742 
2743         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2744         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2745         };
2746 
2747         int failCount = 0;
2748         for (Object[] d : data) {
2749             String pn = (String)d[0];
2750             String tt = (String)d[1];
2751             boolean isFind = "f".equals(((String)d[2]));
2752             boolean expected = (boolean)d[3];
2753             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2754                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2755             if (ret != expected) {
2756                 failCount++;
2757                 continue;
2758             }
2759         }
2760         report("Canonical Equivalence");
2761     }
2762 
2763     /**
2764      * A basic sanity test of Matcher.replaceAll().
2765      */
2766     private static void globalSubstitute() throws Exception {
2767         // Global substitution with a literal
2768         Pattern p = Pattern.compile("(ab)(c*)");
2769         Matcher m = p.matcher("abccczzzabcczzzabccc");
2770         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2771             failCount++;
2772 
2773         m.reset("zzzabccczzzabcczzzabccczzz");
2774         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2775             failCount++;
2776 
2777         // Global substitution with groups
2778         m.reset("zzzabccczzzabcczzzabccczzz");
2779         String result = m.replaceAll("$1");
2780         if (!result.equals("zzzabzzzabzzzabzzz"))
2781             failCount++;
2782 
2783         // Supplementary character test
2784         // Global substitution with a literal
2785         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2786         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2787         if (!m.replaceAll(toSupplementaries("test")).
2788             equals(toSupplementaries("testzzztestzzztest")))
2789             failCount++;
2790 
2791         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2792         if (!m.replaceAll(toSupplementaries("test")).
2793             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2794             failCount++;
2795 
2796         // Global substitution with groups
2797         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2798         result = m.replaceAll("$1");
2799         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2800             failCount++;
2801 
2802         report("Global Substitution");
2803     }
2804 
2805     /**
2806      * Tests the usage of Matcher.appendReplacement() with literal
2807      * and group substitutions.
2808      */
2809     private static void stringbufferSubstitute() throws Exception {
2810         // SB substitution with literal
2811         String blah = "zzzblahzzz";
2812         Pattern p = Pattern.compile("blah");
2813         Matcher m = p.matcher(blah);
2814         StringBuffer result = new StringBuffer();
2815         try {
2816             m.appendReplacement(result, "blech");
2817             failCount++;
2818         } catch (IllegalStateException e) {
2819         }
2820         m.find();
2821         m.appendReplacement(result, "blech");
2822         if (!result.toString().equals("zzzblech"))
2823             failCount++;
2824 
2825         m.appendTail(result);
2826         if (!result.toString().equals("zzzblechzzz"))
2827             failCount++;
2828 
2829         // SB substitution with groups
2830         blah = "zzzabcdzzz";
2831         p = Pattern.compile("(ab)(cd)*");
2832         m = p.matcher(blah);
2833         result = new StringBuffer();
2834         try {
2835             m.appendReplacement(result, "$1");
2836             failCount++;
2837         } catch (IllegalStateException e) {
2838         }
2839         m.find();
2840         m.appendReplacement(result, "$1");
2841         if (!result.toString().equals("zzzab"))
2842             failCount++;
2843 
2844         m.appendTail(result);
2845         if (!result.toString().equals("zzzabzzz"))
2846             failCount++;
2847 
2848         // SB substitution with 3 groups
2849         blah = "zzzabcdcdefzzz";
2850         p = Pattern.compile("(ab)(cd)*(ef)");
2851         m = p.matcher(blah);
2852         result = new StringBuffer();
2853         try {
2854             m.appendReplacement(result, "$1w$2w$3");
2855             failCount++;
2856         } catch (IllegalStateException e) {
2857         }
2858         m.find();
2859         m.appendReplacement(result, "$1w$2w$3");
2860         if (!result.toString().equals("zzzabwcdwef"))
2861             failCount++;
2862 
2863         m.appendTail(result);
2864         if (!result.toString().equals("zzzabwcdwefzzz"))
2865             failCount++;
2866 
2867         // SB substitution with groups and three matches
2868         // skipping middle match
2869         blah = "zzzabcdzzzabcddzzzabcdzzz";
2870         p = Pattern.compile("(ab)(cd*)");
2871         m = p.matcher(blah);
2872         result = new StringBuffer();
2873         try {
2874             m.appendReplacement(result, "$1");
2875             failCount++;
2876         } catch (IllegalStateException e) {
2877         }
2878         m.find();
2879         m.appendReplacement(result, "$1");
2880         if (!result.toString().equals("zzzab"))
2881             failCount++;
2882 
2883         m.find();
2884         m.find();
2885         m.appendReplacement(result, "$2");
2886         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2887             failCount++;
2888 
2889         m.appendTail(result);
2890         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2891             failCount++;
2892 
2893         // Check to make sure escaped $ is ignored
2894         blah = "zzzabcdcdefzzz";
2895         p = Pattern.compile("(ab)(cd)*(ef)");
2896         m = p.matcher(blah);
2897         result = new StringBuffer();
2898         m.find();
2899         m.appendReplacement(result, "$1w\\$2w$3");
2900         if (!result.toString().equals("zzzabw$2wef"))
2901             failCount++;
2902 
2903         m.appendTail(result);
2904         if (!result.toString().equals("zzzabw$2wefzzz"))
2905             failCount++;
2906 
2907         // Check to make sure a reference to nonexistent group causes error
2908         blah = "zzzabcdcdefzzz";
2909         p = Pattern.compile("(ab)(cd)*(ef)");
2910         m = p.matcher(blah);
2911         result = new StringBuffer();
2912         m.find();
2913         try {
2914             m.appendReplacement(result, "$1w$5w$3");
2915             failCount++;
2916         } catch (IndexOutOfBoundsException ioobe) {
2917             // Correct result
2918         }
2919 
2920         // Check double digit group references
2921         blah = "zzz123456789101112zzz";
2922         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2923         m = p.matcher(blah);
2924         result = new StringBuffer();
2925         m.find();
2926         m.appendReplacement(result, "$1w$11w$3");
2927         if (!result.toString().equals("zzz1w11w3"))
2928             failCount++;
2929 
2930         // Check to make sure it backs off $15 to $1 if only three groups
2931         blah = "zzzabcdcdefzzz";
2932         p = Pattern.compile("(ab)(cd)*(ef)");
2933         m = p.matcher(blah);
2934         result = new StringBuffer();
2935         m.find();
2936         m.appendReplacement(result, "$1w$15w$3");
2937         if (!result.toString().equals("zzzabwab5wef"))
2938             failCount++;
2939 
2940 
2941         // Supplementary character test
2942         // SB substitution with literal
2943         blah = toSupplementaries("zzzblahzzz");
2944         p = Pattern.compile(toSupplementaries("blah"));
2945         m = p.matcher(blah);
2946         result = new StringBuffer();
2947         try {
2948             m.appendReplacement(result, toSupplementaries("blech"));
2949             failCount++;
2950         } catch (IllegalStateException e) {
2951         }
2952         m.find();
2953         m.appendReplacement(result, toSupplementaries("blech"));
2954         if (!result.toString().equals(toSupplementaries("zzzblech")))
2955             failCount++;
2956 
2957         m.appendTail(result);
2958         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2959             failCount++;
2960 
2961         // SB substitution with groups
2962         blah = toSupplementaries("zzzabcdzzz");
2963         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2964         m = p.matcher(blah);
2965         result = new StringBuffer();
2966         try {
2967             m.appendReplacement(result, "$1");
2968             failCount++;
2969         } catch (IllegalStateException e) {
2970         }
2971         m.find();
2972         m.appendReplacement(result, "$1");
2973         if (!result.toString().equals(toSupplementaries("zzzab")))
2974             failCount++;
2975 
2976         m.appendTail(result);
2977         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2978             failCount++;
2979 
2980         // SB substitution with 3 groups
2981         blah = toSupplementaries("zzzabcdcdefzzz");
2982         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2983         m = p.matcher(blah);
2984         result = new StringBuffer();
2985         try {
2986             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2987             failCount++;
2988         } catch (IllegalStateException e) {
2989         }
2990         m.find();
2991         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2992         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2993             failCount++;
2994 
2995         m.appendTail(result);
2996         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2997             failCount++;
2998 
2999         // SB substitution with groups and three matches
3000         // skipping middle match
3001         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3002         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3003         m = p.matcher(blah);
3004         result = new StringBuffer();
3005         try {
3006             m.appendReplacement(result, "$1");
3007             failCount++;
3008         } catch (IllegalStateException e) {
3009         }
3010         m.find();
3011         m.appendReplacement(result, "$1");
3012         if (!result.toString().equals(toSupplementaries("zzzab")))
3013             failCount++;
3014 
3015         m.find();
3016         m.find();
3017         m.appendReplacement(result, "$2");
3018         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3019             failCount++;
3020 
3021         m.appendTail(result);
3022         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3023             failCount++;
3024 
3025         // Check to make sure escaped $ is ignored
3026         blah = toSupplementaries("zzzabcdcdefzzz");
3027         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3028         m = p.matcher(blah);
3029         result = new StringBuffer();
3030         m.find();
3031         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3032         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3033             failCount++;
3034 
3035         m.appendTail(result);
3036         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3037             failCount++;
3038 
3039         // Check to make sure a reference to nonexistent group causes error
3040         blah = toSupplementaries("zzzabcdcdefzzz");
3041         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3042         m = p.matcher(blah);
3043         result = new StringBuffer();
3044         m.find();
3045         try {
3046             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3047             failCount++;
3048         } catch (IndexOutOfBoundsException ioobe) {
3049             // Correct result
3050         }
3051 
3052         // Check double digit group references
3053         blah = toSupplementaries("zzz123456789101112zzz");
3054         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3055         m = p.matcher(blah);
3056         result = new StringBuffer();
3057         m.find();
3058         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3059         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3060             failCount++;
3061 
3062         // Check to make sure it backs off $15 to $1 if only three groups
3063         blah = toSupplementaries("zzzabcdcdefzzz");
3064         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3065         m = p.matcher(blah);
3066         result = new StringBuffer();
3067         m.find();
3068         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3069         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3070             failCount++;
3071 
3072         // Check nothing has been appended into the output buffer if
3073         // the replacement string triggers IllegalArgumentException.
3074         p = Pattern.compile("(abc)");
3075         m = p.matcher("abcd");
3076         result = new StringBuffer();
3077         m.find();
3078         try {
3079             m.appendReplacement(result, ("xyz$g"));
3080             failCount++;
3081         } catch (IllegalArgumentException iae) {
3082             if (result.length() != 0)
3083                 failCount++;
3084         }
3085 
3086         report("SB Substitution");
3087     }
3088 
3089     /**
3090      * Tests the usage of Matcher.appendReplacement() with literal
3091      * and group substitutions.
3092      */
3093     private static void stringbuilderSubstitute() throws Exception {
3094         // SB substitution with literal
3095         String blah = "zzzblahzzz";
3096         Pattern p = Pattern.compile("blah");
3097         Matcher m = p.matcher(blah);
3098         StringBuilder result = new StringBuilder();
3099         try {
3100             m.appendReplacement(result, "blech");
3101             failCount++;
3102         } catch (IllegalStateException e) {
3103         }
3104         m.find();
3105         m.appendReplacement(result, "blech");
3106         if (!result.toString().equals("zzzblech"))
3107             failCount++;
3108 
3109         m.appendTail(result);
3110         if (!result.toString().equals("zzzblechzzz"))
3111             failCount++;
3112 
3113         // SB substitution with groups
3114         blah = "zzzabcdzzz";
3115         p = Pattern.compile("(ab)(cd)*");
3116         m = p.matcher(blah);
3117         result = new StringBuilder();
3118         try {
3119             m.appendReplacement(result, "$1");
3120             failCount++;
3121         } catch (IllegalStateException e) {
3122         }
3123         m.find();
3124         m.appendReplacement(result, "$1");
3125         if (!result.toString().equals("zzzab"))
3126             failCount++;
3127 
3128         m.appendTail(result);
3129         if (!result.toString().equals("zzzabzzz"))
3130             failCount++;
3131 
3132         // SB substitution with 3 groups
3133         blah = "zzzabcdcdefzzz";
3134         p = Pattern.compile("(ab)(cd)*(ef)");
3135         m = p.matcher(blah);
3136         result = new StringBuilder();
3137         try {
3138             m.appendReplacement(result, "$1w$2w$3");
3139             failCount++;
3140         } catch (IllegalStateException e) {
3141         }
3142         m.find();
3143         m.appendReplacement(result, "$1w$2w$3");
3144         if (!result.toString().equals("zzzabwcdwef"))
3145             failCount++;
3146 
3147         m.appendTail(result);
3148         if (!result.toString().equals("zzzabwcdwefzzz"))
3149             failCount++;
3150 
3151         // SB substitution with groups and three matches
3152         // skipping middle match
3153         blah = "zzzabcdzzzabcddzzzabcdzzz";
3154         p = Pattern.compile("(ab)(cd*)");
3155         m = p.matcher(blah);
3156         result = new StringBuilder();
3157         try {
3158             m.appendReplacement(result, "$1");
3159             failCount++;
3160         } catch (IllegalStateException e) {
3161         }
3162         m.find();
3163         m.appendReplacement(result, "$1");
3164         if (!result.toString().equals("zzzab"))
3165             failCount++;
3166 
3167         m.find();
3168         m.find();
3169         m.appendReplacement(result, "$2");
3170         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3171             failCount++;
3172 
3173         m.appendTail(result);
3174         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3175             failCount++;
3176 
3177         // Check to make sure escaped $ is ignored
3178         blah = "zzzabcdcdefzzz";
3179         p = Pattern.compile("(ab)(cd)*(ef)");
3180         m = p.matcher(blah);
3181         result = new StringBuilder();
3182         m.find();
3183         m.appendReplacement(result, "$1w\\$2w$3");
3184         if (!result.toString().equals("zzzabw$2wef"))
3185             failCount++;
3186 
3187         m.appendTail(result);
3188         if (!result.toString().equals("zzzabw$2wefzzz"))
3189             failCount++;
3190 
3191         // Check to make sure a reference to nonexistent group causes error
3192         blah = "zzzabcdcdefzzz";
3193         p = Pattern.compile("(ab)(cd)*(ef)");
3194         m = p.matcher(blah);
3195         result = new StringBuilder();
3196         m.find();
3197         try {
3198             m.appendReplacement(result, "$1w$5w$3");
3199             failCount++;
3200         } catch (IndexOutOfBoundsException ioobe) {
3201             // Correct result
3202         }
3203 
3204         // Check double digit group references
3205         blah = "zzz123456789101112zzz";
3206         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3207         m = p.matcher(blah);
3208         result = new StringBuilder();
3209         m.find();
3210         m.appendReplacement(result, "$1w$11w$3");
3211         if (!result.toString().equals("zzz1w11w3"))
3212             failCount++;
3213 
3214         // Check to make sure it backs off $15 to $1 if only three groups
3215         blah = "zzzabcdcdefzzz";
3216         p = Pattern.compile("(ab)(cd)*(ef)");
3217         m = p.matcher(blah);
3218         result = new StringBuilder();
3219         m.find();
3220         m.appendReplacement(result, "$1w$15w$3");
3221         if (!result.toString().equals("zzzabwab5wef"))
3222             failCount++;
3223 
3224 
3225         // Supplementary character test
3226         // SB substitution with literal
3227         blah = toSupplementaries("zzzblahzzz");
3228         p = Pattern.compile(toSupplementaries("blah"));
3229         m = p.matcher(blah);
3230         result = new StringBuilder();
3231         try {
3232             m.appendReplacement(result, toSupplementaries("blech"));
3233             failCount++;
3234         } catch (IllegalStateException e) {
3235         }
3236         m.find();
3237         m.appendReplacement(result, toSupplementaries("blech"));
3238         if (!result.toString().equals(toSupplementaries("zzzblech")))
3239             failCount++;
3240         m.appendTail(result);
3241         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3242             failCount++;
3243 
3244         // SB substitution with groups
3245         blah = toSupplementaries("zzzabcdzzz");
3246         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3247         m = p.matcher(blah);
3248         result = new StringBuilder();
3249         try {
3250             m.appendReplacement(result, "$1");
3251             failCount++;
3252         } catch (IllegalStateException e) {
3253         }
3254         m.find();
3255         m.appendReplacement(result, "$1");
3256         if (!result.toString().equals(toSupplementaries("zzzab")))
3257             failCount++;
3258 
3259         m.appendTail(result);
3260         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3261             failCount++;
3262 
3263         // SB substitution with 3 groups
3264         blah = toSupplementaries("zzzabcdcdefzzz");
3265         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3266         m = p.matcher(blah);
3267         result = new StringBuilder();
3268         try {
3269             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3270             failCount++;
3271         } catch (IllegalStateException e) {
3272         }
3273         m.find();
3274         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3275         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3276             failCount++;
3277 
3278         m.appendTail(result);
3279         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3280             failCount++;
3281 
3282         // SB substitution with groups and three matches
3283         // skipping middle match
3284         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3285         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3286         m = p.matcher(blah);
3287         result = new StringBuilder();
3288         try {
3289             m.appendReplacement(result, "$1");
3290             failCount++;
3291         } catch (IllegalStateException e) {
3292         }
3293         m.find();
3294         m.appendReplacement(result, "$1");
3295         if (!result.toString().equals(toSupplementaries("zzzab")))
3296             failCount++;
3297 
3298         m.find();
3299         m.find();
3300         m.appendReplacement(result, "$2");
3301         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3302             failCount++;
3303 
3304         m.appendTail(result);
3305         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3306             failCount++;
3307 
3308         // Check to make sure escaped $ is ignored
3309         blah = toSupplementaries("zzzabcdcdefzzz");
3310         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3311         m = p.matcher(blah);
3312         result = new StringBuilder();
3313         m.find();
3314         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3315         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3316             failCount++;
3317 
3318         m.appendTail(result);
3319         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3320             failCount++;
3321 
3322         // Check to make sure a reference to nonexistent group causes error
3323         blah = toSupplementaries("zzzabcdcdefzzz");
3324         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3325         m = p.matcher(blah);
3326         result = new StringBuilder();
3327         m.find();
3328         try {
3329             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3330             failCount++;
3331         } catch (IndexOutOfBoundsException ioobe) {
3332             // Correct result
3333         }
3334         // Check double digit group references
3335         blah = toSupplementaries("zzz123456789101112zzz");
3336         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3337         m = p.matcher(blah);
3338         result = new StringBuilder();
3339         m.find();
3340         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3341         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3342             failCount++;
3343 
3344         // Check to make sure it backs off $15 to $1 if only three groups
3345         blah = toSupplementaries("zzzabcdcdefzzz");
3346         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3347         m = p.matcher(blah);
3348         result = new StringBuilder();
3349         m.find();
3350         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3351         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3352             failCount++;
3353         // Check nothing has been appended into the output buffer if
3354         // the replacement string triggers IllegalArgumentException.
3355         p = Pattern.compile("(abc)");
3356         m = p.matcher("abcd");
3357         result = new StringBuilder();
3358         m.find();
3359         try {
3360             m.appendReplacement(result, ("xyz$g"));
3361             failCount++;
3362         } catch (IllegalArgumentException iae) {
3363             if (result.length() != 0)
3364                 failCount++;
3365         }
3366         report("SB Substitution 2");
3367     }
3368 
3369     /*
3370      * 5 groups of characters are created to make a substitution string.
3371      * A base string will be created including random lead chars, the
3372      * substitution string, and random trailing chars.
3373      * A pattern containing the 5 groups is searched for and replaced with:
3374      * random group + random string + random group.
3375      * The results are checked for correctness.
3376      */
3377     private static void substitutionBasher() {
3378         for (int runs = 0; runs<1000; runs++) {
3379             // Create a base string to work in
3380             int leadingChars = generator.nextInt(10);
3381             StringBuffer baseBuffer = new StringBuffer(100);
3382             String leadingString = getRandomAlphaString(leadingChars);
3383             baseBuffer.append(leadingString);
3384 
3385             // Create 5 groups of random number of random chars
3386             // Create the string to substitute
3387             // Create the pattern string to search for
3388             StringBuffer bufferToSub = new StringBuffer(25);
3389             StringBuffer bufferToPat = new StringBuffer(50);
3390             String[] groups = new String[5];
3391             for(int i=0; i<5; i++) {
3392                 int aGroupSize = generator.nextInt(5)+1;
3393                 groups[i] = getRandomAlphaString(aGroupSize);
3394                 bufferToSub.append(groups[i]);
3395                 bufferToPat.append('(');
3396                 bufferToPat.append(groups[i]);
3397                 bufferToPat.append(')');
3398             }
3399             String stringToSub = bufferToSub.toString();
3400             String pattern = bufferToPat.toString();
3401 
3402             // Place sub string into working string at random index
3403             baseBuffer.append(stringToSub);
3404 
3405             // Append random chars to end
3406             int trailingChars = generator.nextInt(10);
3407             String trailingString = getRandomAlphaString(trailingChars);
3408             baseBuffer.append(trailingString);
3409             String baseString = baseBuffer.toString();
3410 
3411             // Create test pattern and matcher
3412             Pattern p = Pattern.compile(pattern);
3413             Matcher m = p.matcher(baseString);
3414 
3415             // Reject candidate if pattern happens to start early
3416             m.find();
3417             if (m.start() < leadingChars)
3418                 continue;
3419 
3420             // Reject candidate if more than one match
3421             if (m.find())
3422                 continue;
3423 
3424             // Construct a replacement string with :
3425             // random group + random string + random group
3426             StringBuffer bufferToRep = new StringBuffer();
3427             int groupIndex1 = generator.nextInt(5);
3428             bufferToRep.append("$" + (groupIndex1 + 1));
3429             String randomMidString = getRandomAlphaString(5);
3430             bufferToRep.append(randomMidString);
3431             int groupIndex2 = generator.nextInt(5);
3432             bufferToRep.append("$" + (groupIndex2 + 1));
3433             String replacement = bufferToRep.toString();
3434 
3435             // Do the replacement
3436             String result = m.replaceAll(replacement);
3437 
3438             // Construct expected result
3439             StringBuffer bufferToRes = new StringBuffer();
3440             bufferToRes.append(leadingString);
3441             bufferToRes.append(groups[groupIndex1]);
3442             bufferToRes.append(randomMidString);
3443             bufferToRes.append(groups[groupIndex2]);
3444             bufferToRes.append(trailingString);
3445             String expectedResult = bufferToRes.toString();
3446 
3447             // Check results
3448             if (!result.equals(expectedResult))
3449                 failCount++;
3450         }
3451 
3452         report("Substitution Basher");
3453     }
3454 
3455     /*
3456      * 5 groups of characters are created to make a substitution string.
3457      * A base string will be created including random lead chars, the
3458      * substitution string, and random trailing chars.
3459      * A pattern containing the 5 groups is searched for and replaced with:
3460      * random group + random string + random group.
3461      * The results are checked for correctness.
3462      */
3463     private static void substitutionBasher2() {
3464         for (int runs = 0; runs<1000; runs++) {
3465             // Create a base string to work in
3466             int leadingChars = generator.nextInt(10);
3467             StringBuilder baseBuffer = new StringBuilder(100);
3468             String leadingString = getRandomAlphaString(leadingChars);
3469             baseBuffer.append(leadingString);
3470 
3471             // Create 5 groups of random number of random chars
3472             // Create the string to substitute
3473             // Create the pattern string to search for
3474             StringBuilder bufferToSub = new StringBuilder(25);
3475             StringBuilder bufferToPat = new StringBuilder(50);
3476             String[] groups = new String[5];
3477             for(int i=0; i<5; i++) {
3478                 int aGroupSize = generator.nextInt(5)+1;
3479                 groups[i] = getRandomAlphaString(aGroupSize);
3480                 bufferToSub.append(groups[i]);
3481                 bufferToPat.append('(');
3482                 bufferToPat.append(groups[i]);
3483                 bufferToPat.append(')');
3484             }
3485             String stringToSub = bufferToSub.toString();
3486             String pattern = bufferToPat.toString();
3487 
3488             // Place sub string into working string at random index
3489             baseBuffer.append(stringToSub);
3490 
3491             // Append random chars to end
3492             int trailingChars = generator.nextInt(10);
3493             String trailingString = getRandomAlphaString(trailingChars);
3494             baseBuffer.append(trailingString);
3495             String baseString = baseBuffer.toString();
3496 
3497             // Create test pattern and matcher
3498             Pattern p = Pattern.compile(pattern);
3499             Matcher m = p.matcher(baseString);
3500 
3501             // Reject candidate if pattern happens to start early
3502             m.find();
3503             if (m.start() < leadingChars)
3504                 continue;
3505 
3506             // Reject candidate if more than one match
3507             if (m.find())
3508                 continue;
3509 
3510             // Construct a replacement string with :
3511             // random group + random string + random group
3512             StringBuilder bufferToRep = new StringBuilder();
3513             int groupIndex1 = generator.nextInt(5);
3514             bufferToRep.append("$" + (groupIndex1 + 1));
3515             String randomMidString = getRandomAlphaString(5);
3516             bufferToRep.append(randomMidString);
3517             int groupIndex2 = generator.nextInt(5);
3518             bufferToRep.append("$" + (groupIndex2 + 1));
3519             String replacement = bufferToRep.toString();
3520 
3521             // Do the replacement
3522             String result = m.replaceAll(replacement);
3523 
3524             // Construct expected result
3525             StringBuilder bufferToRes = new StringBuilder();
3526             bufferToRes.append(leadingString);
3527             bufferToRes.append(groups[groupIndex1]);
3528             bufferToRes.append(randomMidString);
3529             bufferToRes.append(groups[groupIndex2]);
3530             bufferToRes.append(trailingString);
3531             String expectedResult = bufferToRes.toString();
3532 
3533             // Check results
3534             if (!result.equals(expectedResult)) {
3535                 failCount++;
3536             }
3537         }
3538 
3539         report("Substitution Basher 2");
3540     }
3541 
3542     /**
3543      * Checks the handling of some escape sequences that the Pattern
3544      * class should process instead of the java compiler. These are
3545      * not in the file because the escapes should be be processed
3546      * by the Pattern class when the regex is compiled.
3547      */
3548     private static void escapes() throws Exception {
3549         Pattern p = Pattern.compile("\\043");
3550         Matcher m = p.matcher("#");
3551         if (!m.find())
3552             failCount++;
3553 
3554         p = Pattern.compile("\\x23");
3555         m = p.matcher("#");
3556         if (!m.find())
3557             failCount++;
3558 
3559         p = Pattern.compile("\\u0023");
3560         m = p.matcher("#");
3561         if (!m.find())
3562             failCount++;
3563 
3564         report("Escape sequences");
3565     }
3566 
3567     /**
3568      * Checks the handling of blank input situations. These
3569      * tests are incompatible with my test file format.
3570      */
3571     private static void blankInput() throws Exception {
3572         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3573         Matcher m = p.matcher("");
3574         if (m.find())
3575             failCount++;
3576 
3577         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3578         m = p.matcher("");
3579         if (!m.find())
3580             failCount++;
3581 
3582         p = Pattern.compile("abc");
3583         m = p.matcher("");
3584         if (m.find())
3585             failCount++;
3586 
3587         p = Pattern.compile("a*");
3588         m = p.matcher("");
3589         if (!m.find())
3590             failCount++;
3591 
3592         report("Blank input");
3593     }
3594 
3595     /**
3596      * Tests the Boyer-Moore pattern matching of a character sequence
3597      * on randomly generated patterns.
3598      */
3599     private static void bm() throws Exception {
3600         doBnM('a');
3601         report("Boyer Moore (ASCII)");
3602 
3603         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3604         report("Boyer Moore (Supplementary)");
3605     }
3606 
3607     private static void doBnM(int baseCharacter) throws Exception {
3608         int achar=0;
3609 
3610         for (int i=0; i<100; i++) {
3611             // Create a short pattern to search for
3612             int patternLength = generator.nextInt(7) + 4;
3613             StringBuffer patternBuffer = new StringBuffer(patternLength);
3614             String pattern;
3615             retry: for (;;) {
3616                 for (int x=0; x<patternLength; x++) {
3617                     int ch = baseCharacter + generator.nextInt(26);
3618                     if (Character.isSupplementaryCodePoint(ch)) {
3619                         patternBuffer.append(Character.toChars(ch));
3620                     } else {
3621                         patternBuffer.append((char)ch);
3622                     }
3623                 }
3624                 pattern = patternBuffer.toString();
3625 
3626                 // Avoid patterns that start and end with the same substring
3627                 // See JDK-6854417
3628                 for (int x=1; x < pattern.length(); x++) {
3629                     if (pattern.startsWith(pattern.substring(x)))
3630                         continue retry;
3631                 }
3632                 break;
3633             }
3634             Pattern p = Pattern.compile(pattern);
3635 
3636             // Create a buffer with random ASCII chars that does
3637             // not match the sample
3638             String toSearch = null;
3639             StringBuffer s = null;
3640             Matcher m = p.matcher("");
3641             do {
3642                 s = new StringBuffer(100);
3643                 for (int x=0; x<100; x++) {
3644                     int ch = baseCharacter + generator.nextInt(26);
3645                     if (Character.isSupplementaryCodePoint(ch)) {
3646                         s.append(Character.toChars(ch));
3647                     } else {
3648                         s.append((char)ch);
3649                     }
3650                 }
3651                 toSearch = s.toString();
3652                 m.reset(toSearch);
3653             } while (m.find());
3654 
3655             // Insert the pattern at a random spot
3656             int insertIndex = generator.nextInt(99);
3657             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3658                 insertIndex++;
3659             s = s.insert(insertIndex, pattern);
3660             toSearch = s.toString();
3661 
3662             // Make sure that the pattern is found
3663             m.reset(toSearch);
3664             if (!m.find())
3665                 failCount++;
3666 
3667             // Make sure that the match text is the pattern
3668             if (!m.group().equals(pattern))
3669                 failCount++;
3670 
3671             // Make sure match occured at insertion point
3672             if (m.start() != insertIndex)
3673                 failCount++;
3674         }
3675     }
3676 
3677     /**
3678      * Tests the matching of slices on randomly generated patterns.
3679      * The Boyer-Moore optimization is not done on these patterns
3680      * because it uses unicode case folding.
3681      */
3682     private static void slice() throws Exception {
3683         doSlice(Character.MAX_VALUE);
3684         report("Slice");
3685 
3686         doSlice(Character.MAX_CODE_POINT);
3687         report("Slice (Supplementary)");
3688     }
3689 
3690     private static void doSlice(int maxCharacter) throws Exception {
3691         Random generator = new Random();
3692         int achar=0;
3693 
3694         for (int i=0; i<100; i++) {
3695             // Create a short pattern to search for
3696             int patternLength = generator.nextInt(7) + 4;
3697             StringBuffer patternBuffer = new StringBuffer(patternLength);
3698             for (int x=0; x<patternLength; x++) {
3699                 int randomChar = 0;
3700                 while (!Character.isLetterOrDigit(randomChar))
3701                     randomChar = generator.nextInt(maxCharacter);
3702                 if (Character.isSupplementaryCodePoint(randomChar)) {
3703                     patternBuffer.append(Character.toChars(randomChar));
3704                 } else {
3705                     patternBuffer.append((char) randomChar);
3706                 }
3707             }
3708             String pattern =  patternBuffer.toString();
3709             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3710 
3711             // Create a buffer with random chars that does not match the sample
3712             String toSearch = null;
3713             StringBuffer s = null;
3714             Matcher m = p.matcher("");
3715             do {
3716                 s = new StringBuffer(100);
3717                 for (int x=0; x<100; x++) {
3718                     int randomChar = 0;
3719                     while (!Character.isLetterOrDigit(randomChar))
3720                         randomChar = generator.nextInt(maxCharacter);
3721                     if (Character.isSupplementaryCodePoint(randomChar)) {
3722                         s.append(Character.toChars(randomChar));
3723                     } else {
3724                         s.append((char) randomChar);
3725                     }
3726                 }
3727                 toSearch = s.toString();
3728                 m.reset(toSearch);
3729             } while (m.find());
3730 
3731             // Insert the pattern at a random spot
3732             int insertIndex = generator.nextInt(99);
3733             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3734                 insertIndex++;
3735             s = s.insert(insertIndex, pattern);
3736             toSearch = s.toString();
3737 
3738             // Make sure that the pattern is found
3739             m.reset(toSearch);
3740             if (!m.find())
3741                 failCount++;
3742 
3743             // Make sure that the match text is the pattern
3744             if (!m.group().equals(pattern))
3745                 failCount++;
3746 
3747             // Make sure match occured at insertion point
3748             if (m.start() != insertIndex)
3749                 failCount++;
3750         }
3751     }
3752 
3753     private static void explainFailure(String pattern, String data,
3754                                        String expected, String actual) {
3755         System.err.println("----------------------------------------");
3756         System.err.println("Pattern = "+pattern);
3757         System.err.println("Data = "+data);
3758         System.err.println("Expected = " + expected);
3759         System.err.println("Actual   = " + actual);
3760     }
3761 
3762     private static void explainFailure(String pattern, String data,
3763                                        Throwable t) {
3764         System.err.println("----------------------------------------");
3765         System.err.println("Pattern = "+pattern);
3766         System.err.println("Data = "+data);
3767         t.printStackTrace(System.err);
3768     }
3769 
3770     // Testing examples from a file
3771 
3772     /**
3773      * Goes through the file "TestCases.txt" and creates many patterns
3774      * described in the file, matching the patterns against input lines in
3775      * the file, and comparing the results against the correct results
3776      * also found in the file. The file format is described in comments
3777      * at the head of the file.
3778      */
3779     private static void processFile(String fileName) throws Exception {
3780         File testCases = new File(System.getProperty("test.src", "."),
3781                                   fileName);
3782         FileInputStream in = new FileInputStream(testCases);
3783         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3784 
3785         // Process next test case.
3786         String aLine;
3787         while((aLine = r.readLine()) != null) {
3788             // Read a line for pattern
3789             String patternString = grabLine(r);
3790             Pattern p = null;
3791             try {
3792                 p = compileTestPattern(patternString);
3793             } catch (PatternSyntaxException e) {
3794                 String dataString = grabLine(r);
3795                 String expectedResult = grabLine(r);
3796                 if (expectedResult.startsWith("error"))
3797                     continue;
3798                 explainFailure(patternString, dataString, e);
3799                 failCount++;
3800                 continue;
3801             }
3802 
3803             // Read a line for input string
3804             String dataString = grabLine(r);
3805             Matcher m = p.matcher(dataString);
3806             StringBuffer result = new StringBuffer();
3807 
3808             // Check for IllegalStateExceptions before a match
3809             failCount += preMatchInvariants(m);
3810 
3811             boolean found = m.find();
3812 
3813             if (found)
3814                 failCount += postTrueMatchInvariants(m);
3815             else
3816                 failCount += postFalseMatchInvariants(m);
3817 
3818             if (found) {
3819                 result.append("true ");
3820                 result.append(m.group(0) + " ");
3821             } else {
3822                 result.append("false ");
3823             }
3824 
3825             result.append(m.groupCount());
3826 
3827             if (found) {
3828                 for (int i=1; i<m.groupCount()+1; i++)
3829                     if (m.group(i) != null)
3830                         result.append(" " +m.group(i));
3831             }
3832 
3833             // Read a line for the expected result
3834             String expectedResult = grabLine(r);
3835 
3836             if (!result.toString().equals(expectedResult)) {
3837                 explainFailure(patternString, dataString, expectedResult, result.toString());
3838                 failCount++;
3839             }
3840         }
3841 
3842         report(fileName);
3843     }
3844 
3845     private static int preMatchInvariants(Matcher m) {
3846         int failCount = 0;
3847         try {
3848             m.start();
3849             failCount++;
3850         } catch (IllegalStateException ise) {}
3851         try {
3852             m.end();
3853             failCount++;
3854         } catch (IllegalStateException ise) {}
3855         try {
3856             m.group();
3857             failCount++;
3858         } catch (IllegalStateException ise) {}
3859         return failCount;
3860     }
3861 
3862     private static int postFalseMatchInvariants(Matcher m) {
3863         int failCount = 0;
3864         try {
3865             m.group();
3866             failCount++;
3867         } catch (IllegalStateException ise) {}
3868         try {
3869             m.start();
3870             failCount++;
3871         } catch (IllegalStateException ise) {}
3872         try {
3873             m.end();
3874             failCount++;
3875         } catch (IllegalStateException ise) {}
3876         return failCount;
3877     }
3878 
3879     private static int postTrueMatchInvariants(Matcher m) {
3880         int failCount = 0;
3881         //assert(m.start() = m.start(0);
3882         if (m.start() != m.start(0))
3883             failCount++;
3884         //assert(m.end() = m.end(0);
3885         if (m.start() != m.start(0))
3886             failCount++;
3887         //assert(m.group() = m.group(0);
3888         if (!m.group().equals(m.group(0)))
3889             failCount++;
3890         try {
3891             m.group(50);
3892             failCount++;
3893         } catch (IndexOutOfBoundsException ise) {}
3894 
3895         return failCount;
3896     }
3897 
3898     private static Pattern compileTestPattern(String patternString) {
3899         if (!patternString.startsWith("'")) {
3900             return Pattern.compile(patternString);
3901         }
3902         int break1 = patternString.lastIndexOf("'");
3903         String flagString = patternString.substring(
3904                                           break1+1, patternString.length());
3905         patternString = patternString.substring(1, break1);
3906 
3907         if (flagString.equals("i"))
3908             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3909 
3910         if (flagString.equals("m"))
3911             return Pattern.compile(patternString, Pattern.MULTILINE);
3912 
3913         return Pattern.compile(patternString);
3914     }
3915 
3916     /**
3917      * Reads a line from the input file. Keeps reading lines until a non
3918      * empty non comment line is read. If the line contains a \n then
3919      * these two characters are replaced by a newline char. If a \\uxxxx
3920      * sequence is read then the sequence is replaced by the unicode char.
3921      */
3922     private static String grabLine(BufferedReader r) throws Exception {
3923         int index = 0;
3924         String line = r.readLine();
3925         while (line.startsWith("//") || line.length() < 1)
3926             line = r.readLine();
3927         while ((index = line.indexOf("\\n")) != -1) {
3928             StringBuffer temp = new StringBuffer(line);
3929             temp.replace(index, index+2, "\n");
3930             line = temp.toString();
3931         }
3932         while ((index = line.indexOf("\\u")) != -1) {
3933             StringBuffer temp = new StringBuffer(line);
3934             String value = temp.substring(index+2, index+6);
3935             char aChar = (char)Integer.parseInt(value, 16);
3936             String unicodeChar = "" + aChar;
3937             temp.replace(index, index+6, unicodeChar);
3938             line = temp.toString();
3939         }
3940 
3941         return line;
3942     }
3943 
3944     private static void check(Pattern p, String s, String g, String expected) {
3945         Matcher m = p.matcher(s);
3946         m.find();
3947         if (!m.group(g).equals(expected) ||
3948             s.charAt(m.start(g)) != expected.charAt(0) ||
3949             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3950             failCount++;
3951     }
3952 
3953     private static void checkReplaceFirst(String p, String s, String r, String expected)
3954     {
3955         if (!expected.equals(Pattern.compile(p)
3956                                     .matcher(s)
3957                                     .replaceFirst(r)))
3958             failCount++;
3959     }
3960 
3961     private static void checkReplaceAll(String p, String s, String r, String expected)
3962     {
3963         if (!expected.equals(Pattern.compile(p)
3964                                     .matcher(s)
3965                                     .replaceAll(r)))
3966             failCount++;
3967     }
3968 
3969     private static void checkExpectedFail(String p) {
3970         try {
3971             Pattern.compile(p);
3972         } catch (PatternSyntaxException pse) {
3973             //pse.printStackTrace();
3974             return;
3975         }
3976         failCount++;
3977     }
3978 
3979     private static void checkExpectedIAE(Matcher m, String g) {
3980         m.find();
3981         try {
3982             m.group(g);
3983         } catch (IllegalArgumentException x) {
3984             //iae.printStackTrace();
3985             try {
3986                 m.start(g);
3987             } catch (IllegalArgumentException xx) {
3988                 try {
3989                     m.start(g);
3990                 } catch (IllegalArgumentException xxx) {
3991                     return;
3992                 }
3993             }
3994         }
3995         failCount++;
3996     }
3997 
3998     private static void checkExpectedNPE(Matcher m) {
3999         m.find();
4000         try {
4001             m.group(null);
4002         } catch (NullPointerException x) {
4003             try {
4004                 m.start(null);
4005             } catch (NullPointerException xx) {
4006                 try {
4007                     m.end(null);
4008                 } catch (NullPointerException xxx) {
4009                     return;
4010                 }
4011             }
4012         }
4013         failCount++;
4014     }
4015 
4016     private static void namedGroupCaptureTest() throws Exception {
4017         check(Pattern.compile("x+(?<gname>y+)z+"),
4018               "xxxyyyzzz",
4019               "gname",
4020               "yyy");
4021 
4022         check(Pattern.compile("x+(?<gname8>y+)z+"),
4023               "xxxyyyzzz",
4024               "gname8",
4025               "yyy");
4026 
4027         //backref
4028         Pattern pattern = Pattern.compile("(a*)bc\\1");
4029         check(pattern, "zzzaabcazzz", true);  // found "abca"
4030 
4031         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4032               "zzzaabcaazzz", true);
4033 
4034         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4035               "abcdefabc", true);
4036 
4037         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4038               "abcdefghijkk", true);
4039 
4040         // Supplementary character tests
4041         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4042               toSupplementaries("zzzaabcazzz"), true);
4043 
4044         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4045               toSupplementaries("zzzaabcaazzz"), true);
4046 
4047         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4048               toSupplementaries("abcdefabc"), true);
4049 
4050         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4051                               "(?<gname>" +
4052                               toSupplementaries("k)") + "\\k<gname>"),
4053               toSupplementaries("abcdefghijkk"), true);
4054 
4055         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4056               "xxxyyyzzzyyy",
4057               "gname",
4058               "yyy");
4059 
4060         //replaceFirst/All
4061         checkReplaceFirst("(?<gn>ab)(c*)",
4062                           "abccczzzabcczzzabccc",
4063                           "${gn}",
4064                           "abzzzabcczzzabccc");
4065 
4066         checkReplaceAll("(?<gn>ab)(c*)",
4067                         "abccczzzabcczzzabccc",
4068                         "${gn}",
4069                         "abzzzabzzzab");
4070 
4071 
4072         checkReplaceFirst("(?<gn>ab)(c*)",
4073                           "zzzabccczzzabcczzzabccczzz",
4074                           "${gn}",
4075                           "zzzabzzzabcczzzabccczzz");
4076 
4077         checkReplaceAll("(?<gn>ab)(c*)",
4078                         "zzzabccczzzabcczzzabccczzz",
4079                         "${gn}",
4080                         "zzzabzzzabzzzabzzz");
4081 
4082         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4083                           "zzzabccczzzabcczzzabccczzz",
4084                           "${gn2}",
4085                           "zzzccczzzabcczzzabccczzz");
4086 
4087         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4088                         "zzzabccczzzabcczzzabccczzz",
4089                         "${gn2}",
4090                         "zzzccczzzcczzzccczzz");
4091 
4092         //toSupplementaries("(ab)(c*)"));
4093         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4094                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4095                           toSupplementaries("abccczzzabcczzzabccc"),
4096                           "${gn1}",
4097                           toSupplementaries("abzzzabcczzzabccc"));
4098 
4099 
4100         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4101                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4102                         toSupplementaries("abccczzzabcczzzabccc"),
4103                         "${gn1}",
4104                         toSupplementaries("abzzzabzzzab"));
4105 
4106         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4107                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4108                           toSupplementaries("abccczzzabcczzzabccc"),
4109                           "${gn2}",
4110                           toSupplementaries("ccczzzabcczzzabccc"));
4111 
4112 
4113         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4114                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4115                         toSupplementaries("abccczzzabcczzzabccc"),
4116                         "${gn2}",
4117                         toSupplementaries("ccczzzcczzzccc"));
4118 
4119         checkReplaceFirst("(?<dog>Dog)AndCat",
4120                           "zzzDogAndCatzzzDogAndCatzzz",
4121                           "${dog}",
4122                           "zzzDogzzzDogAndCatzzz");
4123 
4124 
4125         checkReplaceAll("(?<dog>Dog)AndCat",
4126                           "zzzDogAndCatzzzDogAndCatzzz",
4127                           "${dog}",
4128                           "zzzDogzzzDogzzz");
4129 
4130         // backref in Matcher & String
4131         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4132             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4133             failCount++;
4134 
4135         // negative
4136         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4137         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4138         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4139         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4140         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4141         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4142                          "gnameX");
4143         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4144         report("NamedGroupCapture");
4145     }
4146 
4147     // This is for bug 6919132
4148     private static void nonBmpClassComplementTest() throws Exception {
4149         Pattern p = Pattern.compile("\\P{Lu}");
4150         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4151 
4152         if (m.find() && m.start() == 1)
4153             failCount++;
4154 
4155         // from a unicode category
4156         p = Pattern.compile("\\P{Lu}");
4157         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4158         if (m.find())
4159             failCount++;
4160         if (!m.hitEnd())
4161             failCount++;
4162 
4163         // block
4164         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4165         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4166         if (m.find() && m.start() == 1)
4167             failCount++;
4168 
4169         p = Pattern.compile("\\P{sc=GRANTHA}");
4170         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4171         if (m.find() && m.start() == 1)
4172             failCount++;
4173 
4174         report("NonBmpClassComplement");
4175     }
4176 
4177     private static void unicodePropertiesTest() throws Exception {
4178         // different forms
4179         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4180             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4181             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4182             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4183             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4184             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4185             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4186             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4187             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4188             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4189             failCount++;
4190 
4191         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4192         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4193         Matcher lastSM  = common;
4194         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4195 
4196         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4197         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4198         Matcher lastBM = latin;
4199         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4200 
4201         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4202             if (cp >= 0x30000 && (cp & 0x70) == 0){
4203                 continue;  // only pick couple code points, they are the same
4204             }
4205 
4206             // Unicode Script
4207             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4208             Matcher m;
4209             String str = new String(Character.toChars(cp));
4210             if (script == lastScript) {
4211                  m = lastSM;
4212                  m.reset(str);
4213             } else {
4214                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4215             }
4216             if (!m.matches()) {
4217                 failCount++;
4218             }
4219             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4220             other.reset(str);
4221             if (other.matches()) {
4222                 failCount++;
4223             }
4224             lastSM = m;
4225             lastScript = script;
4226 
4227             // Unicode Block
4228             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4229             if (block == null) {
4230                 //System.out.printf("Not a Block: cp=%x%n", cp);
4231                 continue;
4232             }
4233             if (block == lastBlock) {
4234                  m = lastBM;
4235                  m.reset(str);
4236             } else {
4237                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4238             }
4239             if (!m.matches()) {
4240                 failCount++;
4241             }
4242             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4243             other.reset(str);
4244             if (other.matches()) {
4245                 failCount++;
4246             }
4247             lastBM = m;
4248             lastBlock = block;
4249         }
4250         report("unicodeProperties");
4251     }
4252 
4253     private static void unicodeHexNotationTest() throws Exception {
4254 
4255         // negative
4256         checkExpectedFail("\\x{-23}");
4257         checkExpectedFail("\\x{110000}");
4258         checkExpectedFail("\\x{}");
4259         checkExpectedFail("\\x{AB[ef]");
4260 
4261         // codepoint
4262         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4263         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4264         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4265         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4266 
4267         // in class
4268         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4269         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4270         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4271         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4272         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4273         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4274 
4275         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4276              String s = "A" + new String(Character.toChars(cp)) + "B";
4277              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4278                                              : String.format("\\u%04x\\u%04x",
4279                                                (int) Character.toChars(cp)[0],
4280                                                (int) Character.toChars(cp)[1]);
4281              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4282              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4283                  failCount++;
4284              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4285                  failCount++;
4286              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4287                  failCount++;
4288              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4289                  failCount++;
4290          }
4291          report("unicodeHexNotation");
4292     }
4293 
4294     private static void unicodeClassesTest() throws Exception {
4295 
4296         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4297         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4298         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4299         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4300         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4301         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4302         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4303         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4304         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4305         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4306         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4307         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4308         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4309         Matcher bound  = Pattern.compile("\\b").matcher("");
4310         Matcher word   = Pattern.compile("\\w++").matcher("");
4311         // UNICODE_CHARACTER_CLASS
4312         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4313         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4314         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4315         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4316         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4317         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4318         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4319         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4320         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4321         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4322         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4323         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4324         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4325         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4326         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4327         // embedded flag (?U)
4328         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4329         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4330         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4331 
4332         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4333         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4334         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4335         // properties
4336         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4337         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4338         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4339         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4340         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4341         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4342         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4343         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4344         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4345         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4346         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4347 
4348         // javaMethod
4349         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4350         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4351         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4352         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4353 
4354         for (int cp = 1; cp < 0x30000; cp++) {
4355             String str = new String(Character.toChars(cp));
4356             int type = Character.getType(cp);
4357             if (// lower
4358                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4359                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4360                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4361                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4362                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4363                 // upper
4364                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4365                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4366                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4367                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4368                 // alpha
4369                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4370                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4371                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4372                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4373                 // digit
4374                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4375                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4376                 // alnum
4377                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4378                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4379                 // punct
4380                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4381                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4382                 // graph
4383                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4384                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4385                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4386                 // blank
4387                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4388                                           != blank.reset(str).matches()  ||
4389                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4390                 // print
4391                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4392                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4393                 // cntrl
4394                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4395                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4396                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4397                 // hexdigit
4398                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4399                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4400                 // space
4401                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4402                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4403                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4404                 // word
4405                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4406                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4407                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4408                 // bwordb
4409                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4410                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4411                 // properties
4412                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4413                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4414                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4415                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4416                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4417                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4418                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
4419                 failCount++;
4420         }
4421 
4422         // bounds/word align
4423         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4424         if (!bwbU.reset("\u0180sherman\u0400").matches())
4425             failCount++;
4426         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4427         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4428             failCount++;
4429         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4430         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4431             failCount++;
4432         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4433             failCount++;
4434         report("unicodePredefinedClasses");
4435     }
4436 
4437     private static void unicodeCharacterNameTest() throws Exception {
4438 
4439         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4440             if (!Character.isValidCodePoint(cp) ||
4441                 Character.getType(cp) == Character.UNASSIGNED)
4442                 continue;
4443             String str = new String(Character.toChars(cp));
4444             // single
4445             String p = "\\N{" + Character.getName(cp) + "}";
4446             if (!Pattern.compile(p).matcher(str).matches()) {
4447                 failCount++;
4448             }
4449             // class[c]
4450             p = "[\\N{" + Character.getName(cp) + "}]";
4451             if (!Pattern.compile(p).matcher(str).matches()) {
4452                 failCount++;
4453             }
4454         }
4455 
4456         // range
4457         for (int i = 0; i < 10; i++) {
4458             int start = generator.nextInt(20);
4459             int end = start + generator.nextInt(200);
4460             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4461             String str;
4462             for (int cp = start; cp < end; cp++) {
4463                 str = new String(Character.toChars(cp));
4464                 if (!Pattern.compile(p).matcher(str).matches()) {
4465                     failCount++;
4466                 }
4467             }
4468             str = new String(Character.toChars(end + 10));
4469             if (Pattern.compile(p).matcher(str).matches()) {
4470                 failCount++;
4471             }
4472         }
4473 
4474         // slice
4475         for (int i = 0; i < 10; i++) {
4476             int n = generator.nextInt(256);
4477             int[] buf = new int[n];
4478             StringBuffer sb = new StringBuffer(1024);
4479             for (int j = 0; j < n; j++) {
4480                 int cp = generator.nextInt(1000);
4481                 if (!Character.isValidCodePoint(cp) ||
4482                     Character.getType(cp) == Character.UNASSIGNED)
4483                     cp = 0x4e00;    // just use 4e00
4484                 sb.append("\\N{" + Character.getName(cp) + "}");
4485                 buf[j] = cp;
4486             }
4487             String p = sb.toString();
4488             String str = new String(buf, 0, buf.length);
4489             if (!Pattern.compile(p).matcher(str).matches()) {
4490                 failCount++;
4491             }
4492         }
4493         report("unicodeCharacterName");
4494     }
4495 
4496     private static void horizontalAndVerticalWSTest() throws Exception {
4497         String hws = new String (new char[] {
4498                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4499                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4500                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4501                                      0x202f, 0x205f, 0x3000 });
4502         String vws = new String (new char[] {
4503                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4504         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4505             !Pattern.compile("[\\h]+").matcher(hws).matches())
4506             failCount++;
4507         if (Pattern.compile("\\H").matcher(hws).find() ||
4508             Pattern.compile("[\\H]").matcher(hws).find())
4509             failCount++;
4510         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4511             !Pattern.compile("[\\v]+").matcher(vws).matches())
4512             failCount++;
4513         if (Pattern.compile("\\V").matcher(vws).find() ||
4514             Pattern.compile("[\\V]").matcher(vws).find())
4515             failCount++;
4516         String prefix = "abcd";
4517         String suffix = "efgh";
4518         String ng = "A";
4519         for (int i = 0; i < hws.length(); i++) {
4520             String c = String.valueOf(hws.charAt(i));
4521             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4522             if (!m.find() || !c.equals(m.group()))
4523                 failCount++;
4524             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4525             if (!m.find() || !c.equals(m.group()))
4526                 failCount++;
4527 
4528             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4529             if (!m.find() || !ng.equals(m.group()))
4530                 failCount++;
4531             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4532             if (!m.find() || !ng.equals(m.group()))
4533                 failCount++;
4534         }
4535         for (int i = 0; i < vws.length(); i++) {
4536             String c = String.valueOf(vws.charAt(i));
4537             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4538             if (!m.find() || !c.equals(m.group()))
4539                 failCount++;
4540             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4541             if (!m.find() || !c.equals(m.group()))
4542                 failCount++;
4543 
4544             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4545             if (!m.find() || !ng.equals(m.group()))
4546                 failCount++;
4547             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4548             if (!m.find() || !ng.equals(m.group()))
4549                 failCount++;
4550         }
4551         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4552         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4553             failCount++;
4554         report("horizontalAndVerticalWSTest");
4555     }
4556 
4557     private static void linebreakTest() throws Exception {
4558         String linebreaks = new String (new char[] {
4559             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4560         String crnl = "\r\n";
4561         if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4562             !Pattern.compile("\\R").matcher(crnl).matches() ||
4563             Pattern.compile("\\R\\R").matcher(crnl).matches())
4564             failCount++;
4565         report("linebreakTest");
4566     }
4567 
4568     // #7189363
4569     private static void branchTest() throws Exception {
4570         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4571             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4572             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4573             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4574             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4575             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4576             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4577             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4578             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4579             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4580             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4581             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4582             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4583             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4584             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4585             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4586             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4587             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4588             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4589             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4590             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4591             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4592             failCount++;
4593         report("branchTest");
4594     }
4595 
4596     // This test is for 8007395
4597     private static void groupCurlyNotFoundSuppTest() throws Exception {
4598         String input = "test this as \ud83d\ude0d";
4599         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4600                                           "test(.)*(@[a-zA-Z.]+)",
4601                                           "test([^B])+(@[a-zA-Z.]+)",
4602                                           "test([^B])*(@[a-zA-Z.]+)",
4603                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4604                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4605                                         }) {
4606             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4607                                .matcher(input);
4608             try {
4609                 if (m.find()) {
4610                     failCount++;
4611                 }
4612             } catch (Exception x) {
4613                 failCount++;
4614             }
4615         }
4616         report("GroupCurly NotFoundSupp");
4617     }
4618 
4619     // This test is for 8023647
4620     private static void groupCurlyBackoffTest() throws Exception {
4621         if (!"abc1c".matches("(\\w)+1\\1") ||
4622             "abc11".matches("(\\w)+1\\1")) {
4623             failCount++;
4624         }
4625         report("GroupCurly backoff");
4626     }
4627 
4628     // This test is for 8012646
4629     private static void patternAsPredicate() throws Exception {
4630         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4631 
4632         if (p.test("")) {
4633             failCount++;
4634         }
4635         if (!p.test("word")) {
4636             failCount++;
4637         }
4638         if (p.test("1234")) {
4639             failCount++;
4640         }
4641         report("Pattern.asPredicate");
4642     }
4643 
4644     // This test is for 8035975
4645     private static void invalidFlags() throws Exception {
4646         for (int flag = 1; flag != 0; flag <<= 1) {
4647             switch (flag) {
4648             case Pattern.CASE_INSENSITIVE:
4649             case Pattern.MULTILINE:
4650             case Pattern.DOTALL:
4651             case Pattern.UNICODE_CASE:
4652             case Pattern.CANON_EQ:
4653             case Pattern.UNIX_LINES:
4654             case Pattern.LITERAL:
4655             case Pattern.UNICODE_CHARACTER_CLASS:
4656             case Pattern.COMMENTS:
4657                 // valid flag, continue
4658                 break;
4659             default:
4660                 try {
4661                     Pattern.compile(".", flag);
4662                     failCount++;
4663                 } catch (IllegalArgumentException expected) {
4664                 }
4665             }
4666         }
4667         report("Invalid compile flags");
4668     }
4669 
4670     private static void grapheme() throws Exception {
4671         Files.lines(Paths.get(System.getProperty("test.src", "."),
4672                               "GraphemeBreakTest.txt"))
4673             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4674             .forEach( ln -> {
4675                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4676                     // System.out.println(str);
4677                     String[] strs = ln.split("\u00f7|\u00d7");
4678                     StringBuilder src = new StringBuilder();
4679                     ArrayList<String> graphemes = new ArrayList<>();
4680                     StringBuilder buf = new StringBuilder();
4681                     int offBk = 0;
4682                     for (String str : strs) {
4683                         if (str.length() == 0)  // first empty str
4684                             continue;
4685                         int cp = Integer.parseInt(str, 16);
4686                         src.appendCodePoint(cp);
4687                         buf.appendCodePoint(cp);
4688                         offBk += (str.length() + 1);
4689                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4690                             graphemes.add(buf.toString());
4691                             buf = new StringBuilder();
4692                         }
4693                     }
4694                     Pattern p = Pattern.compile("\\X");
4695                     Matcher m = p.matcher(src.toString());
4696                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4697                     for (String g : graphemes) {
4698                         // System.out.printf("     grapheme:=[%s]%n", g);
4699                         // (1) test \\X directly
4700                         if (!m.find() || !m.group().equals(g)) {
4701                             System.out.println("Failed \\X [" + ln + "] : " + g);
4702                             failCount++;
4703                         }
4704                         // (2) test \\b{g} + \\X  via Scanner
4705                         boolean hasNext = s.hasNext(p);
4706                         // if (!s.hasNext() || !s.next().equals(next)) {
4707                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4708                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4709                             failCount++;
4710                         }
4711                     }
4712                 });
4713         // some sanity checks
4714         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4715             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4716             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4717             failCount++;
4718         // make sure "\b{n}" still works
4719         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4720             failCount++;
4721         report("Unicode extended grapheme cluster");
4722     }
4723 }