1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  37  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
  38  * 8194667 8197462 8184692 8221431 8224789 8228352 8230365
  39  *
  40  * @library /test/lib
  41  * @library /lib/testlibrary/java/lang
  42  * @build jdk.test.lib.RandomFactory
  43  * @run main RegExTest
  44  * @key randomness
  45  */
  46 
  47 import java.io.BufferedReader;
  48 import java.io.ByteArrayInputStream;
  49 import java.io.ByteArrayOutputStream;
  50 import java.io.File;
  51 import java.io.FileInputStream;
  52 import java.io.InputStreamReader;
  53 import java.io.ObjectInputStream;
  54 import java.io.ObjectOutputStream;
  55 import java.math.BigInteger;
  56 import java.nio.CharBuffer;
  57 import java.nio.file.Files;
  58 import java.util.ArrayList;
  59 import java.util.Arrays;
  60 import java.util.HashMap;
  61 import java.util.List;
  62 import java.util.Random;
  63 import java.util.Scanner;
  64 import java.util.function.Function;
  65 import java.util.function.Predicate;
  66 import java.util.regex.Matcher;
  67 import java.util.regex.MatchResult;
  68 import java.util.regex.Pattern;
  69 import java.util.regex.PatternSyntaxException;
  70 import jdk.test.lib.RandomFactory;
  71 
  72 /**
  73  * This is a test class created to check the operation of
  74  * the Pattern and Matcher classes.
  75  */
  76 public class RegExTest {
  77 
  78     private static Random generator = RandomFactory.getRandom();
  79     private static boolean failure = false;
  80     private static int failCount = 0;
  81     private static String firstFailure = null;
  82 
  83     /**
  84      * Main to interpret arguments and run several tests.
  85      *
  86      */
  87     public static void main(String[] args) throws Exception {
  88         // Most of the tests are in a file
  89         processFile("TestCases.txt");
  90         //processFile("PerlCases.txt");
  91         processFile("BMPTestCases.txt");
  92         processFile("SupplementaryTestCases.txt");
  93 
  94         // These test many randomly generated char patterns
  95         bm();
  96         slice();
  97 
  98         // These are hard to put into the file
  99         escapes();
 100         blankInput();
 101 
 102         // Substitition tests on randomly generated sequences
 103         globalSubstitute();
 104         stringbufferSubstitute();
 105         stringbuilderSubstitute();
 106 
 107         substitutionBasher();
 108         substitutionBasher2();
 109 
 110         // Canonical Equivalence
 111         ceTest();
 112 
 113         // Anchors
 114         anchorTest();
 115 
 116         // boolean match calls
 117         matchesTest();
 118         lookingAtTest();
 119 
 120         // Pattern API
 121         patternMatchesTest();
 122 
 123         // Misc
 124         lookbehindTest();
 125         nullArgumentTest();
 126         backRefTest();
 127         groupCaptureTest();
 128         caretTest();
 129         charClassTest();
 130         emptyPatternTest();
 131         findIntTest();
 132         group0Test();
 133         longPatternTest();
 134         octalTest();
 135         ampersandTest();
 136         negationTest();
 137         splitTest();
 138         appendTest();
 139         caseFoldingTest();
 140         commentsTest();
 141         unixLinesTest();
 142         replaceFirstTest();
 143         gTest();
 144         zTest();
 145         serializeTest();
 146         reluctantRepetitionTest();
 147         multilineDollarTest();
 148         dollarAtEndTest();
 149         caretBetweenTerminatorsTest();
 150         // This RFE rejected in Tiger numOccurrencesTest();
 151         javaCharClassTest();
 152         nonCaptureRepetitionTest();
 153         notCapturedGroupCurlyMatchTest();
 154         escapedSegmentTest();
 155         literalPatternTest();
 156         literalReplacementTest();
 157         regionTest();
 158         toStringTest();
 159         negatedCharClassTest();
 160         findFromTest();
 161         boundsTest();
 162         unicodeWordBoundsTest();
 163         caretAtEndTest();
 164         wordSearchTest();
 165         hitEndTest();
 166         toMatchResultTest();
 167         toMatchResultTest2();
 168         surrogatesInClassTest();
 169         removeQEQuotingTest();
 170         namedGroupCaptureTest();
 171         nonBmpClassComplementTest();
 172         unicodePropertiesTest();
 173         unicodeHexNotationTest();
 174         unicodeClassesTest();
 175         unicodeCharacterNameTest();
 176         horizontalAndVerticalWSTest();
 177         linebreakTest();
 178         branchTest();
 179         groupCurlyNotFoundSuppTest();
 180         groupCurlyBackoffTest();
 181         patternAsPredicate();
 182         patternAsMatchPredicate();
 183         invalidFlags();
 184         embeddedFlags();
 185         grapheme();
 186         expoBacktracking();
 187         invalidGroupName();
 188         illegalRepetitionRange();
 189         surrogatePairWithCanonEq();
 190         controlCharacters();
 191 
 192         if (failure) {
 193             throw new
 194                 RuntimeException("RegExTest failed, 1st failure: " +
 195                                  firstFailure);
 196         } else {
 197             System.err.println("OKAY: All tests passed.");
 198         }
 199     }
 200 
 201     // Utility functions
 202 
 203     private static String getRandomAlphaString(int length) {
 204         StringBuffer buf = new StringBuffer(length);
 205         for (int i=0; i<length; i++) {
 206             char randChar = (char)(97 + generator.nextInt(26));
 207             buf.append(randChar);
 208         }
 209         return buf.toString();
 210     }
 211 
 212     private static void check(Matcher m, String expected) {
 213         m.find();
 214         if (!m.group().equals(expected))
 215             failCount++;
 216     }
 217 
 218     private static void check(Matcher m, String result, boolean expected) {
 219         m.find();
 220         if (m.group().equals(result) != expected)
 221             failCount++;
 222     }
 223 
 224     private static void check(Pattern p, String s, boolean expected) {
 225         if (p.matcher(s).find() != expected)
 226             failCount++;
 227     }
 228 
 229     private static void check(String p, String s, boolean expected) {
 230         Matcher matcher = Pattern.compile(p).matcher(s);
 231         if (matcher.find() != expected)
 232             failCount++;
 233     }
 234 
 235     private static void check(String p, char c, boolean expected) {
 236         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 237         Pattern pattern = Pattern.compile(propertyPattern);
 238         char[] ca = new char[1]; ca[0] = c;
 239         Matcher matcher = pattern.matcher(new String(ca));
 240         if (!matcher.find())
 241             failCount++;
 242     }
 243 
 244     private static void check(String p, int codePoint, boolean expected) {
 245         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 246         Pattern pattern = Pattern.compile(propertyPattern);
 247         char[] ca = Character.toChars(codePoint);
 248         Matcher matcher = pattern.matcher(new String(ca));
 249         if (!matcher.find())
 250             failCount++;
 251     }
 252 
 253     private static void check(String p, int flag, String input, String s,
 254                               boolean expected)
 255     {
 256         Pattern pattern = Pattern.compile(p, flag);
 257         Matcher matcher = pattern.matcher(input);
 258         if (expected)
 259             check(matcher, s, expected);
 260         else
 261             check(pattern, input, false);
 262     }
 263 
 264     private static void report(String testName) {
 265         int spacesToAdd = 30 - testName.length();
 266         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 267         for (int i=0; i<spacesToAdd; i++)
 268             paddedNameBuffer.append(" ");
 269         String paddedName = paddedNameBuffer.toString();
 270         System.err.println(paddedName + ": " +
 271                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 272         if (failCount > 0) {
 273             failure = true;
 274 
 275             if (firstFailure == null) {
 276                 firstFailure = testName;
 277             }
 278         }
 279 
 280         failCount = 0;
 281     }
 282 
 283     /**
 284      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 285      * supplementary characters. This method does NOT fully take care
 286      * of the regex syntax.
 287      */
 288     private static String toSupplementaries(String s) {
 289         int length = s.length();
 290         StringBuffer sb = new StringBuffer(length * 2);
 291 
 292         for (int i = 0; i < length; ) {
 293             char c = s.charAt(i++);
 294             if (c == '\\') {
 295                 sb.append(c);
 296                 if (i < length) {
 297                     c = s.charAt(i++);
 298                     sb.append(c);
 299                     if (c == 'u') {
 300                         // assume no syntax error
 301                         sb.append(s.charAt(i++));
 302                         sb.append(s.charAt(i++));
 303                         sb.append(s.charAt(i++));
 304                         sb.append(s.charAt(i++));
 305                     }
 306                 }
 307             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 308                 sb.append('\ud800').append((char)('\udc00'+c));
 309             } else {
 310                 sb.append(c);
 311             }
 312         }
 313         return sb.toString();
 314     }
 315 
 316     // Regular expression tests
 317 
 318     // This is for bug 6178785
 319     // Test if an expected NPE gets thrown when passing in a null argument
 320     private static boolean check(Runnable test) {
 321         try {
 322             test.run();
 323             failCount++;
 324             return false;
 325         } catch (NullPointerException npe) {
 326             return true;
 327         }
 328     }
 329 
 330     private static void nullArgumentTest() {
 331         check(() -> Pattern.compile(null));
 332         check(() -> Pattern.matches(null, null));
 333         check(() -> Pattern.matches("xyz", null));
 334         check(() -> Pattern.quote(null));
 335         check(() -> Pattern.compile("xyz").split(null));
 336         check(() -> Pattern.compile("xyz").matcher(null));
 337 
 338         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 339         m.matches();
 340         check(() -> m.appendTail((StringBuffer) null));
 341         check(() -> m.appendTail((StringBuilder)null));
 342         check(() -> m.replaceAll((String) null));
 343         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 344         check(() -> m.replaceFirst((String)null));
 345         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 346         check(() -> m.appendReplacement((StringBuffer)null, null));
 347         check(() -> m.appendReplacement((StringBuilder)null, null));
 348         check(() -> m.reset(null));
 349         check(() -> Matcher.quoteReplacement(null));
 350         //check(() -> m.usePattern(null));
 351 
 352         report("Null Argument");
 353     }
 354 
 355     // This is for bug6635133
 356     // Test if surrogate pair in Unicode escapes can be handled correctly.
 357     private static void surrogatesInClassTest() throws Exception {
 358         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 359         Matcher matcher = pattern.matcher("\ud834\udd22");
 360         if (!matcher.find())
 361             failCount++;
 362 
 363         report("Surrogate pair in Unicode escape");
 364     }
 365 
 366     // This is for bug6990617
 367     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 368     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 369     // char is an octal digit.
 370     private static void removeQEQuotingTest() throws Exception {
 371         Pattern pattern =
 372             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 373         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 374         if (!matcher.find())
 375             failCount++;
 376 
 377         report("Remove Q/E Quoting");
 378     }
 379 
 380     // This is for bug 4988891
 381     // Test toMatchResult to see that it is a copy of the Matcher
 382     // that is not affected by subsequent operations on the original
 383     private static void toMatchResultTest() throws Exception {
 384         Pattern pattern = Pattern.compile("squid");
 385         Matcher matcher = pattern.matcher(
 386             "agiantsquidofdestinyasmallsquidoffate");
 387         matcher.find();
 388         int matcherStart1 = matcher.start();
 389         MatchResult mr = matcher.toMatchResult();
 390         if (mr == matcher)
 391             failCount++;
 392         int resultStart1 = mr.start();
 393         if (matcherStart1 != resultStart1)
 394             failCount++;
 395         matcher.find();
 396         int matcherStart2 = matcher.start();
 397         int resultStart2 = mr.start();
 398         if (matcherStart2 == resultStart2)
 399             failCount++;
 400         if (resultStart1 != resultStart2)
 401             failCount++;
 402         MatchResult mr2 = matcher.toMatchResult();
 403         if (mr == mr2)
 404             failCount++;
 405         if (mr2.start() != matcherStart2)
 406             failCount++;
 407         report("toMatchResult is a copy");
 408     }
 409 
 410     private static void checkExpectedISE(Runnable test) {
 411         try {
 412             test.run();
 413             failCount++;
 414         } catch (IllegalStateException x) {
 415         } catch (IndexOutOfBoundsException xx) {
 416             failCount++;
 417         }
 418     }
 419 
 420     private static void checkExpectedIOOE(Runnable test) {
 421         try {
 422             test.run();
 423             failCount++;
 424         } catch (IndexOutOfBoundsException x) {}
 425     }
 426 
 427     // This is for bug 8074678
 428     // Test the result of toMatchResult throws ISE if no match is availble
 429     private static void toMatchResultTest2() throws Exception {
 430         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 431         matcher.find();
 432         MatchResult mr = matcher.toMatchResult();
 433 
 434         checkExpectedISE(() -> mr.start());
 435         checkExpectedISE(() -> mr.start(2));
 436         checkExpectedISE(() -> mr.end());
 437         checkExpectedISE(() -> mr.end(2));
 438         checkExpectedISE(() -> mr.group());
 439         checkExpectedISE(() -> mr.group(2));
 440 
 441         matcher = Pattern.compile("(match)").matcher("there is a match");
 442         matcher.find();
 443         MatchResult mr2 = matcher.toMatchResult();
 444         checkExpectedIOOE(() -> mr2.start(2));
 445         checkExpectedIOOE(() -> mr2.end(2));
 446         checkExpectedIOOE(() -> mr2.group(2));
 447 
 448         report("toMatchResult2 appropriate exceptions");
 449     }
 450 
 451     // This is for bug 5013885
 452     // Must test a slice to see if it reports hitEnd correctly
 453     private static void hitEndTest() throws Exception {
 454         // Basic test of Slice node
 455         Pattern p = Pattern.compile("^squidattack");
 456         Matcher m = p.matcher("squack");
 457         m.find();
 458         if (m.hitEnd())
 459             failCount++;
 460         m.reset("squid");
 461         m.find();
 462         if (!m.hitEnd())
 463             failCount++;
 464 
 465         // Test Slice, SliceA and SliceU nodes
 466         for (int i=0; i<3; i++) {
 467             int flags = 0;
 468             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 469             if (i==2) flags = Pattern.UNICODE_CASE;
 470             p = Pattern.compile("^abc", flags);
 471             m = p.matcher("ad");
 472             m.find();
 473             if (m.hitEnd())
 474                 failCount++;
 475             m.reset("ab");
 476             m.find();
 477             if (!m.hitEnd())
 478                 failCount++;
 479         }
 480 
 481         // Test Boyer-Moore node
 482         p = Pattern.compile("catattack");
 483         m = p.matcher("attack");
 484         m.find();
 485         if (!m.hitEnd())
 486             failCount++;
 487 
 488         p = Pattern.compile("catattack");
 489         m = p.matcher("attackattackattackcatatta");
 490         m.find();
 491         if (!m.hitEnd())
 492             failCount++;
 493 
 494         // 8184706: Matching u+0d at EOL against \R should hit-end
 495         p = Pattern.compile("...\\R");
 496         m = p.matcher("cat" + (char)0x0a);
 497         m.find();
 498         if (m.hitEnd())
 499             failCount++;
 500 
 501         m = p.matcher("cat" + (char)0x0d);
 502         m.find();
 503         if (!m.hitEnd())
 504             failCount++;
 505 
 506         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
 507         m.find();
 508         if (m.hitEnd())
 509             failCount++;
 510 
 511         report("hitEnd");
 512     }
 513 
 514     // This is for bug 4997476
 515     // It is weird code submitted by customer demonstrating a regression
 516     private static void wordSearchTest() throws Exception {
 517         String testString = new String("word1 word2 word3");
 518         Pattern p = Pattern.compile("\\b");
 519         Matcher m = p.matcher(testString);
 520         int position = 0;
 521         int start = 0;
 522         while (m.find(position)) {
 523             start = m.start();
 524             if (start == testString.length())
 525                 break;
 526             if (m.find(start+1)) {
 527                 position = m.start();
 528             } else {
 529                 position = testString.length();
 530             }
 531             if (testString.substring(start, position).equals(" "))
 532                 continue;
 533             if (!testString.substring(start, position-1).startsWith("word"))
 534                 failCount++;
 535         }
 536         report("Customer word search");
 537     }
 538 
 539     // This is for bug 4994840
 540     private static void caretAtEndTest() throws Exception {
 541         // Problem only occurs with multiline patterns
 542         // containing a beginning-of-line caret "^" followed
 543         // by an expression that also matches the empty string.
 544         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 545         Matcher matcher = pattern.matcher("\r");
 546         matcher.find();
 547         matcher.find();
 548         report("Caret at end");
 549     }
 550 
 551     // This test is for 4979006
 552     // Check to see if word boundary construct properly handles unicode
 553     // non spacing marks
 554     private static void unicodeWordBoundsTest() throws Exception {
 555         String spaces = "  ";
 556         String wordChar = "a";
 557         String nsm = "\u030a";
 558 
 559         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 560 
 561         Pattern pattern = Pattern.compile("\\b");
 562         Matcher matcher = pattern.matcher("");
 563         // S=other B=word character N=non spacing mark .=word boundary
 564         // SS.BB.SS
 565         String input = spaces + wordChar + wordChar + spaces;
 566         twoFindIndexes(input, matcher, 2, 4);
 567         // SS.BBN.SS
 568         input = spaces + wordChar +wordChar + nsm + spaces;
 569         twoFindIndexes(input, matcher, 2, 5);
 570         // SS.BN.SS
 571         input = spaces + wordChar + nsm + spaces;
 572         twoFindIndexes(input, matcher, 2, 4);
 573         // SS.BNN.SS
 574         input = spaces + wordChar + nsm + nsm + spaces;
 575         twoFindIndexes(input, matcher, 2, 5);
 576         // SSN.BB.SS
 577         input = spaces + nsm + wordChar + wordChar + spaces;
 578         twoFindIndexes(input, matcher, 3, 5);
 579         // SS.BNB.SS
 580         input = spaces + wordChar + nsm + wordChar + spaces;
 581         twoFindIndexes(input, matcher, 2, 5);
 582         // SSNNSS
 583         input = spaces + nsm + nsm + spaces;
 584         matcher.reset(input);
 585         if (matcher.find())
 586             failCount++;
 587         // SSN.BBN.SS
 588         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 589         twoFindIndexes(input, matcher, 3, 6);
 590 
 591         report("Unicode word boundary");
 592     }
 593 
 594     private static void twoFindIndexes(String input, Matcher matcher, int a,
 595                                        int b) throws Exception
 596     {
 597         matcher.reset(input);
 598         matcher.find();
 599         if (matcher.start() != a)
 600             failCount++;
 601         matcher.find();
 602         if (matcher.start() != b)
 603             failCount++;
 604     }
 605 
 606     // This test is for 6284152
 607     static void check(String regex, String input, String[] expected) {
 608         List<String> result = new ArrayList<String>();
 609         Pattern p = Pattern.compile(regex);
 610         Matcher m = p.matcher(input);
 611         while (m.find()) {
 612             result.add(m.group());
 613         }
 614         if (!Arrays.asList(expected).equals(result))
 615             failCount++;
 616     }
 617 
 618     private static void lookbehindTest() throws Exception {
 619         //Positive
 620         check("(?<=%.{0,5})foo\\d",
 621               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 622               new String[]{"foo1", "foo2", "foo3"});
 623 
 624         //boundary at end of the lookbehind sub-regex should work consistently
 625         //with the boundary just after the lookbehind sub-regex
 626         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 627         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 628         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 629         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 630 
 631         //Negative
 632         check("(?<!%.{0,5})foo\\d",
 633               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 634               new String[] {"foo4", "foo5"});
 635 
 636         //Positive greedy
 637         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 638 
 639         //Positive reluctant
 640         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 641 
 642         //supplementary
 643         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 644               new String[] {"fo\ud800\udc00o"});
 645         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 646               new String[] {"fo\ud800\udc00o"});
 647         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 648               new String[] {"fo\ud800\udc00o"});
 649         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 650               new String[] {"fo\ud800\udc00o"});
 651         report("Lookbehind");
 652     }
 653 
 654     // This test is for 4938995
 655     // Check to see if weak region boundaries are transparent to
 656     // lookahead and lookbehind constructs
 657     private static void boundsTest() throws Exception {
 658         String fullMessage = "catdogcat";
 659         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 660         Matcher matcher = pattern.matcher("catdogca");
 661         matcher.useTransparentBounds(true);
 662         if (matcher.find())
 663             failCount++;
 664         matcher.reset("atdogcat");
 665         if (matcher.find())
 666             failCount++;
 667         matcher.reset(fullMessage);
 668         if (!matcher.find())
 669             failCount++;
 670         matcher.reset(fullMessage);
 671         matcher.region(0,9);
 672         if (!matcher.find())
 673             failCount++;
 674         matcher.reset(fullMessage);
 675         matcher.region(0,6);
 676         if (!matcher.find())
 677             failCount++;
 678         matcher.reset(fullMessage);
 679         matcher.region(3,6);
 680         if (!matcher.find())
 681             failCount++;
 682         matcher.useTransparentBounds(false);
 683         if (matcher.find())
 684             failCount++;
 685 
 686         // Negative lookahead/lookbehind
 687         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 688         matcher = pattern.matcher("dogcat");
 689         matcher.useTransparentBounds(true);
 690         matcher.region(0,3);
 691         if (matcher.find())
 692             failCount++;
 693         matcher.reset("catdog");
 694         matcher.region(3,6);
 695         if (matcher.find())
 696             failCount++;
 697         matcher.useTransparentBounds(false);
 698         matcher.reset("dogcat");
 699         matcher.region(0,3);
 700         if (!matcher.find())
 701             failCount++;
 702         matcher.reset("catdog");
 703         matcher.region(3,6);
 704         if (!matcher.find())
 705             failCount++;
 706 
 707         report("Region bounds transparency");
 708     }
 709 
 710     // This test is for 4945394
 711     private static void findFromTest() throws Exception {
 712         String message = "This is 40 $0 message.";
 713         Pattern pat = Pattern.compile("\\$0");
 714         Matcher match = pat.matcher(message);
 715         if (!match.find())
 716             failCount++;
 717         if (match.find())
 718             failCount++;
 719         if (match.find())
 720             failCount++;
 721         report("Check for alternating find");
 722     }
 723 
 724     // This test is for 4872664 and 4892980
 725     private static void negatedCharClassTest() throws Exception {
 726         Pattern pattern = Pattern.compile("[^>]");
 727         Matcher matcher = pattern.matcher("\u203A");
 728         if (!matcher.matches())
 729             failCount++;
 730         pattern = Pattern.compile("[^fr]");
 731         matcher = pattern.matcher("a");
 732         if (!matcher.find())
 733             failCount++;
 734         matcher.reset("\u203A");
 735         if (!matcher.find())
 736             failCount++;
 737         String s = "for";
 738         String result[] = s.split("[^fr]");
 739         if (!result[0].equals("f"))
 740             failCount++;
 741         if (!result[1].equals("r"))
 742             failCount++;
 743         s = "f\u203Ar";
 744         result = s.split("[^fr]");
 745         if (!result[0].equals("f"))
 746             failCount++;
 747         if (!result[1].equals("r"))
 748             failCount++;
 749 
 750         // Test adding to bits, subtracting a node, then adding to bits again
 751         pattern = Pattern.compile("[^f\u203Ar]");
 752         matcher = pattern.matcher("a");
 753         if (!matcher.find())
 754             failCount++;
 755         matcher.reset("f");
 756         if (matcher.find())
 757             failCount++;
 758         matcher.reset("\u203A");
 759         if (matcher.find())
 760             failCount++;
 761         matcher.reset("r");
 762         if (matcher.find())
 763             failCount++;
 764         matcher.reset("\u203B");
 765         if (!matcher.find())
 766             failCount++;
 767 
 768         // Test subtracting a node, adding to bits, subtracting again
 769         pattern = Pattern.compile("[^\u203Ar\u203B]");
 770         matcher = pattern.matcher("a");
 771         if (!matcher.find())
 772             failCount++;
 773         matcher.reset("\u203A");
 774         if (matcher.find())
 775             failCount++;
 776         matcher.reset("r");
 777         if (matcher.find())
 778             failCount++;
 779         matcher.reset("\u203B");
 780         if (matcher.find())
 781             failCount++;
 782         matcher.reset("\u203C");
 783         if (!matcher.find())
 784             failCount++;
 785 
 786         report("Negated Character Class");
 787     }
 788 
 789     // This test is for 4628291
 790     private static void toStringTest() throws Exception {
 791         Pattern pattern = Pattern.compile("b+");
 792         if (pattern.toString() != "b+")
 793             failCount++;
 794         Matcher matcher = pattern.matcher("aaabbbccc");
 795         String matcherString = matcher.toString(); // unspecified
 796         matcher.find();
 797         matcherString = matcher.toString(); // unspecified
 798         matcher.region(0,3);
 799         matcherString = matcher.toString(); // unspecified
 800         matcher.reset();
 801         matcherString = matcher.toString(); // unspecified
 802         report("toString");
 803     }
 804 
 805     // This test is for 4808962
 806     private static void literalPatternTest() throws Exception {
 807         int flags = Pattern.LITERAL;
 808 
 809         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 810         check(pattern, "abc\\t$^", true);
 811 
 812         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 813         check(pattern, "abc\\t$^", true);
 814 
 815         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 816         check(pattern, "\\Qa^$bcabc\\E", true);
 817         check(pattern, "a^$bcabc", false);
 818 
 819         pattern = Pattern.compile("\\\\Q\\\\E");
 820         check(pattern, "\\Q\\E", true);
 821 
 822         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 823         check(pattern, "abcefg\\Q\\Ehij", true);
 824 
 825         pattern = Pattern.compile("\\\\\\Q\\\\E");
 826         check(pattern, "\\\\\\\\", true);
 827 
 828         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 829         check(pattern, "\\Qa^$bcabc\\E", true);
 830         check(pattern, "a^$bcabc", false);
 831 
 832         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 833         check(pattern, "\\Qabc\\Edef", true);
 834         check(pattern, "abcdef", false);
 835 
 836         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 837         check(pattern, "abc\\Edef", true);
 838         check(pattern, "abcdef", false);
 839 
 840         pattern = Pattern.compile(Pattern.quote("\\E"));
 841         check(pattern, "\\E", true);
 842 
 843         pattern = Pattern.compile("((((abc.+?:)", flags);
 844         check(pattern, "((((abc.+?:)", true);
 845 
 846         flags |= Pattern.MULTILINE;
 847 
 848         pattern = Pattern.compile("^cat$", flags);
 849         check(pattern, "abc^cat$def", true);
 850         check(pattern, "cat", false);
 851 
 852         flags |= Pattern.CASE_INSENSITIVE;
 853 
 854         pattern = Pattern.compile("abcdef", flags);
 855         check(pattern, "ABCDEF", true);
 856         check(pattern, "AbCdEf", true);
 857 
 858         flags |= Pattern.DOTALL;
 859 
 860         pattern = Pattern.compile("a...b", flags);
 861         check(pattern, "A...b", true);
 862         check(pattern, "Axxxb", false);
 863 
 864         flags |= Pattern.CANON_EQ;
 865 
 866         Pattern p = Pattern.compile("testa\u030a", flags);
 867         check(pattern, "testa\u030a", false);
 868         check(pattern, "test\u00e5", false);
 869 
 870         // Supplementary character test
 871         flags = Pattern.LITERAL;
 872 
 873         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 874         check(pattern, toSupplementaries("abc\\t$^"), true);
 875 
 876         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 877         check(pattern, toSupplementaries("abc\\t$^"), true);
 878 
 879         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 880         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 881         check(pattern, toSupplementaries("a^$bcabc"), false);
 882 
 883         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 884         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 885         check(pattern, toSupplementaries("a^$bcabc"), false);
 886 
 887         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 888         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 889         check(pattern, toSupplementaries("abcdef"), false);
 890 
 891         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 892         check(pattern, toSupplementaries("abc\\Edef"), true);
 893         check(pattern, toSupplementaries("abcdef"), false);
 894 
 895         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 896         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 897 
 898         flags |= Pattern.MULTILINE;
 899 
 900         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 901         check(pattern, toSupplementaries("abc^cat$def"), true);
 902         check(pattern, toSupplementaries("cat"), false);
 903 
 904         flags |= Pattern.DOTALL;
 905 
 906         // note: this is case-sensitive.
 907         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 908         check(pattern, toSupplementaries("a...b"), true);
 909         check(pattern, toSupplementaries("axxxb"), false);
 910 
 911         flags |= Pattern.CANON_EQ;
 912 
 913         String t = toSupplementaries("test");
 914         p = Pattern.compile(t + "a\u030a", flags);
 915         check(pattern, t + "a\u030a", false);
 916         check(pattern, t + "\u00e5", false);
 917 
 918         report("Literal pattern");
 919     }
 920 
 921     // This test is for 4803179
 922     // This test is also for 4808962, replacement parts
 923     private static void literalReplacementTest() throws Exception {
 924         int flags = Pattern.LITERAL;
 925 
 926         Pattern pattern = Pattern.compile("abc", flags);
 927         Matcher matcher = pattern.matcher("zzzabczzz");
 928         String replaceTest = "$0";
 929         String result = matcher.replaceAll(replaceTest);
 930         if (!result.equals("zzzabczzz"))
 931             failCount++;
 932 
 933         matcher.reset();
 934         String literalReplacement = matcher.quoteReplacement(replaceTest);
 935         result = matcher.replaceAll(literalReplacement);
 936         if (!result.equals("zzz$0zzz"))
 937             failCount++;
 938 
 939         matcher.reset();
 940         replaceTest = "\\t$\\$";
 941         literalReplacement = matcher.quoteReplacement(replaceTest);
 942         result = matcher.replaceAll(literalReplacement);
 943         if (!result.equals("zzz\\t$\\$zzz"))
 944             failCount++;
 945 
 946         // Supplementary character test
 947         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 948         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 949         replaceTest = "$0";
 950         result = matcher.replaceAll(replaceTest);
 951         if (!result.equals(toSupplementaries("zzzabczzz")))
 952             failCount++;
 953 
 954         matcher.reset();
 955         literalReplacement = matcher.quoteReplacement(replaceTest);
 956         result = matcher.replaceAll(literalReplacement);
 957         if (!result.equals(toSupplementaries("zzz$0zzz")))
 958             failCount++;
 959 
 960         matcher.reset();
 961         replaceTest = "\\t$\\$";
 962         literalReplacement = matcher.quoteReplacement(replaceTest);
 963         result = matcher.replaceAll(literalReplacement);
 964         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 965             failCount++;
 966 
 967         // IAE should be thrown if backslash or '$' is the last character
 968         // in replacement string
 969         try {
 970             "\uac00".replaceAll("\uac00", "$");
 971             failCount++;
 972         } catch (IllegalArgumentException iie) {
 973         } catch (Exception e) {
 974             failCount++;
 975         }
 976         try {
 977             "\uac00".replaceAll("\uac00", "\\");
 978             failCount++;
 979         } catch (IllegalArgumentException iie) {
 980         } catch (Exception e) {
 981             failCount++;
 982         }
 983         report("Literal replacement");
 984     }
 985 
 986     // This test is for 4757029
 987     private static void regionTest() throws Exception {
 988         Pattern pattern = Pattern.compile("abc");
 989         Matcher matcher = pattern.matcher("abcdefabc");
 990 
 991         matcher.region(0,9);
 992         if (!matcher.find())
 993             failCount++;
 994         if (!matcher.find())
 995             failCount++;
 996         matcher.region(0,3);
 997         if (!matcher.find())
 998            failCount++;
 999         matcher.region(3,6);
1000         if (matcher.find())
1001            failCount++;
1002         matcher.region(0,2);
1003         if (matcher.find())
1004            failCount++;
1005 
1006         expectRegionFail(matcher, 1, -1);
1007         expectRegionFail(matcher, -1, -1);
1008         expectRegionFail(matcher, -1, 1);
1009         expectRegionFail(matcher, 5, 3);
1010         expectRegionFail(matcher, 5, 12);
1011         expectRegionFail(matcher, 12, 12);
1012 
1013         pattern = Pattern.compile("^abc$");
1014         matcher = pattern.matcher("zzzabczzz");
1015         matcher.region(0,9);
1016         if (matcher.find())
1017             failCount++;
1018         matcher.region(3,6);
1019         if (!matcher.find())
1020            failCount++;
1021         matcher.region(3,6);
1022         matcher.useAnchoringBounds(false);
1023         if (matcher.find())
1024            failCount++;
1025 
1026         // Supplementary character test
1027         pattern = Pattern.compile(toSupplementaries("abc"));
1028         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1029         matcher.region(0,9*2);
1030         if (!matcher.find())
1031             failCount++;
1032         if (!matcher.find())
1033             failCount++;
1034         matcher.region(0,3*2);
1035         if (!matcher.find())
1036            failCount++;
1037         matcher.region(1,3*2);
1038         if (matcher.find())
1039            failCount++;
1040         matcher.region(3*2,6*2);
1041         if (matcher.find())
1042            failCount++;
1043         matcher.region(0,2*2);
1044         if (matcher.find())
1045            failCount++;
1046         matcher.region(0,2*2+1);
1047         if (matcher.find())
1048            failCount++;
1049 
1050         expectRegionFail(matcher, 1*2, -1);
1051         expectRegionFail(matcher, -1, -1);
1052         expectRegionFail(matcher, -1, 1*2);
1053         expectRegionFail(matcher, 5*2, 3*2);
1054         expectRegionFail(matcher, 5*2, 12*2);
1055         expectRegionFail(matcher, 12*2, 12*2);
1056 
1057         pattern = Pattern.compile(toSupplementaries("^abc$"));
1058         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1059         matcher.region(0,9*2);
1060         if (matcher.find())
1061             failCount++;
1062         matcher.region(3*2,6*2);
1063         if (!matcher.find())
1064            failCount++;
1065         matcher.region(3*2+1,6*2);
1066         if (matcher.find())
1067            failCount++;
1068         matcher.region(3*2,6*2-1);
1069         if (matcher.find())
1070            failCount++;
1071         matcher.region(3*2,6*2);
1072         matcher.useAnchoringBounds(false);
1073         if (matcher.find())
1074            failCount++;
1075         report("Regions");
1076     }
1077 
1078     private static void expectRegionFail(Matcher matcher, int index1,
1079                                          int index2)
1080     {
1081         try {
1082             matcher.region(index1, index2);
1083             failCount++;
1084         } catch (IndexOutOfBoundsException ioobe) {
1085             // Correct result
1086         } catch (IllegalStateException ise) {
1087             // Correct result
1088         }
1089     }
1090 
1091     // This test is for 4803197
1092     private static void escapedSegmentTest() throws Exception {
1093 
1094         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1095         check(pattern, "dir1\\dir2", true);
1096 
1097         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1098         check(pattern, "dir1\\dir2\\", true);
1099 
1100         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1101         check(pattern, "dir1\\dir2\\", true);
1102 
1103         // Supplementary character test
1104         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1105         check(pattern, toSupplementaries("dir1\\dir2"), true);
1106 
1107         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1108         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1109 
1110         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1111         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1112 
1113         report("Escaped segment");
1114     }
1115 
1116     // This test is for 4792284
1117     private static void nonCaptureRepetitionTest() throws Exception {
1118         String input = "abcdefgh;";
1119 
1120         String[] patterns = new String[] {
1121             "(?:\\w{4})+;",
1122             "(?:\\w{8})*;",
1123             "(?:\\w{2}){2,4};",
1124             "(?:\\w{4}){2,};",   // only matches the
1125             ".*?(?:\\w{5})+;",   //     specified minimum
1126             ".*?(?:\\w{9})*;",   //     number of reps - OK
1127             "(?:\\w{4})+?;",     // lazy repetition - OK
1128             "(?:\\w{4})++;",     // possessive repetition - OK
1129             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1130             "(\\w{4})+;",        // capturing group - OK
1131         };
1132 
1133         for (int i = 0; i < patterns.length; i++) {
1134             // Check find()
1135             check(patterns[i], 0, input, input, true);
1136             // Check matches()
1137             Pattern p = Pattern.compile(patterns[i]);
1138             Matcher m = p.matcher(input);
1139 
1140             if (m.matches()) {
1141                 if (!m.group(0).equals(input))
1142                     failCount++;
1143             } else {
1144                 failCount++;
1145             }
1146         }
1147 
1148         report("Non capturing repetition");
1149     }
1150 
1151     // This test is for 6358731
1152     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1153         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1154         Matcher matcher = pattern.matcher("abcd");
1155         if (!matcher.matches() ||
1156              matcher.group(1) != null ||
1157              !matcher.group(2).equals("abcd")) {
1158             failCount++;
1159         }
1160         report("Not captured GroupCurly");
1161     }
1162 
1163     // This test is for 4706545
1164     private static void javaCharClassTest() throws Exception {
1165         for (int i=0; i<1000; i++) {
1166             char c = (char)generator.nextInt();
1167             check("{javaLowerCase}", c, Character.isLowerCase(c));
1168             check("{javaUpperCase}", c, Character.isUpperCase(c));
1169             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1170             check("{javaTitleCase}", c, Character.isTitleCase(c));
1171             check("{javaDigit}", c, Character.isDigit(c));
1172             check("{javaDefined}", c, Character.isDefined(c));
1173             check("{javaLetter}", c, Character.isLetter(c));
1174             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1175             check("{javaJavaIdentifierStart}", c,
1176                   Character.isJavaIdentifierStart(c));
1177             check("{javaJavaIdentifierPart}", c,
1178                   Character.isJavaIdentifierPart(c));
1179             check("{javaUnicodeIdentifierStart}", c,
1180                   Character.isUnicodeIdentifierStart(c));
1181             check("{javaUnicodeIdentifierPart}", c,
1182                   Character.isUnicodeIdentifierPart(c));
1183             check("{javaIdentifierIgnorable}", c,
1184                   Character.isIdentifierIgnorable(c));
1185             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1186             check("{javaWhitespace}", c, Character.isWhitespace(c));
1187             check("{javaISOControl}", c, Character.isISOControl(c));
1188             check("{javaMirrored}", c, Character.isMirrored(c));
1189 
1190         }
1191 
1192         // Supplementary character test
1193         for (int i=0; i<1000; i++) {
1194             int c = generator.nextInt(Character.MAX_CODE_POINT
1195                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1196                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1197             check("{javaLowerCase}", c, Character.isLowerCase(c));
1198             check("{javaUpperCase}", c, Character.isUpperCase(c));
1199             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1200             check("{javaTitleCase}", c, Character.isTitleCase(c));
1201             check("{javaDigit}", c, Character.isDigit(c));
1202             check("{javaDefined}", c, Character.isDefined(c));
1203             check("{javaLetter}", c, Character.isLetter(c));
1204             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1205             check("{javaJavaIdentifierStart}", c,
1206                   Character.isJavaIdentifierStart(c));
1207             check("{javaJavaIdentifierPart}", c,
1208                   Character.isJavaIdentifierPart(c));
1209             check("{javaUnicodeIdentifierStart}", c,
1210                   Character.isUnicodeIdentifierStart(c));
1211             check("{javaUnicodeIdentifierPart}", c,
1212                   Character.isUnicodeIdentifierPart(c));
1213             check("{javaIdentifierIgnorable}", c,
1214                   Character.isIdentifierIgnorable(c));
1215             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1216             check("{javaWhitespace}", c, Character.isWhitespace(c));
1217             check("{javaISOControl}", c, Character.isISOControl(c));
1218             check("{javaMirrored}", c, Character.isMirrored(c));
1219         }
1220 
1221         report("Java character classes");
1222     }
1223 
1224     // This test is for 4523620
1225     /*
1226     private static void numOccurrencesTest() throws Exception {
1227         Pattern pattern = Pattern.compile("aaa");
1228 
1229         if (pattern.numOccurrences("aaaaaa", false) != 2)
1230             failCount++;
1231         if (pattern.numOccurrences("aaaaaa", true) != 4)
1232             failCount++;
1233 
1234         pattern = Pattern.compile("^");
1235         if (pattern.numOccurrences("aaaaaa", false) != 1)
1236             failCount++;
1237         if (pattern.numOccurrences("aaaaaa", true) != 1)
1238             failCount++;
1239 
1240         report("Number of Occurrences");
1241     }
1242     */
1243 
1244     // This test is for 4776374
1245     private static void caretBetweenTerminatorsTest() throws Exception {
1246         int flags1 = Pattern.DOTALL;
1247         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1248         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1249         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1250 
1251         check("^....", flags1, "test\ntest", "test", true);
1252         check(".....^", flags1, "test\ntest", "test", false);
1253         check(".....^", flags1, "test\n", "test", false);
1254         check("....^", flags1, "test\r\n", "test", false);
1255 
1256         check("^....", flags2, "test\ntest", "test", true);
1257         check("....^", flags2, "test\ntest", "test", false);
1258         check(".....^", flags2, "test\n", "test", false);
1259         check("....^", flags2, "test\r\n", "test", false);
1260 
1261         check("^....", flags3, "test\ntest", "test", true);
1262         check(".....^", flags3, "test\ntest", "test\n", true);
1263         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1264         check(".....^", flags3, "test\n", "test", false);
1265         check(".....^", flags3, "test\r\n", "test", false);
1266         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1267 
1268         check("^....", flags4, "test\ntest", "test", true);
1269         check(".....^", flags3, "test\ntest", "test\n", true);
1270         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1271         check(".....^", flags4, "test\n", "test\n", false);
1272         check(".....^", flags4, "test\r\n", "test\r", false);
1273 
1274         // Supplementary character test
1275         String t = toSupplementaries("test");
1276         check("^....", flags1, t+"\n"+t, t, true);
1277         check(".....^", flags1, t+"\n"+t, t, false);
1278         check(".....^", flags1, t+"\n", t, false);
1279         check("....^", flags1, t+"\r\n", t, false);
1280 
1281         check("^....", flags2, t+"\n"+t, t, true);
1282         check("....^", flags2, t+"\n"+t, t, false);
1283         check(".....^", flags2, t+"\n", t, false);
1284         check("....^", flags2, t+"\r\n", t, false);
1285 
1286         check("^....", flags3, t+"\n"+t, t, true);
1287         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1288         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1289         check(".....^", flags3, t+"\n", t, false);
1290         check(".....^", flags3, t+"\r\n", t, false);
1291         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1292 
1293         check("^....", flags4, t+"\n"+t, t, true);
1294         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1295         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1296         check(".....^", flags4, t+"\n", t+"\n", false);
1297         check(".....^", flags4, t+"\r\n", t+"\r", false);
1298 
1299         report("Caret between terminators");
1300     }
1301 
1302     // This test is for 4727935
1303     private static void dollarAtEndTest() throws Exception {
1304         int flags1 = Pattern.DOTALL;
1305         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1306         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1307 
1308         check("....$", flags1, "test\n", "test", true);
1309         check("....$", flags1, "test\r\n", "test", true);
1310         check(".....$", flags1, "test\n", "test\n", true);
1311         check(".....$", flags1, "test\u0085", "test\u0085", true);
1312         check("....$", flags1, "test\u0085", "test", true);
1313 
1314         check("....$", flags2, "test\n", "test", true);
1315         check(".....$", flags2, "test\n", "test\n", true);
1316         check(".....$", flags2, "test\u0085", "test\u0085", true);
1317         check("....$", flags2, "test\u0085", "est\u0085", true);
1318 
1319         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1320         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1321         check("....$blah", flags3, "test\nblah", "!!!!", false);
1322         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1323 
1324         // Supplementary character test
1325         String t = toSupplementaries("test");
1326         String b = toSupplementaries("blah");
1327         check("....$", flags1, t+"\n", t, true);
1328         check("....$", flags1, t+"\r\n", t, true);
1329         check(".....$", flags1, t+"\n", t+"\n", true);
1330         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1331         check("....$", flags1, t+"\u0085", t, true);
1332 
1333         check("....$", flags2, t+"\n", t, true);
1334         check(".....$", flags2, t+"\n", t+"\n", true);
1335         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1336         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1337 
1338         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1339         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1340         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1341         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1342 
1343         report("Dollar at End");
1344     }
1345 
1346     // This test is for 4711773
1347     private static void multilineDollarTest() throws Exception {
1348         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1349         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1350         matcher.find();
1351         if (matcher.start(0) != 9)
1352             failCount++;
1353         matcher.find();
1354         if (matcher.start(0) != 20)
1355             failCount++;
1356 
1357         // Supplementary character test
1358         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1359         matcher.find();
1360         if (matcher.start(0) != 9*2)
1361             failCount++;
1362         matcher.find();
1363         if (matcher.start(0) != 20*2)
1364             failCount++;
1365 
1366         report("Multiline Dollar");
1367     }
1368 
1369     private static void reluctantRepetitionTest() throws Exception {
1370         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1371         check(p, "1 word word word 2", true);
1372         check(p, "1 wor wo w 2", true);
1373         check(p, "1 word word 2", true);
1374         check(p, "1 word 2", true);
1375         check(p, "1 wo w w 2", true);
1376         check(p, "1 wo w 2", true);
1377         check(p, "1 wor w 2", true);
1378 
1379         p = Pattern.compile("([a-z])+?c");
1380         Matcher m = p.matcher("ababcdefdec");
1381         check(m, "ababc");
1382 
1383         // Supplementary character test
1384         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1385         m = p.matcher(toSupplementaries("ababcdefdec"));
1386         check(m, toSupplementaries("ababc"));
1387 
1388         report("Reluctant Repetition");
1389     }
1390 
1391     private static Pattern serializedPattern(Pattern p) throws Exception {
1392         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1393         ObjectOutputStream oos = new ObjectOutputStream(baos);
1394         oos.writeObject(p);
1395         oos.close();
1396         try (ObjectInputStream ois = new ObjectInputStream(
1397                 new ByteArrayInputStream(baos.toByteArray()))) {
1398             return (Pattern)ois.readObject();
1399         }
1400     }
1401 
1402     private static void serializeTest() throws Exception {
1403         String patternStr = "(b)";
1404         String matchStr = "b";
1405         Pattern pattern = Pattern.compile(patternStr);
1406         Pattern serializedPattern = serializedPattern(pattern);
1407         Matcher matcher = serializedPattern.matcher(matchStr);
1408         if (!matcher.matches())
1409             failCount++;
1410         if (matcher.groupCount() != 1)
1411             failCount++;
1412 
1413         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1414         serializedPattern = serializedPattern(pattern);
1415         if (!serializedPattern.matcher("Ab").matches())
1416             failCount++;
1417         if (serializedPattern.matcher("AB").matches())
1418             failCount++;
1419 
1420         report("Serialization");
1421     }
1422 
1423     private static void gTest() {
1424         Pattern pattern = Pattern.compile("\\G\\w");
1425         Matcher matcher = pattern.matcher("abc#x#x");
1426         matcher.find();
1427         matcher.find();
1428         matcher.find();
1429         if (matcher.find())
1430             failCount++;
1431 
1432         pattern = Pattern.compile("\\GA*");
1433         matcher = pattern.matcher("1A2AA3");
1434         matcher.find();
1435         if (matcher.find())
1436             failCount++;
1437 
1438         pattern = Pattern.compile("\\GA*");
1439         matcher = pattern.matcher("1A2AA3");
1440         if (!matcher.find(1))
1441             failCount++;
1442         matcher.find();
1443         if (matcher.find())
1444             failCount++;
1445 
1446         report("\\G");
1447     }
1448 
1449     private static void zTest() {
1450         Pattern pattern = Pattern.compile("foo\\Z");
1451         // Positives
1452         check(pattern, "foo\u0085", true);
1453         check(pattern, "foo\u2028", true);
1454         check(pattern, "foo\u2029", true);
1455         check(pattern, "foo\n", true);
1456         check(pattern, "foo\r", true);
1457         check(pattern, "foo\r\n", true);
1458         // Negatives
1459         check(pattern, "fooo", false);
1460         check(pattern, "foo\n\r", false);
1461 
1462         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1463         // Positives
1464         check(pattern, "foo", true);
1465         check(pattern, "foo\n", true);
1466         // Negatives
1467         check(pattern, "foo\r", false);
1468         check(pattern, "foo\u0085", false);
1469         check(pattern, "foo\u2028", false);
1470         check(pattern, "foo\u2029", false);
1471 
1472         report("\\Z");
1473     }
1474 
1475     private static void replaceFirstTest() {
1476         Pattern pattern = Pattern.compile("(ab)(c*)");
1477         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1478         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1479             failCount++;
1480 
1481         matcher.reset("zzzabccczzzabcczzzabccczzz");
1482         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1483             failCount++;
1484 
1485         matcher.reset("zzzabccczzzabcczzzabccczzz");
1486         String result = matcher.replaceFirst("$1");
1487         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1488             failCount++;
1489 
1490         matcher.reset("zzzabccczzzabcczzzabccczzz");
1491         result = matcher.replaceFirst("$2");
1492         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1493             failCount++;
1494 
1495         pattern = Pattern.compile("a*");
1496         matcher = pattern.matcher("aaaaaaaaaa");
1497         if (!matcher.replaceFirst("test").equals("test"))
1498             failCount++;
1499 
1500         pattern = Pattern.compile("a+");
1501         matcher = pattern.matcher("zzzaaaaaaaaaa");
1502         if (!matcher.replaceFirst("test").equals("zzztest"))
1503             failCount++;
1504 
1505         // Supplementary character test
1506         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1507         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1508         if (!matcher.replaceFirst(toSupplementaries("test"))
1509                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1510             failCount++;
1511 
1512         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1513         if (!matcher.replaceFirst(toSupplementaries("test")).
1514             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1515             failCount++;
1516 
1517         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1518         result = matcher.replaceFirst("$1");
1519         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1520             failCount++;
1521 
1522         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1523         result = matcher.replaceFirst("$2");
1524         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1525             failCount++;
1526 
1527         pattern = Pattern.compile(toSupplementaries("a*"));
1528         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1529         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1530             failCount++;
1531 
1532         pattern = Pattern.compile(toSupplementaries("a+"));
1533         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1534         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1535             failCount++;
1536 
1537         report("Replace First");
1538     }
1539 
1540     private static void unixLinesTest() {
1541         Pattern pattern = Pattern.compile(".*");
1542         Matcher matcher = pattern.matcher("aa\u2028blah");
1543         matcher.find();
1544         if (!matcher.group(0).equals("aa"))
1545             failCount++;
1546 
1547         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1548         matcher = pattern.matcher("aa\u2028blah");
1549         matcher.find();
1550         if (!matcher.group(0).equals("aa\u2028blah"))
1551             failCount++;
1552 
1553         pattern = Pattern.compile("[az]$",
1554                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1555         matcher = pattern.matcher("aa\u2028zz");
1556         check(matcher, "a\u2028", false);
1557 
1558         // Supplementary character test
1559         pattern = Pattern.compile(".*");
1560         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1561         matcher.find();
1562         if (!matcher.group(0).equals(toSupplementaries("aa")))
1563             failCount++;
1564 
1565         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1566         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1567         matcher.find();
1568         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1569             failCount++;
1570 
1571         pattern = Pattern.compile(toSupplementaries("[az]$"),
1572                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1573         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1574         check(matcher, toSupplementaries("a\u2028"), false);
1575 
1576         report("Unix Lines");
1577     }
1578 
1579     private static void commentsTest() {
1580         int flags = Pattern.COMMENTS;
1581 
1582         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1583         Matcher matcher = pattern.matcher("aa#aa");
1584         if (!matcher.matches())
1585             failCount++;
1586 
1587         pattern = Pattern.compile("aa  # blah", flags);
1588         matcher = pattern.matcher("aa");
1589         if (!matcher.matches())
1590             failCount++;
1591 
1592         pattern = Pattern.compile("aa blah", flags);
1593         matcher = pattern.matcher("aablah");
1594         if (!matcher.matches())
1595              failCount++;
1596 
1597         pattern = Pattern.compile("aa  # blah blech  ", flags);
1598         matcher = pattern.matcher("aa");
1599         if (!matcher.matches())
1600             failCount++;
1601 
1602         pattern = Pattern.compile("aa  # blah\n  ", flags);
1603         matcher = pattern.matcher("aa");
1604         if (!matcher.matches())
1605             failCount++;
1606 
1607         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1608         matcher = pattern.matcher("aabc");
1609         if (!matcher.matches())
1610              failCount++;
1611 
1612         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1613         matcher = pattern.matcher("aabc");
1614         if (!matcher.matches())
1615              failCount++;
1616 
1617         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1618         matcher = pattern.matcher("aabc#blech");
1619         if (!matcher.matches())
1620              failCount++;
1621 
1622         // Supplementary character test
1623         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1624         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1625         if (!matcher.matches())
1626             failCount++;
1627 
1628         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1629         matcher = pattern.matcher(toSupplementaries("aa"));
1630         if (!matcher.matches())
1631             failCount++;
1632 
1633         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1634         matcher = pattern.matcher(toSupplementaries("aablah"));
1635         if (!matcher.matches())
1636              failCount++;
1637 
1638         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1639         matcher = pattern.matcher(toSupplementaries("aa"));
1640         if (!matcher.matches())
1641             failCount++;
1642 
1643         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1644         matcher = pattern.matcher(toSupplementaries("aa"));
1645         if (!matcher.matches())
1646             failCount++;
1647 
1648         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1649         matcher = pattern.matcher(toSupplementaries("aabc"));
1650         if (!matcher.matches())
1651              failCount++;
1652 
1653         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1654         matcher = pattern.matcher(toSupplementaries("aabc"));
1655         if (!matcher.matches())
1656              failCount++;
1657 
1658         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1659         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1660         if (!matcher.matches())
1661              failCount++;
1662 
1663         report("Comments");
1664     }
1665 
1666     private static void caseFoldingTest() { // bug 4504687
1667         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1668         Pattern pattern = Pattern.compile("aa", flags);
1669         Matcher matcher = pattern.matcher("ab");
1670         if (matcher.matches())
1671             failCount++;
1672 
1673         pattern = Pattern.compile("aA", flags);
1674         matcher = pattern.matcher("ab");
1675         if (matcher.matches())
1676             failCount++;
1677 
1678         pattern = Pattern.compile("aa", flags);
1679         matcher = pattern.matcher("aB");
1680         if (matcher.matches())
1681             failCount++;
1682         matcher = pattern.matcher("Ab");
1683         if (matcher.matches())
1684             failCount++;
1685 
1686         // ASCII               "a"
1687         // Latin-1 Supplement  "a" + grave
1688         // Cyrillic            "a"
1689         String[] patterns = new String[] {
1690             //single
1691             "a", "\u00e0", "\u0430",
1692             //slice
1693             "ab", "\u00e0\u00e1", "\u0430\u0431",
1694             //class single
1695             "[a]", "[\u00e0]", "[\u0430]",
1696             //class range
1697             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1698             //back reference
1699             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1700         };
1701 
1702         String[] texts = new String[] {
1703             "A", "\u00c0", "\u0410",
1704             "AB", "\u00c0\u00c1", "\u0410\u0411",
1705             "A", "\u00c0", "\u0410",
1706             "B", "\u00c2", "\u0411",
1707             "aA", "\u00e0\u00c0", "\u0430\u0410"
1708         };
1709 
1710         boolean[] expected = new boolean[] {
1711             true, false, false,
1712             true, false, false,
1713             true, false, false,
1714             true, false, false,
1715             true, false, false
1716         };
1717 
1718         flags = Pattern.CASE_INSENSITIVE;
1719         for (int i = 0; i < patterns.length; i++) {
1720             pattern = Pattern.compile(patterns[i], flags);
1721             matcher = pattern.matcher(texts[i]);
1722             if (matcher.matches() != expected[i]) {
1723                 System.out.println("<1> Failed at " + i);
1724                 failCount++;
1725             }
1726         }
1727 
1728         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1729         for (int i = 0; i < patterns.length; i++) {
1730             pattern = Pattern.compile(patterns[i], flags);
1731             matcher = pattern.matcher(texts[i]);
1732             if (!matcher.matches()) {
1733                 System.out.println("<2> Failed at " + i);
1734                 failCount++;
1735             }
1736         }
1737         // flag unicode_case alone should do nothing
1738         flags = Pattern.UNICODE_CASE;
1739         for (int i = 0; i < patterns.length; i++) {
1740             pattern = Pattern.compile(patterns[i], flags);
1741             matcher = pattern.matcher(texts[i]);
1742             if (matcher.matches()) {
1743                 System.out.println("<3> Failed at " + i);
1744                 failCount++;
1745             }
1746         }
1747 
1748         // Special cases: i, I, u+0131 and u+0130
1749         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1750         pattern = Pattern.compile("[h-j]+", flags);
1751         if (!pattern.matcher("\u0131\u0130").matches())
1752             failCount++;
1753         report("Case Folding");
1754     }
1755 
1756     private static void appendTest() {
1757         Pattern pattern = Pattern.compile("(ab)(cd)");
1758         Matcher matcher = pattern.matcher("abcd");
1759         String result = matcher.replaceAll("$2$1");
1760         if (!result.equals("cdab"))
1761             failCount++;
1762 
1763         String  s1 = "Swap all: first = 123, second = 456";
1764         String  s2 = "Swap one: first = 123, second = 456";
1765         String  r  = "$3$2$1";
1766         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1767         matcher = pattern.matcher(s1);
1768 
1769         result = matcher.replaceAll(r);
1770         if (!result.equals("Swap all: 123 = first, 456 = second"))
1771             failCount++;
1772 
1773         matcher = pattern.matcher(s2);
1774 
1775         if (matcher.find()) {
1776             StringBuffer sb = new StringBuffer();
1777             matcher.appendReplacement(sb, r);
1778             matcher.appendTail(sb);
1779             result = sb.toString();
1780             if (!result.equals("Swap one: 123 = first, second = 456"))
1781                 failCount++;
1782         }
1783 
1784         // Supplementary character test
1785         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1786         matcher = pattern.matcher(toSupplementaries("abcd"));
1787         result = matcher.replaceAll("$2$1");
1788         if (!result.equals(toSupplementaries("cdab")))
1789             failCount++;
1790 
1791         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1792         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1793         r  = toSupplementaries("$3$2$1");
1794         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1795         matcher = pattern.matcher(s1);
1796 
1797         result = matcher.replaceAll(r);
1798         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1799             failCount++;
1800 
1801         matcher = pattern.matcher(s2);
1802 
1803         if (matcher.find()) {
1804             StringBuffer sb = new StringBuffer();
1805             matcher.appendReplacement(sb, r);
1806             matcher.appendTail(sb);
1807             result = sb.toString();
1808             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1809                 failCount++;
1810         }
1811         report("Append");
1812     }
1813 
1814     private static void splitTest() {
1815         Pattern pattern = Pattern.compile(":");
1816         String[] result = pattern.split("foo:and:boo", 2);
1817         if (!result[0].equals("foo"))
1818             failCount++;
1819         if (!result[1].equals("and:boo"))
1820             failCount++;
1821         // Supplementary character test
1822         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1823         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1824         if (!result[0].equals(toSupplementaries("foo")))
1825             failCount++;
1826         if (!result[1].equals(toSupplementaries("andXboo")))
1827             failCount++;
1828 
1829         CharBuffer cb = CharBuffer.allocate(100);
1830         cb.put("foo:and:boo");
1831         cb.flip();
1832         result = pattern.split(cb);
1833         if (!result[0].equals("foo"))
1834             failCount++;
1835         if (!result[1].equals("and"))
1836             failCount++;
1837         if (!result[2].equals("boo"))
1838             failCount++;
1839 
1840         // Supplementary character test
1841         CharBuffer cbs = CharBuffer.allocate(100);
1842         cbs.put(toSupplementaries("fooXandXboo"));
1843         cbs.flip();
1844         result = patternX.split(cbs);
1845         if (!result[0].equals(toSupplementaries("foo")))
1846             failCount++;
1847         if (!result[1].equals(toSupplementaries("and")))
1848             failCount++;
1849         if (!result[2].equals(toSupplementaries("boo")))
1850             failCount++;
1851 
1852         String source = "0123456789";
1853         for (int limit=-2; limit<3; limit++) {
1854             for (int x=0; x<10; x++) {
1855                 result = source.split(Integer.toString(x), limit);
1856                 int expectedLength = limit < 1 ? 2 : limit;
1857 
1858                 if ((limit == 0) && (x == 9)) {
1859                     // expected dropping of ""
1860                     if (result.length != 1)
1861                         failCount++;
1862                     if (!result[0].equals("012345678")) {
1863                         failCount++;
1864                     }
1865                 } else {
1866                     if (result.length != expectedLength) {
1867                         failCount++;
1868                     }
1869                     if (!result[0].equals(source.substring(0,x))) {
1870                         if (limit != 1) {
1871                             failCount++;
1872                         } else {
1873                             if (!result[0].equals(source.substring(0,10))) {
1874                                 failCount++;
1875                             }
1876                         }
1877                     }
1878                     if (expectedLength > 1) { // Check segment 2
1879                         if (!result[1].equals(source.substring(x+1,10)))
1880                             failCount++;
1881                     }
1882                 }
1883             }
1884         }
1885         // Check the case for no match found
1886         for (int limit=-2; limit<3; limit++) {
1887             result = source.split("e", limit);
1888             if (result.length != 1)
1889                 failCount++;
1890             if (!result[0].equals(source))
1891                 failCount++;
1892         }
1893         // Check the case for limit == 0, source = "";
1894         // split() now returns 0-length for empty source "" see #6559590
1895         source = "";
1896         result = source.split("e", 0);
1897         if (result.length != 1)
1898             failCount++;
1899         if (!result[0].equals(source))
1900             failCount++;
1901 
1902         // Check both split() and splitAsStraem(), especially for zero-lenth
1903         // input and zero-lenth match cases
1904         String[][] input = new String[][] {
1905             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1906             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1907             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1908             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1909             { "(?=\\p{Lu})", "AbcEfg" },
1910             { "(?=\\p{Lu})", "Abc" },
1911             { " ",           "" },              // zero-length input
1912             { ".*",          "" },
1913 
1914             // some tests from PatternStreamTest.java
1915             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1916             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1917             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1918             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1919             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1920             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1921             { "\u56da",  "" },
1922             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1923             { "o",       "boo:and:foo" },
1924             { "o",       "booooo:and:fooooo" },
1925             { "o",       "fooooo:" },
1926         };
1927 
1928         String[][] expected = new String[][] {
1929             { "Abc", "Efg", "Hij" },
1930             { "", "Abc", "Efg", "Hij" },
1931             { "Abc", "", "Efg", "Hij" },
1932             { "Abc", "Efg", "Hij" },
1933             { "Abc", "Efg" },
1934             { "Abc" },
1935             { "" },
1936             { "" },
1937 
1938             { "awgqwefg1fefw", "vssv1vvv1" },
1939             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1940             { "awgqwefg", "fefw4vssv", "vvv" },
1941             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1942             { "1", "23", "456", "7890" },
1943             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1944             { "" },
1945             { "This", "is", "testing", "", "with", "different", "separators" },
1946             { "b", "", ":and:f" },
1947             { "b", "", "", "", "", ":and:f" },
1948             { "f", "", "", "", "", ":" },
1949         };
1950         for (int i = 0; i < input.length; i++) {
1951             pattern = Pattern.compile(input[i][0]);
1952             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1953                 failCount++;
1954             }
1955             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1956                                              // array for zero-length input for now
1957                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1958                                expected[i])) {
1959                 failCount++;
1960             }
1961         }
1962         report("Split");
1963     }
1964 
1965     private static void negationTest() {
1966         Pattern pattern = Pattern.compile("[\\[@^]+");
1967         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1968         if (!matcher.find())
1969             failCount++;
1970         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1971             failCount++;
1972         pattern = Pattern.compile("[@\\[^]+");
1973         matcher = pattern.matcher("@@@@[[[[^^^^");
1974         if (!matcher.find())
1975             failCount++;
1976         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1977             failCount++;
1978         pattern = Pattern.compile("[@\\[^@]+");
1979         matcher = pattern.matcher("@@@@[[[[^^^^");
1980         if (!matcher.find())
1981             failCount++;
1982         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1983             failCount++;
1984 
1985         pattern = Pattern.compile("\\)");
1986         matcher = pattern.matcher("xxx)xxx");
1987         if (!matcher.find())
1988             failCount++;
1989 
1990         report("Negation");
1991     }
1992 
1993     private static void ampersandTest() {
1994         Pattern pattern = Pattern.compile("[&@]+");
1995         check(pattern, "@@@@&&&&", true);
1996 
1997         pattern = Pattern.compile("[@&]+");
1998         check(pattern, "@@@@&&&&", true);
1999 
2000         pattern = Pattern.compile("[@\\&]+");
2001         check(pattern, "@@@@&&&&", true);
2002 
2003         report("Ampersand");
2004     }
2005 
2006     private static void octalTest() throws Exception {
2007         Pattern pattern = Pattern.compile("\\u0007");
2008         Matcher matcher = pattern.matcher("\u0007");
2009         if (!matcher.matches())
2010             failCount++;
2011         pattern = Pattern.compile("\\07");
2012         matcher = pattern.matcher("\u0007");
2013         if (!matcher.matches())
2014             failCount++;
2015         pattern = Pattern.compile("\\007");
2016         matcher = pattern.matcher("\u0007");
2017         if (!matcher.matches())
2018             failCount++;
2019         pattern = Pattern.compile("\\0007");
2020         matcher = pattern.matcher("\u0007");
2021         if (!matcher.matches())
2022             failCount++;
2023         pattern = Pattern.compile("\\040");
2024         matcher = pattern.matcher("\u0020");
2025         if (!matcher.matches())
2026             failCount++;
2027         pattern = Pattern.compile("\\0403");
2028         matcher = pattern.matcher("\u00203");
2029         if (!matcher.matches())
2030             failCount++;
2031         pattern = Pattern.compile("\\0103");
2032         matcher = pattern.matcher("\u0043");
2033         if (!matcher.matches())
2034             failCount++;
2035 
2036         report("Octal");
2037     }
2038 
2039     private static void longPatternTest() throws Exception {
2040         try {
2041             Pattern pattern = Pattern.compile(
2042                 "a 32-character-long pattern xxxx");
2043             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2044             pattern = Pattern.compile("a thirty four character long regex");
2045             StringBuffer patternToBe = new StringBuffer(101);
2046             for (int i=0; i<100; i++)
2047                 patternToBe.append((char)(97 + i%26));
2048             pattern = Pattern.compile(patternToBe.toString());
2049         } catch (PatternSyntaxException e) {
2050             failCount++;
2051         }
2052 
2053         // Supplementary character test
2054         try {
2055             Pattern pattern = Pattern.compile(
2056                 toSupplementaries("a 32-character-long pattern xxxx"));
2057             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2058             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2059             StringBuffer patternToBe = new StringBuffer(101*2);
2060             for (int i=0; i<100; i++)
2061                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2062                                                      + 97 + i%26));
2063             pattern = Pattern.compile(patternToBe.toString());
2064         } catch (PatternSyntaxException e) {
2065             failCount++;
2066         }
2067         report("LongPattern");
2068     }
2069 
2070     private static void group0Test() throws Exception {
2071         Pattern pattern = Pattern.compile("(tes)ting");
2072         Matcher matcher = pattern.matcher("testing");
2073         check(matcher, "testing");
2074 
2075         matcher.reset("testing");
2076         if (matcher.lookingAt()) {
2077             if (!matcher.group(0).equals("testing"))
2078                 failCount++;
2079         } else {
2080             failCount++;
2081         }
2082 
2083         matcher.reset("testing");
2084         if (matcher.matches()) {
2085             if (!matcher.group(0).equals("testing"))
2086                 failCount++;
2087         } else {
2088             failCount++;
2089         }
2090 
2091         pattern = Pattern.compile("(tes)ting");
2092         matcher = pattern.matcher("testing");
2093         if (matcher.lookingAt()) {
2094             if (!matcher.group(0).equals("testing"))
2095                 failCount++;
2096         } else {
2097             failCount++;
2098         }
2099 
2100         pattern = Pattern.compile("^(tes)ting");
2101         matcher = pattern.matcher("testing");
2102         if (matcher.matches()) {
2103             if (!matcher.group(0).equals("testing"))
2104                 failCount++;
2105         } else {
2106             failCount++;
2107         }
2108 
2109         // Supplementary character test
2110         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2111         matcher = pattern.matcher(toSupplementaries("testing"));
2112         check(matcher, toSupplementaries("testing"));
2113 
2114         matcher.reset(toSupplementaries("testing"));
2115         if (matcher.lookingAt()) {
2116             if (!matcher.group(0).equals(toSupplementaries("testing")))
2117                 failCount++;
2118         } else {
2119             failCount++;
2120         }
2121 
2122         matcher.reset(toSupplementaries("testing"));
2123         if (matcher.matches()) {
2124             if (!matcher.group(0).equals(toSupplementaries("testing")))
2125                 failCount++;
2126         } else {
2127             failCount++;
2128         }
2129 
2130         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2131         matcher = pattern.matcher(toSupplementaries("testing"));
2132         if (matcher.lookingAt()) {
2133             if (!matcher.group(0).equals(toSupplementaries("testing")))
2134                 failCount++;
2135         } else {
2136             failCount++;
2137         }
2138 
2139         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2140         matcher = pattern.matcher(toSupplementaries("testing"));
2141         if (matcher.matches()) {
2142             if (!matcher.group(0).equals(toSupplementaries("testing")))
2143                 failCount++;
2144         } else {
2145             failCount++;
2146         }
2147 
2148         report("Group0");
2149     }
2150 
2151     private static void findIntTest() throws Exception {
2152         Pattern p = Pattern.compile("blah");
2153         Matcher m = p.matcher("zzzzblahzzzzzblah");
2154         boolean result = m.find(2);
2155         if (!result)
2156             failCount++;
2157 
2158         p = Pattern.compile("$");
2159         m = p.matcher("1234567890");
2160         result = m.find(10);
2161         if (!result)
2162             failCount++;
2163         try {
2164             result = m.find(11);
2165             failCount++;
2166         } catch (IndexOutOfBoundsException e) {
2167             // correct result
2168         }
2169 
2170         // Supplementary character test
2171         p = Pattern.compile(toSupplementaries("blah"));
2172         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2173         result = m.find(2);
2174         if (!result)
2175             failCount++;
2176 
2177         report("FindInt");
2178     }
2179 
2180     private static void emptyPatternTest() throws Exception {
2181         Pattern p = Pattern.compile("");
2182         Matcher m = p.matcher("foo");
2183 
2184         // Should find empty pattern at beginning of input
2185         boolean result = m.find();
2186         if (result != true)
2187             failCount++;
2188         if (m.start() != 0)
2189             failCount++;
2190 
2191         // Should not match entire input if input is not empty
2192         m.reset();
2193         result = m.matches();
2194         if (result == true)
2195             failCount++;
2196 
2197         try {
2198             m.start(0);
2199             failCount++;
2200         } catch (IllegalStateException e) {
2201             // Correct result
2202         }
2203 
2204         // Should match entire input if input is empty
2205         m.reset("");
2206         result = m.matches();
2207         if (result != true)
2208             failCount++;
2209 
2210         result = Pattern.matches("", "");
2211         if (result != true)
2212             failCount++;
2213 
2214         result = Pattern.matches("", "foo");
2215         if (result == true)
2216             failCount++;
2217         report("EmptyPattern");
2218     }
2219 
2220     private static void charClassTest() throws Exception {
2221         Pattern pattern = Pattern.compile("blah[ab]]blech");
2222         check(pattern, "blahb]blech", true);
2223 
2224         pattern = Pattern.compile("[abc[def]]");
2225         check(pattern, "b", true);
2226 
2227         // Supplementary character tests
2228         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2229         check(pattern, toSupplementaries("blahb]blech"), true);
2230 
2231         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2232         check(pattern, toSupplementaries("b"), true);
2233 
2234         try {
2235             // u00ff when UNICODE_CASE
2236             pattern = Pattern.compile("[ab\u00ffcd]",
2237                                       Pattern.CASE_INSENSITIVE|
2238                                       Pattern.UNICODE_CASE);
2239             check(pattern, "ab\u00ffcd", true);
2240             check(pattern, "Ab\u0178Cd", true);
2241 
2242             // u00b5 when UNICODE_CASE
2243             pattern = Pattern.compile("[ab\u00b5cd]",
2244                                       Pattern.CASE_INSENSITIVE|
2245                                       Pattern.UNICODE_CASE);
2246             check(pattern, "ab\u00b5cd", true);
2247             check(pattern, "Ab\u039cCd", true);
2248         } catch (Exception e) { failCount++; }
2249 
2250         /* Special cases
2251            (1)LatinSmallLetterLongS u+017f
2252            (2)LatinSmallLetterDotlessI u+0131
2253            (3)LatineCapitalLetterIWithDotAbove u+0130
2254            (4)KelvinSign u+212a
2255            (5)AngstromSign u+212b
2256         */
2257         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2258         pattern = Pattern.compile("[sik\u00c5]+", flags);
2259         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2260             failCount++;
2261 
2262         report("CharClass");
2263     }
2264 
2265     private static void caretTest() throws Exception {
2266         Pattern pattern = Pattern.compile("\\w*");
2267         Matcher matcher = pattern.matcher("a#bc#def##g");
2268         check(matcher, "a");
2269         check(matcher, "");
2270         check(matcher, "bc");
2271         check(matcher, "");
2272         check(matcher, "def");
2273         check(matcher, "");
2274         check(matcher, "");
2275         check(matcher, "g");
2276         check(matcher, "");
2277         if (matcher.find())
2278             failCount++;
2279 
2280         pattern = Pattern.compile("^\\w*");
2281         matcher = pattern.matcher("a#bc#def##g");
2282         check(matcher, "a");
2283         if (matcher.find())
2284             failCount++;
2285 
2286         pattern = Pattern.compile("\\w");
2287         matcher = pattern.matcher("abc##x");
2288         check(matcher, "a");
2289         check(matcher, "b");
2290         check(matcher, "c");
2291         check(matcher, "x");
2292         if (matcher.find())
2293             failCount++;
2294 
2295         pattern = Pattern.compile("^\\w");
2296         matcher = pattern.matcher("abc##x");
2297         check(matcher, "a");
2298         if (matcher.find())
2299             failCount++;
2300 
2301         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2302         matcher = pattern.matcher("abcdef-ghi\njklmno");
2303         check(matcher, "abc");
2304         if (matcher.find())
2305             failCount++;
2306 
2307         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2308         matcher = pattern.matcher("abcdef-ghi\njklmno");
2309         check(matcher, "abc");
2310         check(matcher, "jkl");
2311         if (matcher.find())
2312             failCount++;
2313 
2314         pattern = Pattern.compile("^", Pattern.MULTILINE);
2315         matcher = pattern.matcher("this is some text");
2316         String result = matcher.replaceAll("X");
2317         if (!result.equals("Xthis is some text"))
2318             failCount++;
2319 
2320         pattern = Pattern.compile("^");
2321         matcher = pattern.matcher("this is some text");
2322         result = matcher.replaceAll("X");
2323         if (!result.equals("Xthis is some text"))
2324             failCount++;
2325 
2326         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2327         matcher = pattern.matcher("this is some text\n");
2328         result = matcher.replaceAll("X");
2329         if (!result.equals("Xthis is some text\n"))
2330             failCount++;
2331 
2332         report("Caret");
2333     }
2334 
2335     private static void groupCaptureTest() throws Exception {
2336         // Independent group
2337         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2338         Matcher matcher = pattern.matcher("xxxyyyzzz");
2339         matcher.find();
2340         try {
2341             String blah = matcher.group(1);
2342             failCount++;
2343         } catch (IndexOutOfBoundsException ioobe) {
2344             // Good result
2345         }
2346         // Pure group
2347         pattern = Pattern.compile("x+(?:y+)z+");
2348         matcher = pattern.matcher("xxxyyyzzz");
2349         matcher.find();
2350         try {
2351             String blah = matcher.group(1);
2352             failCount++;
2353         } catch (IndexOutOfBoundsException ioobe) {
2354             // Good result
2355         }
2356 
2357         // Supplementary character tests
2358         // Independent group
2359         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2360         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2361         matcher.find();
2362         try {
2363             String blah = matcher.group(1);
2364             failCount++;
2365         } catch (IndexOutOfBoundsException ioobe) {
2366             // Good result
2367         }
2368         // Pure group
2369         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2370         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2371         matcher.find();
2372         try {
2373             String blah = matcher.group(1);
2374             failCount++;
2375         } catch (IndexOutOfBoundsException ioobe) {
2376             // Good result
2377         }
2378 
2379         report("GroupCapture");
2380     }
2381 
2382     private static void backRefTest() throws Exception {
2383         Pattern pattern = Pattern.compile("(a*)bc\\1");
2384         check(pattern, "zzzaabcazzz", true);
2385 
2386         pattern = Pattern.compile("(a*)bc\\1");
2387         check(pattern, "zzzaabcaazzz", true);
2388 
2389         pattern = Pattern.compile("(abc)(def)\\1");
2390         check(pattern, "abcdefabc", true);
2391 
2392         pattern = Pattern.compile("(abc)(def)\\3");
2393         check(pattern, "abcdefabc", false);
2394 
2395         try {
2396             for (int i = 1; i < 10; i++) {
2397                 // Make sure backref 1-9 are always accepted
2398                 pattern = Pattern.compile("abcdef\\" + i);
2399                 // and fail to match if the target group does not exit
2400                 check(pattern, "abcdef", false);
2401             }
2402         } catch(PatternSyntaxException e) {
2403             failCount++;
2404         }
2405 
2406         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2407         check(pattern, "abcdefghija", false);
2408         check(pattern, "abcdefghija1", true);
2409 
2410         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2411         check(pattern, "abcdefghijkk", true);
2412 
2413         pattern = Pattern.compile("(a)bcdefghij\\11");
2414         check(pattern, "abcdefghija1", true);
2415 
2416         // Supplementary character tests
2417         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2418         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2419 
2420         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2421         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2422 
2423         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2424         check(pattern, toSupplementaries("abcdefabc"), true);
2425 
2426         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2427         check(pattern, toSupplementaries("abcdefabc"), false);
2428 
2429         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2430         check(pattern, toSupplementaries("abcdefghija"), false);
2431         check(pattern, toSupplementaries("abcdefghija1"), true);
2432 
2433         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2434         check(pattern, toSupplementaries("abcdefghijkk"), true);
2435 
2436         report("BackRef");
2437     }
2438 
2439     /**
2440      * Unicode Technical Report #18, section 2.6 End of Line
2441      * There is no empty line to be matched in the sequence \u000D\u000A
2442      * but there is an empty line in the sequence \u000A\u000D.
2443      */
2444     private static void anchorTest() throws Exception {
2445         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2446         Matcher m = p.matcher("blah1\r\nblah2");
2447         m.find();
2448         m.find();
2449         if (!m.group().equals("blah2"))
2450             failCount++;
2451 
2452         m.reset("blah1\n\rblah2");
2453         m.find();
2454         m.find();
2455         m.find();
2456         if (!m.group().equals("blah2"))
2457             failCount++;
2458 
2459         // Test behavior of $ with \r\n at end of input
2460         p = Pattern.compile(".+$");
2461         m = p.matcher("blah1\r\n");
2462         if (!m.find())
2463             failCount++;
2464        if (!m.group().equals("blah1"))
2465             failCount++;
2466         if (m.find())
2467             failCount++;
2468 
2469         // Test behavior of $ with \r\n at end of input in multiline
2470         p = Pattern.compile(".+$", Pattern.MULTILINE);
2471         m = p.matcher("blah1\r\n");
2472         if (!m.find())
2473             failCount++;
2474         if (m.find())
2475             failCount++;
2476 
2477         // Test for $ recognition of \u0085 for bug 4527731
2478         p = Pattern.compile(".+$", Pattern.MULTILINE);
2479         m = p.matcher("blah1\u0085");
2480         if (!m.find())
2481             failCount++;
2482 
2483         // Supplementary character test
2484         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2485         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2486         m.find();
2487         m.find();
2488         if (!m.group().equals(toSupplementaries("blah2")))
2489             failCount++;
2490 
2491         m.reset(toSupplementaries("blah1\n\rblah2"));
2492         m.find();
2493         m.find();
2494         m.find();
2495         if (!m.group().equals(toSupplementaries("blah2")))
2496             failCount++;
2497 
2498         // Test behavior of $ with \r\n at end of input
2499         p = Pattern.compile(".+$");
2500         m = p.matcher(toSupplementaries("blah1\r\n"));
2501         if (!m.find())
2502             failCount++;
2503         if (!m.group().equals(toSupplementaries("blah1")))
2504             failCount++;
2505         if (m.find())
2506             failCount++;
2507 
2508         // Test behavior of $ with \r\n at end of input in multiline
2509         p = Pattern.compile(".+$", Pattern.MULTILINE);
2510         m = p.matcher(toSupplementaries("blah1\r\n"));
2511         if (!m.find())
2512             failCount++;
2513         if (m.find())
2514             failCount++;
2515 
2516         // Test for $ recognition of \u0085 for bug 4527731
2517         p = Pattern.compile(".+$", Pattern.MULTILINE);
2518         m = p.matcher(toSupplementaries("blah1\u0085"));
2519         if (!m.find())
2520             failCount++;
2521 
2522         report("Anchors");
2523     }
2524 
2525     /**
2526      * A basic sanity test of Matcher.lookingAt().
2527      */
2528     private static void lookingAtTest() throws Exception {
2529         Pattern p = Pattern.compile("(ab)(c*)");
2530         Matcher m = p.matcher("abccczzzabcczzzabccc");
2531 
2532         if (!m.lookingAt())
2533             failCount++;
2534 
2535         if (!m.group().equals(m.group(0)))
2536             failCount++;
2537 
2538         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2539         if (m.lookingAt())
2540             failCount++;
2541 
2542         // Supplementary character test
2543         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2544         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2545 
2546         if (!m.lookingAt())
2547             failCount++;
2548 
2549         if (!m.group().equals(m.group(0)))
2550             failCount++;
2551 
2552         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2553         if (m.lookingAt())
2554             failCount++;
2555 
2556         report("Looking At");
2557     }
2558 
2559     /**
2560      * A basic sanity test of Matcher.matches().
2561      */
2562     private static void matchesTest() throws Exception {
2563         // matches()
2564         Pattern p = Pattern.compile("ulb(c*)");
2565         Matcher m = p.matcher("ulbcccccc");
2566         if (!m.matches())
2567             failCount++;
2568 
2569         // find() but not matches()
2570         m.reset("zzzulbcccccc");
2571         if (m.matches())
2572             failCount++;
2573 
2574         // lookingAt() but not matches()
2575         m.reset("ulbccccccdef");
2576         if (m.matches())
2577             failCount++;
2578 
2579         // matches()
2580         p = Pattern.compile("a|ad");
2581         m = p.matcher("ad");
2582         if (!m.matches())
2583             failCount++;
2584 
2585         // Supplementary character test
2586         // matches()
2587         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2588         m = p.matcher(toSupplementaries("ulbcccccc"));
2589         if (!m.matches())
2590             failCount++;
2591 
2592         // find() but not matches()
2593         m.reset(toSupplementaries("zzzulbcccccc"));
2594         if (m.matches())
2595             failCount++;
2596 
2597         // lookingAt() but not matches()
2598         m.reset(toSupplementaries("ulbccccccdef"));
2599         if (m.matches())
2600             failCount++;
2601 
2602         // matches()
2603         p = Pattern.compile(toSupplementaries("a|ad"));
2604         m = p.matcher(toSupplementaries("ad"));
2605         if (!m.matches())
2606             failCount++;
2607 
2608         report("Matches");
2609     }
2610 
2611     /**
2612      * A basic sanity test of Pattern.matches().
2613      */
2614     private static void patternMatchesTest() throws Exception {
2615         // matches()
2616         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2617                              toSupplementaries("ulbcccccc")))
2618             failCount++;
2619 
2620         // find() but not matches()
2621         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2622                             toSupplementaries("zzzulbcccccc")))
2623             failCount++;
2624 
2625         // lookingAt() but not matches()
2626         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2627                             toSupplementaries("ulbccccccdef")))
2628             failCount++;
2629 
2630         // Supplementary character test
2631         // matches()
2632         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2633                              toSupplementaries("ulbcccccc")))
2634             failCount++;
2635 
2636         // find() but not matches()
2637         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2638                             toSupplementaries("zzzulbcccccc")))
2639             failCount++;
2640 
2641         // lookingAt() but not matches()
2642         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2643                             toSupplementaries("ulbccccccdef")))
2644             failCount++;
2645 
2646         report("Pattern Matches");
2647     }
2648 
2649     /**
2650      * Canonical equivalence testing. Tests the ability of the engine
2651      * to match sequences that are not explicitly specified in the
2652      * pattern when they are considered equivalent by the Unicode Standard.
2653      */
2654     private static void ceTest() throws Exception {
2655         // Decomposed char outside char classes
2656         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2657         Matcher m = p.matcher("test\u00e5");
2658         if (!m.matches())
2659             failCount++;
2660 
2661         m.reset("testa\u030a");
2662         if (!m.matches())
2663             failCount++;
2664 
2665         // Composed char outside char classes
2666         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2667         m = p.matcher("test\u00e5");
2668         if (!m.matches())
2669             failCount++;
2670 
2671         m.reset("testa\u030a");
2672         if (!m.find())
2673             failCount++;
2674 
2675         // Decomposed char inside a char class
2676         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2677         m = p.matcher("test\u00e5");
2678         if (!m.find())
2679             failCount++;
2680 
2681         m.reset("testa\u030a");
2682         if (!m.find())
2683             failCount++;
2684 
2685         // Composed char inside a char class
2686         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2687         m = p.matcher("test\u00e5");
2688         if (!m.find())
2689             failCount++;
2690 
2691         m.reset("testa\u0300");
2692         if (!m.find())
2693             failCount++;
2694 
2695         m.reset("testa\u030a");
2696         if (!m.find())
2697             failCount++;
2698 
2699         // Marks that cannot legally change order and be equivalent
2700         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2701         check(p, "testa\u0308\u0300", true);
2702         check(p, "testa\u0300\u0308", false);
2703 
2704         // Marks that can legally change order and be equivalent
2705         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2706         check(p, "testa\u0308\u0323", true);
2707         check(p, "testa\u0323\u0308", true);
2708 
2709         // Test all equivalences of the sequence a\u0308\u0323\u0300
2710         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2711         check(p, "testa\u0308\u0323\u0300", true);
2712         check(p, "testa\u0323\u0308\u0300", true);
2713         check(p, "testa\u0308\u0300\u0323", true);
2714         check(p, "test\u00e4\u0323\u0300", true);
2715         check(p, "test\u00e4\u0300\u0323", true);
2716 
2717         Object[][] data = new Object[][] {
2718 
2719         // JDK-4867170
2720         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2721         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2722         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2723         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2724         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2725         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2726         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2727 
2728         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2729         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2730         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2731         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2732         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2733 
2734         // backtracking, force to match "\u1f80", instead of \u1f82"
2735         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2736 
2737         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2738         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2739 
2740         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2741         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2742         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2743 
2744         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2745         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2746         { "\u1f80",          "ab\u1f80cd",             "f", true },
2747         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2748         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2749         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2750         { "\u1f82",          "\u1f80\u0300",           "m", true },
2751 
2752         // JDK-7080302       # compile failed
2753         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2754 
2755         // JDK-6728861, same cause as above one
2756         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2757 
2758         // JDK-6995635
2759         { "(\u00e9)", "e\u0301", "m", true },
2760 
2761         // JDK-6736245
2762         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2763         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2764         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2765 
2766         //  4916384.
2767         // Decomposed hangul (jamos) works inside clazz
2768         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2769         { "[\u1100\u1161]", "\uac00", "m", true},
2770 
2771         { "[\uac00]", "\u1100\u1161", "m", true},
2772         { "[\uac00]", "\uac00", "m", true},
2773 
2774         // Decomposed hangul (jamos)
2775         { "\u1100\u1161", "\u1100\u1161", "m", true},
2776         { "\u1100\u1161", "\uac00", "m", true},
2777 
2778         // Composed hangul
2779         { "\uac00",  "\u1100\u1161", "m", true },
2780         { "\uac00",  "\uac00", "m", true },
2781 
2782         /* Need a NFDSlice to nfd the source to solve this issue
2783            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2784            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2785            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2786 
2787         // Decomposed supplementary outside char classes
2788         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2789         // Composed supplementary outside char classes
2790         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2791         */
2792         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2793         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2794 
2795         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2796         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2797         };
2798 
2799         int failCount = 0;
2800         for (Object[] d : data) {
2801             String pn = (String)d[0];
2802             String tt = (String)d[1];
2803             boolean isFind = "f".equals(((String)d[2]));
2804             boolean expected = (boolean)d[3];
2805             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2806                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2807             if (ret != expected) {
2808                 failCount++;
2809                 continue;
2810             }
2811         }
2812         report("Canonical Equivalence");
2813     }
2814 
2815     /**
2816      * A basic sanity test of Matcher.replaceAll().
2817      */
2818     private static void globalSubstitute() throws Exception {
2819         // Global substitution with a literal
2820         Pattern p = Pattern.compile("(ab)(c*)");
2821         Matcher m = p.matcher("abccczzzabcczzzabccc");
2822         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2823             failCount++;
2824 
2825         m.reset("zzzabccczzzabcczzzabccczzz");
2826         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2827             failCount++;
2828 
2829         // Global substitution with groups
2830         m.reset("zzzabccczzzabcczzzabccczzz");
2831         String result = m.replaceAll("$1");
2832         if (!result.equals("zzzabzzzabzzzabzzz"))
2833             failCount++;
2834 
2835         // Supplementary character test
2836         // Global substitution with a literal
2837         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2838         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2839         if (!m.replaceAll(toSupplementaries("test")).
2840             equals(toSupplementaries("testzzztestzzztest")))
2841             failCount++;
2842 
2843         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2844         if (!m.replaceAll(toSupplementaries("test")).
2845             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2846             failCount++;
2847 
2848         // Global substitution with groups
2849         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2850         result = m.replaceAll("$1");
2851         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2852             failCount++;
2853 
2854         report("Global Substitution");
2855     }
2856 
2857     /**
2858      * Tests the usage of Matcher.appendReplacement() with literal
2859      * and group substitutions.
2860      */
2861     private static void stringbufferSubstitute() throws Exception {
2862         // SB substitution with literal
2863         String blah = "zzzblahzzz";
2864         Pattern p = Pattern.compile("blah");
2865         Matcher m = p.matcher(blah);
2866         StringBuffer result = new StringBuffer();
2867         try {
2868             m.appendReplacement(result, "blech");
2869             failCount++;
2870         } catch (IllegalStateException e) {
2871         }
2872         m.find();
2873         m.appendReplacement(result, "blech");
2874         if (!result.toString().equals("zzzblech"))
2875             failCount++;
2876 
2877         m.appendTail(result);
2878         if (!result.toString().equals("zzzblechzzz"))
2879             failCount++;
2880 
2881         // SB substitution with groups
2882         blah = "zzzabcdzzz";
2883         p = Pattern.compile("(ab)(cd)*");
2884         m = p.matcher(blah);
2885         result = new StringBuffer();
2886         try {
2887             m.appendReplacement(result, "$1");
2888             failCount++;
2889         } catch (IllegalStateException e) {
2890         }
2891         m.find();
2892         m.appendReplacement(result, "$1");
2893         if (!result.toString().equals("zzzab"))
2894             failCount++;
2895 
2896         m.appendTail(result);
2897         if (!result.toString().equals("zzzabzzz"))
2898             failCount++;
2899 
2900         // SB substitution with 3 groups
2901         blah = "zzzabcdcdefzzz";
2902         p = Pattern.compile("(ab)(cd)*(ef)");
2903         m = p.matcher(blah);
2904         result = new StringBuffer();
2905         try {
2906             m.appendReplacement(result, "$1w$2w$3");
2907             failCount++;
2908         } catch (IllegalStateException e) {
2909         }
2910         m.find();
2911         m.appendReplacement(result, "$1w$2w$3");
2912         if (!result.toString().equals("zzzabwcdwef"))
2913             failCount++;
2914 
2915         m.appendTail(result);
2916         if (!result.toString().equals("zzzabwcdwefzzz"))
2917             failCount++;
2918 
2919         // SB substitution with groups and three matches
2920         // skipping middle match
2921         blah = "zzzabcdzzzabcddzzzabcdzzz";
2922         p = Pattern.compile("(ab)(cd*)");
2923         m = p.matcher(blah);
2924         result = new StringBuffer();
2925         try {
2926             m.appendReplacement(result, "$1");
2927             failCount++;
2928         } catch (IllegalStateException e) {
2929         }
2930         m.find();
2931         m.appendReplacement(result, "$1");
2932         if (!result.toString().equals("zzzab"))
2933             failCount++;
2934 
2935         m.find();
2936         m.find();
2937         m.appendReplacement(result, "$2");
2938         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2939             failCount++;
2940 
2941         m.appendTail(result);
2942         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2943             failCount++;
2944 
2945         // Check to make sure escaped $ is ignored
2946         blah = "zzzabcdcdefzzz";
2947         p = Pattern.compile("(ab)(cd)*(ef)");
2948         m = p.matcher(blah);
2949         result = new StringBuffer();
2950         m.find();
2951         m.appendReplacement(result, "$1w\\$2w$3");
2952         if (!result.toString().equals("zzzabw$2wef"))
2953             failCount++;
2954 
2955         m.appendTail(result);
2956         if (!result.toString().equals("zzzabw$2wefzzz"))
2957             failCount++;
2958 
2959         // Check to make sure a reference to nonexistent group causes error
2960         blah = "zzzabcdcdefzzz";
2961         p = Pattern.compile("(ab)(cd)*(ef)");
2962         m = p.matcher(blah);
2963         result = new StringBuffer();
2964         m.find();
2965         try {
2966             m.appendReplacement(result, "$1w$5w$3");
2967             failCount++;
2968         } catch (IndexOutOfBoundsException ioobe) {
2969             // Correct result
2970         }
2971 
2972         // Check double digit group references
2973         blah = "zzz123456789101112zzz";
2974         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2975         m = p.matcher(blah);
2976         result = new StringBuffer();
2977         m.find();
2978         m.appendReplacement(result, "$1w$11w$3");
2979         if (!result.toString().equals("zzz1w11w3"))
2980             failCount++;
2981 
2982         // Check to make sure it backs off $15 to $1 if only three groups
2983         blah = "zzzabcdcdefzzz";
2984         p = Pattern.compile("(ab)(cd)*(ef)");
2985         m = p.matcher(blah);
2986         result = new StringBuffer();
2987         m.find();
2988         m.appendReplacement(result, "$1w$15w$3");
2989         if (!result.toString().equals("zzzabwab5wef"))
2990             failCount++;
2991 
2992 
2993         // Supplementary character test
2994         // SB substitution with literal
2995         blah = toSupplementaries("zzzblahzzz");
2996         p = Pattern.compile(toSupplementaries("blah"));
2997         m = p.matcher(blah);
2998         result = new StringBuffer();
2999         try {
3000             m.appendReplacement(result, toSupplementaries("blech"));
3001             failCount++;
3002         } catch (IllegalStateException e) {
3003         }
3004         m.find();
3005         m.appendReplacement(result, toSupplementaries("blech"));
3006         if (!result.toString().equals(toSupplementaries("zzzblech")))
3007             failCount++;
3008 
3009         m.appendTail(result);
3010         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3011             failCount++;
3012 
3013         // SB substitution with groups
3014         blah = toSupplementaries("zzzabcdzzz");
3015         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3016         m = p.matcher(blah);
3017         result = new StringBuffer();
3018         try {
3019             m.appendReplacement(result, "$1");
3020             failCount++;
3021         } catch (IllegalStateException e) {
3022         }
3023         m.find();
3024         m.appendReplacement(result, "$1");
3025         if (!result.toString().equals(toSupplementaries("zzzab")))
3026             failCount++;
3027 
3028         m.appendTail(result);
3029         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3030             failCount++;
3031 
3032         // SB substitution with 3 groups
3033         blah = toSupplementaries("zzzabcdcdefzzz");
3034         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3035         m = p.matcher(blah);
3036         result = new StringBuffer();
3037         try {
3038             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3039             failCount++;
3040         } catch (IllegalStateException e) {
3041         }
3042         m.find();
3043         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3044         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3045             failCount++;
3046 
3047         m.appendTail(result);
3048         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3049             failCount++;
3050 
3051         // SB substitution with groups and three matches
3052         // skipping middle match
3053         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3054         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3055         m = p.matcher(blah);
3056         result = new StringBuffer();
3057         try {
3058             m.appendReplacement(result, "$1");
3059             failCount++;
3060         } catch (IllegalStateException e) {
3061         }
3062         m.find();
3063         m.appendReplacement(result, "$1");
3064         if (!result.toString().equals(toSupplementaries("zzzab")))
3065             failCount++;
3066 
3067         m.find();
3068         m.find();
3069         m.appendReplacement(result, "$2");
3070         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3071             failCount++;
3072 
3073         m.appendTail(result);
3074         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3075             failCount++;
3076 
3077         // Check to make sure escaped $ is ignored
3078         blah = toSupplementaries("zzzabcdcdefzzz");
3079         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3080         m = p.matcher(blah);
3081         result = new StringBuffer();
3082         m.find();
3083         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3084         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3085             failCount++;
3086 
3087         m.appendTail(result);
3088         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3089             failCount++;
3090 
3091         // Check to make sure a reference to nonexistent group causes error
3092         blah = toSupplementaries("zzzabcdcdefzzz");
3093         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3094         m = p.matcher(blah);
3095         result = new StringBuffer();
3096         m.find();
3097         try {
3098             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3099             failCount++;
3100         } catch (IndexOutOfBoundsException ioobe) {
3101             // Correct result
3102         }
3103 
3104         // Check double digit group references
3105         blah = toSupplementaries("zzz123456789101112zzz");
3106         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3107         m = p.matcher(blah);
3108         result = new StringBuffer();
3109         m.find();
3110         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3111         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3112             failCount++;
3113 
3114         // Check to make sure it backs off $15 to $1 if only three groups
3115         blah = toSupplementaries("zzzabcdcdefzzz");
3116         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3117         m = p.matcher(blah);
3118         result = new StringBuffer();
3119         m.find();
3120         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3121         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3122             failCount++;
3123 
3124         // Check nothing has been appended into the output buffer if
3125         // the replacement string triggers IllegalArgumentException.
3126         p = Pattern.compile("(abc)");
3127         m = p.matcher("abcd");
3128         result = new StringBuffer();
3129         m.find();
3130         try {
3131             m.appendReplacement(result, ("xyz$g"));
3132             failCount++;
3133         } catch (IllegalArgumentException iae) {
3134             if (result.length() != 0)
3135                 failCount++;
3136         }
3137 
3138         report("SB Substitution");
3139     }
3140 
3141     /**
3142      * Tests the usage of Matcher.appendReplacement() with literal
3143      * and group substitutions.
3144      */
3145     private static void stringbuilderSubstitute() throws Exception {
3146         // SB substitution with literal
3147         String blah = "zzzblahzzz";
3148         Pattern p = Pattern.compile("blah");
3149         Matcher m = p.matcher(blah);
3150         StringBuilder result = new StringBuilder();
3151         try {
3152             m.appendReplacement(result, "blech");
3153             failCount++;
3154         } catch (IllegalStateException e) {
3155         }
3156         m.find();
3157         m.appendReplacement(result, "blech");
3158         if (!result.toString().equals("zzzblech"))
3159             failCount++;
3160 
3161         m.appendTail(result);
3162         if (!result.toString().equals("zzzblechzzz"))
3163             failCount++;
3164 
3165         // SB substitution with groups
3166         blah = "zzzabcdzzz";
3167         p = Pattern.compile("(ab)(cd)*");
3168         m = p.matcher(blah);
3169         result = new StringBuilder();
3170         try {
3171             m.appendReplacement(result, "$1");
3172             failCount++;
3173         } catch (IllegalStateException e) {
3174         }
3175         m.find();
3176         m.appendReplacement(result, "$1");
3177         if (!result.toString().equals("zzzab"))
3178             failCount++;
3179 
3180         m.appendTail(result);
3181         if (!result.toString().equals("zzzabzzz"))
3182             failCount++;
3183 
3184         // SB substitution with 3 groups
3185         blah = "zzzabcdcdefzzz";
3186         p = Pattern.compile("(ab)(cd)*(ef)");
3187         m = p.matcher(blah);
3188         result = new StringBuilder();
3189         try {
3190             m.appendReplacement(result, "$1w$2w$3");
3191             failCount++;
3192         } catch (IllegalStateException e) {
3193         }
3194         m.find();
3195         m.appendReplacement(result, "$1w$2w$3");
3196         if (!result.toString().equals("zzzabwcdwef"))
3197             failCount++;
3198 
3199         m.appendTail(result);
3200         if (!result.toString().equals("zzzabwcdwefzzz"))
3201             failCount++;
3202 
3203         // SB substitution with groups and three matches
3204         // skipping middle match
3205         blah = "zzzabcdzzzabcddzzzabcdzzz";
3206         p = Pattern.compile("(ab)(cd*)");
3207         m = p.matcher(blah);
3208         result = new StringBuilder();
3209         try {
3210             m.appendReplacement(result, "$1");
3211             failCount++;
3212         } catch (IllegalStateException e) {
3213         }
3214         m.find();
3215         m.appendReplacement(result, "$1");
3216         if (!result.toString().equals("zzzab"))
3217             failCount++;
3218 
3219         m.find();
3220         m.find();
3221         m.appendReplacement(result, "$2");
3222         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3223             failCount++;
3224 
3225         m.appendTail(result);
3226         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3227             failCount++;
3228 
3229         // Check to make sure escaped $ is ignored
3230         blah = "zzzabcdcdefzzz";
3231         p = Pattern.compile("(ab)(cd)*(ef)");
3232         m = p.matcher(blah);
3233         result = new StringBuilder();
3234         m.find();
3235         m.appendReplacement(result, "$1w\\$2w$3");
3236         if (!result.toString().equals("zzzabw$2wef"))
3237             failCount++;
3238 
3239         m.appendTail(result);
3240         if (!result.toString().equals("zzzabw$2wefzzz"))
3241             failCount++;
3242 
3243         // Check to make sure a reference to nonexistent group causes error
3244         blah = "zzzabcdcdefzzz";
3245         p = Pattern.compile("(ab)(cd)*(ef)");
3246         m = p.matcher(blah);
3247         result = new StringBuilder();
3248         m.find();
3249         try {
3250             m.appendReplacement(result, "$1w$5w$3");
3251             failCount++;
3252         } catch (IndexOutOfBoundsException ioobe) {
3253             // Correct result
3254         }
3255 
3256         // Check double digit group references
3257         blah = "zzz123456789101112zzz";
3258         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3259         m = p.matcher(blah);
3260         result = new StringBuilder();
3261         m.find();
3262         m.appendReplacement(result, "$1w$11w$3");
3263         if (!result.toString().equals("zzz1w11w3"))
3264             failCount++;
3265 
3266         // Check to make sure it backs off $15 to $1 if only three groups
3267         blah = "zzzabcdcdefzzz";
3268         p = Pattern.compile("(ab)(cd)*(ef)");
3269         m = p.matcher(blah);
3270         result = new StringBuilder();
3271         m.find();
3272         m.appendReplacement(result, "$1w$15w$3");
3273         if (!result.toString().equals("zzzabwab5wef"))
3274             failCount++;
3275 
3276 
3277         // Supplementary character test
3278         // SB substitution with literal
3279         blah = toSupplementaries("zzzblahzzz");
3280         p = Pattern.compile(toSupplementaries("blah"));
3281         m = p.matcher(blah);
3282         result = new StringBuilder();
3283         try {
3284             m.appendReplacement(result, toSupplementaries("blech"));
3285             failCount++;
3286         } catch (IllegalStateException e) {
3287         }
3288         m.find();
3289         m.appendReplacement(result, toSupplementaries("blech"));
3290         if (!result.toString().equals(toSupplementaries("zzzblech")))
3291             failCount++;
3292         m.appendTail(result);
3293         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3294             failCount++;
3295 
3296         // SB substitution with groups
3297         blah = toSupplementaries("zzzabcdzzz");
3298         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3299         m = p.matcher(blah);
3300         result = new StringBuilder();
3301         try {
3302             m.appendReplacement(result, "$1");
3303             failCount++;
3304         } catch (IllegalStateException e) {
3305         }
3306         m.find();
3307         m.appendReplacement(result, "$1");
3308         if (!result.toString().equals(toSupplementaries("zzzab")))
3309             failCount++;
3310 
3311         m.appendTail(result);
3312         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3313             failCount++;
3314 
3315         // SB substitution with 3 groups
3316         blah = toSupplementaries("zzzabcdcdefzzz");
3317         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3318         m = p.matcher(blah);
3319         result = new StringBuilder();
3320         try {
3321             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3322             failCount++;
3323         } catch (IllegalStateException e) {
3324         }
3325         m.find();
3326         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3327         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3328             failCount++;
3329 
3330         m.appendTail(result);
3331         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3332             failCount++;
3333 
3334         // SB substitution with groups and three matches
3335         // skipping middle match
3336         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3337         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3338         m = p.matcher(blah);
3339         result = new StringBuilder();
3340         try {
3341             m.appendReplacement(result, "$1");
3342             failCount++;
3343         } catch (IllegalStateException e) {
3344         }
3345         m.find();
3346         m.appendReplacement(result, "$1");
3347         if (!result.toString().equals(toSupplementaries("zzzab")))
3348             failCount++;
3349 
3350         m.find();
3351         m.find();
3352         m.appendReplacement(result, "$2");
3353         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3354             failCount++;
3355 
3356         m.appendTail(result);
3357         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3358             failCount++;
3359 
3360         // Check to make sure escaped $ is ignored
3361         blah = toSupplementaries("zzzabcdcdefzzz");
3362         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3363         m = p.matcher(blah);
3364         result = new StringBuilder();
3365         m.find();
3366         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3367         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3368             failCount++;
3369 
3370         m.appendTail(result);
3371         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3372             failCount++;
3373 
3374         // Check to make sure a reference to nonexistent group causes error
3375         blah = toSupplementaries("zzzabcdcdefzzz");
3376         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3377         m = p.matcher(blah);
3378         result = new StringBuilder();
3379         m.find();
3380         try {
3381             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3382             failCount++;
3383         } catch (IndexOutOfBoundsException ioobe) {
3384             // Correct result
3385         }
3386         // Check double digit group references
3387         blah = toSupplementaries("zzz123456789101112zzz");
3388         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3389         m = p.matcher(blah);
3390         result = new StringBuilder();
3391         m.find();
3392         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3393         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3394             failCount++;
3395 
3396         // Check to make sure it backs off $15 to $1 if only three groups
3397         blah = toSupplementaries("zzzabcdcdefzzz");
3398         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3399         m = p.matcher(blah);
3400         result = new StringBuilder();
3401         m.find();
3402         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3403         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3404             failCount++;
3405         // Check nothing has been appended into the output buffer if
3406         // the replacement string triggers IllegalArgumentException.
3407         p = Pattern.compile("(abc)");
3408         m = p.matcher("abcd");
3409         result = new StringBuilder();
3410         m.find();
3411         try {
3412             m.appendReplacement(result, ("xyz$g"));
3413             failCount++;
3414         } catch (IllegalArgumentException iae) {
3415             if (result.length() != 0)
3416                 failCount++;
3417         }
3418         report("SB Substitution 2");
3419     }
3420 
3421     /*
3422      * 5 groups of characters are created to make a substitution string.
3423      * A base string will be created including random lead chars, the
3424      * substitution string, and random trailing chars.
3425      * A pattern containing the 5 groups is searched for and replaced with:
3426      * random group + random string + random group.
3427      * The results are checked for correctness.
3428      */
3429     private static void substitutionBasher() {
3430         for (int runs = 0; runs<1000; runs++) {
3431             // Create a base string to work in
3432             int leadingChars = generator.nextInt(10);
3433             StringBuffer baseBuffer = new StringBuffer(100);
3434             String leadingString = getRandomAlphaString(leadingChars);
3435             baseBuffer.append(leadingString);
3436 
3437             // Create 5 groups of random number of random chars
3438             // Create the string to substitute
3439             // Create the pattern string to search for
3440             StringBuffer bufferToSub = new StringBuffer(25);
3441             StringBuffer bufferToPat = new StringBuffer(50);
3442             String[] groups = new String[5];
3443             for(int i=0; i<5; i++) {
3444                 int aGroupSize = generator.nextInt(5)+1;
3445                 groups[i] = getRandomAlphaString(aGroupSize);
3446                 bufferToSub.append(groups[i]);
3447                 bufferToPat.append('(');
3448                 bufferToPat.append(groups[i]);
3449                 bufferToPat.append(')');
3450             }
3451             String stringToSub = bufferToSub.toString();
3452             String pattern = bufferToPat.toString();
3453 
3454             // Place sub string into working string at random index
3455             baseBuffer.append(stringToSub);
3456 
3457             // Append random chars to end
3458             int trailingChars = generator.nextInt(10);
3459             String trailingString = getRandomAlphaString(trailingChars);
3460             baseBuffer.append(trailingString);
3461             String baseString = baseBuffer.toString();
3462 
3463             // Create test pattern and matcher
3464             Pattern p = Pattern.compile(pattern);
3465             Matcher m = p.matcher(baseString);
3466 
3467             // Reject candidate if pattern happens to start early
3468             m.find();
3469             if (m.start() < leadingChars)
3470                 continue;
3471 
3472             // Reject candidate if more than one match
3473             if (m.find())
3474                 continue;
3475 
3476             // Construct a replacement string with :
3477             // random group + random string + random group
3478             StringBuffer bufferToRep = new StringBuffer();
3479             int groupIndex1 = generator.nextInt(5);
3480             bufferToRep.append("$" + (groupIndex1 + 1));
3481             String randomMidString = getRandomAlphaString(5);
3482             bufferToRep.append(randomMidString);
3483             int groupIndex2 = generator.nextInt(5);
3484             bufferToRep.append("$" + (groupIndex2 + 1));
3485             String replacement = bufferToRep.toString();
3486 
3487             // Do the replacement
3488             String result = m.replaceAll(replacement);
3489 
3490             // Construct expected result
3491             StringBuffer bufferToRes = new StringBuffer();
3492             bufferToRes.append(leadingString);
3493             bufferToRes.append(groups[groupIndex1]);
3494             bufferToRes.append(randomMidString);
3495             bufferToRes.append(groups[groupIndex2]);
3496             bufferToRes.append(trailingString);
3497             String expectedResult = bufferToRes.toString();
3498 
3499             // Check results
3500             if (!result.equals(expectedResult))
3501                 failCount++;
3502         }
3503 
3504         report("Substitution Basher");
3505     }
3506 
3507     /*
3508      * 5 groups of characters are created to make a substitution string.
3509      * A base string will be created including random lead chars, the
3510      * substitution string, and random trailing chars.
3511      * A pattern containing the 5 groups is searched for and replaced with:
3512      * random group + random string + random group.
3513      * The results are checked for correctness.
3514      */
3515     private static void substitutionBasher2() {
3516         for (int runs = 0; runs<1000; runs++) {
3517             // Create a base string to work in
3518             int leadingChars = generator.nextInt(10);
3519             StringBuilder baseBuffer = new StringBuilder(100);
3520             String leadingString = getRandomAlphaString(leadingChars);
3521             baseBuffer.append(leadingString);
3522 
3523             // Create 5 groups of random number of random chars
3524             // Create the string to substitute
3525             // Create the pattern string to search for
3526             StringBuilder bufferToSub = new StringBuilder(25);
3527             StringBuilder bufferToPat = new StringBuilder(50);
3528             String[] groups = new String[5];
3529             for(int i=0; i<5; i++) {
3530                 int aGroupSize = generator.nextInt(5)+1;
3531                 groups[i] = getRandomAlphaString(aGroupSize);
3532                 bufferToSub.append(groups[i]);
3533                 bufferToPat.append('(');
3534                 bufferToPat.append(groups[i]);
3535                 bufferToPat.append(')');
3536             }
3537             String stringToSub = bufferToSub.toString();
3538             String pattern = bufferToPat.toString();
3539 
3540             // Place sub string into working string at random index
3541             baseBuffer.append(stringToSub);
3542 
3543             // Append random chars to end
3544             int trailingChars = generator.nextInt(10);
3545             String trailingString = getRandomAlphaString(trailingChars);
3546             baseBuffer.append(trailingString);
3547             String baseString = baseBuffer.toString();
3548 
3549             // Create test pattern and matcher
3550             Pattern p = Pattern.compile(pattern);
3551             Matcher m = p.matcher(baseString);
3552 
3553             // Reject candidate if pattern happens to start early
3554             m.find();
3555             if (m.start() < leadingChars)
3556                 continue;
3557 
3558             // Reject candidate if more than one match
3559             if (m.find())
3560                 continue;
3561 
3562             // Construct a replacement string with :
3563             // random group + random string + random group
3564             StringBuilder bufferToRep = new StringBuilder();
3565             int groupIndex1 = generator.nextInt(5);
3566             bufferToRep.append("$" + (groupIndex1 + 1));
3567             String randomMidString = getRandomAlphaString(5);
3568             bufferToRep.append(randomMidString);
3569             int groupIndex2 = generator.nextInt(5);
3570             bufferToRep.append("$" + (groupIndex2 + 1));
3571             String replacement = bufferToRep.toString();
3572 
3573             // Do the replacement
3574             String result = m.replaceAll(replacement);
3575 
3576             // Construct expected result
3577             StringBuilder bufferToRes = new StringBuilder();
3578             bufferToRes.append(leadingString);
3579             bufferToRes.append(groups[groupIndex1]);
3580             bufferToRes.append(randomMidString);
3581             bufferToRes.append(groups[groupIndex2]);
3582             bufferToRes.append(trailingString);
3583             String expectedResult = bufferToRes.toString();
3584 
3585             // Check results
3586             if (!result.equals(expectedResult)) {
3587                 failCount++;
3588             }
3589         }
3590 
3591         report("Substitution Basher 2");
3592     }
3593 
3594     /**
3595      * Checks the handling of some escape sequences that the Pattern
3596      * class should process instead of the java compiler. These are
3597      * not in the file because the escapes should be be processed
3598      * by the Pattern class when the regex is compiled.
3599      */
3600     private static void escapes() throws Exception {
3601         Pattern p = Pattern.compile("\\043");
3602         Matcher m = p.matcher("#");
3603         if (!m.find())
3604             failCount++;
3605 
3606         p = Pattern.compile("\\x23");
3607         m = p.matcher("#");
3608         if (!m.find())
3609             failCount++;
3610 
3611         p = Pattern.compile("\\u0023");
3612         m = p.matcher("#");
3613         if (!m.find())
3614             failCount++;
3615 
3616         report("Escape sequences");
3617     }
3618 
3619     /**
3620      * Checks the handling of blank input situations. These
3621      * tests are incompatible with my test file format.
3622      */
3623     private static void blankInput() throws Exception {
3624         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3625         Matcher m = p.matcher("");
3626         if (m.find())
3627             failCount++;
3628 
3629         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3630         m = p.matcher("");
3631         if (!m.find())
3632             failCount++;
3633 
3634         p = Pattern.compile("abc");
3635         m = p.matcher("");
3636         if (m.find())
3637             failCount++;
3638 
3639         p = Pattern.compile("a*");
3640         m = p.matcher("");
3641         if (!m.find())
3642             failCount++;
3643 
3644         report("Blank input");
3645     }
3646 
3647     /**
3648      * Tests the Boyer-Moore pattern matching of a character sequence
3649      * on randomly generated patterns.
3650      */
3651     private static void bm() throws Exception {
3652         doBnM('a');
3653         report("Boyer Moore (ASCII)");
3654 
3655         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3656         report("Boyer Moore (Supplementary)");
3657     }
3658 
3659     private static void doBnM(int baseCharacter) throws Exception {
3660         int achar=0;
3661 
3662         for (int i=0; i<100; i++) {
3663             // Create a short pattern to search for
3664             int patternLength = generator.nextInt(7) + 4;
3665             StringBuffer patternBuffer = new StringBuffer(patternLength);
3666             String pattern;
3667             retry: for (;;) {
3668                 for (int x=0; x<patternLength; x++) {
3669                     int ch = baseCharacter + generator.nextInt(26);
3670                     if (Character.isSupplementaryCodePoint(ch)) {
3671                         patternBuffer.append(Character.toChars(ch));
3672                     } else {
3673                         patternBuffer.append((char)ch);
3674                     }
3675                 }
3676                 pattern = patternBuffer.toString();
3677 
3678                 // Avoid patterns that start and end with the same substring
3679                 // See JDK-6854417
3680                 for (int x=1; x < pattern.length(); x++) {
3681                     if (pattern.startsWith(pattern.substring(x)))
3682                         continue retry;
3683                 }
3684                 break;
3685             }
3686             Pattern p = Pattern.compile(pattern);
3687 
3688             // Create a buffer with random ASCII chars that does
3689             // not match the sample
3690             String toSearch = null;
3691             StringBuffer s = null;
3692             Matcher m = p.matcher("");
3693             do {
3694                 s = new StringBuffer(100);
3695                 for (int x=0; x<100; x++) {
3696                     int ch = baseCharacter + generator.nextInt(26);
3697                     if (Character.isSupplementaryCodePoint(ch)) {
3698                         s.append(Character.toChars(ch));
3699                     } else {
3700                         s.append((char)ch);
3701                     }
3702                 }
3703                 toSearch = s.toString();
3704                 m.reset(toSearch);
3705             } while (m.find());
3706 
3707             // Insert the pattern at a random spot
3708             int insertIndex = generator.nextInt(99);
3709             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3710                 insertIndex++;
3711             s = s.insert(insertIndex, pattern);
3712             toSearch = s.toString();
3713 
3714             // Make sure that the pattern is found
3715             m.reset(toSearch);
3716             if (!m.find())
3717                 failCount++;
3718 
3719             // Make sure that the match text is the pattern
3720             if (!m.group().equals(pattern))
3721                 failCount++;
3722 
3723             // Make sure match occured at insertion point
3724             if (m.start() != insertIndex)
3725                 failCount++;
3726         }
3727     }
3728 
3729     /**
3730      * Tests the matching of slices on randomly generated patterns.
3731      * The Boyer-Moore optimization is not done on these patterns
3732      * because it uses unicode case folding.
3733      */
3734     private static void slice() throws Exception {
3735         doSlice(Character.MAX_VALUE);
3736         report("Slice");
3737 
3738         doSlice(Character.MAX_CODE_POINT);
3739         report("Slice (Supplementary)");
3740     }
3741 
3742     private static void doSlice(int maxCharacter) throws Exception {
3743         Random generator = new Random();
3744         int achar=0;
3745 
3746         for (int i=0; i<100; i++) {
3747             // Create a short pattern to search for
3748             int patternLength = generator.nextInt(7) + 4;
3749             StringBuffer patternBuffer = new StringBuffer(patternLength);
3750             for (int x=0; x<patternLength; x++) {
3751                 int randomChar = 0;
3752                 while (!Character.isLetterOrDigit(randomChar))
3753                     randomChar = generator.nextInt(maxCharacter);
3754                 if (Character.isSupplementaryCodePoint(randomChar)) {
3755                     patternBuffer.append(Character.toChars(randomChar));
3756                 } else {
3757                     patternBuffer.append((char) randomChar);
3758                 }
3759             }
3760             String pattern =  patternBuffer.toString();
3761             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3762 
3763             // Create a buffer with random chars that does not match the sample
3764             String toSearch = null;
3765             StringBuffer s = null;
3766             Matcher m = p.matcher("");
3767             do {
3768                 s = new StringBuffer(100);
3769                 for (int x=0; x<100; x++) {
3770                     int randomChar = 0;
3771                     while (!Character.isLetterOrDigit(randomChar))
3772                         randomChar = generator.nextInt(maxCharacter);
3773                     if (Character.isSupplementaryCodePoint(randomChar)) {
3774                         s.append(Character.toChars(randomChar));
3775                     } else {
3776                         s.append((char) randomChar);
3777                     }
3778                 }
3779                 toSearch = s.toString();
3780                 m.reset(toSearch);
3781             } while (m.find());
3782 
3783             // Insert the pattern at a random spot
3784             int insertIndex = generator.nextInt(99);
3785             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3786                 insertIndex++;
3787             s = s.insert(insertIndex, pattern);
3788             toSearch = s.toString();
3789 
3790             // Make sure that the pattern is found
3791             m.reset(toSearch);
3792             if (!m.find())
3793                 failCount++;
3794 
3795             // Make sure that the match text is the pattern
3796             if (!m.group().equals(pattern))
3797                 failCount++;
3798 
3799             // Make sure match occured at insertion point
3800             if (m.start() != insertIndex)
3801                 failCount++;
3802         }
3803     }
3804 
3805     private static void explainFailure(String pattern, String data,
3806                                        String expected, String actual) {
3807         System.err.println("----------------------------------------");
3808         System.err.println("Pattern = "+pattern);
3809         System.err.println("Data = "+data);
3810         System.err.println("Expected = " + expected);
3811         System.err.println("Actual   = " + actual);
3812     }
3813 
3814     private static void explainFailure(String pattern, String data,
3815                                        Throwable t) {
3816         System.err.println("----------------------------------------");
3817         System.err.println("Pattern = "+pattern);
3818         System.err.println("Data = "+data);
3819         t.printStackTrace(System.err);
3820     }
3821 
3822     // Testing examples from a file
3823 
3824     /**
3825      * Goes through the file "TestCases.txt" and creates many patterns
3826      * described in the file, matching the patterns against input lines in
3827      * the file, and comparing the results against the correct results
3828      * also found in the file. The file format is described in comments
3829      * at the head of the file.
3830      */
3831     private static void processFile(String fileName) throws Exception {
3832         File testCases = new File(System.getProperty("test.src", "."),
3833                                   fileName);
3834         FileInputStream in = new FileInputStream(testCases);
3835         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3836 
3837         // Process next test case.
3838         String aLine;
3839         while((aLine = r.readLine()) != null) {
3840             // Read a line for pattern
3841             String patternString = grabLine(r);
3842             Pattern p = null;
3843             try {
3844                 p = compileTestPattern(patternString);
3845             } catch (PatternSyntaxException e) {
3846                 String dataString = grabLine(r);
3847                 String expectedResult = grabLine(r);
3848                 if (expectedResult.startsWith("error"))
3849                     continue;
3850                 explainFailure(patternString, dataString, e);
3851                 failCount++;
3852                 continue;
3853             }
3854 
3855             // Read a line for input string
3856             String dataString = grabLine(r);
3857             Matcher m = p.matcher(dataString);
3858             StringBuffer result = new StringBuffer();
3859 
3860             // Check for IllegalStateExceptions before a match
3861             failCount += preMatchInvariants(m);
3862 
3863             boolean found = m.find();
3864 
3865             if (found)
3866                 failCount += postTrueMatchInvariants(m);
3867             else
3868                 failCount += postFalseMatchInvariants(m);
3869 
3870             if (found) {
3871                 result.append("true ");
3872                 result.append(m.group(0) + " ");
3873             } else {
3874                 result.append("false ");
3875             }
3876 
3877             result.append(m.groupCount());
3878 
3879             if (found) {
3880                 for (int i=1; i<m.groupCount()+1; i++)
3881                     if (m.group(i) != null)
3882                         result.append(" " +m.group(i));
3883             }
3884 
3885             // Read a line for the expected result
3886             String expectedResult = grabLine(r);
3887 
3888             if (!result.toString().equals(expectedResult)) {
3889                 explainFailure(patternString, dataString, expectedResult, result.toString());
3890                 failCount++;
3891             }
3892         }
3893 
3894         report(fileName);
3895     }
3896 
3897     private static int preMatchInvariants(Matcher m) {
3898         int failCount = 0;
3899         try {
3900             m.start();
3901             failCount++;
3902         } catch (IllegalStateException ise) {}
3903         try {
3904             m.end();
3905             failCount++;
3906         } catch (IllegalStateException ise) {}
3907         try {
3908             m.group();
3909             failCount++;
3910         } catch (IllegalStateException ise) {}
3911         return failCount;
3912     }
3913 
3914     private static int postFalseMatchInvariants(Matcher m) {
3915         int failCount = 0;
3916         try {
3917             m.group();
3918             failCount++;
3919         } catch (IllegalStateException ise) {}
3920         try {
3921             m.start();
3922             failCount++;
3923         } catch (IllegalStateException ise) {}
3924         try {
3925             m.end();
3926             failCount++;
3927         } catch (IllegalStateException ise) {}
3928         return failCount;
3929     }
3930 
3931     private static int postTrueMatchInvariants(Matcher m) {
3932         int failCount = 0;
3933         //assert(m.start() = m.start(0);
3934         if (m.start() != m.start(0))
3935             failCount++;
3936         //assert(m.end() = m.end(0);
3937         if (m.start() != m.start(0))
3938             failCount++;
3939         //assert(m.group() = m.group(0);
3940         if (!m.group().equals(m.group(0)))
3941             failCount++;
3942         try {
3943             m.group(50);
3944             failCount++;
3945         } catch (IndexOutOfBoundsException ise) {}
3946 
3947         return failCount;
3948     }
3949 
3950     private static Pattern compileTestPattern(String patternString) {
3951         if (!patternString.startsWith("'")) {
3952             return Pattern.compile(patternString);
3953         }
3954         int break1 = patternString.lastIndexOf("'");
3955         String flagString = patternString.substring(
3956                                           break1+1, patternString.length());
3957         patternString = patternString.substring(1, break1);
3958 
3959         if (flagString.equals("i"))
3960             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3961 
3962         if (flagString.equals("m"))
3963             return Pattern.compile(patternString, Pattern.MULTILINE);
3964 
3965         return Pattern.compile(patternString);
3966     }
3967 
3968     /**
3969      * Reads a line from the input file. Keeps reading lines until a non
3970      * empty non comment line is read. If the line contains a \n then
3971      * these two characters are replaced by a newline char. If a \\uxxxx
3972      * sequence is read then the sequence is replaced by the unicode char.
3973      */
3974     private static String grabLine(BufferedReader r) throws Exception {
3975         int index = 0;
3976         String line = r.readLine();
3977         while (line.startsWith("//") || line.length() < 1)
3978             line = r.readLine();
3979         while ((index = line.indexOf("\\n")) != -1) {
3980             StringBuffer temp = new StringBuffer(line);
3981             temp.replace(index, index+2, "\n");
3982             line = temp.toString();
3983         }
3984         while ((index = line.indexOf("\\u")) != -1) {
3985             StringBuffer temp = new StringBuffer(line);
3986             String value = temp.substring(index+2, index+6);
3987             char aChar = (char)Integer.parseInt(value, 16);
3988             String unicodeChar = "" + aChar;
3989             temp.replace(index, index+6, unicodeChar);
3990             line = temp.toString();
3991         }
3992 
3993         return line;
3994     }
3995 
3996     private static void check(Pattern p, String s, String g, String expected) {
3997         Matcher m = p.matcher(s);
3998         m.find();
3999         if (!m.group(g).equals(expected) ||
4000             s.charAt(m.start(g)) != expected.charAt(0) ||
4001             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4002             failCount++;
4003     }
4004 
4005     private static void checkReplaceFirst(String p, String s, String r, String expected)
4006     {
4007         if (!expected.equals(Pattern.compile(p)
4008                                     .matcher(s)
4009                                     .replaceFirst(r)))
4010             failCount++;
4011     }
4012 
4013     private static void checkReplaceAll(String p, String s, String r, String expected)
4014     {
4015         if (!expected.equals(Pattern.compile(p)
4016                                     .matcher(s)
4017                                     .replaceAll(r)))
4018             failCount++;
4019     }
4020 
4021     private static void checkExpectedFail(String p) {
4022         try {
4023             Pattern.compile(p);
4024         } catch (PatternSyntaxException pse) {
4025             //pse.printStackTrace();
4026             return;
4027         }
4028         failCount++;
4029     }
4030 
4031     private static void checkExpectedIAE(Matcher m, String g) {
4032         m.find();
4033         try {
4034             m.group(g);
4035         } catch (IllegalArgumentException x) {
4036             //iae.printStackTrace();
4037             try {
4038                 m.start(g);
4039             } catch (IllegalArgumentException xx) {
4040                 try {
4041                     m.start(g);
4042                 } catch (IllegalArgumentException xxx) {
4043                     return;
4044                 }
4045             }
4046         }
4047         failCount++;
4048     }
4049 
4050     private static void checkExpectedNPE(Matcher m) {
4051         m.find();
4052         try {
4053             m.group(null);
4054         } catch (NullPointerException x) {
4055             try {
4056                 m.start(null);
4057             } catch (NullPointerException xx) {
4058                 try {
4059                     m.end(null);
4060                 } catch (NullPointerException xxx) {
4061                     return;
4062                 }
4063             }
4064         }
4065         failCount++;
4066     }
4067 
4068     private static void namedGroupCaptureTest() throws Exception {
4069         check(Pattern.compile("x+(?<gname>y+)z+"),
4070               "xxxyyyzzz",
4071               "gname",
4072               "yyy");
4073 
4074         check(Pattern.compile("x+(?<gname8>y+)z+"),
4075               "xxxyyyzzz",
4076               "gname8",
4077               "yyy");
4078 
4079         //backref
4080         Pattern pattern = Pattern.compile("(a*)bc\\1");
4081         check(pattern, "zzzaabcazzz", true);  // found "abca"
4082 
4083         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4084               "zzzaabcaazzz", true);
4085 
4086         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4087               "abcdefabc", true);
4088 
4089         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4090               "abcdefghijkk", true);
4091 
4092         // Supplementary character tests
4093         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4094               toSupplementaries("zzzaabcazzz"), true);
4095 
4096         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4097               toSupplementaries("zzzaabcaazzz"), true);
4098 
4099         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4100               toSupplementaries("abcdefabc"), true);
4101 
4102         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4103                               "(?<gname>" +
4104                               toSupplementaries("k)") + "\\k<gname>"),
4105               toSupplementaries("abcdefghijkk"), true);
4106 
4107         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4108               "xxxyyyzzzyyy",
4109               "gname",
4110               "yyy");
4111 
4112         //replaceFirst/All
4113         checkReplaceFirst("(?<gn>ab)(c*)",
4114                           "abccczzzabcczzzabccc",
4115                           "${gn}",
4116                           "abzzzabcczzzabccc");
4117 
4118         checkReplaceAll("(?<gn>ab)(c*)",
4119                         "abccczzzabcczzzabccc",
4120                         "${gn}",
4121                         "abzzzabzzzab");
4122 
4123 
4124         checkReplaceFirst("(?<gn>ab)(c*)",
4125                           "zzzabccczzzabcczzzabccczzz",
4126                           "${gn}",
4127                           "zzzabzzzabcczzzabccczzz");
4128 
4129         checkReplaceAll("(?<gn>ab)(c*)",
4130                         "zzzabccczzzabcczzzabccczzz",
4131                         "${gn}",
4132                         "zzzabzzzabzzzabzzz");
4133 
4134         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4135                           "zzzabccczzzabcczzzabccczzz",
4136                           "${gn2}",
4137                           "zzzccczzzabcczzzabccczzz");
4138 
4139         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4140                         "zzzabccczzzabcczzzabccczzz",
4141                         "${gn2}",
4142                         "zzzccczzzcczzzccczzz");
4143 
4144         //toSupplementaries("(ab)(c*)"));
4145         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4146                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4147                           toSupplementaries("abccczzzabcczzzabccc"),
4148                           "${gn1}",
4149                           toSupplementaries("abzzzabcczzzabccc"));
4150 
4151 
4152         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4153                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4154                         toSupplementaries("abccczzzabcczzzabccc"),
4155                         "${gn1}",
4156                         toSupplementaries("abzzzabzzzab"));
4157 
4158         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4159                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4160                           toSupplementaries("abccczzzabcczzzabccc"),
4161                           "${gn2}",
4162                           toSupplementaries("ccczzzabcczzzabccc"));
4163 
4164 
4165         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4166                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4167                         toSupplementaries("abccczzzabcczzzabccc"),
4168                         "${gn2}",
4169                         toSupplementaries("ccczzzcczzzccc"));
4170 
4171         checkReplaceFirst("(?<dog>Dog)AndCat",
4172                           "zzzDogAndCatzzzDogAndCatzzz",
4173                           "${dog}",
4174                           "zzzDogzzzDogAndCatzzz");
4175 
4176 
4177         checkReplaceAll("(?<dog>Dog)AndCat",
4178                           "zzzDogAndCatzzzDogAndCatzzz",
4179                           "${dog}",
4180                           "zzzDogzzzDogzzz");
4181 
4182         // backref in Matcher & String
4183         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4184             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4185             failCount++;
4186 
4187         // negative
4188         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4189         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4190         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4191         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4192         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4193         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4194                          "gnameX");
4195         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4196         report("NamedGroupCapture");
4197     }
4198 
4199     // This is for bug 6919132
4200     private static void nonBmpClassComplementTest() throws Exception {
4201         Pattern p = Pattern.compile("\\P{Lu}");
4202         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4203 
4204         if (m.find() && m.start() == 1)
4205             failCount++;
4206 
4207         // from a unicode category
4208         p = Pattern.compile("\\P{Lu}");
4209         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4210         if (m.find())
4211             failCount++;
4212         if (!m.hitEnd())
4213             failCount++;
4214 
4215         // block
4216         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4217         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4218         if (m.find() && m.start() == 1)
4219             failCount++;
4220 
4221         p = Pattern.compile("\\P{sc=GRANTHA}");
4222         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4223         if (m.find() && m.start() == 1)
4224             failCount++;
4225 
4226         report("NonBmpClassComplement");
4227     }
4228 
4229     private static void unicodePropertiesTest() throws Exception {
4230         // different forms
4231         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4232             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4233             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4234             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4235             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4236             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4237             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4238             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4239             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4240             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4241             failCount++;
4242 
4243         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4244         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4245         Matcher lastSM  = common;
4246         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4247 
4248         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4249         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4250         Matcher lastBM = latin;
4251         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4252 
4253         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4254             if (cp >= 0x30000 && (cp & 0x70) == 0){
4255                 continue;  // only pick couple code points, they are the same
4256             }
4257 
4258             // Unicode Script
4259             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4260             Matcher m;
4261             String str = new String(Character.toChars(cp));
4262             if (script == lastScript) {
4263                  m = lastSM;
4264                  m.reset(str);
4265             } else {
4266                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4267             }
4268             if (!m.matches()) {
4269                 failCount++;
4270             }
4271             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4272             other.reset(str);
4273             if (other.matches()) {
4274                 failCount++;
4275             }
4276             lastSM = m;
4277             lastScript = script;
4278 
4279             // Unicode Block
4280             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4281             if (block == null) {
4282                 //System.out.printf("Not a Block: cp=%x%n", cp);
4283                 continue;
4284             }
4285             if (block == lastBlock) {
4286                  m = lastBM;
4287                  m.reset(str);
4288             } else {
4289                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4290             }
4291             if (!m.matches()) {
4292                 failCount++;
4293             }
4294             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4295             other.reset(str);
4296             if (other.matches()) {
4297                 failCount++;
4298             }
4299             lastBM = m;
4300             lastBlock = block;
4301         }
4302         report("unicodeProperties");
4303     }
4304 
4305     private static void unicodeHexNotationTest() throws Exception {
4306 
4307         // negative
4308         checkExpectedFail("\\x{-23}");
4309         checkExpectedFail("\\x{110000}");
4310         checkExpectedFail("\\x{}");
4311         checkExpectedFail("\\x{AB[ef]");
4312 
4313         // codepoint
4314         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4315         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4316         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4317         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4318 
4319         // in class
4320         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4321         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4322         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4323         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4324         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4325         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4326 
4327         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4328              String s = "A" + new String(Character.toChars(cp)) + "B";
4329              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4330                                              : String.format("\\u%04x\\u%04x",
4331                                                (int) Character.toChars(cp)[0],
4332                                                (int) Character.toChars(cp)[1]);
4333              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4334              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4335                  failCount++;
4336              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4337                  failCount++;
4338              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4339                  failCount++;
4340              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4341                  failCount++;
4342          }
4343          report("unicodeHexNotation");
4344     }
4345 
4346     private static void unicodeClassesTest() throws Exception {
4347 
4348         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4349         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4350         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4351         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4352         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4353         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4354         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4355         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4356         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4357         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4358         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4359         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4360         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4361         Matcher bound  = Pattern.compile("\\b").matcher("");
4362         Matcher word   = Pattern.compile("\\w++").matcher("");
4363         // UNICODE_CHARACTER_CLASS
4364         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4365         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4366         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4367         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4368         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4369         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4370         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4371         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4372         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4373         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4374         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4375         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4376         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4377         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4378         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4379         // embedded flag (?U)
4380         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4381         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4382         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4383 
4384         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4385         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4386         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4387         // properties
4388         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4389         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4390         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4391         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4392         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4393         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4394         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4395         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4396         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4397         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4398         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4399         // javaMethod
4400         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4401         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4402         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4403         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4404         // GC/C
4405         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
4406 
4407         for (int cp = 1; cp < 0x30000; cp++) {
4408             String str = new String(Character.toChars(cp));
4409             int type = Character.getType(cp);
4410             if (// lower
4411                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4412                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4413                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4414                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4415                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4416                 // upper
4417                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4418                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4419                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4420                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4421                 // alpha
4422                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4423                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4424                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4425                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4426                 // digit
4427                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4428                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4429                 // alnum
4430                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4431                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4432                 // punct
4433                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4434                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4435                 // graph
4436                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4437                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4438                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4439                 // blank
4440                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4441                                           != blank.reset(str).matches()  ||
4442                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4443                 // print
4444                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4445                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4446                 // cntrl
4447                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4448                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4449                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4450                 // hexdigit
4451                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4452                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4453                 // space
4454                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4455                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4456                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4457                 // word
4458                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4459                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4460                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4461                 // bwordb
4462                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4463                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4464                 // properties
4465                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4466                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4467                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4468                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4469                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4470                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4471                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4472                 // gc_C
4473                 (Character.CONTROL == type || Character.FORMAT == type ||
4474                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4475                  Character.UNASSIGNED == type)
4476                 != gcC.reset(str).matches()) {
4477                 failCount++;
4478             }
4479         }
4480 
4481         // bounds/word align
4482         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4483         if (!bwbU.reset("\u0180sherman\u0400").matches())
4484             failCount++;
4485         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4486         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4487             failCount++;
4488         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4489         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4490             failCount++;
4491         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4492             failCount++;
4493         report("unicodePredefinedClasses");
4494     }
4495 
4496     private static void unicodeCharacterNameTest() throws Exception {
4497 
4498         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4499             if (!Character.isValidCodePoint(cp) ||
4500                 Character.getType(cp) == Character.UNASSIGNED)
4501                 continue;
4502             String str = new String(Character.toChars(cp));
4503             // single
4504             String p = "\\N{" + Character.getName(cp) + "}";
4505             if (!Pattern.compile(p).matcher(str).matches()) {
4506                 failCount++;
4507             }
4508             // class[c]
4509             p = "[\\N{" + Character.getName(cp) + "}]";
4510             if (!Pattern.compile(p).matcher(str).matches()) {
4511                 failCount++;
4512             }
4513         }
4514 
4515         // range
4516         for (int i = 0; i < 10; i++) {
4517             int start = generator.nextInt(20);
4518             int end = start + generator.nextInt(200);
4519             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4520             String str;
4521             for (int cp = start; cp < end; cp++) {
4522                 str = new String(Character.toChars(cp));
4523                 if (!Pattern.compile(p).matcher(str).matches()) {
4524                     failCount++;
4525                 }
4526             }
4527             str = new String(Character.toChars(end + 10));
4528             if (Pattern.compile(p).matcher(str).matches()) {
4529                 failCount++;
4530             }
4531         }
4532 
4533         // slice
4534         for (int i = 0; i < 10; i++) {
4535             int n = generator.nextInt(256);
4536             int[] buf = new int[n];
4537             StringBuffer sb = new StringBuffer(1024);
4538             for (int j = 0; j < n; j++) {
4539                 int cp = generator.nextInt(1000);
4540                 if (!Character.isValidCodePoint(cp) ||
4541                     Character.getType(cp) == Character.UNASSIGNED)
4542                     cp = 0x4e00;    // just use 4e00
4543                 sb.append("\\N{" + Character.getName(cp) + "}");
4544                 buf[j] = cp;
4545             }
4546             String p = sb.toString();
4547             String str = new String(buf, 0, buf.length);
4548             if (!Pattern.compile(p).matcher(str).matches()) {
4549                 failCount++;
4550             }
4551         }
4552         report("unicodeCharacterName");
4553     }
4554 
4555     private static void horizontalAndVerticalWSTest() throws Exception {
4556         String hws = new String (new char[] {
4557                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4558                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4559                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4560                                      0x202f, 0x205f, 0x3000 });
4561         String vws = new String (new char[] {
4562                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4563         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4564             !Pattern.compile("[\\h]+").matcher(hws).matches())
4565             failCount++;
4566         if (Pattern.compile("\\H").matcher(hws).find() ||
4567             Pattern.compile("[\\H]").matcher(hws).find())
4568             failCount++;
4569         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4570             !Pattern.compile("[\\v]+").matcher(vws).matches())
4571             failCount++;
4572         if (Pattern.compile("\\V").matcher(vws).find() ||
4573             Pattern.compile("[\\V]").matcher(vws).find())
4574             failCount++;
4575         String prefix = "abcd";
4576         String suffix = "efgh";
4577         String ng = "A";
4578         for (int i = 0; i < hws.length(); i++) {
4579             String c = String.valueOf(hws.charAt(i));
4580             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4581             if (!m.find() || !c.equals(m.group()))
4582                 failCount++;
4583             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4584             if (!m.find() || !c.equals(m.group()))
4585                 failCount++;
4586 
4587             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4588             if (!m.find() || !ng.equals(m.group()))
4589                 failCount++;
4590             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4591             if (!m.find() || !ng.equals(m.group()))
4592                 failCount++;
4593         }
4594         for (int i = 0; i < vws.length(); i++) {
4595             String c = String.valueOf(vws.charAt(i));
4596             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4597             if (!m.find() || !c.equals(m.group()))
4598                 failCount++;
4599             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4600             if (!m.find() || !c.equals(m.group()))
4601                 failCount++;
4602 
4603             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4604             if (!m.find() || !ng.equals(m.group()))
4605                 failCount++;
4606             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4607             if (!m.find() || !ng.equals(m.group()))
4608                 failCount++;
4609         }
4610         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4611         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4612             failCount++;
4613         report("horizontalAndVerticalWSTest");
4614     }
4615 
4616     private static void linebreakTest() throws Exception {
4617         String linebreaks = new String (new char[] {
4618             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4619         String crnl = "\r\n";
4620         if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4621               Pattern.compile("\\R").matcher(crnl).matches() &&
4622               Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4623               Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4624               Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4625               Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4626               !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4627             failCount++;
4628         }
4629         report("linebreakTest");
4630     }
4631 
4632     // #7189363
4633     private static void branchTest() throws Exception {
4634         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4635             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4636             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4637             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4638             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4639             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4640             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4641             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4642             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4643             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4644             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4645             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4646             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4647             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4648             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4649             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4650             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4651             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4652             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4653             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4654             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4655             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4656             failCount++;
4657         report("branchTest");
4658     }
4659 
4660     // This test is for 8007395
4661     private static void groupCurlyNotFoundSuppTest() throws Exception {
4662         String input = "test this as \ud83d\ude0d";
4663         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4664                                           "test(.)*(@[a-zA-Z.]+)",
4665                                           "test([^B])+(@[a-zA-Z.]+)",
4666                                           "test([^B])*(@[a-zA-Z.]+)",
4667                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4668                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4669                                         }) {
4670             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4671                                .matcher(input);
4672             try {
4673                 if (m.find()) {
4674                     failCount++;
4675                 }
4676             } catch (Exception x) {
4677                 failCount++;
4678             }
4679         }
4680         report("GroupCurly NotFoundSupp");
4681     }
4682 
4683     // This test is for 8023647
4684     private static void groupCurlyBackoffTest() throws Exception {
4685         if (!"abc1c".matches("(\\w)+1\\1") ||
4686             "abc11".matches("(\\w)+1\\1")) {
4687             failCount++;
4688         }
4689         report("GroupCurly backoff");
4690     }
4691 
4692     // This test is for 8012646
4693     private static void patternAsPredicate() throws Exception {
4694         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4695 
4696         if (p.test("")) {
4697             failCount++;
4698         }
4699         if (!p.test("word")) {
4700             failCount++;
4701         }
4702         if (p.test("1234")) {
4703             failCount++;
4704         }
4705         if (!p.test("word1234")) {
4706             failCount++;
4707         }
4708         report("Pattern.asPredicate");
4709     }
4710 
4711     // This test is for 8184692
4712     private static void patternAsMatchPredicate() throws Exception {
4713         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4714 
4715         if (p.test("")) {
4716             failCount++;
4717         }
4718         if (!p.test("word")) {
4719             failCount++;
4720         }
4721         if (p.test("1234word")) {
4722             failCount++;
4723         }
4724         if (p.test("1234")) {
4725             failCount++;
4726         }
4727         report("Pattern.asMatchPredicate");
4728     }
4729 
4730 
4731     // This test is for 8035975
4732     private static void invalidFlags() throws Exception {
4733         for (int flag = 1; flag != 0; flag <<= 1) {
4734             switch (flag) {
4735             case Pattern.CASE_INSENSITIVE:
4736             case Pattern.MULTILINE:
4737             case Pattern.DOTALL:
4738             case Pattern.UNICODE_CASE:
4739             case Pattern.CANON_EQ:
4740             case Pattern.UNIX_LINES:
4741             case Pattern.LITERAL:
4742             case Pattern.UNICODE_CHARACTER_CLASS:
4743             case Pattern.COMMENTS:
4744                 // valid flag, continue
4745                 break;
4746             default:
4747                 try {
4748                     Pattern.compile(".", flag);
4749                     failCount++;
4750                 } catch (IllegalArgumentException expected) {
4751                 }
4752             }
4753         }
4754         report("Invalid compile flags");
4755     }
4756 
4757     // This test is for 8158482
4758     private static void embeddedFlags() throws Exception {
4759         try {
4760             Pattern.compile("(?i).(?-i).");
4761             Pattern.compile("(?m).(?-m).");
4762             Pattern.compile("(?s).(?-s).");
4763             Pattern.compile("(?d).(?-d).");
4764             Pattern.compile("(?u).(?-u).");
4765             Pattern.compile("(?c).(?-c).");
4766             Pattern.compile("(?x).(?-x).");
4767             Pattern.compile("(?U).(?-U).");
4768             Pattern.compile("(?imsducxU).(?-imsducxU).");
4769         } catch (PatternSyntaxException x) {
4770             failCount++;
4771         }
4772         report("Embedded flags");
4773     }
4774 
4775     private static void grapheme() throws Exception {
4776         Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
4777             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4778             .forEach( ln -> {
4779                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4780                     // System.out.println(str);
4781                     String[] strs = ln.split("\u00f7|\u00d7");
4782                     StringBuilder src = new StringBuilder();
4783                     ArrayList<String> graphemes = new ArrayList<>();
4784                     StringBuilder buf = new StringBuilder();
4785                     int offBk = 0;
4786                     for (String str : strs) {
4787                         if (str.length() == 0)  // first empty str
4788                             continue;
4789                         int cp = Integer.parseInt(str, 16);
4790                         src.appendCodePoint(cp);
4791                         buf.appendCodePoint(cp);
4792                         offBk += (str.length() + 1);
4793                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4794                             graphemes.add(buf.toString());
4795                             buf = new StringBuilder();
4796                         }
4797                     }
4798                     Pattern p = Pattern.compile("\\X");
4799                     Matcher m = p.matcher(src.toString());
4800                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4801                     for (String g : graphemes) {
4802                         // System.out.printf("     grapheme:=[%s]%n", g);
4803                         // (1) test \\X directly
4804                         if (!m.find() || !m.group().equals(g)) {
4805                             System.out.println("Failed \\X [" + ln + "] : " + g);
4806                             failCount++;
4807                         }
4808                         // (2) test \\b{g} + \\X  via Scanner
4809                         boolean hasNext = s.hasNext(p);
4810                         // if (!s.hasNext() || !s.next().equals(next)) {
4811                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4812                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4813                             failCount++;
4814                         }
4815                     }
4816                 });
4817         // some sanity checks
4818         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4819             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4820             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4821             failCount++;
4822         // make sure "\b{n}" still works
4823         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4824             failCount++;
4825         report("Unicode extended grapheme cluster");
4826     }
4827 
4828     // hangup/timeout if go into exponential backtracking
4829     private static void expoBacktracking() throws Exception {
4830 
4831         Object[][] patternMatchers = {
4832             // 6328855
4833             { "(.*\n*)*",
4834               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4835               false },
4836             // 6192895
4837             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4838               "Hello World this is a test this is a test this is a test A",
4839               true },
4840             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4841               "Hello World this is a test this is a test this is a test \u4e00 ",
4842               false },
4843             { " *([a-z0-9]+ *)+",
4844               "hello world this is a test this is a test this is a test A",
4845               false },
4846             // 4771934 [FIXED] #5013651?
4847             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4848               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4849               true },
4850             // 4866249 [FIXED]
4851             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4852               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4853               true },
4854             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4855               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4856               false },
4857             // 6345469
4858             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4859               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4860               true }, // --> matched
4861             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4862               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4863               false },
4864             // 5026912
4865             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4866               "156580451111112225588087755221111111566969655555555",
4867               false},
4868             // 6988218
4869             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4870               "'%)) order by ANGEBOT.ID",
4871               false},    // find
4872             // 6693451
4873             { "^(\\s*foo\\s*)*$",
4874               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4875               true },
4876             { "^(\\s*foo\\s*)*$",
4877               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4878               false
4879             },
4880             // 7006761
4881             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4882             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4883             // 8140212
4884             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4885               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4886               false
4887             },
4888             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4889             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4890 
4891             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4892             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4893 
4894             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4895             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4896 
4897             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4898 
4899             /* not fixed
4900             //8132141   --->    second level exponential backtracking
4901             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4902               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4903             */
4904         };
4905 
4906         for (Object[] pm : patternMatchers) {
4907             String p = (String)pm[0];
4908             String s = (String)pm[1];
4909             boolean r = (Boolean)pm[2];
4910             if (r != Pattern.compile(p).matcher(s).matches()) {
4911                 failCount++;
4912             }
4913         }
4914     }
4915 
4916     private static void invalidGroupName() {
4917         // Invalid start of a group name
4918         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4919                 "\u0060", "\u007b", "\u0416")) {
4920             for (String pat : List.of("(?<" + groupName + ">)",
4921                     "\\k<" + groupName + ">")) {
4922                 try {
4923                     Pattern.compile(pat);
4924                     failCount++;
4925                 } catch (PatternSyntaxException e) {
4926                     if (!e.getMessage().startsWith(
4927                             "capturing group name does not start with a"
4928                             + " Latin letter")) {
4929                         failCount++;
4930                     }
4931                 }
4932             }
4933         }
4934         // Invalid char in a group name
4935         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4936                 "d\u0060", "e\u007b", "f\u0416")) {
4937             for (String pat : List.of("(?<" + groupName + ">)",
4938                     "\\k<" + groupName + ">")) {
4939                 try {
4940                     Pattern.compile(pat);
4941                     failCount++;
4942                 } catch (PatternSyntaxException e) {
4943                     if (!e.getMessage().startsWith(
4944                             "named capturing group is missing trailing '>'")) {
4945                         failCount++;
4946                     }
4947                 }
4948             }
4949         }
4950         report("Invalid capturing group names");
4951     }
4952 
4953     private static void illegalRepetitionRange() {
4954         // huge integers > (2^31 - 1)
4955         String n = BigInteger.valueOf(1L << 32)
4956             .toString();
4957         String m = BigInteger.valueOf(1L << 31)
4958             .add(new BigInteger(80, generator))
4959             .toString();
4960         for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4961                 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4962             String pat = ".{" + rep + "}";
4963             try {
4964                 Pattern.compile(pat);
4965                 failCount++;
4966                 System.out.println("Expected to fail. Pattern: " + pat);
4967             } catch (PatternSyntaxException e) {
4968                 if (!e.getMessage().startsWith("Illegal repetition")) {
4969                     failCount++;
4970                     System.out.println("Unexpected error message: " + e.getMessage());
4971                 }
4972             } catch (Throwable t) {
4973                 failCount++;
4974                 System.out.println("Unexpected exception: " + t);
4975             }
4976         }
4977         report("illegalRepetitionRange");
4978     }
4979 
4980     private static void surrogatePairWithCanonEq() {
4981         try {
4982             Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
4983         } catch (Throwable t) {
4984             failCount++;
4985             System.out.println("Unexpected exception: " + t);
4986         }
4987         report("surrogatePairWithCanonEq");
4988     }
4989 
4990     private static void controlCharacters() {
4991         char[] contolCharsPairs = { '@', 0x00,
4992             'A', 0x01, 'B', 0x02, 'C', 0x03, 'D', 0x04, 'E', 0x05, 'F', 0x06,
4993             'G', 0x07, 'H', 0x08, 'I', 0x09, 'J', 0x0a, 'K', 0x0b, 'L', 0x0c,
4994             'M', 0x0d, 'N', 0x0e, 'O', 0x0f, 'P', 0x10, 'Q', 0x11, 'R', 0x12,
4995             'S', 0x13, 'T', 0x14, 'U', 0x15, 'V', 0x16, 'W', 0x17, 'X', 0x18,
4996             'Y', 0x19, 'Z', 0x1a,
4997             '[', 0x1b, '\\', 0x1c, ']', 0x1d, '^', 0x1e, '_', 0x1f, '?', 0x7f };
4998         var contolChars = new HashMap<Character, Integer>();
4999         for (int i = 0; i < contolCharsPairs.length; i += 2)
5000             contolChars.put(Character.valueOf(contolCharsPairs[i]),
5001                             Integer.valueOf(contolCharsPairs[i + 1]));
5002 
5003         for (char chP = 0; chP <= 0xff + 16; ++chP) {
5004             String pat = "\\c";
5005             if (chP < 0xff) {
5006                 // \cx with ASCII x
5007                 pat = "\\c" + Character.toString(chP);
5008             } else if (chP == 0xff) {
5009                 // incomplete \c at the end of pattern
5010                 pat = "\\c";
5011             } else if (chP <= 0xff + 8) {
5012                 // \cx with a random non-ASCII char x
5013                 int x = 0xff + generator.nextInt(0xff00 + 1);
5014                 pat = "\\c" + Character.toString(x);
5015             } else {
5016                 // \cx with a random non-ASCII codepoint x
5017                 int x = 0xff + generator.nextInt(Character.MAX_CODE_POINT + 1 - 0xff);
5018                 pat = "\\c" + Character.toString(x);
5019             }
5020             if (contolChars.containsKey(chP)) {
5021                 try {
5022                     Pattern p = Pattern.compile(pat);
5023                     for (int chS = 0; chS < 0xff; ++chS) {
5024                         Matcher m = p.matcher(Character.toString(chS));
5025                         if (m.matches() && contolChars.get(chP) != chS) {
5026                             failCount++;
5027                             System.out.println("Control character 0x" + Integer.toHexString(chS) +
5028                                                " unexpectedly matched pattern " + pat);
5029                         } else if (!m.matches() && contolChars.get(chP) == chS) {
5030                             failCount++;
5031                             System.out.println("Control character 0x" + Integer.toHexString(chS) +
5032                                                " failed to match pattern " + pat);
5033                         }
5034                         if (m.matches() && Character.getType(chS) != Character.CONTROL) {
5035                             failCount++;
5036                             System.out.println("Non-control character 0x" + Integer.toHexString(chS) +
5037                                                " unexpectedly matched pattern " + pat);
5038                         }
5039                     }
5040                 } catch (Throwable t) {
5041                     failCount++;
5042                     System.out.println("Failed to compile pattern " + pat +
5043                                        " due to exception: " + t);
5044                 }
5045             } else {
5046                 try {
5047                     Pattern p = Pattern.compile(pat);
5048                     failCount++;
5049                     System.out.println("Expected to throw an exception when compiling " + pat);
5050                 } catch (PatternSyntaxException expected) {
5051                 } catch (Throwable t) {
5052                     failCount++;
5053                     System.out.println("Unexpected exception when compiling " + pat +
5054                                        " : " + t);
5055                 }
5056             }
5057         }
5058         report("controlCharacters");
5059     }
5060 }