1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  37  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
  38  * 8194667 8197462 8184692 8221431 8224789 8228352
  39  *
  40  * @library /test/lib
  41  * @library /lib/testlibrary/java/lang
  42  * @build jdk.test.lib.RandomFactory
  43  * @run main RegExTest
  44  * @key randomness
  45  */
  46 
  47 import java.io.BufferedReader;
  48 import java.io.ByteArrayInputStream;
  49 import java.io.ByteArrayOutputStream;
  50 import java.io.File;
  51 import java.io.FileInputStream;
  52 import java.io.InputStreamReader;
  53 import java.io.ObjectInputStream;
  54 import java.io.ObjectOutputStream;
  55 import java.math.BigInteger;
  56 import java.nio.CharBuffer;
  57 import java.nio.file.Files;
  58 import java.util.ArrayList;
  59 import java.util.Arrays;
  60 import java.util.List;
  61 import java.util.Random;
  62 import java.util.Scanner;
  63 import java.util.function.Function;
  64 import java.util.function.Predicate;
  65 import java.util.regex.Matcher;
  66 import java.util.regex.MatchResult;
  67 import java.util.regex.Pattern;
  68 import java.util.regex.PatternSyntaxException;
  69 import jdk.test.lib.RandomFactory;
  70 
  71 /**
  72  * This is a test class created to check the operation of
  73  * the Pattern and Matcher classes.
  74  */
  75 public class RegExTest {
  76 
  77     private static Random generator = RandomFactory.getRandom();
  78     private static boolean failure = false;
  79     private static int failCount = 0;
  80     private static String firstFailure = null;
  81 
  82     /**
  83      * Main to interpret arguments and run several tests.
  84      *
  85      */
  86     public static void main(String[] args) throws Exception {
  87         // Most of the tests are in a file
  88         processFile("TestCases.txt");
  89         //processFile("PerlCases.txt");
  90         processFile("BMPTestCases.txt");
  91         processFile("SupplementaryTestCases.txt");
  92 
  93         // These test many randomly generated char patterns
  94         bm();
  95         slice();
  96 
  97         // These are hard to put into the file
  98         escapes();
  99         blankInput();
 100 
 101         // Substitition tests on randomly generated sequences
 102         globalSubstitute();
 103         stringbufferSubstitute();
 104         stringbuilderSubstitute();
 105 
 106         substitutionBasher();
 107         substitutionBasher2();
 108 
 109         // Canonical Equivalence
 110         ceTest();
 111 
 112         // Anchors
 113         anchorTest();
 114 
 115         // boolean match calls
 116         matchesTest();
 117         lookingAtTest();
 118 
 119         // Pattern API
 120         patternMatchesTest();
 121 
 122         // Misc
 123         lookbehindTest();
 124         nullArgumentTest();
 125         backRefTest();
 126         groupCaptureTest();
 127         caretTest();
 128         charClassTest();
 129         emptyPatternTest();
 130         findIntTest();
 131         group0Test();
 132         longPatternTest();
 133         octalTest();
 134         ampersandTest();
 135         negationTest();
 136         splitTest();
 137         appendTest();
 138         caseFoldingTest();
 139         commentsTest();
 140         unixLinesTest();
 141         replaceFirstTest();
 142         gTest();
 143         zTest();
 144         serializeTest();
 145         reluctantRepetitionTest();
 146         multilineDollarTest();
 147         dollarAtEndTest();
 148         caretBetweenTerminatorsTest();
 149         // This RFE rejected in Tiger numOccurrencesTest();
 150         javaCharClassTest();
 151         nonCaptureRepetitionTest();
 152         notCapturedGroupCurlyMatchTest();
 153         escapedSegmentTest();
 154         literalPatternTest();
 155         literalReplacementTest();
 156         regionTest();
 157         toStringTest();
 158         negatedCharClassTest();
 159         findFromTest();
 160         boundsTest();
 161         unicodeWordBoundsTest();
 162         caretAtEndTest();
 163         wordSearchTest();
 164         hitEndTest();
 165         toMatchResultTest();
 166         toMatchResultTest2();
 167         surrogatesInClassTest();
 168         removeQEQuotingTest();
 169         namedGroupCaptureTest();
 170         nonBmpClassComplementTest();
 171         unicodePropertiesTest();
 172         unicodeHexNotationTest();
 173         unicodeClassesTest();
 174         unicodeCharacterNameTest();
 175         horizontalAndVerticalWSTest();
 176         linebreakTest();
 177         branchTest();
 178         groupCurlyNotFoundSuppTest();
 179         groupCurlyBackoffTest();
 180         patternAsPredicate();
 181         patternAsMatchPredicate();
 182         invalidFlags();
 183         embeddedFlags();
 184         grapheme();
 185         expoBacktracking();
 186         invalidGroupName();
 187         illegalRepetitionRange();
 188         surrogatePairWithCanonEq();
 189 
 190         if (failure) {
 191             throw new
 192                 RuntimeException("RegExTest failed, 1st failure: " +
 193                                  firstFailure);
 194         } else {
 195             System.err.println("OKAY: All tests passed.");
 196         }
 197     }
 198 
 199     // Utility functions
 200 
 201     private static String getRandomAlphaString(int length) {
 202         StringBuffer buf = new StringBuffer(length);
 203         for (int i=0; i<length; i++) {
 204             char randChar = (char)(97 + generator.nextInt(26));
 205             buf.append(randChar);
 206         }
 207         return buf.toString();
 208     }
 209 
 210     private static void check(Matcher m, String expected) {
 211         m.find();
 212         if (!m.group().equals(expected))
 213             failCount++;
 214     }
 215 
 216     private static void check(Matcher m, String result, boolean expected) {
 217         m.find();
 218         if (m.group().equals(result) != expected)
 219             failCount++;
 220     }
 221 
 222     private static void check(Pattern p, String s, boolean expected) {
 223         if (p.matcher(s).find() != expected)
 224             failCount++;
 225     }
 226 
 227     private static void check(String p, String s, boolean expected) {
 228         Matcher matcher = Pattern.compile(p).matcher(s);
 229         if (matcher.find() != expected)
 230             failCount++;
 231     }
 232 
 233     private static void check(String p, char c, boolean expected) {
 234         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 235         Pattern pattern = Pattern.compile(propertyPattern);
 236         char[] ca = new char[1]; ca[0] = c;
 237         Matcher matcher = pattern.matcher(new String(ca));
 238         if (!matcher.find())
 239             failCount++;
 240     }
 241 
 242     private static void check(String p, int codePoint, boolean expected) {
 243         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 244         Pattern pattern = Pattern.compile(propertyPattern);
 245         char[] ca = Character.toChars(codePoint);
 246         Matcher matcher = pattern.matcher(new String(ca));
 247         if (!matcher.find())
 248             failCount++;
 249     }
 250 
 251     private static void check(String p, int flag, String input, String s,
 252                               boolean expected)
 253     {
 254         Pattern pattern = Pattern.compile(p, flag);
 255         Matcher matcher = pattern.matcher(input);
 256         if (expected)
 257             check(matcher, s, expected);
 258         else
 259             check(pattern, input, false);
 260     }
 261 
 262     private static void report(String testName) {
 263         int spacesToAdd = 30 - testName.length();
 264         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 265         for (int i=0; i<spacesToAdd; i++)
 266             paddedNameBuffer.append(" ");
 267         String paddedName = paddedNameBuffer.toString();
 268         System.err.println(paddedName + ": " +
 269                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 270         if (failCount > 0) {
 271             failure = true;
 272 
 273             if (firstFailure == null) {
 274                 firstFailure = testName;
 275             }
 276         }
 277 
 278         failCount = 0;
 279     }
 280 
 281     /**
 282      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 283      * supplementary characters. This method does NOT fully take care
 284      * of the regex syntax.
 285      */
 286     private static String toSupplementaries(String s) {
 287         int length = s.length();
 288         StringBuffer sb = new StringBuffer(length * 2);
 289 
 290         for (int i = 0; i < length; ) {
 291             char c = s.charAt(i++);
 292             if (c == '\\') {
 293                 sb.append(c);
 294                 if (i < length) {
 295                     c = s.charAt(i++);
 296                     sb.append(c);
 297                     if (c == 'u') {
 298                         // assume no syntax error
 299                         sb.append(s.charAt(i++));
 300                         sb.append(s.charAt(i++));
 301                         sb.append(s.charAt(i++));
 302                         sb.append(s.charAt(i++));
 303                     }
 304                 }
 305             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 306                 sb.append('\ud800').append((char)('\udc00'+c));
 307             } else {
 308                 sb.append(c);
 309             }
 310         }
 311         return sb.toString();
 312     }
 313 
 314     // Regular expression tests
 315 
 316     // This is for bug 6178785
 317     // Test if an expected NPE gets thrown when passing in a null argument
 318     private static boolean check(Runnable test) {
 319         try {
 320             test.run();
 321             failCount++;
 322             return false;
 323         } catch (NullPointerException npe) {
 324             return true;
 325         }
 326     }
 327 
 328     private static void nullArgumentTest() {
 329         check(() -> Pattern.compile(null));
 330         check(() -> Pattern.matches(null, null));
 331         check(() -> Pattern.matches("xyz", null));
 332         check(() -> Pattern.quote(null));
 333         check(() -> Pattern.compile("xyz").split(null));
 334         check(() -> Pattern.compile("xyz").matcher(null));
 335 
 336         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 337         m.matches();
 338         check(() -> m.appendTail((StringBuffer) null));
 339         check(() -> m.appendTail((StringBuilder)null));
 340         check(() -> m.replaceAll((String) null));
 341         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 342         check(() -> m.replaceFirst((String)null));
 343         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 344         check(() -> m.appendReplacement((StringBuffer)null, null));
 345         check(() -> m.appendReplacement((StringBuilder)null, null));
 346         check(() -> m.reset(null));
 347         check(() -> Matcher.quoteReplacement(null));
 348         //check(() -> m.usePattern(null));
 349 
 350         report("Null Argument");
 351     }
 352 
 353     // This is for bug6635133
 354     // Test if surrogate pair in Unicode escapes can be handled correctly.
 355     private static void surrogatesInClassTest() throws Exception {
 356         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 357         Matcher matcher = pattern.matcher("\ud834\udd22");
 358         if (!matcher.find())
 359             failCount++;
 360 
 361         report("Surrogate pair in Unicode escape");
 362     }
 363 
 364     // This is for bug6990617
 365     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 366     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 367     // char is an octal digit.
 368     private static void removeQEQuotingTest() throws Exception {
 369         Pattern pattern =
 370             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 371         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 372         if (!matcher.find())
 373             failCount++;
 374 
 375         report("Remove Q/E Quoting");
 376     }
 377 
 378     // This is for bug 4988891
 379     // Test toMatchResult to see that it is a copy of the Matcher
 380     // that is not affected by subsequent operations on the original
 381     private static void toMatchResultTest() throws Exception {
 382         Pattern pattern = Pattern.compile("squid");
 383         Matcher matcher = pattern.matcher(
 384             "agiantsquidofdestinyasmallsquidoffate");
 385         matcher.find();
 386         int matcherStart1 = matcher.start();
 387         MatchResult mr = matcher.toMatchResult();
 388         if (mr == matcher)
 389             failCount++;
 390         int resultStart1 = mr.start();
 391         if (matcherStart1 != resultStart1)
 392             failCount++;
 393         matcher.find();
 394         int matcherStart2 = matcher.start();
 395         int resultStart2 = mr.start();
 396         if (matcherStart2 == resultStart2)
 397             failCount++;
 398         if (resultStart1 != resultStart2)
 399             failCount++;
 400         MatchResult mr2 = matcher.toMatchResult();
 401         if (mr == mr2)
 402             failCount++;
 403         if (mr2.start() != matcherStart2)
 404             failCount++;
 405         report("toMatchResult is a copy");
 406     }
 407 
 408     private static void checkExpectedISE(Runnable test) {
 409         try {
 410             test.run();
 411             failCount++;
 412         } catch (IllegalStateException x) {
 413         } catch (IndexOutOfBoundsException xx) {
 414             failCount++;
 415         }
 416     }
 417 
 418     private static void checkExpectedIOOE(Runnable test) {
 419         try {
 420             test.run();
 421             failCount++;
 422         } catch (IndexOutOfBoundsException x) {}
 423     }
 424 
 425     // This is for bug 8074678
 426     // Test the result of toMatchResult throws ISE if no match is availble
 427     private static void toMatchResultTest2() throws Exception {
 428         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 429         matcher.find();
 430         MatchResult mr = matcher.toMatchResult();
 431 
 432         checkExpectedISE(() -> mr.start());
 433         checkExpectedISE(() -> mr.start(2));
 434         checkExpectedISE(() -> mr.end());
 435         checkExpectedISE(() -> mr.end(2));
 436         checkExpectedISE(() -> mr.group());
 437         checkExpectedISE(() -> mr.group(2));
 438 
 439         matcher = Pattern.compile("(match)").matcher("there is a match");
 440         matcher.find();
 441         MatchResult mr2 = matcher.toMatchResult();
 442         checkExpectedIOOE(() -> mr2.start(2));
 443         checkExpectedIOOE(() -> mr2.end(2));
 444         checkExpectedIOOE(() -> mr2.group(2));
 445 
 446         report("toMatchResult2 appropriate exceptions");
 447     }
 448 
 449     // This is for bug 5013885
 450     // Must test a slice to see if it reports hitEnd correctly
 451     private static void hitEndTest() throws Exception {
 452         // Basic test of Slice node
 453         Pattern p = Pattern.compile("^squidattack");
 454         Matcher m = p.matcher("squack");
 455         m.find();
 456         if (m.hitEnd())
 457             failCount++;
 458         m.reset("squid");
 459         m.find();
 460         if (!m.hitEnd())
 461             failCount++;
 462 
 463         // Test Slice, SliceA and SliceU nodes
 464         for (int i=0; i<3; i++) {
 465             int flags = 0;
 466             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 467             if (i==2) flags = Pattern.UNICODE_CASE;
 468             p = Pattern.compile("^abc", flags);
 469             m = p.matcher("ad");
 470             m.find();
 471             if (m.hitEnd())
 472                 failCount++;
 473             m.reset("ab");
 474             m.find();
 475             if (!m.hitEnd())
 476                 failCount++;
 477         }
 478 
 479         // Test Boyer-Moore node
 480         p = Pattern.compile("catattack");
 481         m = p.matcher("attack");
 482         m.find();
 483         if (!m.hitEnd())
 484             failCount++;
 485 
 486         p = Pattern.compile("catattack");
 487         m = p.matcher("attackattackattackcatatta");
 488         m.find();
 489         if (!m.hitEnd())
 490             failCount++;
 491 
 492         // 8184706: Matching u+0d at EOL against \R should hit-end
 493         p = Pattern.compile("...\\R");
 494         m = p.matcher("cat" + (char)0x0a);
 495         m.find();
 496         if (m.hitEnd())
 497             failCount++;
 498 
 499         m = p.matcher("cat" + (char)0x0d);
 500         m.find();
 501         if (!m.hitEnd())
 502             failCount++;
 503 
 504         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
 505         m.find();
 506         if (m.hitEnd())
 507             failCount++;
 508 
 509         report("hitEnd");
 510     }
 511 
 512     // This is for bug 4997476
 513     // It is weird code submitted by customer demonstrating a regression
 514     private static void wordSearchTest() throws Exception {
 515         String testString = new String("word1 word2 word3");
 516         Pattern p = Pattern.compile("\\b");
 517         Matcher m = p.matcher(testString);
 518         int position = 0;
 519         int start = 0;
 520         while (m.find(position)) {
 521             start = m.start();
 522             if (start == testString.length())
 523                 break;
 524             if (m.find(start+1)) {
 525                 position = m.start();
 526             } else {
 527                 position = testString.length();
 528             }
 529             if (testString.substring(start, position).equals(" "))
 530                 continue;
 531             if (!testString.substring(start, position-1).startsWith("word"))
 532                 failCount++;
 533         }
 534         report("Customer word search");
 535     }
 536 
 537     // This is for bug 4994840
 538     private static void caretAtEndTest() throws Exception {
 539         // Problem only occurs with multiline patterns
 540         // containing a beginning-of-line caret "^" followed
 541         // by an expression that also matches the empty string.
 542         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 543         Matcher matcher = pattern.matcher("\r");
 544         matcher.find();
 545         matcher.find();
 546         report("Caret at end");
 547     }
 548 
 549     // This test is for 4979006
 550     // Check to see if word boundary construct properly handles unicode
 551     // non spacing marks
 552     private static void unicodeWordBoundsTest() throws Exception {
 553         String spaces = "  ";
 554         String wordChar = "a";
 555         String nsm = "\u030a";
 556 
 557         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 558 
 559         Pattern pattern = Pattern.compile("\\b");
 560         Matcher matcher = pattern.matcher("");
 561         // S=other B=word character N=non spacing mark .=word boundary
 562         // SS.BB.SS
 563         String input = spaces + wordChar + wordChar + spaces;
 564         twoFindIndexes(input, matcher, 2, 4);
 565         // SS.BBN.SS
 566         input = spaces + wordChar +wordChar + nsm + spaces;
 567         twoFindIndexes(input, matcher, 2, 5);
 568         // SS.BN.SS
 569         input = spaces + wordChar + nsm + spaces;
 570         twoFindIndexes(input, matcher, 2, 4);
 571         // SS.BNN.SS
 572         input = spaces + wordChar + nsm + nsm + spaces;
 573         twoFindIndexes(input, matcher, 2, 5);
 574         // SSN.BB.SS
 575         input = spaces + nsm + wordChar + wordChar + spaces;
 576         twoFindIndexes(input, matcher, 3, 5);
 577         // SS.BNB.SS
 578         input = spaces + wordChar + nsm + wordChar + spaces;
 579         twoFindIndexes(input, matcher, 2, 5);
 580         // SSNNSS
 581         input = spaces + nsm + nsm + spaces;
 582         matcher.reset(input);
 583         if (matcher.find())
 584             failCount++;
 585         // SSN.BBN.SS
 586         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 587         twoFindIndexes(input, matcher, 3, 6);
 588 
 589         report("Unicode word boundary");
 590     }
 591 
 592     private static void twoFindIndexes(String input, Matcher matcher, int a,
 593                                        int b) throws Exception
 594     {
 595         matcher.reset(input);
 596         matcher.find();
 597         if (matcher.start() != a)
 598             failCount++;
 599         matcher.find();
 600         if (matcher.start() != b)
 601             failCount++;
 602     }
 603 
 604     // This test is for 6284152
 605     static void check(String regex, String input, String[] expected) {
 606         List<String> result = new ArrayList<String>();
 607         Pattern p = Pattern.compile(regex);
 608         Matcher m = p.matcher(input);
 609         while (m.find()) {
 610             result.add(m.group());
 611         }
 612         if (!Arrays.asList(expected).equals(result))
 613             failCount++;
 614     }
 615 
 616     private static void lookbehindTest() throws Exception {
 617         //Positive
 618         check("(?<=%.{0,5})foo\\d",
 619               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 620               new String[]{"foo1", "foo2", "foo3"});
 621 
 622         //boundary at end of the lookbehind sub-regex should work consistently
 623         //with the boundary just after the lookbehind sub-regex
 624         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 625         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 626         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 627         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 628 
 629         //Negative
 630         check("(?<!%.{0,5})foo\\d",
 631               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 632               new String[] {"foo4", "foo5"});
 633 
 634         //Positive greedy
 635         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 636 
 637         //Positive reluctant
 638         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 639 
 640         //supplementary
 641         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 642               new String[] {"fo\ud800\udc00o"});
 643         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 644               new String[] {"fo\ud800\udc00o"});
 645         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 646               new String[] {"fo\ud800\udc00o"});
 647         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 648               new String[] {"fo\ud800\udc00o"});
 649         report("Lookbehind");
 650     }
 651 
 652     // This test is for 4938995
 653     // Check to see if weak region boundaries are transparent to
 654     // lookahead and lookbehind constructs
 655     private static void boundsTest() throws Exception {
 656         String fullMessage = "catdogcat";
 657         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 658         Matcher matcher = pattern.matcher("catdogca");
 659         matcher.useTransparentBounds(true);
 660         if (matcher.find())
 661             failCount++;
 662         matcher.reset("atdogcat");
 663         if (matcher.find())
 664             failCount++;
 665         matcher.reset(fullMessage);
 666         if (!matcher.find())
 667             failCount++;
 668         matcher.reset(fullMessage);
 669         matcher.region(0,9);
 670         if (!matcher.find())
 671             failCount++;
 672         matcher.reset(fullMessage);
 673         matcher.region(0,6);
 674         if (!matcher.find())
 675             failCount++;
 676         matcher.reset(fullMessage);
 677         matcher.region(3,6);
 678         if (!matcher.find())
 679             failCount++;
 680         matcher.useTransparentBounds(false);
 681         if (matcher.find())
 682             failCount++;
 683 
 684         // Negative lookahead/lookbehind
 685         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 686         matcher = pattern.matcher("dogcat");
 687         matcher.useTransparentBounds(true);
 688         matcher.region(0,3);
 689         if (matcher.find())
 690             failCount++;
 691         matcher.reset("catdog");
 692         matcher.region(3,6);
 693         if (matcher.find())
 694             failCount++;
 695         matcher.useTransparentBounds(false);
 696         matcher.reset("dogcat");
 697         matcher.region(0,3);
 698         if (!matcher.find())
 699             failCount++;
 700         matcher.reset("catdog");
 701         matcher.region(3,6);
 702         if (!matcher.find())
 703             failCount++;
 704 
 705         report("Region bounds transparency");
 706     }
 707 
 708     // This test is for 4945394
 709     private static void findFromTest() throws Exception {
 710         String message = "This is 40 $0 message.";
 711         Pattern pat = Pattern.compile("\\$0");
 712         Matcher match = pat.matcher(message);
 713         if (!match.find())
 714             failCount++;
 715         if (match.find())
 716             failCount++;
 717         if (match.find())
 718             failCount++;
 719         report("Check for alternating find");
 720     }
 721 
 722     // This test is for 4872664 and 4892980
 723     private static void negatedCharClassTest() throws Exception {
 724         Pattern pattern = Pattern.compile("[^>]");
 725         Matcher matcher = pattern.matcher("\u203A");
 726         if (!matcher.matches())
 727             failCount++;
 728         pattern = Pattern.compile("[^fr]");
 729         matcher = pattern.matcher("a");
 730         if (!matcher.find())
 731             failCount++;
 732         matcher.reset("\u203A");
 733         if (!matcher.find())
 734             failCount++;
 735         String s = "for";
 736         String result[] = s.split("[^fr]");
 737         if (!result[0].equals("f"))
 738             failCount++;
 739         if (!result[1].equals("r"))
 740             failCount++;
 741         s = "f\u203Ar";
 742         result = s.split("[^fr]");
 743         if (!result[0].equals("f"))
 744             failCount++;
 745         if (!result[1].equals("r"))
 746             failCount++;
 747 
 748         // Test adding to bits, subtracting a node, then adding to bits again
 749         pattern = Pattern.compile("[^f\u203Ar]");
 750         matcher = pattern.matcher("a");
 751         if (!matcher.find())
 752             failCount++;
 753         matcher.reset("f");
 754         if (matcher.find())
 755             failCount++;
 756         matcher.reset("\u203A");
 757         if (matcher.find())
 758             failCount++;
 759         matcher.reset("r");
 760         if (matcher.find())
 761             failCount++;
 762         matcher.reset("\u203B");
 763         if (!matcher.find())
 764             failCount++;
 765 
 766         // Test subtracting a node, adding to bits, subtracting again
 767         pattern = Pattern.compile("[^\u203Ar\u203B]");
 768         matcher = pattern.matcher("a");
 769         if (!matcher.find())
 770             failCount++;
 771         matcher.reset("\u203A");
 772         if (matcher.find())
 773             failCount++;
 774         matcher.reset("r");
 775         if (matcher.find())
 776             failCount++;
 777         matcher.reset("\u203B");
 778         if (matcher.find())
 779             failCount++;
 780         matcher.reset("\u203C");
 781         if (!matcher.find())
 782             failCount++;
 783 
 784         report("Negated Character Class");
 785     }
 786 
 787     // This test is for 4628291
 788     private static void toStringTest() throws Exception {
 789         Pattern pattern = Pattern.compile("b+");
 790         if (pattern.toString() != "b+")
 791             failCount++;
 792         Matcher matcher = pattern.matcher("aaabbbccc");
 793         String matcherString = matcher.toString(); // unspecified
 794         matcher.find();
 795         matcherString = matcher.toString(); // unspecified
 796         matcher.region(0,3);
 797         matcherString = matcher.toString(); // unspecified
 798         matcher.reset();
 799         matcherString = matcher.toString(); // unspecified
 800         report("toString");
 801     }
 802 
 803     // This test is for 4808962
 804     private static void literalPatternTest() throws Exception {
 805         int flags = Pattern.LITERAL;
 806 
 807         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 808         check(pattern, "abc\\t$^", true);
 809 
 810         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 811         check(pattern, "abc\\t$^", true);
 812 
 813         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 814         check(pattern, "\\Qa^$bcabc\\E", true);
 815         check(pattern, "a^$bcabc", false);
 816 
 817         pattern = Pattern.compile("\\\\Q\\\\E");
 818         check(pattern, "\\Q\\E", true);
 819 
 820         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 821         check(pattern, "abcefg\\Q\\Ehij", true);
 822 
 823         pattern = Pattern.compile("\\\\\\Q\\\\E");
 824         check(pattern, "\\\\\\\\", true);
 825 
 826         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 827         check(pattern, "\\Qa^$bcabc\\E", true);
 828         check(pattern, "a^$bcabc", false);
 829 
 830         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 831         check(pattern, "\\Qabc\\Edef", true);
 832         check(pattern, "abcdef", false);
 833 
 834         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 835         check(pattern, "abc\\Edef", true);
 836         check(pattern, "abcdef", false);
 837 
 838         pattern = Pattern.compile(Pattern.quote("\\E"));
 839         check(pattern, "\\E", true);
 840 
 841         pattern = Pattern.compile("((((abc.+?:)", flags);
 842         check(pattern, "((((abc.+?:)", true);
 843 
 844         flags |= Pattern.MULTILINE;
 845 
 846         pattern = Pattern.compile("^cat$", flags);
 847         check(pattern, "abc^cat$def", true);
 848         check(pattern, "cat", false);
 849 
 850         flags |= Pattern.CASE_INSENSITIVE;
 851 
 852         pattern = Pattern.compile("abcdef", flags);
 853         check(pattern, "ABCDEF", true);
 854         check(pattern, "AbCdEf", true);
 855 
 856         flags |= Pattern.DOTALL;
 857 
 858         pattern = Pattern.compile("a...b", flags);
 859         check(pattern, "A...b", true);
 860         check(pattern, "Axxxb", false);
 861 
 862         flags |= Pattern.CANON_EQ;
 863 
 864         Pattern p = Pattern.compile("testa\u030a", flags);
 865         check(pattern, "testa\u030a", false);
 866         check(pattern, "test\u00e5", false);
 867 
 868         // Supplementary character test
 869         flags = Pattern.LITERAL;
 870 
 871         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 872         check(pattern, toSupplementaries("abc\\t$^"), true);
 873 
 874         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 875         check(pattern, toSupplementaries("abc\\t$^"), true);
 876 
 877         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 878         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 879         check(pattern, toSupplementaries("a^$bcabc"), false);
 880 
 881         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 882         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 883         check(pattern, toSupplementaries("a^$bcabc"), false);
 884 
 885         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 886         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 887         check(pattern, toSupplementaries("abcdef"), false);
 888 
 889         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 890         check(pattern, toSupplementaries("abc\\Edef"), true);
 891         check(pattern, toSupplementaries("abcdef"), false);
 892 
 893         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 894         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 895 
 896         flags |= Pattern.MULTILINE;
 897 
 898         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 899         check(pattern, toSupplementaries("abc^cat$def"), true);
 900         check(pattern, toSupplementaries("cat"), false);
 901 
 902         flags |= Pattern.DOTALL;
 903 
 904         // note: this is case-sensitive.
 905         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 906         check(pattern, toSupplementaries("a...b"), true);
 907         check(pattern, toSupplementaries("axxxb"), false);
 908 
 909         flags |= Pattern.CANON_EQ;
 910 
 911         String t = toSupplementaries("test");
 912         p = Pattern.compile(t + "a\u030a", flags);
 913         check(pattern, t + "a\u030a", false);
 914         check(pattern, t + "\u00e5", false);
 915 
 916         report("Literal pattern");
 917     }
 918 
 919     // This test is for 4803179
 920     // This test is also for 4808962, replacement parts
 921     private static void literalReplacementTest() throws Exception {
 922         int flags = Pattern.LITERAL;
 923 
 924         Pattern pattern = Pattern.compile("abc", flags);
 925         Matcher matcher = pattern.matcher("zzzabczzz");
 926         String replaceTest = "$0";
 927         String result = matcher.replaceAll(replaceTest);
 928         if (!result.equals("zzzabczzz"))
 929             failCount++;
 930 
 931         matcher.reset();
 932         String literalReplacement = matcher.quoteReplacement(replaceTest);
 933         result = matcher.replaceAll(literalReplacement);
 934         if (!result.equals("zzz$0zzz"))
 935             failCount++;
 936 
 937         matcher.reset();
 938         replaceTest = "\\t$\\$";
 939         literalReplacement = matcher.quoteReplacement(replaceTest);
 940         result = matcher.replaceAll(literalReplacement);
 941         if (!result.equals("zzz\\t$\\$zzz"))
 942             failCount++;
 943 
 944         // Supplementary character test
 945         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 946         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 947         replaceTest = "$0";
 948         result = matcher.replaceAll(replaceTest);
 949         if (!result.equals(toSupplementaries("zzzabczzz")))
 950             failCount++;
 951 
 952         matcher.reset();
 953         literalReplacement = matcher.quoteReplacement(replaceTest);
 954         result = matcher.replaceAll(literalReplacement);
 955         if (!result.equals(toSupplementaries("zzz$0zzz")))
 956             failCount++;
 957 
 958         matcher.reset();
 959         replaceTest = "\\t$\\$";
 960         literalReplacement = matcher.quoteReplacement(replaceTest);
 961         result = matcher.replaceAll(literalReplacement);
 962         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 963             failCount++;
 964 
 965         // IAE should be thrown if backslash or '$' is the last character
 966         // in replacement string
 967         try {
 968             "\uac00".replaceAll("\uac00", "$");
 969             failCount++;
 970         } catch (IllegalArgumentException iie) {
 971         } catch (Exception e) {
 972             failCount++;
 973         }
 974         try {
 975             "\uac00".replaceAll("\uac00", "\\");
 976             failCount++;
 977         } catch (IllegalArgumentException iie) {
 978         } catch (Exception e) {
 979             failCount++;
 980         }
 981         report("Literal replacement");
 982     }
 983 
 984     // This test is for 4757029
 985     private static void regionTest() throws Exception {
 986         Pattern pattern = Pattern.compile("abc");
 987         Matcher matcher = pattern.matcher("abcdefabc");
 988 
 989         matcher.region(0,9);
 990         if (!matcher.find())
 991             failCount++;
 992         if (!matcher.find())
 993             failCount++;
 994         matcher.region(0,3);
 995         if (!matcher.find())
 996            failCount++;
 997         matcher.region(3,6);
 998         if (matcher.find())
 999            failCount++;
1000         matcher.region(0,2);
1001         if (matcher.find())
1002            failCount++;
1003 
1004         expectRegionFail(matcher, 1, -1);
1005         expectRegionFail(matcher, -1, -1);
1006         expectRegionFail(matcher, -1, 1);
1007         expectRegionFail(matcher, 5, 3);
1008         expectRegionFail(matcher, 5, 12);
1009         expectRegionFail(matcher, 12, 12);
1010 
1011         pattern = Pattern.compile("^abc$");
1012         matcher = pattern.matcher("zzzabczzz");
1013         matcher.region(0,9);
1014         if (matcher.find())
1015             failCount++;
1016         matcher.region(3,6);
1017         if (!matcher.find())
1018            failCount++;
1019         matcher.region(3,6);
1020         matcher.useAnchoringBounds(false);
1021         if (matcher.find())
1022            failCount++;
1023 
1024         // Supplementary character test
1025         pattern = Pattern.compile(toSupplementaries("abc"));
1026         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1027         matcher.region(0,9*2);
1028         if (!matcher.find())
1029             failCount++;
1030         if (!matcher.find())
1031             failCount++;
1032         matcher.region(0,3*2);
1033         if (!matcher.find())
1034            failCount++;
1035         matcher.region(1,3*2);
1036         if (matcher.find())
1037            failCount++;
1038         matcher.region(3*2,6*2);
1039         if (matcher.find())
1040            failCount++;
1041         matcher.region(0,2*2);
1042         if (matcher.find())
1043            failCount++;
1044         matcher.region(0,2*2+1);
1045         if (matcher.find())
1046            failCount++;
1047 
1048         expectRegionFail(matcher, 1*2, -1);
1049         expectRegionFail(matcher, -1, -1);
1050         expectRegionFail(matcher, -1, 1*2);
1051         expectRegionFail(matcher, 5*2, 3*2);
1052         expectRegionFail(matcher, 5*2, 12*2);
1053         expectRegionFail(matcher, 12*2, 12*2);
1054 
1055         pattern = Pattern.compile(toSupplementaries("^abc$"));
1056         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1057         matcher.region(0,9*2);
1058         if (matcher.find())
1059             failCount++;
1060         matcher.region(3*2,6*2);
1061         if (!matcher.find())
1062            failCount++;
1063         matcher.region(3*2+1,6*2);
1064         if (matcher.find())
1065            failCount++;
1066         matcher.region(3*2,6*2-1);
1067         if (matcher.find())
1068            failCount++;
1069         matcher.region(3*2,6*2);
1070         matcher.useAnchoringBounds(false);
1071         if (matcher.find())
1072            failCount++;
1073         report("Regions");
1074     }
1075 
1076     private static void expectRegionFail(Matcher matcher, int index1,
1077                                          int index2)
1078     {
1079         try {
1080             matcher.region(index1, index2);
1081             failCount++;
1082         } catch (IndexOutOfBoundsException ioobe) {
1083             // Correct result
1084         } catch (IllegalStateException ise) {
1085             // Correct result
1086         }
1087     }
1088 
1089     // This test is for 4803197
1090     private static void escapedSegmentTest() throws Exception {
1091 
1092         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1093         check(pattern, "dir1\\dir2", true);
1094 
1095         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1096         check(pattern, "dir1\\dir2\\", true);
1097 
1098         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1099         check(pattern, "dir1\\dir2\\", true);
1100 
1101         // Supplementary character test
1102         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1103         check(pattern, toSupplementaries("dir1\\dir2"), true);
1104 
1105         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1106         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1107 
1108         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1109         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1110 
1111         report("Escaped segment");
1112     }
1113 
1114     // This test is for 4792284
1115     private static void nonCaptureRepetitionTest() throws Exception {
1116         String input = "abcdefgh;";
1117 
1118         String[] patterns = new String[] {
1119             "(?:\\w{4})+;",
1120             "(?:\\w{8})*;",
1121             "(?:\\w{2}){2,4};",
1122             "(?:\\w{4}){2,};",   // only matches the
1123             ".*?(?:\\w{5})+;",   //     specified minimum
1124             ".*?(?:\\w{9})*;",   //     number of reps - OK
1125             "(?:\\w{4})+?;",     // lazy repetition - OK
1126             "(?:\\w{4})++;",     // possessive repetition - OK
1127             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1128             "(\\w{4})+;",        // capturing group - OK
1129         };
1130 
1131         for (int i = 0; i < patterns.length; i++) {
1132             // Check find()
1133             check(patterns[i], 0, input, input, true);
1134             // Check matches()
1135             Pattern p = Pattern.compile(patterns[i]);
1136             Matcher m = p.matcher(input);
1137 
1138             if (m.matches()) {
1139                 if (!m.group(0).equals(input))
1140                     failCount++;
1141             } else {
1142                 failCount++;
1143             }
1144         }
1145 
1146         report("Non capturing repetition");
1147     }
1148 
1149     // This test is for 6358731
1150     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1151         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1152         Matcher matcher = pattern.matcher("abcd");
1153         if (!matcher.matches() ||
1154              matcher.group(1) != null ||
1155              !matcher.group(2).equals("abcd")) {
1156             failCount++;
1157         }
1158         report("Not captured GroupCurly");
1159     }
1160 
1161     // This test is for 4706545
1162     private static void javaCharClassTest() throws Exception {
1163         for (int i=0; i<1000; i++) {
1164             char c = (char)generator.nextInt();
1165             check("{javaLowerCase}", c, Character.isLowerCase(c));
1166             check("{javaUpperCase}", c, Character.isUpperCase(c));
1167             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1168             check("{javaTitleCase}", c, Character.isTitleCase(c));
1169             check("{javaDigit}", c, Character.isDigit(c));
1170             check("{javaDefined}", c, Character.isDefined(c));
1171             check("{javaLetter}", c, Character.isLetter(c));
1172             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1173             check("{javaJavaIdentifierStart}", c,
1174                   Character.isJavaIdentifierStart(c));
1175             check("{javaJavaIdentifierPart}", c,
1176                   Character.isJavaIdentifierPart(c));
1177             check("{javaUnicodeIdentifierStart}", c,
1178                   Character.isUnicodeIdentifierStart(c));
1179             check("{javaUnicodeIdentifierPart}", c,
1180                   Character.isUnicodeIdentifierPart(c));
1181             check("{javaIdentifierIgnorable}", c,
1182                   Character.isIdentifierIgnorable(c));
1183             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1184             check("{javaWhitespace}", c, Character.isWhitespace(c));
1185             check("{javaISOControl}", c, Character.isISOControl(c));
1186             check("{javaMirrored}", c, Character.isMirrored(c));
1187 
1188         }
1189 
1190         // Supplementary character test
1191         for (int i=0; i<1000; i++) {
1192             int c = generator.nextInt(Character.MAX_CODE_POINT
1193                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1194                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1195             check("{javaLowerCase}", c, Character.isLowerCase(c));
1196             check("{javaUpperCase}", c, Character.isUpperCase(c));
1197             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1198             check("{javaTitleCase}", c, Character.isTitleCase(c));
1199             check("{javaDigit}", c, Character.isDigit(c));
1200             check("{javaDefined}", c, Character.isDefined(c));
1201             check("{javaLetter}", c, Character.isLetter(c));
1202             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1203             check("{javaJavaIdentifierStart}", c,
1204                   Character.isJavaIdentifierStart(c));
1205             check("{javaJavaIdentifierPart}", c,
1206                   Character.isJavaIdentifierPart(c));
1207             check("{javaUnicodeIdentifierStart}", c,
1208                   Character.isUnicodeIdentifierStart(c));
1209             check("{javaUnicodeIdentifierPart}", c,
1210                   Character.isUnicodeIdentifierPart(c));
1211             check("{javaIdentifierIgnorable}", c,
1212                   Character.isIdentifierIgnorable(c));
1213             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1214             check("{javaWhitespace}", c, Character.isWhitespace(c));
1215             check("{javaISOControl}", c, Character.isISOControl(c));
1216             check("{javaMirrored}", c, Character.isMirrored(c));
1217         }
1218 
1219         report("Java character classes");
1220     }
1221 
1222     // This test is for 4523620
1223     /*
1224     private static void numOccurrencesTest() throws Exception {
1225         Pattern pattern = Pattern.compile("aaa");
1226 
1227         if (pattern.numOccurrences("aaaaaa", false) != 2)
1228             failCount++;
1229         if (pattern.numOccurrences("aaaaaa", true) != 4)
1230             failCount++;
1231 
1232         pattern = Pattern.compile("^");
1233         if (pattern.numOccurrences("aaaaaa", false) != 1)
1234             failCount++;
1235         if (pattern.numOccurrences("aaaaaa", true) != 1)
1236             failCount++;
1237 
1238         report("Number of Occurrences");
1239     }
1240     */
1241 
1242     // This test is for 4776374
1243     private static void caretBetweenTerminatorsTest() throws Exception {
1244         int flags1 = Pattern.DOTALL;
1245         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1246         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1247         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1248 
1249         check("^....", flags1, "test\ntest", "test", true);
1250         check(".....^", flags1, "test\ntest", "test", false);
1251         check(".....^", flags1, "test\n", "test", false);
1252         check("....^", flags1, "test\r\n", "test", false);
1253 
1254         check("^....", flags2, "test\ntest", "test", true);
1255         check("....^", flags2, "test\ntest", "test", false);
1256         check(".....^", flags2, "test\n", "test", false);
1257         check("....^", flags2, "test\r\n", "test", false);
1258 
1259         check("^....", flags3, "test\ntest", "test", true);
1260         check(".....^", flags3, "test\ntest", "test\n", true);
1261         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1262         check(".....^", flags3, "test\n", "test", false);
1263         check(".....^", flags3, "test\r\n", "test", false);
1264         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1265 
1266         check("^....", flags4, "test\ntest", "test", true);
1267         check(".....^", flags3, "test\ntest", "test\n", true);
1268         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1269         check(".....^", flags4, "test\n", "test\n", false);
1270         check(".....^", flags4, "test\r\n", "test\r", false);
1271 
1272         // Supplementary character test
1273         String t = toSupplementaries("test");
1274         check("^....", flags1, t+"\n"+t, t, true);
1275         check(".....^", flags1, t+"\n"+t, t, false);
1276         check(".....^", flags1, t+"\n", t, false);
1277         check("....^", flags1, t+"\r\n", t, false);
1278 
1279         check("^....", flags2, t+"\n"+t, t, true);
1280         check("....^", flags2, t+"\n"+t, t, false);
1281         check(".....^", flags2, t+"\n", t, false);
1282         check("....^", flags2, t+"\r\n", t, false);
1283 
1284         check("^....", flags3, t+"\n"+t, t, true);
1285         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1286         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1287         check(".....^", flags3, t+"\n", t, false);
1288         check(".....^", flags3, t+"\r\n", t, false);
1289         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1290 
1291         check("^....", flags4, t+"\n"+t, t, true);
1292         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1293         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1294         check(".....^", flags4, t+"\n", t+"\n", false);
1295         check(".....^", flags4, t+"\r\n", t+"\r", false);
1296 
1297         report("Caret between terminators");
1298     }
1299 
1300     // This test is for 4727935
1301     private static void dollarAtEndTest() throws Exception {
1302         int flags1 = Pattern.DOTALL;
1303         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1304         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1305 
1306         check("....$", flags1, "test\n", "test", true);
1307         check("....$", flags1, "test\r\n", "test", true);
1308         check(".....$", flags1, "test\n", "test\n", true);
1309         check(".....$", flags1, "test\u0085", "test\u0085", true);
1310         check("....$", flags1, "test\u0085", "test", true);
1311 
1312         check("....$", flags2, "test\n", "test", true);
1313         check(".....$", flags2, "test\n", "test\n", true);
1314         check(".....$", flags2, "test\u0085", "test\u0085", true);
1315         check("....$", flags2, "test\u0085", "est\u0085", true);
1316 
1317         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1318         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1319         check("....$blah", flags3, "test\nblah", "!!!!", false);
1320         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1321 
1322         // Supplementary character test
1323         String t = toSupplementaries("test");
1324         String b = toSupplementaries("blah");
1325         check("....$", flags1, t+"\n", t, true);
1326         check("....$", flags1, t+"\r\n", t, true);
1327         check(".....$", flags1, t+"\n", t+"\n", true);
1328         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1329         check("....$", flags1, t+"\u0085", t, true);
1330 
1331         check("....$", flags2, t+"\n", t, true);
1332         check(".....$", flags2, t+"\n", t+"\n", true);
1333         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1334         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1335 
1336         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1337         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1338         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1339         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1340 
1341         report("Dollar at End");
1342     }
1343 
1344     // This test is for 4711773
1345     private static void multilineDollarTest() throws Exception {
1346         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1347         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1348         matcher.find();
1349         if (matcher.start(0) != 9)
1350             failCount++;
1351         matcher.find();
1352         if (matcher.start(0) != 20)
1353             failCount++;
1354 
1355         // Supplementary character test
1356         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1357         matcher.find();
1358         if (matcher.start(0) != 9*2)
1359             failCount++;
1360         matcher.find();
1361         if (matcher.start(0) != 20*2)
1362             failCount++;
1363 
1364         report("Multiline Dollar");
1365     }
1366 
1367     private static void reluctantRepetitionTest() throws Exception {
1368         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1369         check(p, "1 word word word 2", true);
1370         check(p, "1 wor wo w 2", true);
1371         check(p, "1 word word 2", true);
1372         check(p, "1 word 2", true);
1373         check(p, "1 wo w w 2", true);
1374         check(p, "1 wo w 2", true);
1375         check(p, "1 wor w 2", true);
1376 
1377         p = Pattern.compile("([a-z])+?c");
1378         Matcher m = p.matcher("ababcdefdec");
1379         check(m, "ababc");
1380 
1381         // Supplementary character test
1382         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1383         m = p.matcher(toSupplementaries("ababcdefdec"));
1384         check(m, toSupplementaries("ababc"));
1385 
1386         report("Reluctant Repetition");
1387     }
1388 
1389     private static Pattern serializedPattern(Pattern p) throws Exception {
1390         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1391         ObjectOutputStream oos = new ObjectOutputStream(baos);
1392         oos.writeObject(p);
1393         oos.close();
1394         try (ObjectInputStream ois = new ObjectInputStream(
1395                 new ByteArrayInputStream(baos.toByteArray()))) {
1396             return (Pattern)ois.readObject();
1397         }
1398     }
1399 
1400     private static void serializeTest() throws Exception {
1401         String patternStr = "(b)";
1402         String matchStr = "b";
1403         Pattern pattern = Pattern.compile(patternStr);
1404         Pattern serializedPattern = serializedPattern(pattern);
1405         Matcher matcher = serializedPattern.matcher(matchStr);
1406         if (!matcher.matches())
1407             failCount++;
1408         if (matcher.groupCount() != 1)
1409             failCount++;
1410 
1411         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1412         serializedPattern = serializedPattern(pattern);
1413         if (!serializedPattern.matcher("Ab").matches())
1414             failCount++;
1415         if (serializedPattern.matcher("AB").matches())
1416             failCount++;
1417 
1418         report("Serialization");
1419     }
1420 
1421     private static void gTest() {
1422         Pattern pattern = Pattern.compile("\\G\\w");
1423         Matcher matcher = pattern.matcher("abc#x#x");
1424         matcher.find();
1425         matcher.find();
1426         matcher.find();
1427         if (matcher.find())
1428             failCount++;
1429 
1430         pattern = Pattern.compile("\\GA*");
1431         matcher = pattern.matcher("1A2AA3");
1432         matcher.find();
1433         if (matcher.find())
1434             failCount++;
1435 
1436         pattern = Pattern.compile("\\GA*");
1437         matcher = pattern.matcher("1A2AA3");
1438         if (!matcher.find(1))
1439             failCount++;
1440         matcher.find();
1441         if (matcher.find())
1442             failCount++;
1443 
1444         report("\\G");
1445     }
1446 
1447     private static void zTest() {
1448         Pattern pattern = Pattern.compile("foo\\Z");
1449         // Positives
1450         check(pattern, "foo\u0085", true);
1451         check(pattern, "foo\u2028", true);
1452         check(pattern, "foo\u2029", true);
1453         check(pattern, "foo\n", true);
1454         check(pattern, "foo\r", true);
1455         check(pattern, "foo\r\n", true);
1456         // Negatives
1457         check(pattern, "fooo", false);
1458         check(pattern, "foo\n\r", false);
1459 
1460         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1461         // Positives
1462         check(pattern, "foo", true);
1463         check(pattern, "foo\n", true);
1464         // Negatives
1465         check(pattern, "foo\r", false);
1466         check(pattern, "foo\u0085", false);
1467         check(pattern, "foo\u2028", false);
1468         check(pattern, "foo\u2029", false);
1469 
1470         report("\\Z");
1471     }
1472 
1473     private static void replaceFirstTest() {
1474         Pattern pattern = Pattern.compile("(ab)(c*)");
1475         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1476         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1477             failCount++;
1478 
1479         matcher.reset("zzzabccczzzabcczzzabccczzz");
1480         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1481             failCount++;
1482 
1483         matcher.reset("zzzabccczzzabcczzzabccczzz");
1484         String result = matcher.replaceFirst("$1");
1485         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1486             failCount++;
1487 
1488         matcher.reset("zzzabccczzzabcczzzabccczzz");
1489         result = matcher.replaceFirst("$2");
1490         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1491             failCount++;
1492 
1493         pattern = Pattern.compile("a*");
1494         matcher = pattern.matcher("aaaaaaaaaa");
1495         if (!matcher.replaceFirst("test").equals("test"))
1496             failCount++;
1497 
1498         pattern = Pattern.compile("a+");
1499         matcher = pattern.matcher("zzzaaaaaaaaaa");
1500         if (!matcher.replaceFirst("test").equals("zzztest"))
1501             failCount++;
1502 
1503         // Supplementary character test
1504         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1505         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1506         if (!matcher.replaceFirst(toSupplementaries("test"))
1507                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1508             failCount++;
1509 
1510         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1511         if (!matcher.replaceFirst(toSupplementaries("test")).
1512             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1513             failCount++;
1514 
1515         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1516         result = matcher.replaceFirst("$1");
1517         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1518             failCount++;
1519 
1520         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1521         result = matcher.replaceFirst("$2");
1522         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1523             failCount++;
1524 
1525         pattern = Pattern.compile(toSupplementaries("a*"));
1526         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1527         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1528             failCount++;
1529 
1530         pattern = Pattern.compile(toSupplementaries("a+"));
1531         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1532         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1533             failCount++;
1534 
1535         report("Replace First");
1536     }
1537 
1538     private static void unixLinesTest() {
1539         Pattern pattern = Pattern.compile(".*");
1540         Matcher matcher = pattern.matcher("aa\u2028blah");
1541         matcher.find();
1542         if (!matcher.group(0).equals("aa"))
1543             failCount++;
1544 
1545         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1546         matcher = pattern.matcher("aa\u2028blah");
1547         matcher.find();
1548         if (!matcher.group(0).equals("aa\u2028blah"))
1549             failCount++;
1550 
1551         pattern = Pattern.compile("[az]$",
1552                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1553         matcher = pattern.matcher("aa\u2028zz");
1554         check(matcher, "a\u2028", false);
1555 
1556         // Supplementary character test
1557         pattern = Pattern.compile(".*");
1558         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1559         matcher.find();
1560         if (!matcher.group(0).equals(toSupplementaries("aa")))
1561             failCount++;
1562 
1563         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1564         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1565         matcher.find();
1566         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1567             failCount++;
1568 
1569         pattern = Pattern.compile(toSupplementaries("[az]$"),
1570                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1571         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1572         check(matcher, toSupplementaries("a\u2028"), false);
1573 
1574         report("Unix Lines");
1575     }
1576 
1577     private static void commentsTest() {
1578         int flags = Pattern.COMMENTS;
1579 
1580         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1581         Matcher matcher = pattern.matcher("aa#aa");
1582         if (!matcher.matches())
1583             failCount++;
1584 
1585         pattern = Pattern.compile("aa  # blah", flags);
1586         matcher = pattern.matcher("aa");
1587         if (!matcher.matches())
1588             failCount++;
1589 
1590         pattern = Pattern.compile("aa blah", flags);
1591         matcher = pattern.matcher("aablah");
1592         if (!matcher.matches())
1593              failCount++;
1594 
1595         pattern = Pattern.compile("aa  # blah blech  ", flags);
1596         matcher = pattern.matcher("aa");
1597         if (!matcher.matches())
1598             failCount++;
1599 
1600         pattern = Pattern.compile("aa  # blah\n  ", flags);
1601         matcher = pattern.matcher("aa");
1602         if (!matcher.matches())
1603             failCount++;
1604 
1605         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1606         matcher = pattern.matcher("aabc");
1607         if (!matcher.matches())
1608              failCount++;
1609 
1610         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1611         matcher = pattern.matcher("aabc");
1612         if (!matcher.matches())
1613              failCount++;
1614 
1615         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1616         matcher = pattern.matcher("aabc#blech");
1617         if (!matcher.matches())
1618              failCount++;
1619 
1620         // Supplementary character test
1621         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1622         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1623         if (!matcher.matches())
1624             failCount++;
1625 
1626         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1627         matcher = pattern.matcher(toSupplementaries("aa"));
1628         if (!matcher.matches())
1629             failCount++;
1630 
1631         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1632         matcher = pattern.matcher(toSupplementaries("aablah"));
1633         if (!matcher.matches())
1634              failCount++;
1635 
1636         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1637         matcher = pattern.matcher(toSupplementaries("aa"));
1638         if (!matcher.matches())
1639             failCount++;
1640 
1641         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1642         matcher = pattern.matcher(toSupplementaries("aa"));
1643         if (!matcher.matches())
1644             failCount++;
1645 
1646         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1647         matcher = pattern.matcher(toSupplementaries("aabc"));
1648         if (!matcher.matches())
1649              failCount++;
1650 
1651         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1652         matcher = pattern.matcher(toSupplementaries("aabc"));
1653         if (!matcher.matches())
1654              failCount++;
1655 
1656         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1657         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1658         if (!matcher.matches())
1659              failCount++;
1660 
1661         report("Comments");
1662     }
1663 
1664     private static void caseFoldingTest() { // bug 4504687
1665         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1666         Pattern pattern = Pattern.compile("aa", flags);
1667         Matcher matcher = pattern.matcher("ab");
1668         if (matcher.matches())
1669             failCount++;
1670 
1671         pattern = Pattern.compile("aA", flags);
1672         matcher = pattern.matcher("ab");
1673         if (matcher.matches())
1674             failCount++;
1675 
1676         pattern = Pattern.compile("aa", flags);
1677         matcher = pattern.matcher("aB");
1678         if (matcher.matches())
1679             failCount++;
1680         matcher = pattern.matcher("Ab");
1681         if (matcher.matches())
1682             failCount++;
1683 
1684         // ASCII               "a"
1685         // Latin-1 Supplement  "a" + grave
1686         // Cyrillic            "a"
1687         String[] patterns = new String[] {
1688             //single
1689             "a", "\u00e0", "\u0430",
1690             //slice
1691             "ab", "\u00e0\u00e1", "\u0430\u0431",
1692             //class single
1693             "[a]", "[\u00e0]", "[\u0430]",
1694             //class range
1695             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1696             //back reference
1697             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1698         };
1699 
1700         String[] texts = new String[] {
1701             "A", "\u00c0", "\u0410",
1702             "AB", "\u00c0\u00c1", "\u0410\u0411",
1703             "A", "\u00c0", "\u0410",
1704             "B", "\u00c2", "\u0411",
1705             "aA", "\u00e0\u00c0", "\u0430\u0410"
1706         };
1707 
1708         boolean[] expected = new boolean[] {
1709             true, false, false,
1710             true, false, false,
1711             true, false, false,
1712             true, false, false,
1713             true, false, false
1714         };
1715 
1716         flags = Pattern.CASE_INSENSITIVE;
1717         for (int i = 0; i < patterns.length; i++) {
1718             pattern = Pattern.compile(patterns[i], flags);
1719             matcher = pattern.matcher(texts[i]);
1720             if (matcher.matches() != expected[i]) {
1721                 System.out.println("<1> Failed at " + i);
1722                 failCount++;
1723             }
1724         }
1725 
1726         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1727         for (int i = 0; i < patterns.length; i++) {
1728             pattern = Pattern.compile(patterns[i], flags);
1729             matcher = pattern.matcher(texts[i]);
1730             if (!matcher.matches()) {
1731                 System.out.println("<2> Failed at " + i);
1732                 failCount++;
1733             }
1734         }
1735         // flag unicode_case alone should do nothing
1736         flags = Pattern.UNICODE_CASE;
1737         for (int i = 0; i < patterns.length; i++) {
1738             pattern = Pattern.compile(patterns[i], flags);
1739             matcher = pattern.matcher(texts[i]);
1740             if (matcher.matches()) {
1741                 System.out.println("<3> Failed at " + i);
1742                 failCount++;
1743             }
1744         }
1745 
1746         // Special cases: i, I, u+0131 and u+0130
1747         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1748         pattern = Pattern.compile("[h-j]+", flags);
1749         if (!pattern.matcher("\u0131\u0130").matches())
1750             failCount++;
1751         report("Case Folding");
1752     }
1753 
1754     private static void appendTest() {
1755         Pattern pattern = Pattern.compile("(ab)(cd)");
1756         Matcher matcher = pattern.matcher("abcd");
1757         String result = matcher.replaceAll("$2$1");
1758         if (!result.equals("cdab"))
1759             failCount++;
1760 
1761         String  s1 = "Swap all: first = 123, second = 456";
1762         String  s2 = "Swap one: first = 123, second = 456";
1763         String  r  = "$3$2$1";
1764         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1765         matcher = pattern.matcher(s1);
1766 
1767         result = matcher.replaceAll(r);
1768         if (!result.equals("Swap all: 123 = first, 456 = second"))
1769             failCount++;
1770 
1771         matcher = pattern.matcher(s2);
1772 
1773         if (matcher.find()) {
1774             StringBuffer sb = new StringBuffer();
1775             matcher.appendReplacement(sb, r);
1776             matcher.appendTail(sb);
1777             result = sb.toString();
1778             if (!result.equals("Swap one: 123 = first, second = 456"))
1779                 failCount++;
1780         }
1781 
1782         // Supplementary character test
1783         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1784         matcher = pattern.matcher(toSupplementaries("abcd"));
1785         result = matcher.replaceAll("$2$1");
1786         if (!result.equals(toSupplementaries("cdab")))
1787             failCount++;
1788 
1789         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1790         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1791         r  = toSupplementaries("$3$2$1");
1792         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1793         matcher = pattern.matcher(s1);
1794 
1795         result = matcher.replaceAll(r);
1796         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1797             failCount++;
1798 
1799         matcher = pattern.matcher(s2);
1800 
1801         if (matcher.find()) {
1802             StringBuffer sb = new StringBuffer();
1803             matcher.appendReplacement(sb, r);
1804             matcher.appendTail(sb);
1805             result = sb.toString();
1806             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1807                 failCount++;
1808         }
1809         report("Append");
1810     }
1811 
1812     private static void splitTest() {
1813         Pattern pattern = Pattern.compile(":");
1814         String[] result = pattern.split("foo:and:boo", 2);
1815         if (!result[0].equals("foo"))
1816             failCount++;
1817         if (!result[1].equals("and:boo"))
1818             failCount++;
1819         // Supplementary character test
1820         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1821         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1822         if (!result[0].equals(toSupplementaries("foo")))
1823             failCount++;
1824         if (!result[1].equals(toSupplementaries("andXboo")))
1825             failCount++;
1826 
1827         CharBuffer cb = CharBuffer.allocate(100);
1828         cb.put("foo:and:boo");
1829         cb.flip();
1830         result = pattern.split(cb);
1831         if (!result[0].equals("foo"))
1832             failCount++;
1833         if (!result[1].equals("and"))
1834             failCount++;
1835         if (!result[2].equals("boo"))
1836             failCount++;
1837 
1838         // Supplementary character test
1839         CharBuffer cbs = CharBuffer.allocate(100);
1840         cbs.put(toSupplementaries("fooXandXboo"));
1841         cbs.flip();
1842         result = patternX.split(cbs);
1843         if (!result[0].equals(toSupplementaries("foo")))
1844             failCount++;
1845         if (!result[1].equals(toSupplementaries("and")))
1846             failCount++;
1847         if (!result[2].equals(toSupplementaries("boo")))
1848             failCount++;
1849 
1850         String source = "0123456789";
1851         for (int limit=-2; limit<3; limit++) {
1852             for (int x=0; x<10; x++) {
1853                 result = source.split(Integer.toString(x), limit);
1854                 int expectedLength = limit < 1 ? 2 : limit;
1855 
1856                 if ((limit == 0) && (x == 9)) {
1857                     // expected dropping of ""
1858                     if (result.length != 1)
1859                         failCount++;
1860                     if (!result[0].equals("012345678")) {
1861                         failCount++;
1862                     }
1863                 } else {
1864                     if (result.length != expectedLength) {
1865                         failCount++;
1866                     }
1867                     if (!result[0].equals(source.substring(0,x))) {
1868                         if (limit != 1) {
1869                             failCount++;
1870                         } else {
1871                             if (!result[0].equals(source.substring(0,10))) {
1872                                 failCount++;
1873                             }
1874                         }
1875                     }
1876                     if (expectedLength > 1) { // Check segment 2
1877                         if (!result[1].equals(source.substring(x+1,10)))
1878                             failCount++;
1879                     }
1880                 }
1881             }
1882         }
1883         // Check the case for no match found
1884         for (int limit=-2; limit<3; limit++) {
1885             result = source.split("e", limit);
1886             if (result.length != 1)
1887                 failCount++;
1888             if (!result[0].equals(source))
1889                 failCount++;
1890         }
1891         // Check the case for limit == 0, source = "";
1892         // split() now returns 0-length for empty source "" see #6559590
1893         source = "";
1894         result = source.split("e", 0);
1895         if (result.length != 1)
1896             failCount++;
1897         if (!result[0].equals(source))
1898             failCount++;
1899 
1900         // Check both split() and splitAsStraem(), especially for zero-lenth
1901         // input and zero-lenth match cases
1902         String[][] input = new String[][] {
1903             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1904             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1905             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1906             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1907             { "(?=\\p{Lu})", "AbcEfg" },
1908             { "(?=\\p{Lu})", "Abc" },
1909             { " ",           "" },              // zero-length input
1910             { ".*",          "" },
1911 
1912             // some tests from PatternStreamTest.java
1913             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1914             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1915             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1916             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1917             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1918             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1919             { "\u56da",  "" },
1920             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1921             { "o",       "boo:and:foo" },
1922             { "o",       "booooo:and:fooooo" },
1923             { "o",       "fooooo:" },
1924         };
1925 
1926         String[][] expected = new String[][] {
1927             { "Abc", "Efg", "Hij" },
1928             { "", "Abc", "Efg", "Hij" },
1929             { "Abc", "", "Efg", "Hij" },
1930             { "Abc", "Efg", "Hij" },
1931             { "Abc", "Efg" },
1932             { "Abc" },
1933             { "" },
1934             { "" },
1935 
1936             { "awgqwefg1fefw", "vssv1vvv1" },
1937             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1938             { "awgqwefg", "fefw4vssv", "vvv" },
1939             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1940             { "1", "23", "456", "7890" },
1941             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1942             { "" },
1943             { "This", "is", "testing", "", "with", "different", "separators" },
1944             { "b", "", ":and:f" },
1945             { "b", "", "", "", "", ":and:f" },
1946             { "f", "", "", "", "", ":" },
1947         };
1948         for (int i = 0; i < input.length; i++) {
1949             pattern = Pattern.compile(input[i][0]);
1950             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1951                 failCount++;
1952             }
1953             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1954                                              // array for zero-length input for now
1955                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1956                                expected[i])) {
1957                 failCount++;
1958             }
1959         }
1960         report("Split");
1961     }
1962 
1963     private static void negationTest() {
1964         Pattern pattern = Pattern.compile("[\\[@^]+");
1965         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1966         if (!matcher.find())
1967             failCount++;
1968         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1969             failCount++;
1970         pattern = Pattern.compile("[@\\[^]+");
1971         matcher = pattern.matcher("@@@@[[[[^^^^");
1972         if (!matcher.find())
1973             failCount++;
1974         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1975             failCount++;
1976         pattern = Pattern.compile("[@\\[^@]+");
1977         matcher = pattern.matcher("@@@@[[[[^^^^");
1978         if (!matcher.find())
1979             failCount++;
1980         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1981             failCount++;
1982 
1983         pattern = Pattern.compile("\\)");
1984         matcher = pattern.matcher("xxx)xxx");
1985         if (!matcher.find())
1986             failCount++;
1987 
1988         report("Negation");
1989     }
1990 
1991     private static void ampersandTest() {
1992         Pattern pattern = Pattern.compile("[&@]+");
1993         check(pattern, "@@@@&&&&", true);
1994 
1995         pattern = Pattern.compile("[@&]+");
1996         check(pattern, "@@@@&&&&", true);
1997 
1998         pattern = Pattern.compile("[@\\&]+");
1999         check(pattern, "@@@@&&&&", true);
2000 
2001         report("Ampersand");
2002     }
2003 
2004     private static void octalTest() throws Exception {
2005         Pattern pattern = Pattern.compile("\\u0007");
2006         Matcher matcher = pattern.matcher("\u0007");
2007         if (!matcher.matches())
2008             failCount++;
2009         pattern = Pattern.compile("\\07");
2010         matcher = pattern.matcher("\u0007");
2011         if (!matcher.matches())
2012             failCount++;
2013         pattern = Pattern.compile("\\007");
2014         matcher = pattern.matcher("\u0007");
2015         if (!matcher.matches())
2016             failCount++;
2017         pattern = Pattern.compile("\\0007");
2018         matcher = pattern.matcher("\u0007");
2019         if (!matcher.matches())
2020             failCount++;
2021         pattern = Pattern.compile("\\040");
2022         matcher = pattern.matcher("\u0020");
2023         if (!matcher.matches())
2024             failCount++;
2025         pattern = Pattern.compile("\\0403");
2026         matcher = pattern.matcher("\u00203");
2027         if (!matcher.matches())
2028             failCount++;
2029         pattern = Pattern.compile("\\0103");
2030         matcher = pattern.matcher("\u0043");
2031         if (!matcher.matches())
2032             failCount++;
2033 
2034         report("Octal");
2035     }
2036 
2037     private static void longPatternTest() throws Exception {
2038         try {
2039             Pattern pattern = Pattern.compile(
2040                 "a 32-character-long pattern xxxx");
2041             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2042             pattern = Pattern.compile("a thirty four character long regex");
2043             StringBuffer patternToBe = new StringBuffer(101);
2044             for (int i=0; i<100; i++)
2045                 patternToBe.append((char)(97 + i%26));
2046             pattern = Pattern.compile(patternToBe.toString());
2047         } catch (PatternSyntaxException e) {
2048             failCount++;
2049         }
2050 
2051         // Supplementary character test
2052         try {
2053             Pattern pattern = Pattern.compile(
2054                 toSupplementaries("a 32-character-long pattern xxxx"));
2055             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2056             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2057             StringBuffer patternToBe = new StringBuffer(101*2);
2058             for (int i=0; i<100; i++)
2059                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2060                                                      + 97 + i%26));
2061             pattern = Pattern.compile(patternToBe.toString());
2062         } catch (PatternSyntaxException e) {
2063             failCount++;
2064         }
2065         report("LongPattern");
2066     }
2067 
2068     private static void group0Test() throws Exception {
2069         Pattern pattern = Pattern.compile("(tes)ting");
2070         Matcher matcher = pattern.matcher("testing");
2071         check(matcher, "testing");
2072 
2073         matcher.reset("testing");
2074         if (matcher.lookingAt()) {
2075             if (!matcher.group(0).equals("testing"))
2076                 failCount++;
2077         } else {
2078             failCount++;
2079         }
2080 
2081         matcher.reset("testing");
2082         if (matcher.matches()) {
2083             if (!matcher.group(0).equals("testing"))
2084                 failCount++;
2085         } else {
2086             failCount++;
2087         }
2088 
2089         pattern = Pattern.compile("(tes)ting");
2090         matcher = pattern.matcher("testing");
2091         if (matcher.lookingAt()) {
2092             if (!matcher.group(0).equals("testing"))
2093                 failCount++;
2094         } else {
2095             failCount++;
2096         }
2097 
2098         pattern = Pattern.compile("^(tes)ting");
2099         matcher = pattern.matcher("testing");
2100         if (matcher.matches()) {
2101             if (!matcher.group(0).equals("testing"))
2102                 failCount++;
2103         } else {
2104             failCount++;
2105         }
2106 
2107         // Supplementary character test
2108         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2109         matcher = pattern.matcher(toSupplementaries("testing"));
2110         check(matcher, toSupplementaries("testing"));
2111 
2112         matcher.reset(toSupplementaries("testing"));
2113         if (matcher.lookingAt()) {
2114             if (!matcher.group(0).equals(toSupplementaries("testing")))
2115                 failCount++;
2116         } else {
2117             failCount++;
2118         }
2119 
2120         matcher.reset(toSupplementaries("testing"));
2121         if (matcher.matches()) {
2122             if (!matcher.group(0).equals(toSupplementaries("testing")))
2123                 failCount++;
2124         } else {
2125             failCount++;
2126         }
2127 
2128         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2129         matcher = pattern.matcher(toSupplementaries("testing"));
2130         if (matcher.lookingAt()) {
2131             if (!matcher.group(0).equals(toSupplementaries("testing")))
2132                 failCount++;
2133         } else {
2134             failCount++;
2135         }
2136 
2137         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2138         matcher = pattern.matcher(toSupplementaries("testing"));
2139         if (matcher.matches()) {
2140             if (!matcher.group(0).equals(toSupplementaries("testing")))
2141                 failCount++;
2142         } else {
2143             failCount++;
2144         }
2145 
2146         report("Group0");
2147     }
2148 
2149     private static void findIntTest() throws Exception {
2150         Pattern p = Pattern.compile("blah");
2151         Matcher m = p.matcher("zzzzblahzzzzzblah");
2152         boolean result = m.find(2);
2153         if (!result)
2154             failCount++;
2155 
2156         p = Pattern.compile("$");
2157         m = p.matcher("1234567890");
2158         result = m.find(10);
2159         if (!result)
2160             failCount++;
2161         try {
2162             result = m.find(11);
2163             failCount++;
2164         } catch (IndexOutOfBoundsException e) {
2165             // correct result
2166         }
2167 
2168         // Supplementary character test
2169         p = Pattern.compile(toSupplementaries("blah"));
2170         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2171         result = m.find(2);
2172         if (!result)
2173             failCount++;
2174 
2175         report("FindInt");
2176     }
2177 
2178     private static void emptyPatternTest() throws Exception {
2179         Pattern p = Pattern.compile("");
2180         Matcher m = p.matcher("foo");
2181 
2182         // Should find empty pattern at beginning of input
2183         boolean result = m.find();
2184         if (result != true)
2185             failCount++;
2186         if (m.start() != 0)
2187             failCount++;
2188 
2189         // Should not match entire input if input is not empty
2190         m.reset();
2191         result = m.matches();
2192         if (result == true)
2193             failCount++;
2194 
2195         try {
2196             m.start(0);
2197             failCount++;
2198         } catch (IllegalStateException e) {
2199             // Correct result
2200         }
2201 
2202         // Should match entire input if input is empty
2203         m.reset("");
2204         result = m.matches();
2205         if (result != true)
2206             failCount++;
2207 
2208         result = Pattern.matches("", "");
2209         if (result != true)
2210             failCount++;
2211 
2212         result = Pattern.matches("", "foo");
2213         if (result == true)
2214             failCount++;
2215         report("EmptyPattern");
2216     }
2217 
2218     private static void charClassTest() throws Exception {
2219         Pattern pattern = Pattern.compile("blah[ab]]blech");
2220         check(pattern, "blahb]blech", true);
2221 
2222         pattern = Pattern.compile("[abc[def]]");
2223         check(pattern, "b", true);
2224 
2225         // Supplementary character tests
2226         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2227         check(pattern, toSupplementaries("blahb]blech"), true);
2228 
2229         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2230         check(pattern, toSupplementaries("b"), true);
2231 
2232         try {
2233             // u00ff when UNICODE_CASE
2234             pattern = Pattern.compile("[ab\u00ffcd]",
2235                                       Pattern.CASE_INSENSITIVE|
2236                                       Pattern.UNICODE_CASE);
2237             check(pattern, "ab\u00ffcd", true);
2238             check(pattern, "Ab\u0178Cd", true);
2239 
2240             // u00b5 when UNICODE_CASE
2241             pattern = Pattern.compile("[ab\u00b5cd]",
2242                                       Pattern.CASE_INSENSITIVE|
2243                                       Pattern.UNICODE_CASE);
2244             check(pattern, "ab\u00b5cd", true);
2245             check(pattern, "Ab\u039cCd", true);
2246         } catch (Exception e) { failCount++; }
2247 
2248         /* Special cases
2249            (1)LatinSmallLetterLongS u+017f
2250            (2)LatinSmallLetterDotlessI u+0131
2251            (3)LatineCapitalLetterIWithDotAbove u+0130
2252            (4)KelvinSign u+212a
2253            (5)AngstromSign u+212b
2254         */
2255         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2256         pattern = Pattern.compile("[sik\u00c5]+", flags);
2257         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2258             failCount++;
2259 
2260         report("CharClass");
2261     }
2262 
2263     private static void caretTest() throws Exception {
2264         Pattern pattern = Pattern.compile("\\w*");
2265         Matcher matcher = pattern.matcher("a#bc#def##g");
2266         check(matcher, "a");
2267         check(matcher, "");
2268         check(matcher, "bc");
2269         check(matcher, "");
2270         check(matcher, "def");
2271         check(matcher, "");
2272         check(matcher, "");
2273         check(matcher, "g");
2274         check(matcher, "");
2275         if (matcher.find())
2276             failCount++;
2277 
2278         pattern = Pattern.compile("^\\w*");
2279         matcher = pattern.matcher("a#bc#def##g");
2280         check(matcher, "a");
2281         if (matcher.find())
2282             failCount++;
2283 
2284         pattern = Pattern.compile("\\w");
2285         matcher = pattern.matcher("abc##x");
2286         check(matcher, "a");
2287         check(matcher, "b");
2288         check(matcher, "c");
2289         check(matcher, "x");
2290         if (matcher.find())
2291             failCount++;
2292 
2293         pattern = Pattern.compile("^\\w");
2294         matcher = pattern.matcher("abc##x");
2295         check(matcher, "a");
2296         if (matcher.find())
2297             failCount++;
2298 
2299         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2300         matcher = pattern.matcher("abcdef-ghi\njklmno");
2301         check(matcher, "abc");
2302         if (matcher.find())
2303             failCount++;
2304 
2305         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2306         matcher = pattern.matcher("abcdef-ghi\njklmno");
2307         check(matcher, "abc");
2308         check(matcher, "jkl");
2309         if (matcher.find())
2310             failCount++;
2311 
2312         pattern = Pattern.compile("^", Pattern.MULTILINE);
2313         matcher = pattern.matcher("this is some text");
2314         String result = matcher.replaceAll("X");
2315         if (!result.equals("Xthis is some text"))
2316             failCount++;
2317 
2318         pattern = Pattern.compile("^");
2319         matcher = pattern.matcher("this is some text");
2320         result = matcher.replaceAll("X");
2321         if (!result.equals("Xthis is some text"))
2322             failCount++;
2323 
2324         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2325         matcher = pattern.matcher("this is some text\n");
2326         result = matcher.replaceAll("X");
2327         if (!result.equals("Xthis is some text\n"))
2328             failCount++;
2329 
2330         report("Caret");
2331     }
2332 
2333     private static void groupCaptureTest() throws Exception {
2334         // Independent group
2335         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2336         Matcher matcher = pattern.matcher("xxxyyyzzz");
2337         matcher.find();
2338         try {
2339             String blah = matcher.group(1);
2340             failCount++;
2341         } catch (IndexOutOfBoundsException ioobe) {
2342             // Good result
2343         }
2344         // Pure group
2345         pattern = Pattern.compile("x+(?:y+)z+");
2346         matcher = pattern.matcher("xxxyyyzzz");
2347         matcher.find();
2348         try {
2349             String blah = matcher.group(1);
2350             failCount++;
2351         } catch (IndexOutOfBoundsException ioobe) {
2352             // Good result
2353         }
2354 
2355         // Supplementary character tests
2356         // Independent group
2357         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2358         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2359         matcher.find();
2360         try {
2361             String blah = matcher.group(1);
2362             failCount++;
2363         } catch (IndexOutOfBoundsException ioobe) {
2364             // Good result
2365         }
2366         // Pure group
2367         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2368         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2369         matcher.find();
2370         try {
2371             String blah = matcher.group(1);
2372             failCount++;
2373         } catch (IndexOutOfBoundsException ioobe) {
2374             // Good result
2375         }
2376 
2377         report("GroupCapture");
2378     }
2379 
2380     private static void backRefTest() throws Exception {
2381         Pattern pattern = Pattern.compile("(a*)bc\\1");
2382         check(pattern, "zzzaabcazzz", true);
2383 
2384         pattern = Pattern.compile("(a*)bc\\1");
2385         check(pattern, "zzzaabcaazzz", true);
2386 
2387         pattern = Pattern.compile("(abc)(def)\\1");
2388         check(pattern, "abcdefabc", true);
2389 
2390         pattern = Pattern.compile("(abc)(def)\\3");
2391         check(pattern, "abcdefabc", false);
2392 
2393         try {
2394             for (int i = 1; i < 10; i++) {
2395                 // Make sure backref 1-9 are always accepted
2396                 pattern = Pattern.compile("abcdef\\" + i);
2397                 // and fail to match if the target group does not exit
2398                 check(pattern, "abcdef", false);
2399             }
2400         } catch(PatternSyntaxException e) {
2401             failCount++;
2402         }
2403 
2404         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2405         check(pattern, "abcdefghija", false);
2406         check(pattern, "abcdefghija1", true);
2407 
2408         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2409         check(pattern, "abcdefghijkk", true);
2410 
2411         pattern = Pattern.compile("(a)bcdefghij\\11");
2412         check(pattern, "abcdefghija1", true);
2413 
2414         // Supplementary character tests
2415         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2416         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2417 
2418         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2419         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2420 
2421         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2422         check(pattern, toSupplementaries("abcdefabc"), true);
2423 
2424         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2425         check(pattern, toSupplementaries("abcdefabc"), false);
2426 
2427         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2428         check(pattern, toSupplementaries("abcdefghija"), false);
2429         check(pattern, toSupplementaries("abcdefghija1"), true);
2430 
2431         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2432         check(pattern, toSupplementaries("abcdefghijkk"), true);
2433 
2434         report("BackRef");
2435     }
2436 
2437     /**
2438      * Unicode Technical Report #18, section 2.6 End of Line
2439      * There is no empty line to be matched in the sequence \u000D\u000A
2440      * but there is an empty line in the sequence \u000A\u000D.
2441      */
2442     private static void anchorTest() throws Exception {
2443         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2444         Matcher m = p.matcher("blah1\r\nblah2");
2445         m.find();
2446         m.find();
2447         if (!m.group().equals("blah2"))
2448             failCount++;
2449 
2450         m.reset("blah1\n\rblah2");
2451         m.find();
2452         m.find();
2453         m.find();
2454         if (!m.group().equals("blah2"))
2455             failCount++;
2456 
2457         // Test behavior of $ with \r\n at end of input
2458         p = Pattern.compile(".+$");
2459         m = p.matcher("blah1\r\n");
2460         if (!m.find())
2461             failCount++;
2462        if (!m.group().equals("blah1"))
2463             failCount++;
2464         if (m.find())
2465             failCount++;
2466 
2467         // Test behavior of $ with \r\n at end of input in multiline
2468         p = Pattern.compile(".+$", Pattern.MULTILINE);
2469         m = p.matcher("blah1\r\n");
2470         if (!m.find())
2471             failCount++;
2472         if (m.find())
2473             failCount++;
2474 
2475         // Test for $ recognition of \u0085 for bug 4527731
2476         p = Pattern.compile(".+$", Pattern.MULTILINE);
2477         m = p.matcher("blah1\u0085");
2478         if (!m.find())
2479             failCount++;
2480 
2481         // Supplementary character test
2482         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2483         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2484         m.find();
2485         m.find();
2486         if (!m.group().equals(toSupplementaries("blah2")))
2487             failCount++;
2488 
2489         m.reset(toSupplementaries("blah1\n\rblah2"));
2490         m.find();
2491         m.find();
2492         m.find();
2493         if (!m.group().equals(toSupplementaries("blah2")))
2494             failCount++;
2495 
2496         // Test behavior of $ with \r\n at end of input
2497         p = Pattern.compile(".+$");
2498         m = p.matcher(toSupplementaries("blah1\r\n"));
2499         if (!m.find())
2500             failCount++;
2501         if (!m.group().equals(toSupplementaries("blah1")))
2502             failCount++;
2503         if (m.find())
2504             failCount++;
2505 
2506         // Test behavior of $ with \r\n at end of input in multiline
2507         p = Pattern.compile(".+$", Pattern.MULTILINE);
2508         m = p.matcher(toSupplementaries("blah1\r\n"));
2509         if (!m.find())
2510             failCount++;
2511         if (m.find())
2512             failCount++;
2513 
2514         // Test for $ recognition of \u0085 for bug 4527731
2515         p = Pattern.compile(".+$", Pattern.MULTILINE);
2516         m = p.matcher(toSupplementaries("blah1\u0085"));
2517         if (!m.find())
2518             failCount++;
2519 
2520         report("Anchors");
2521     }
2522 
2523     /**
2524      * A basic sanity test of Matcher.lookingAt().
2525      */
2526     private static void lookingAtTest() throws Exception {
2527         Pattern p = Pattern.compile("(ab)(c*)");
2528         Matcher m = p.matcher("abccczzzabcczzzabccc");
2529 
2530         if (!m.lookingAt())
2531             failCount++;
2532 
2533         if (!m.group().equals(m.group(0)))
2534             failCount++;
2535 
2536         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2537         if (m.lookingAt())
2538             failCount++;
2539 
2540         // Supplementary character test
2541         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2542         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2543 
2544         if (!m.lookingAt())
2545             failCount++;
2546 
2547         if (!m.group().equals(m.group(0)))
2548             failCount++;
2549 
2550         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2551         if (m.lookingAt())
2552             failCount++;
2553 
2554         report("Looking At");
2555     }
2556 
2557     /**
2558      * A basic sanity test of Matcher.matches().
2559      */
2560     private static void matchesTest() throws Exception {
2561         // matches()
2562         Pattern p = Pattern.compile("ulb(c*)");
2563         Matcher m = p.matcher("ulbcccccc");
2564         if (!m.matches())
2565             failCount++;
2566 
2567         // find() but not matches()
2568         m.reset("zzzulbcccccc");
2569         if (m.matches())
2570             failCount++;
2571 
2572         // lookingAt() but not matches()
2573         m.reset("ulbccccccdef");
2574         if (m.matches())
2575             failCount++;
2576 
2577         // matches()
2578         p = Pattern.compile("a|ad");
2579         m = p.matcher("ad");
2580         if (!m.matches())
2581             failCount++;
2582 
2583         // Supplementary character test
2584         // matches()
2585         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2586         m = p.matcher(toSupplementaries("ulbcccccc"));
2587         if (!m.matches())
2588             failCount++;
2589 
2590         // find() but not matches()
2591         m.reset(toSupplementaries("zzzulbcccccc"));
2592         if (m.matches())
2593             failCount++;
2594 
2595         // lookingAt() but not matches()
2596         m.reset(toSupplementaries("ulbccccccdef"));
2597         if (m.matches())
2598             failCount++;
2599 
2600         // matches()
2601         p = Pattern.compile(toSupplementaries("a|ad"));
2602         m = p.matcher(toSupplementaries("ad"));
2603         if (!m.matches())
2604             failCount++;
2605 
2606         report("Matches");
2607     }
2608 
2609     /**
2610      * A basic sanity test of Pattern.matches().
2611      */
2612     private static void patternMatchesTest() throws Exception {
2613         // matches()
2614         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2615                              toSupplementaries("ulbcccccc")))
2616             failCount++;
2617 
2618         // find() but not matches()
2619         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2620                             toSupplementaries("zzzulbcccccc")))
2621             failCount++;
2622 
2623         // lookingAt() but not matches()
2624         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2625                             toSupplementaries("ulbccccccdef")))
2626             failCount++;
2627 
2628         // Supplementary character test
2629         // matches()
2630         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2631                              toSupplementaries("ulbcccccc")))
2632             failCount++;
2633 
2634         // find() but not matches()
2635         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2636                             toSupplementaries("zzzulbcccccc")))
2637             failCount++;
2638 
2639         // lookingAt() but not matches()
2640         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2641                             toSupplementaries("ulbccccccdef")))
2642             failCount++;
2643 
2644         report("Pattern Matches");
2645     }
2646 
2647     /**
2648      * Canonical equivalence testing. Tests the ability of the engine
2649      * to match sequences that are not explicitly specified in the
2650      * pattern when they are considered equivalent by the Unicode Standard.
2651      */
2652     private static void ceTest() throws Exception {
2653         // Decomposed char outside char classes
2654         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2655         Matcher m = p.matcher("test\u00e5");
2656         if (!m.matches())
2657             failCount++;
2658 
2659         m.reset("testa\u030a");
2660         if (!m.matches())
2661             failCount++;
2662 
2663         // Composed char outside char classes
2664         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2665         m = p.matcher("test\u00e5");
2666         if (!m.matches())
2667             failCount++;
2668 
2669         m.reset("testa\u030a");
2670         if (!m.find())
2671             failCount++;
2672 
2673         // Decomposed char inside a char class
2674         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2675         m = p.matcher("test\u00e5");
2676         if (!m.find())
2677             failCount++;
2678 
2679         m.reset("testa\u030a");
2680         if (!m.find())
2681             failCount++;
2682 
2683         // Composed char inside a char class
2684         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2685         m = p.matcher("test\u00e5");
2686         if (!m.find())
2687             failCount++;
2688 
2689         m.reset("testa\u0300");
2690         if (!m.find())
2691             failCount++;
2692 
2693         m.reset("testa\u030a");
2694         if (!m.find())
2695             failCount++;
2696 
2697         // Marks that cannot legally change order and be equivalent
2698         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2699         check(p, "testa\u0308\u0300", true);
2700         check(p, "testa\u0300\u0308", false);
2701 
2702         // Marks that can legally change order and be equivalent
2703         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2704         check(p, "testa\u0308\u0323", true);
2705         check(p, "testa\u0323\u0308", true);
2706 
2707         // Test all equivalences of the sequence a\u0308\u0323\u0300
2708         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2709         check(p, "testa\u0308\u0323\u0300", true);
2710         check(p, "testa\u0323\u0308\u0300", true);
2711         check(p, "testa\u0308\u0300\u0323", true);
2712         check(p, "test\u00e4\u0323\u0300", true);
2713         check(p, "test\u00e4\u0300\u0323", true);
2714 
2715         Object[][] data = new Object[][] {
2716 
2717         // JDK-4867170
2718         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2719         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2720         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2721         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2722         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2723         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2724         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2725 
2726         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2727         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2728         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2729         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2730         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2731 
2732         // backtracking, force to match "\u1f80", instead of \u1f82"
2733         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2734 
2735         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2736         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2737 
2738         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2739         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2740         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2741 
2742         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2743         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2744         { "\u1f80",          "ab\u1f80cd",             "f", true },
2745         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2746         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2747         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2748         { "\u1f82",          "\u1f80\u0300",           "m", true },
2749 
2750         // JDK-7080302       # compile failed
2751         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2752 
2753         // JDK-6728861, same cause as above one
2754         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2755 
2756         // JDK-6995635
2757         { "(\u00e9)", "e\u0301", "m", true },
2758 
2759         // JDK-6736245
2760         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2761         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2762         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2763 
2764         //  4916384.
2765         // Decomposed hangul (jamos) works inside clazz
2766         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2767         { "[\u1100\u1161]", "\uac00", "m", true},
2768 
2769         { "[\uac00]", "\u1100\u1161", "m", true},
2770         { "[\uac00]", "\uac00", "m", true},
2771 
2772         // Decomposed hangul (jamos)
2773         { "\u1100\u1161", "\u1100\u1161", "m", true},
2774         { "\u1100\u1161", "\uac00", "m", true},
2775 
2776         // Composed hangul
2777         { "\uac00",  "\u1100\u1161", "m", true },
2778         { "\uac00",  "\uac00", "m", true },
2779 
2780         /* Need a NFDSlice to nfd the source to solve this issue
2781            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2782            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2783            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2784 
2785         // Decomposed supplementary outside char classes
2786         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2787         // Composed supplementary outside char classes
2788         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2789         */
2790         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2791         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2792 
2793         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2794         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2795         };
2796 
2797         int failCount = 0;
2798         for (Object[] d : data) {
2799             String pn = (String)d[0];
2800             String tt = (String)d[1];
2801             boolean isFind = "f".equals(((String)d[2]));
2802             boolean expected = (boolean)d[3];
2803             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2804                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2805             if (ret != expected) {
2806                 failCount++;
2807                 continue;
2808             }
2809         }
2810         report("Canonical Equivalence");
2811     }
2812 
2813     /**
2814      * A basic sanity test of Matcher.replaceAll().
2815      */
2816     private static void globalSubstitute() throws Exception {
2817         // Global substitution with a literal
2818         Pattern p = Pattern.compile("(ab)(c*)");
2819         Matcher m = p.matcher("abccczzzabcczzzabccc");
2820         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2821             failCount++;
2822 
2823         m.reset("zzzabccczzzabcczzzabccczzz");
2824         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2825             failCount++;
2826 
2827         // Global substitution with groups
2828         m.reset("zzzabccczzzabcczzzabccczzz");
2829         String result = m.replaceAll("$1");
2830         if (!result.equals("zzzabzzzabzzzabzzz"))
2831             failCount++;
2832 
2833         // Supplementary character test
2834         // Global substitution with a literal
2835         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2836         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2837         if (!m.replaceAll(toSupplementaries("test")).
2838             equals(toSupplementaries("testzzztestzzztest")))
2839             failCount++;
2840 
2841         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2842         if (!m.replaceAll(toSupplementaries("test")).
2843             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2844             failCount++;
2845 
2846         // Global substitution with groups
2847         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2848         result = m.replaceAll("$1");
2849         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2850             failCount++;
2851 
2852         report("Global Substitution");
2853     }
2854 
2855     /**
2856      * Tests the usage of Matcher.appendReplacement() with literal
2857      * and group substitutions.
2858      */
2859     private static void stringbufferSubstitute() throws Exception {
2860         // SB substitution with literal
2861         String blah = "zzzblahzzz";
2862         Pattern p = Pattern.compile("blah");
2863         Matcher m = p.matcher(blah);
2864         StringBuffer result = new StringBuffer();
2865         try {
2866             m.appendReplacement(result, "blech");
2867             failCount++;
2868         } catch (IllegalStateException e) {
2869         }
2870         m.find();
2871         m.appendReplacement(result, "blech");
2872         if (!result.toString().equals("zzzblech"))
2873             failCount++;
2874 
2875         m.appendTail(result);
2876         if (!result.toString().equals("zzzblechzzz"))
2877             failCount++;
2878 
2879         // SB substitution with groups
2880         blah = "zzzabcdzzz";
2881         p = Pattern.compile("(ab)(cd)*");
2882         m = p.matcher(blah);
2883         result = new StringBuffer();
2884         try {
2885             m.appendReplacement(result, "$1");
2886             failCount++;
2887         } catch (IllegalStateException e) {
2888         }
2889         m.find();
2890         m.appendReplacement(result, "$1");
2891         if (!result.toString().equals("zzzab"))
2892             failCount++;
2893 
2894         m.appendTail(result);
2895         if (!result.toString().equals("zzzabzzz"))
2896             failCount++;
2897 
2898         // SB substitution with 3 groups
2899         blah = "zzzabcdcdefzzz";
2900         p = Pattern.compile("(ab)(cd)*(ef)");
2901         m = p.matcher(blah);
2902         result = new StringBuffer();
2903         try {
2904             m.appendReplacement(result, "$1w$2w$3");
2905             failCount++;
2906         } catch (IllegalStateException e) {
2907         }
2908         m.find();
2909         m.appendReplacement(result, "$1w$2w$3");
2910         if (!result.toString().equals("zzzabwcdwef"))
2911             failCount++;
2912 
2913         m.appendTail(result);
2914         if (!result.toString().equals("zzzabwcdwefzzz"))
2915             failCount++;
2916 
2917         // SB substitution with groups and three matches
2918         // skipping middle match
2919         blah = "zzzabcdzzzabcddzzzabcdzzz";
2920         p = Pattern.compile("(ab)(cd*)");
2921         m = p.matcher(blah);
2922         result = new StringBuffer();
2923         try {
2924             m.appendReplacement(result, "$1");
2925             failCount++;
2926         } catch (IllegalStateException e) {
2927         }
2928         m.find();
2929         m.appendReplacement(result, "$1");
2930         if (!result.toString().equals("zzzab"))
2931             failCount++;
2932 
2933         m.find();
2934         m.find();
2935         m.appendReplacement(result, "$2");
2936         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2937             failCount++;
2938 
2939         m.appendTail(result);
2940         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2941             failCount++;
2942 
2943         // Check to make sure escaped $ is ignored
2944         blah = "zzzabcdcdefzzz";
2945         p = Pattern.compile("(ab)(cd)*(ef)");
2946         m = p.matcher(blah);
2947         result = new StringBuffer();
2948         m.find();
2949         m.appendReplacement(result, "$1w\\$2w$3");
2950         if (!result.toString().equals("zzzabw$2wef"))
2951             failCount++;
2952 
2953         m.appendTail(result);
2954         if (!result.toString().equals("zzzabw$2wefzzz"))
2955             failCount++;
2956 
2957         // Check to make sure a reference to nonexistent group causes error
2958         blah = "zzzabcdcdefzzz";
2959         p = Pattern.compile("(ab)(cd)*(ef)");
2960         m = p.matcher(blah);
2961         result = new StringBuffer();
2962         m.find();
2963         try {
2964             m.appendReplacement(result, "$1w$5w$3");
2965             failCount++;
2966         } catch (IndexOutOfBoundsException ioobe) {
2967             // Correct result
2968         }
2969 
2970         // Check double digit group references
2971         blah = "zzz123456789101112zzz";
2972         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2973         m = p.matcher(blah);
2974         result = new StringBuffer();
2975         m.find();
2976         m.appendReplacement(result, "$1w$11w$3");
2977         if (!result.toString().equals("zzz1w11w3"))
2978             failCount++;
2979 
2980         // Check to make sure it backs off $15 to $1 if only three groups
2981         blah = "zzzabcdcdefzzz";
2982         p = Pattern.compile("(ab)(cd)*(ef)");
2983         m = p.matcher(blah);
2984         result = new StringBuffer();
2985         m.find();
2986         m.appendReplacement(result, "$1w$15w$3");
2987         if (!result.toString().equals("zzzabwab5wef"))
2988             failCount++;
2989 
2990 
2991         // Supplementary character test
2992         // SB substitution with literal
2993         blah = toSupplementaries("zzzblahzzz");
2994         p = Pattern.compile(toSupplementaries("blah"));
2995         m = p.matcher(blah);
2996         result = new StringBuffer();
2997         try {
2998             m.appendReplacement(result, toSupplementaries("blech"));
2999             failCount++;
3000         } catch (IllegalStateException e) {
3001         }
3002         m.find();
3003         m.appendReplacement(result, toSupplementaries("blech"));
3004         if (!result.toString().equals(toSupplementaries("zzzblech")))
3005             failCount++;
3006 
3007         m.appendTail(result);
3008         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3009             failCount++;
3010 
3011         // SB substitution with groups
3012         blah = toSupplementaries("zzzabcdzzz");
3013         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3014         m = p.matcher(blah);
3015         result = new StringBuffer();
3016         try {
3017             m.appendReplacement(result, "$1");
3018             failCount++;
3019         } catch (IllegalStateException e) {
3020         }
3021         m.find();
3022         m.appendReplacement(result, "$1");
3023         if (!result.toString().equals(toSupplementaries("zzzab")))
3024             failCount++;
3025 
3026         m.appendTail(result);
3027         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3028             failCount++;
3029 
3030         // SB substitution with 3 groups
3031         blah = toSupplementaries("zzzabcdcdefzzz");
3032         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3033         m = p.matcher(blah);
3034         result = new StringBuffer();
3035         try {
3036             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3037             failCount++;
3038         } catch (IllegalStateException e) {
3039         }
3040         m.find();
3041         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3042         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3043             failCount++;
3044 
3045         m.appendTail(result);
3046         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3047             failCount++;
3048 
3049         // SB substitution with groups and three matches
3050         // skipping middle match
3051         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3052         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3053         m = p.matcher(blah);
3054         result = new StringBuffer();
3055         try {
3056             m.appendReplacement(result, "$1");
3057             failCount++;
3058         } catch (IllegalStateException e) {
3059         }
3060         m.find();
3061         m.appendReplacement(result, "$1");
3062         if (!result.toString().equals(toSupplementaries("zzzab")))
3063             failCount++;
3064 
3065         m.find();
3066         m.find();
3067         m.appendReplacement(result, "$2");
3068         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3069             failCount++;
3070 
3071         m.appendTail(result);
3072         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3073             failCount++;
3074 
3075         // Check to make sure escaped $ is ignored
3076         blah = toSupplementaries("zzzabcdcdefzzz");
3077         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3078         m = p.matcher(blah);
3079         result = new StringBuffer();
3080         m.find();
3081         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3082         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3083             failCount++;
3084 
3085         m.appendTail(result);
3086         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3087             failCount++;
3088 
3089         // Check to make sure a reference to nonexistent group causes error
3090         blah = toSupplementaries("zzzabcdcdefzzz");
3091         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3092         m = p.matcher(blah);
3093         result = new StringBuffer();
3094         m.find();
3095         try {
3096             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3097             failCount++;
3098         } catch (IndexOutOfBoundsException ioobe) {
3099             // Correct result
3100         }
3101 
3102         // Check double digit group references
3103         blah = toSupplementaries("zzz123456789101112zzz");
3104         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3105         m = p.matcher(blah);
3106         result = new StringBuffer();
3107         m.find();
3108         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3109         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3110             failCount++;
3111 
3112         // Check to make sure it backs off $15 to $1 if only three groups
3113         blah = toSupplementaries("zzzabcdcdefzzz");
3114         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3115         m = p.matcher(blah);
3116         result = new StringBuffer();
3117         m.find();
3118         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3119         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3120             failCount++;
3121 
3122         // Check nothing has been appended into the output buffer if
3123         // the replacement string triggers IllegalArgumentException.
3124         p = Pattern.compile("(abc)");
3125         m = p.matcher("abcd");
3126         result = new StringBuffer();
3127         m.find();
3128         try {
3129             m.appendReplacement(result, ("xyz$g"));
3130             failCount++;
3131         } catch (IllegalArgumentException iae) {
3132             if (result.length() != 0)
3133                 failCount++;
3134         }
3135 
3136         report("SB Substitution");
3137     }
3138 
3139     /**
3140      * Tests the usage of Matcher.appendReplacement() with literal
3141      * and group substitutions.
3142      */
3143     private static void stringbuilderSubstitute() throws Exception {
3144         // SB substitution with literal
3145         String blah = "zzzblahzzz";
3146         Pattern p = Pattern.compile("blah");
3147         Matcher m = p.matcher(blah);
3148         StringBuilder result = new StringBuilder();
3149         try {
3150             m.appendReplacement(result, "blech");
3151             failCount++;
3152         } catch (IllegalStateException e) {
3153         }
3154         m.find();
3155         m.appendReplacement(result, "blech");
3156         if (!result.toString().equals("zzzblech"))
3157             failCount++;
3158 
3159         m.appendTail(result);
3160         if (!result.toString().equals("zzzblechzzz"))
3161             failCount++;
3162 
3163         // SB substitution with groups
3164         blah = "zzzabcdzzz";
3165         p = Pattern.compile("(ab)(cd)*");
3166         m = p.matcher(blah);
3167         result = new StringBuilder();
3168         try {
3169             m.appendReplacement(result, "$1");
3170             failCount++;
3171         } catch (IllegalStateException e) {
3172         }
3173         m.find();
3174         m.appendReplacement(result, "$1");
3175         if (!result.toString().equals("zzzab"))
3176             failCount++;
3177 
3178         m.appendTail(result);
3179         if (!result.toString().equals("zzzabzzz"))
3180             failCount++;
3181 
3182         // SB substitution with 3 groups
3183         blah = "zzzabcdcdefzzz";
3184         p = Pattern.compile("(ab)(cd)*(ef)");
3185         m = p.matcher(blah);
3186         result = new StringBuilder();
3187         try {
3188             m.appendReplacement(result, "$1w$2w$3");
3189             failCount++;
3190         } catch (IllegalStateException e) {
3191         }
3192         m.find();
3193         m.appendReplacement(result, "$1w$2w$3");
3194         if (!result.toString().equals("zzzabwcdwef"))
3195             failCount++;
3196 
3197         m.appendTail(result);
3198         if (!result.toString().equals("zzzabwcdwefzzz"))
3199             failCount++;
3200 
3201         // SB substitution with groups and three matches
3202         // skipping middle match
3203         blah = "zzzabcdzzzabcddzzzabcdzzz";
3204         p = Pattern.compile("(ab)(cd*)");
3205         m = p.matcher(blah);
3206         result = new StringBuilder();
3207         try {
3208             m.appendReplacement(result, "$1");
3209             failCount++;
3210         } catch (IllegalStateException e) {
3211         }
3212         m.find();
3213         m.appendReplacement(result, "$1");
3214         if (!result.toString().equals("zzzab"))
3215             failCount++;
3216 
3217         m.find();
3218         m.find();
3219         m.appendReplacement(result, "$2");
3220         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3221             failCount++;
3222 
3223         m.appendTail(result);
3224         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3225             failCount++;
3226 
3227         // Check to make sure escaped $ is ignored
3228         blah = "zzzabcdcdefzzz";
3229         p = Pattern.compile("(ab)(cd)*(ef)");
3230         m = p.matcher(blah);
3231         result = new StringBuilder();
3232         m.find();
3233         m.appendReplacement(result, "$1w\\$2w$3");
3234         if (!result.toString().equals("zzzabw$2wef"))
3235             failCount++;
3236 
3237         m.appendTail(result);
3238         if (!result.toString().equals("zzzabw$2wefzzz"))
3239             failCount++;
3240 
3241         // Check to make sure a reference to nonexistent group causes error
3242         blah = "zzzabcdcdefzzz";
3243         p = Pattern.compile("(ab)(cd)*(ef)");
3244         m = p.matcher(blah);
3245         result = new StringBuilder();
3246         m.find();
3247         try {
3248             m.appendReplacement(result, "$1w$5w$3");
3249             failCount++;
3250         } catch (IndexOutOfBoundsException ioobe) {
3251             // Correct result
3252         }
3253 
3254         // Check double digit group references
3255         blah = "zzz123456789101112zzz";
3256         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3257         m = p.matcher(blah);
3258         result = new StringBuilder();
3259         m.find();
3260         m.appendReplacement(result, "$1w$11w$3");
3261         if (!result.toString().equals("zzz1w11w3"))
3262             failCount++;
3263 
3264         // Check to make sure it backs off $15 to $1 if only three groups
3265         blah = "zzzabcdcdefzzz";
3266         p = Pattern.compile("(ab)(cd)*(ef)");
3267         m = p.matcher(blah);
3268         result = new StringBuilder();
3269         m.find();
3270         m.appendReplacement(result, "$1w$15w$3");
3271         if (!result.toString().equals("zzzabwab5wef"))
3272             failCount++;
3273 
3274 
3275         // Supplementary character test
3276         // SB substitution with literal
3277         blah = toSupplementaries("zzzblahzzz");
3278         p = Pattern.compile(toSupplementaries("blah"));
3279         m = p.matcher(blah);
3280         result = new StringBuilder();
3281         try {
3282             m.appendReplacement(result, toSupplementaries("blech"));
3283             failCount++;
3284         } catch (IllegalStateException e) {
3285         }
3286         m.find();
3287         m.appendReplacement(result, toSupplementaries("blech"));
3288         if (!result.toString().equals(toSupplementaries("zzzblech")))
3289             failCount++;
3290         m.appendTail(result);
3291         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3292             failCount++;
3293 
3294         // SB substitution with groups
3295         blah = toSupplementaries("zzzabcdzzz");
3296         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3297         m = p.matcher(blah);
3298         result = new StringBuilder();
3299         try {
3300             m.appendReplacement(result, "$1");
3301             failCount++;
3302         } catch (IllegalStateException e) {
3303         }
3304         m.find();
3305         m.appendReplacement(result, "$1");
3306         if (!result.toString().equals(toSupplementaries("zzzab")))
3307             failCount++;
3308 
3309         m.appendTail(result);
3310         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3311             failCount++;
3312 
3313         // SB substitution with 3 groups
3314         blah = toSupplementaries("zzzabcdcdefzzz");
3315         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3316         m = p.matcher(blah);
3317         result = new StringBuilder();
3318         try {
3319             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3320             failCount++;
3321         } catch (IllegalStateException e) {
3322         }
3323         m.find();
3324         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3325         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3326             failCount++;
3327 
3328         m.appendTail(result);
3329         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3330             failCount++;
3331 
3332         // SB substitution with groups and three matches
3333         // skipping middle match
3334         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3335         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3336         m = p.matcher(blah);
3337         result = new StringBuilder();
3338         try {
3339             m.appendReplacement(result, "$1");
3340             failCount++;
3341         } catch (IllegalStateException e) {
3342         }
3343         m.find();
3344         m.appendReplacement(result, "$1");
3345         if (!result.toString().equals(toSupplementaries("zzzab")))
3346             failCount++;
3347 
3348         m.find();
3349         m.find();
3350         m.appendReplacement(result, "$2");
3351         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3352             failCount++;
3353 
3354         m.appendTail(result);
3355         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3356             failCount++;
3357 
3358         // Check to make sure escaped $ is ignored
3359         blah = toSupplementaries("zzzabcdcdefzzz");
3360         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3361         m = p.matcher(blah);
3362         result = new StringBuilder();
3363         m.find();
3364         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3365         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3366             failCount++;
3367 
3368         m.appendTail(result);
3369         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3370             failCount++;
3371 
3372         // Check to make sure a reference to nonexistent group causes error
3373         blah = toSupplementaries("zzzabcdcdefzzz");
3374         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3375         m = p.matcher(blah);
3376         result = new StringBuilder();
3377         m.find();
3378         try {
3379             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3380             failCount++;
3381         } catch (IndexOutOfBoundsException ioobe) {
3382             // Correct result
3383         }
3384         // Check double digit group references
3385         blah = toSupplementaries("zzz123456789101112zzz");
3386         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3387         m = p.matcher(blah);
3388         result = new StringBuilder();
3389         m.find();
3390         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3391         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3392             failCount++;
3393 
3394         // Check to make sure it backs off $15 to $1 if only three groups
3395         blah = toSupplementaries("zzzabcdcdefzzz");
3396         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3397         m = p.matcher(blah);
3398         result = new StringBuilder();
3399         m.find();
3400         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3401         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3402             failCount++;
3403         // Check nothing has been appended into the output buffer if
3404         // the replacement string triggers IllegalArgumentException.
3405         p = Pattern.compile("(abc)");
3406         m = p.matcher("abcd");
3407         result = new StringBuilder();
3408         m.find();
3409         try {
3410             m.appendReplacement(result, ("xyz$g"));
3411             failCount++;
3412         } catch (IllegalArgumentException iae) {
3413             if (result.length() != 0)
3414                 failCount++;
3415         }
3416         report("SB Substitution 2");
3417     }
3418 
3419     /*
3420      * 5 groups of characters are created to make a substitution string.
3421      * A base string will be created including random lead chars, the
3422      * substitution string, and random trailing chars.
3423      * A pattern containing the 5 groups is searched for and replaced with:
3424      * random group + random string + random group.
3425      * The results are checked for correctness.
3426      */
3427     private static void substitutionBasher() {
3428         for (int runs = 0; runs<1000; runs++) {
3429             // Create a base string to work in
3430             int leadingChars = generator.nextInt(10);
3431             StringBuffer baseBuffer = new StringBuffer(100);
3432             String leadingString = getRandomAlphaString(leadingChars);
3433             baseBuffer.append(leadingString);
3434 
3435             // Create 5 groups of random number of random chars
3436             // Create the string to substitute
3437             // Create the pattern string to search for
3438             StringBuffer bufferToSub = new StringBuffer(25);
3439             StringBuffer bufferToPat = new StringBuffer(50);
3440             String[] groups = new String[5];
3441             for(int i=0; i<5; i++) {
3442                 int aGroupSize = generator.nextInt(5)+1;
3443                 groups[i] = getRandomAlphaString(aGroupSize);
3444                 bufferToSub.append(groups[i]);
3445                 bufferToPat.append('(');
3446                 bufferToPat.append(groups[i]);
3447                 bufferToPat.append(')');
3448             }
3449             String stringToSub = bufferToSub.toString();
3450             String pattern = bufferToPat.toString();
3451 
3452             // Place sub string into working string at random index
3453             baseBuffer.append(stringToSub);
3454 
3455             // Append random chars to end
3456             int trailingChars = generator.nextInt(10);
3457             String trailingString = getRandomAlphaString(trailingChars);
3458             baseBuffer.append(trailingString);
3459             String baseString = baseBuffer.toString();
3460 
3461             // Create test pattern and matcher
3462             Pattern p = Pattern.compile(pattern);
3463             Matcher m = p.matcher(baseString);
3464 
3465             // Reject candidate if pattern happens to start early
3466             m.find();
3467             if (m.start() < leadingChars)
3468                 continue;
3469 
3470             // Reject candidate if more than one match
3471             if (m.find())
3472                 continue;
3473 
3474             // Construct a replacement string with :
3475             // random group + random string + random group
3476             StringBuffer bufferToRep = new StringBuffer();
3477             int groupIndex1 = generator.nextInt(5);
3478             bufferToRep.append("$" + (groupIndex1 + 1));
3479             String randomMidString = getRandomAlphaString(5);
3480             bufferToRep.append(randomMidString);
3481             int groupIndex2 = generator.nextInt(5);
3482             bufferToRep.append("$" + (groupIndex2 + 1));
3483             String replacement = bufferToRep.toString();
3484 
3485             // Do the replacement
3486             String result = m.replaceAll(replacement);
3487 
3488             // Construct expected result
3489             StringBuffer bufferToRes = new StringBuffer();
3490             bufferToRes.append(leadingString);
3491             bufferToRes.append(groups[groupIndex1]);
3492             bufferToRes.append(randomMidString);
3493             bufferToRes.append(groups[groupIndex2]);
3494             bufferToRes.append(trailingString);
3495             String expectedResult = bufferToRes.toString();
3496 
3497             // Check results
3498             if (!result.equals(expectedResult))
3499                 failCount++;
3500         }
3501 
3502         report("Substitution Basher");
3503     }
3504 
3505     /*
3506      * 5 groups of characters are created to make a substitution string.
3507      * A base string will be created including random lead chars, the
3508      * substitution string, and random trailing chars.
3509      * A pattern containing the 5 groups is searched for and replaced with:
3510      * random group + random string + random group.
3511      * The results are checked for correctness.
3512      */
3513     private static void substitutionBasher2() {
3514         for (int runs = 0; runs<1000; runs++) {
3515             // Create a base string to work in
3516             int leadingChars = generator.nextInt(10);
3517             StringBuilder baseBuffer = new StringBuilder(100);
3518             String leadingString = getRandomAlphaString(leadingChars);
3519             baseBuffer.append(leadingString);
3520 
3521             // Create 5 groups of random number of random chars
3522             // Create the string to substitute
3523             // Create the pattern string to search for
3524             StringBuilder bufferToSub = new StringBuilder(25);
3525             StringBuilder bufferToPat = new StringBuilder(50);
3526             String[] groups = new String[5];
3527             for(int i=0; i<5; i++) {
3528                 int aGroupSize = generator.nextInt(5)+1;
3529                 groups[i] = getRandomAlphaString(aGroupSize);
3530                 bufferToSub.append(groups[i]);
3531                 bufferToPat.append('(');
3532                 bufferToPat.append(groups[i]);
3533                 bufferToPat.append(')');
3534             }
3535             String stringToSub = bufferToSub.toString();
3536             String pattern = bufferToPat.toString();
3537 
3538             // Place sub string into working string at random index
3539             baseBuffer.append(stringToSub);
3540 
3541             // Append random chars to end
3542             int trailingChars = generator.nextInt(10);
3543             String trailingString = getRandomAlphaString(trailingChars);
3544             baseBuffer.append(trailingString);
3545             String baseString = baseBuffer.toString();
3546 
3547             // Create test pattern and matcher
3548             Pattern p = Pattern.compile(pattern);
3549             Matcher m = p.matcher(baseString);
3550 
3551             // Reject candidate if pattern happens to start early
3552             m.find();
3553             if (m.start() < leadingChars)
3554                 continue;
3555 
3556             // Reject candidate if more than one match
3557             if (m.find())
3558                 continue;
3559 
3560             // Construct a replacement string with :
3561             // random group + random string + random group
3562             StringBuilder bufferToRep = new StringBuilder();
3563             int groupIndex1 = generator.nextInt(5);
3564             bufferToRep.append("$" + (groupIndex1 + 1));
3565             String randomMidString = getRandomAlphaString(5);
3566             bufferToRep.append(randomMidString);
3567             int groupIndex2 = generator.nextInt(5);
3568             bufferToRep.append("$" + (groupIndex2 + 1));
3569             String replacement = bufferToRep.toString();
3570 
3571             // Do the replacement
3572             String result = m.replaceAll(replacement);
3573 
3574             // Construct expected result
3575             StringBuilder bufferToRes = new StringBuilder();
3576             bufferToRes.append(leadingString);
3577             bufferToRes.append(groups[groupIndex1]);
3578             bufferToRes.append(randomMidString);
3579             bufferToRes.append(groups[groupIndex2]);
3580             bufferToRes.append(trailingString);
3581             String expectedResult = bufferToRes.toString();
3582 
3583             // Check results
3584             if (!result.equals(expectedResult)) {
3585                 failCount++;
3586             }
3587         }
3588 
3589         report("Substitution Basher 2");
3590     }
3591 
3592     /**
3593      * Checks the handling of some escape sequences that the Pattern
3594      * class should process instead of the java compiler. These are
3595      * not in the file because the escapes should be be processed
3596      * by the Pattern class when the regex is compiled.
3597      */
3598     private static void escapes() throws Exception {
3599         Pattern p = Pattern.compile("\\043");
3600         Matcher m = p.matcher("#");
3601         if (!m.find())
3602             failCount++;
3603 
3604         p = Pattern.compile("\\x23");
3605         m = p.matcher("#");
3606         if (!m.find())
3607             failCount++;
3608 
3609         p = Pattern.compile("\\u0023");
3610         m = p.matcher("#");
3611         if (!m.find())
3612             failCount++;
3613 
3614         report("Escape sequences");
3615     }
3616 
3617     /**
3618      * Checks the handling of blank input situations. These
3619      * tests are incompatible with my test file format.
3620      */
3621     private static void blankInput() throws Exception {
3622         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3623         Matcher m = p.matcher("");
3624         if (m.find())
3625             failCount++;
3626 
3627         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3628         m = p.matcher("");
3629         if (!m.find())
3630             failCount++;
3631 
3632         p = Pattern.compile("abc");
3633         m = p.matcher("");
3634         if (m.find())
3635             failCount++;
3636 
3637         p = Pattern.compile("a*");
3638         m = p.matcher("");
3639         if (!m.find())
3640             failCount++;
3641 
3642         report("Blank input");
3643     }
3644 
3645     /**
3646      * Tests the Boyer-Moore pattern matching of a character sequence
3647      * on randomly generated patterns.
3648      */
3649     private static void bm() throws Exception {
3650         doBnM('a');
3651         report("Boyer Moore (ASCII)");
3652 
3653         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3654         report("Boyer Moore (Supplementary)");
3655     }
3656 
3657     private static void doBnM(int baseCharacter) throws Exception {
3658         int achar=0;
3659 
3660         for (int i=0; i<100; i++) {
3661             // Create a short pattern to search for
3662             int patternLength = generator.nextInt(7) + 4;
3663             StringBuffer patternBuffer = new StringBuffer(patternLength);
3664             String pattern;
3665             retry: for (;;) {
3666                 for (int x=0; x<patternLength; x++) {
3667                     int ch = baseCharacter + generator.nextInt(26);
3668                     if (Character.isSupplementaryCodePoint(ch)) {
3669                         patternBuffer.append(Character.toChars(ch));
3670                     } else {
3671                         patternBuffer.append((char)ch);
3672                     }
3673                 }
3674                 pattern = patternBuffer.toString();
3675 
3676                 // Avoid patterns that start and end with the same substring
3677                 // See JDK-6854417
3678                 for (int x=1; x < pattern.length(); x++) {
3679                     if (pattern.startsWith(pattern.substring(x)))
3680                         continue retry;
3681                 }
3682                 break;
3683             }
3684             Pattern p = Pattern.compile(pattern);
3685 
3686             // Create a buffer with random ASCII chars that does
3687             // not match the sample
3688             String toSearch = null;
3689             StringBuffer s = null;
3690             Matcher m = p.matcher("");
3691             do {
3692                 s = new StringBuffer(100);
3693                 for (int x=0; x<100; x++) {
3694                     int ch = baseCharacter + generator.nextInt(26);
3695                     if (Character.isSupplementaryCodePoint(ch)) {
3696                         s.append(Character.toChars(ch));
3697                     } else {
3698                         s.append((char)ch);
3699                     }
3700                 }
3701                 toSearch = s.toString();
3702                 m.reset(toSearch);
3703             } while (m.find());
3704 
3705             // Insert the pattern at a random spot
3706             int insertIndex = generator.nextInt(99);
3707             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3708                 insertIndex++;
3709             s = s.insert(insertIndex, pattern);
3710             toSearch = s.toString();
3711 
3712             // Make sure that the pattern is found
3713             m.reset(toSearch);
3714             if (!m.find())
3715                 failCount++;
3716 
3717             // Make sure that the match text is the pattern
3718             if (!m.group().equals(pattern))
3719                 failCount++;
3720 
3721             // Make sure match occured at insertion point
3722             if (m.start() != insertIndex)
3723                 failCount++;
3724         }
3725     }
3726 
3727     /**
3728      * Tests the matching of slices on randomly generated patterns.
3729      * The Boyer-Moore optimization is not done on these patterns
3730      * because it uses unicode case folding.
3731      */
3732     private static void slice() throws Exception {
3733         doSlice(Character.MAX_VALUE);
3734         report("Slice");
3735 
3736         doSlice(Character.MAX_CODE_POINT);
3737         report("Slice (Supplementary)");
3738     }
3739 
3740     private static void doSlice(int maxCharacter) throws Exception {
3741         Random generator = new Random();
3742         int achar=0;
3743 
3744         for (int i=0; i<100; i++) {
3745             // Create a short pattern to search for
3746             int patternLength = generator.nextInt(7) + 4;
3747             StringBuffer patternBuffer = new StringBuffer(patternLength);
3748             for (int x=0; x<patternLength; x++) {
3749                 int randomChar = 0;
3750                 while (!Character.isLetterOrDigit(randomChar))
3751                     randomChar = generator.nextInt(maxCharacter);
3752                 if (Character.isSupplementaryCodePoint(randomChar)) {
3753                     patternBuffer.append(Character.toChars(randomChar));
3754                 } else {
3755                     patternBuffer.append((char) randomChar);
3756                 }
3757             }
3758             String pattern =  patternBuffer.toString();
3759             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3760 
3761             // Create a buffer with random chars that does not match the sample
3762             String toSearch = null;
3763             StringBuffer s = null;
3764             Matcher m = p.matcher("");
3765             do {
3766                 s = new StringBuffer(100);
3767                 for (int x=0; x<100; x++) {
3768                     int randomChar = 0;
3769                     while (!Character.isLetterOrDigit(randomChar))
3770                         randomChar = generator.nextInt(maxCharacter);
3771                     if (Character.isSupplementaryCodePoint(randomChar)) {
3772                         s.append(Character.toChars(randomChar));
3773                     } else {
3774                         s.append((char) randomChar);
3775                     }
3776                 }
3777                 toSearch = s.toString();
3778                 m.reset(toSearch);
3779             } while (m.find());
3780 
3781             // Insert the pattern at a random spot
3782             int insertIndex = generator.nextInt(99);
3783             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3784                 insertIndex++;
3785             s = s.insert(insertIndex, pattern);
3786             toSearch = s.toString();
3787 
3788             // Make sure that the pattern is found
3789             m.reset(toSearch);
3790             if (!m.find())
3791                 failCount++;
3792 
3793             // Make sure that the match text is the pattern
3794             if (!m.group().equals(pattern))
3795                 failCount++;
3796 
3797             // Make sure match occured at insertion point
3798             if (m.start() != insertIndex)
3799                 failCount++;
3800         }
3801     }
3802 
3803     private static void explainFailure(String pattern, String data,
3804                                        String expected, String actual) {
3805         System.err.println("----------------------------------------");
3806         System.err.println("Pattern = "+pattern);
3807         System.err.println("Data = "+data);
3808         System.err.println("Expected = " + expected);
3809         System.err.println("Actual   = " + actual);
3810     }
3811 
3812     private static void explainFailure(String pattern, String data,
3813                                        Throwable t) {
3814         System.err.println("----------------------------------------");
3815         System.err.println("Pattern = "+pattern);
3816         System.err.println("Data = "+data);
3817         t.printStackTrace(System.err);
3818     }
3819 
3820     // Testing examples from a file
3821 
3822     /**
3823      * Goes through the file "TestCases.txt" and creates many patterns
3824      * described in the file, matching the patterns against input lines in
3825      * the file, and comparing the results against the correct results
3826      * also found in the file. The file format is described in comments
3827      * at the head of the file.
3828      */
3829     private static void processFile(String fileName) throws Exception {
3830         File testCases = new File(System.getProperty("test.src", "."),
3831                                   fileName);
3832         FileInputStream in = new FileInputStream(testCases);
3833         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3834 
3835         // Process next test case.
3836         String aLine;
3837         while((aLine = r.readLine()) != null) {
3838             // Read a line for pattern
3839             String patternString = grabLine(r);
3840             Pattern p = null;
3841             try {
3842                 p = compileTestPattern(patternString);
3843             } catch (PatternSyntaxException e) {
3844                 String dataString = grabLine(r);
3845                 String expectedResult = grabLine(r);
3846                 if (expectedResult.startsWith("error"))
3847                     continue;
3848                 explainFailure(patternString, dataString, e);
3849                 failCount++;
3850                 continue;
3851             }
3852 
3853             // Read a line for input string
3854             String dataString = grabLine(r);
3855             Matcher m = p.matcher(dataString);
3856             StringBuffer result = new StringBuffer();
3857 
3858             // Check for IllegalStateExceptions before a match
3859             failCount += preMatchInvariants(m);
3860 
3861             boolean found = m.find();
3862 
3863             if (found)
3864                 failCount += postTrueMatchInvariants(m);
3865             else
3866                 failCount += postFalseMatchInvariants(m);
3867 
3868             if (found) {
3869                 result.append("true ");
3870                 result.append(m.group(0) + " ");
3871             } else {
3872                 result.append("false ");
3873             }
3874 
3875             result.append(m.groupCount());
3876 
3877             if (found) {
3878                 for (int i=1; i<m.groupCount()+1; i++)
3879                     if (m.group(i) != null)
3880                         result.append(" " +m.group(i));
3881             }
3882 
3883             // Read a line for the expected result
3884             String expectedResult = grabLine(r);
3885 
3886             if (!result.toString().equals(expectedResult)) {
3887                 explainFailure(patternString, dataString, expectedResult, result.toString());
3888                 failCount++;
3889             }
3890         }
3891 
3892         report(fileName);
3893     }
3894 
3895     private static int preMatchInvariants(Matcher m) {
3896         int failCount = 0;
3897         try {
3898             m.start();
3899             failCount++;
3900         } catch (IllegalStateException ise) {}
3901         try {
3902             m.end();
3903             failCount++;
3904         } catch (IllegalStateException ise) {}
3905         try {
3906             m.group();
3907             failCount++;
3908         } catch (IllegalStateException ise) {}
3909         return failCount;
3910     }
3911 
3912     private static int postFalseMatchInvariants(Matcher m) {
3913         int failCount = 0;
3914         try {
3915             m.group();
3916             failCount++;
3917         } catch (IllegalStateException ise) {}
3918         try {
3919             m.start();
3920             failCount++;
3921         } catch (IllegalStateException ise) {}
3922         try {
3923             m.end();
3924             failCount++;
3925         } catch (IllegalStateException ise) {}
3926         return failCount;
3927     }
3928 
3929     private static int postTrueMatchInvariants(Matcher m) {
3930         int failCount = 0;
3931         //assert(m.start() = m.start(0);
3932         if (m.start() != m.start(0))
3933             failCount++;
3934         //assert(m.end() = m.end(0);
3935         if (m.start() != m.start(0))
3936             failCount++;
3937         //assert(m.group() = m.group(0);
3938         if (!m.group().equals(m.group(0)))
3939             failCount++;
3940         try {
3941             m.group(50);
3942             failCount++;
3943         } catch (IndexOutOfBoundsException ise) {}
3944 
3945         return failCount;
3946     }
3947 
3948     private static Pattern compileTestPattern(String patternString) {
3949         if (!patternString.startsWith("'")) {
3950             return Pattern.compile(patternString);
3951         }
3952         int break1 = patternString.lastIndexOf("'");
3953         String flagString = patternString.substring(
3954                                           break1+1, patternString.length());
3955         patternString = patternString.substring(1, break1);
3956 
3957         if (flagString.equals("i"))
3958             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3959 
3960         if (flagString.equals("m"))
3961             return Pattern.compile(patternString, Pattern.MULTILINE);
3962 
3963         return Pattern.compile(patternString);
3964     }
3965 
3966     /**
3967      * Reads a line from the input file. Keeps reading lines until a non
3968      * empty non comment line is read. If the line contains a \n then
3969      * these two characters are replaced by a newline char. If a \\uxxxx
3970      * sequence is read then the sequence is replaced by the unicode char.
3971      */
3972     private static String grabLine(BufferedReader r) throws Exception {
3973         int index = 0;
3974         String line = r.readLine();
3975         while (line.startsWith("//") || line.length() < 1)
3976             line = r.readLine();
3977         while ((index = line.indexOf("\\n")) != -1) {
3978             StringBuffer temp = new StringBuffer(line);
3979             temp.replace(index, index+2, "\n");
3980             line = temp.toString();
3981         }
3982         while ((index = line.indexOf("\\u")) != -1) {
3983             StringBuffer temp = new StringBuffer(line);
3984             String value = temp.substring(index+2, index+6);
3985             char aChar = (char)Integer.parseInt(value, 16);
3986             String unicodeChar = "" + aChar;
3987             temp.replace(index, index+6, unicodeChar);
3988             line = temp.toString();
3989         }
3990 
3991         return line;
3992     }
3993 
3994     private static void check(Pattern p, String s, String g, String expected) {
3995         Matcher m = p.matcher(s);
3996         m.find();
3997         if (!m.group(g).equals(expected) ||
3998             s.charAt(m.start(g)) != expected.charAt(0) ||
3999             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4000             failCount++;
4001     }
4002 
4003     private static void checkReplaceFirst(String p, String s, String r, String expected)
4004     {
4005         if (!expected.equals(Pattern.compile(p)
4006                                     .matcher(s)
4007                                     .replaceFirst(r)))
4008             failCount++;
4009     }
4010 
4011     private static void checkReplaceAll(String p, String s, String r, String expected)
4012     {
4013         if (!expected.equals(Pattern.compile(p)
4014                                     .matcher(s)
4015                                     .replaceAll(r)))
4016             failCount++;
4017     }
4018 
4019     private static void checkExpectedFail(String p) {
4020         try {
4021             Pattern.compile(p);
4022         } catch (PatternSyntaxException pse) {
4023             //pse.printStackTrace();
4024             return;
4025         }
4026         failCount++;
4027     }
4028 
4029     private static void checkExpectedIAE(Matcher m, String g) {
4030         m.find();
4031         try {
4032             m.group(g);
4033         } catch (IllegalArgumentException x) {
4034             //iae.printStackTrace();
4035             try {
4036                 m.start(g);
4037             } catch (IllegalArgumentException xx) {
4038                 try {
4039                     m.start(g);
4040                 } catch (IllegalArgumentException xxx) {
4041                     return;
4042                 }
4043             }
4044         }
4045         failCount++;
4046     }
4047 
4048     private static void checkExpectedNPE(Matcher m) {
4049         m.find();
4050         try {
4051             m.group(null);
4052         } catch (NullPointerException x) {
4053             try {
4054                 m.start(null);
4055             } catch (NullPointerException xx) {
4056                 try {
4057                     m.end(null);
4058                 } catch (NullPointerException xxx) {
4059                     return;
4060                 }
4061             }
4062         }
4063         failCount++;
4064     }
4065 
4066     private static void namedGroupCaptureTest() throws Exception {
4067         check(Pattern.compile("x+(?<gname>y+)z+"),
4068               "xxxyyyzzz",
4069               "gname",
4070               "yyy");
4071 
4072         check(Pattern.compile("x+(?<gname8>y+)z+"),
4073               "xxxyyyzzz",
4074               "gname8",
4075               "yyy");
4076 
4077         //backref
4078         Pattern pattern = Pattern.compile("(a*)bc\\1");
4079         check(pattern, "zzzaabcazzz", true);  // found "abca"
4080 
4081         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4082               "zzzaabcaazzz", true);
4083 
4084         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4085               "abcdefabc", true);
4086 
4087         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4088               "abcdefghijkk", true);
4089 
4090         // Supplementary character tests
4091         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4092               toSupplementaries("zzzaabcazzz"), true);
4093 
4094         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4095               toSupplementaries("zzzaabcaazzz"), true);
4096 
4097         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4098               toSupplementaries("abcdefabc"), true);
4099 
4100         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4101                               "(?<gname>" +
4102                               toSupplementaries("k)") + "\\k<gname>"),
4103               toSupplementaries("abcdefghijkk"), true);
4104 
4105         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4106               "xxxyyyzzzyyy",
4107               "gname",
4108               "yyy");
4109 
4110         //replaceFirst/All
4111         checkReplaceFirst("(?<gn>ab)(c*)",
4112                           "abccczzzabcczzzabccc",
4113                           "${gn}",
4114                           "abzzzabcczzzabccc");
4115 
4116         checkReplaceAll("(?<gn>ab)(c*)",
4117                         "abccczzzabcczzzabccc",
4118                         "${gn}",
4119                         "abzzzabzzzab");
4120 
4121 
4122         checkReplaceFirst("(?<gn>ab)(c*)",
4123                           "zzzabccczzzabcczzzabccczzz",
4124                           "${gn}",
4125                           "zzzabzzzabcczzzabccczzz");
4126 
4127         checkReplaceAll("(?<gn>ab)(c*)",
4128                         "zzzabccczzzabcczzzabccczzz",
4129                         "${gn}",
4130                         "zzzabzzzabzzzabzzz");
4131 
4132         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4133                           "zzzabccczzzabcczzzabccczzz",
4134                           "${gn2}",
4135                           "zzzccczzzabcczzzabccczzz");
4136 
4137         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4138                         "zzzabccczzzabcczzzabccczzz",
4139                         "${gn2}",
4140                         "zzzccczzzcczzzccczzz");
4141 
4142         //toSupplementaries("(ab)(c*)"));
4143         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4144                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4145                           toSupplementaries("abccczzzabcczzzabccc"),
4146                           "${gn1}",
4147                           toSupplementaries("abzzzabcczzzabccc"));
4148 
4149 
4150         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4151                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4152                         toSupplementaries("abccczzzabcczzzabccc"),
4153                         "${gn1}",
4154                         toSupplementaries("abzzzabzzzab"));
4155 
4156         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4157                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4158                           toSupplementaries("abccczzzabcczzzabccc"),
4159                           "${gn2}",
4160                           toSupplementaries("ccczzzabcczzzabccc"));
4161 
4162 
4163         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4164                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4165                         toSupplementaries("abccczzzabcczzzabccc"),
4166                         "${gn2}",
4167                         toSupplementaries("ccczzzcczzzccc"));
4168 
4169         checkReplaceFirst("(?<dog>Dog)AndCat",
4170                           "zzzDogAndCatzzzDogAndCatzzz",
4171                           "${dog}",
4172                           "zzzDogzzzDogAndCatzzz");
4173 
4174 
4175         checkReplaceAll("(?<dog>Dog)AndCat",
4176                           "zzzDogAndCatzzzDogAndCatzzz",
4177                           "${dog}",
4178                           "zzzDogzzzDogzzz");
4179 
4180         // backref in Matcher & String
4181         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4182             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4183             failCount++;
4184 
4185         // negative
4186         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4187         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4188         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4189         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4190         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4191         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4192                          "gnameX");
4193         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4194         report("NamedGroupCapture");
4195     }
4196 
4197     // This is for bug 6919132
4198     private static void nonBmpClassComplementTest() throws Exception {
4199         Pattern p = Pattern.compile("\\P{Lu}");
4200         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4201 
4202         if (m.find() && m.start() == 1)
4203             failCount++;
4204 
4205         // from a unicode category
4206         p = Pattern.compile("\\P{Lu}");
4207         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4208         if (m.find())
4209             failCount++;
4210         if (!m.hitEnd())
4211             failCount++;
4212 
4213         // block
4214         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4215         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4216         if (m.find() && m.start() == 1)
4217             failCount++;
4218 
4219         p = Pattern.compile("\\P{sc=GRANTHA}");
4220         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4221         if (m.find() && m.start() == 1)
4222             failCount++;
4223 
4224         report("NonBmpClassComplement");
4225     }
4226 
4227     private static void unicodePropertiesTest() throws Exception {
4228         // different forms
4229         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4230             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4231             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4232             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4233             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4234             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4235             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4236             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4237             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4238             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4239             failCount++;
4240 
4241         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4242         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4243         Matcher lastSM  = common;
4244         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4245 
4246         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4247         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4248         Matcher lastBM = latin;
4249         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4250 
4251         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4252             if (cp >= 0x30000 && (cp & 0x70) == 0){
4253                 continue;  // only pick couple code points, they are the same
4254             }
4255 
4256             // Unicode Script
4257             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4258             Matcher m;
4259             String str = new String(Character.toChars(cp));
4260             if (script == lastScript) {
4261                  m = lastSM;
4262                  m.reset(str);
4263             } else {
4264                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4265             }
4266             if (!m.matches()) {
4267                 failCount++;
4268             }
4269             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4270             other.reset(str);
4271             if (other.matches()) {
4272                 failCount++;
4273             }
4274             lastSM = m;
4275             lastScript = script;
4276 
4277             // Unicode Block
4278             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4279             if (block == null) {
4280                 //System.out.printf("Not a Block: cp=%x%n", cp);
4281                 continue;
4282             }
4283             if (block == lastBlock) {
4284                  m = lastBM;
4285                  m.reset(str);
4286             } else {
4287                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4288             }
4289             if (!m.matches()) {
4290                 failCount++;
4291             }
4292             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4293             other.reset(str);
4294             if (other.matches()) {
4295                 failCount++;
4296             }
4297             lastBM = m;
4298             lastBlock = block;
4299         }
4300         report("unicodeProperties");
4301     }
4302 
4303     private static void unicodeHexNotationTest() throws Exception {
4304 
4305         // negative
4306         checkExpectedFail("\\x{-23}");
4307         checkExpectedFail("\\x{110000}");
4308         checkExpectedFail("\\x{}");
4309         checkExpectedFail("\\x{AB[ef]");
4310 
4311         // codepoint
4312         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4313         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4314         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4315         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4316 
4317         // in class
4318         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4319         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4320         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4321         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4322         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4323         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4324 
4325         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4326              String s = "A" + new String(Character.toChars(cp)) + "B";
4327              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4328                                              : String.format("\\u%04x\\u%04x",
4329                                                (int) Character.toChars(cp)[0],
4330                                                (int) Character.toChars(cp)[1]);
4331              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4332              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4333                  failCount++;
4334              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4335                  failCount++;
4336              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4337                  failCount++;
4338              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4339                  failCount++;
4340          }
4341          report("unicodeHexNotation");
4342     }
4343 
4344     private static void unicodeClassesTest() throws Exception {
4345 
4346         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4347         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4348         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4349         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4350         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4351         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4352         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4353         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4354         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4355         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4356         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4357         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4358         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4359         Matcher bound  = Pattern.compile("\\b").matcher("");
4360         Matcher word   = Pattern.compile("\\w++").matcher("");
4361         // UNICODE_CHARACTER_CLASS
4362         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4363         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4364         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4365         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4366         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4367         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4368         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4369         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4370         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4371         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4372         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4373         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4374         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4375         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4376         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4377         // embedded flag (?U)
4378         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4379         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4380         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4381 
4382         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4383         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4384         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4385         // properties
4386         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4387         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4388         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4389         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4390         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4391         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4392         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4393         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4394         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4395         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4396         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4397         // javaMethod
4398         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4399         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4400         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4401         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4402         // GC/C
4403         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
4404 
4405         for (int cp = 1; cp < 0x30000; cp++) {
4406             String str = new String(Character.toChars(cp));
4407             int type = Character.getType(cp);
4408             if (// lower
4409                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4410                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4411                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4412                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4413                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4414                 // upper
4415                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4416                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4417                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4418                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4419                 // alpha
4420                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4421                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4422                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4423                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4424                 // digit
4425                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4426                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4427                 // alnum
4428                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4429                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4430                 // punct
4431                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4432                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4433                 // graph
4434                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4435                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4436                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4437                 // blank
4438                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4439                                           != blank.reset(str).matches()  ||
4440                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4441                 // print
4442                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4443                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4444                 // cntrl
4445                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4446                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4447                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4448                 // hexdigit
4449                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4450                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4451                 // space
4452                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4453                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4454                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4455                 // word
4456                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4457                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4458                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4459                 // bwordb
4460                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4461                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4462                 // properties
4463                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4464                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4465                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4466                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4467                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4468                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4469                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4470                 // gc_C
4471                 (Character.CONTROL == type || Character.FORMAT == type ||
4472                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4473                  Character.UNASSIGNED == type)
4474                 != gcC.reset(str).matches()) {
4475                 failCount++;
4476             }
4477         }
4478 
4479         // bounds/word align
4480         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4481         if (!bwbU.reset("\u0180sherman\u0400").matches())
4482             failCount++;
4483         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4484         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4485             failCount++;
4486         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4487         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4488             failCount++;
4489         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4490             failCount++;
4491         report("unicodePredefinedClasses");
4492     }
4493 
4494     private static void unicodeCharacterNameTest() throws Exception {
4495 
4496         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4497             if (!Character.isValidCodePoint(cp) ||
4498                 Character.getType(cp) == Character.UNASSIGNED)
4499                 continue;
4500             String str = new String(Character.toChars(cp));
4501             // single
4502             String p = "\\N{" + Character.getName(cp) + "}";
4503             if (!Pattern.compile(p).matcher(str).matches()) {
4504                 failCount++;
4505             }
4506             // class[c]
4507             p = "[\\N{" + Character.getName(cp) + "}]";
4508             if (!Pattern.compile(p).matcher(str).matches()) {
4509                 failCount++;
4510             }
4511         }
4512 
4513         // range
4514         for (int i = 0; i < 10; i++) {
4515             int start = generator.nextInt(20);
4516             int end = start + generator.nextInt(200);
4517             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4518             String str;
4519             for (int cp = start; cp < end; cp++) {
4520                 str = new String(Character.toChars(cp));
4521                 if (!Pattern.compile(p).matcher(str).matches()) {
4522                     failCount++;
4523                 }
4524             }
4525             str = new String(Character.toChars(end + 10));
4526             if (Pattern.compile(p).matcher(str).matches()) {
4527                 failCount++;
4528             }
4529         }
4530 
4531         // slice
4532         for (int i = 0; i < 10; i++) {
4533             int n = generator.nextInt(256);
4534             int[] buf = new int[n];
4535             StringBuffer sb = new StringBuffer(1024);
4536             for (int j = 0; j < n; j++) {
4537                 int cp = generator.nextInt(1000);
4538                 if (!Character.isValidCodePoint(cp) ||
4539                     Character.getType(cp) == Character.UNASSIGNED)
4540                     cp = 0x4e00;    // just use 4e00
4541                 sb.append("\\N{" + Character.getName(cp) + "}");
4542                 buf[j] = cp;
4543             }
4544             String p = sb.toString();
4545             String str = new String(buf, 0, buf.length);
4546             if (!Pattern.compile(p).matcher(str).matches()) {
4547                 failCount++;
4548             }
4549         }
4550         report("unicodeCharacterName");
4551     }
4552 
4553     private static void horizontalAndVerticalWSTest() throws Exception {
4554         String hws = new String (new char[] {
4555                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4556                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4557                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4558                                      0x202f, 0x205f, 0x3000 });
4559         String vws = new String (new char[] {
4560                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4561         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4562             !Pattern.compile("[\\h]+").matcher(hws).matches())
4563             failCount++;
4564         if (Pattern.compile("\\H").matcher(hws).find() ||
4565             Pattern.compile("[\\H]").matcher(hws).find())
4566             failCount++;
4567         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4568             !Pattern.compile("[\\v]+").matcher(vws).matches())
4569             failCount++;
4570         if (Pattern.compile("\\V").matcher(vws).find() ||
4571             Pattern.compile("[\\V]").matcher(vws).find())
4572             failCount++;
4573         String prefix = "abcd";
4574         String suffix = "efgh";
4575         String ng = "A";
4576         for (int i = 0; i < hws.length(); i++) {
4577             String c = String.valueOf(hws.charAt(i));
4578             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4579             if (!m.find() || !c.equals(m.group()))
4580                 failCount++;
4581             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4582             if (!m.find() || !c.equals(m.group()))
4583                 failCount++;
4584 
4585             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4586             if (!m.find() || !ng.equals(m.group()))
4587                 failCount++;
4588             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4589             if (!m.find() || !ng.equals(m.group()))
4590                 failCount++;
4591         }
4592         for (int i = 0; i < vws.length(); i++) {
4593             String c = String.valueOf(vws.charAt(i));
4594             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4595             if (!m.find() || !c.equals(m.group()))
4596                 failCount++;
4597             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4598             if (!m.find() || !c.equals(m.group()))
4599                 failCount++;
4600 
4601             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4602             if (!m.find() || !ng.equals(m.group()))
4603                 failCount++;
4604             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4605             if (!m.find() || !ng.equals(m.group()))
4606                 failCount++;
4607         }
4608         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4609         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4610             failCount++;
4611         report("horizontalAndVerticalWSTest");
4612     }
4613 
4614     private static void linebreakTest() throws Exception {
4615         String linebreaks = new String (new char[] {
4616             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4617         String crnl = "\r\n";
4618         if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4619               Pattern.compile("\\R").matcher(crnl).matches() &&
4620               Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4621               Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4622               Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4623               Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4624               !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4625             failCount++;
4626         }
4627         report("linebreakTest");
4628     }
4629 
4630     // #7189363
4631     private static void branchTest() throws Exception {
4632         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4633             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4634             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4635             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4636             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4637             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4638             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4639             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4640             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4641             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4642             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4643             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4644             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4645             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4646             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4647             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4648             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4649             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4650             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4651             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4652             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4653             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4654             failCount++;
4655         report("branchTest");
4656     }
4657 
4658     // This test is for 8007395
4659     private static void groupCurlyNotFoundSuppTest() throws Exception {
4660         String input = "test this as \ud83d\ude0d";
4661         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4662                                           "test(.)*(@[a-zA-Z.]+)",
4663                                           "test([^B])+(@[a-zA-Z.]+)",
4664                                           "test([^B])*(@[a-zA-Z.]+)",
4665                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4666                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4667                                         }) {
4668             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4669                                .matcher(input);
4670             try {
4671                 if (m.find()) {
4672                     failCount++;
4673                 }
4674             } catch (Exception x) {
4675                 failCount++;
4676             }
4677         }
4678         report("GroupCurly NotFoundSupp");
4679     }
4680 
4681     // This test is for 8023647
4682     private static void groupCurlyBackoffTest() throws Exception {
4683         if (!"abc1c".matches("(\\w)+1\\1") ||
4684             "abc11".matches("(\\w)+1\\1")) {
4685             failCount++;
4686         }
4687         report("GroupCurly backoff");
4688     }
4689 
4690     // This test is for 8012646
4691     private static void patternAsPredicate() throws Exception {
4692         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4693 
4694         if (p.test("")) {
4695             failCount++;
4696         }
4697         if (!p.test("word")) {
4698             failCount++;
4699         }
4700         if (p.test("1234")) {
4701             failCount++;
4702         }
4703         if (!p.test("word1234")) {
4704             failCount++;
4705         }
4706         report("Pattern.asPredicate");
4707     }
4708 
4709     // This test is for 8184692
4710     private static void patternAsMatchPredicate() throws Exception {
4711         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4712 
4713         if (p.test("")) {
4714             failCount++;
4715         }
4716         if (!p.test("word")) {
4717             failCount++;
4718         }
4719         if (p.test("1234word")) {
4720             failCount++;
4721         }
4722         if (p.test("1234")) {
4723             failCount++;
4724         }
4725         report("Pattern.asMatchPredicate");
4726     }
4727 
4728 
4729     // This test is for 8035975
4730     private static void invalidFlags() throws Exception {
4731         for (int flag = 1; flag != 0; flag <<= 1) {
4732             switch (flag) {
4733             case Pattern.CASE_INSENSITIVE:
4734             case Pattern.MULTILINE:
4735             case Pattern.DOTALL:
4736             case Pattern.UNICODE_CASE:
4737             case Pattern.CANON_EQ:
4738             case Pattern.UNIX_LINES:
4739             case Pattern.LITERAL:
4740             case Pattern.UNICODE_CHARACTER_CLASS:
4741             case Pattern.COMMENTS:
4742                 // valid flag, continue
4743                 break;
4744             default:
4745                 try {
4746                     Pattern.compile(".", flag);
4747                     failCount++;
4748                 } catch (IllegalArgumentException expected) {
4749                 }
4750             }
4751         }
4752         report("Invalid compile flags");
4753     }
4754 
4755     // This test is for 8158482
4756     private static void embeddedFlags() throws Exception {
4757         try {
4758             Pattern.compile("(?i).(?-i).");
4759             Pattern.compile("(?m).(?-m).");
4760             Pattern.compile("(?s).(?-s).");
4761             Pattern.compile("(?d).(?-d).");
4762             Pattern.compile("(?u).(?-u).");
4763             Pattern.compile("(?c).(?-c).");
4764             Pattern.compile("(?x).(?-x).");
4765             Pattern.compile("(?U).(?-U).");
4766             Pattern.compile("(?imsducxU).(?-imsducxU).");
4767         } catch (PatternSyntaxException x) {
4768             failCount++;
4769         }
4770         report("Embedded flags");
4771     }
4772 
4773     private static void grapheme() throws Exception {
4774         Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
4775             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4776             .forEach( ln -> {
4777                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4778                     // System.out.println(str);
4779                     String[] strs = ln.split("\u00f7|\u00d7");
4780                     StringBuilder src = new StringBuilder();
4781                     ArrayList<String> graphemes = new ArrayList<>();
4782                     StringBuilder buf = new StringBuilder();
4783                     int offBk = 0;
4784                     for (String str : strs) {
4785                         if (str.length() == 0)  // first empty str
4786                             continue;
4787                         int cp = Integer.parseInt(str, 16);
4788                         src.appendCodePoint(cp);
4789                         buf.appendCodePoint(cp);
4790                         offBk += (str.length() + 1);
4791                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4792                             graphemes.add(buf.toString());
4793                             buf = new StringBuilder();
4794                         }
4795                     }
4796                     Pattern p = Pattern.compile("\\X");
4797                     Matcher m = p.matcher(src.toString());
4798                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4799                     for (String g : graphemes) {
4800                         // System.out.printf("     grapheme:=[%s]%n", g);
4801                         // (1) test \\X directly
4802                         if (!m.find() || !m.group().equals(g)) {
4803                             System.out.println("Failed \\X [" + ln + "] : " + g);
4804                             failCount++;
4805                         }
4806                         // (2) test \\b{g} + \\X  via Scanner
4807                         boolean hasNext = s.hasNext(p);
4808                         // if (!s.hasNext() || !s.next().equals(next)) {
4809                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4810                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4811                             failCount++;
4812                         }
4813                     }
4814                 });
4815         // some sanity checks
4816         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4817             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4818             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4819             failCount++;
4820         // make sure "\b{n}" still works
4821         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4822             failCount++;
4823         report("Unicode extended grapheme cluster");
4824     }
4825 
4826     // hangup/timeout if go into exponential backtracking
4827     private static void expoBacktracking() throws Exception {
4828 
4829         Object[][] patternMatchers = {
4830             // 6328855
4831             { "(.*\n*)*",
4832               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4833               false },
4834             // 6192895
4835             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4836               "Hello World this is a test this is a test this is a test A",
4837               true },
4838             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4839               "Hello World this is a test this is a test this is a test \u4e00 ",
4840               false },
4841             { " *([a-z0-9]+ *)+",
4842               "hello world this is a test this is a test this is a test A",
4843               false },
4844             // 4771934 [FIXED] #5013651?
4845             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4846               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4847               true },
4848             // 4866249 [FIXED]
4849             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4850               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4851               true },
4852             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4853               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4854               false },
4855             // 6345469
4856             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4857               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4858               true }, // --> matched
4859             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4860               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4861               false },
4862             // 5026912
4863             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4864               "156580451111112225588087755221111111566969655555555",
4865               false},
4866             // 6988218
4867             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4868               "'%)) order by ANGEBOT.ID",
4869               false},    // find
4870             // 6693451
4871             { "^(\\s*foo\\s*)*$",
4872               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4873               true },
4874             { "^(\\s*foo\\s*)*$",
4875               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4876               false
4877             },
4878             // 7006761
4879             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4880             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4881             // 8140212
4882             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4883               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4884               false
4885             },
4886             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4887             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4888 
4889             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4890             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4891 
4892             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4893             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4894 
4895             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4896 
4897             /* not fixed
4898             //8132141   --->    second level exponential backtracking
4899             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4900               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4901             */
4902         };
4903 
4904         for (Object[] pm : patternMatchers) {
4905             String p = (String)pm[0];
4906             String s = (String)pm[1];
4907             boolean r = (Boolean)pm[2];
4908             if (r != Pattern.compile(p).matcher(s).matches()) {
4909                 failCount++;
4910             }
4911         }
4912     }
4913 
4914     private static void invalidGroupName() {
4915         // Invalid start of a group name
4916         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4917                 "\u0060", "\u007b", "\u0416")) {
4918             for (String pat : List.of("(?<" + groupName + ">)",
4919                     "\\k<" + groupName + ">")) {
4920                 try {
4921                     Pattern.compile(pat);
4922                     failCount++;
4923                 } catch (PatternSyntaxException e) {
4924                     if (!e.getMessage().startsWith(
4925                             "capturing group name does not start with a"
4926                             + " Latin letter")) {
4927                         failCount++;
4928                     }
4929                 }
4930             }
4931         }
4932         // Invalid char in a group name
4933         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4934                 "d\u0060", "e\u007b", "f\u0416")) {
4935             for (String pat : List.of("(?<" + groupName + ">)",
4936                     "\\k<" + groupName + ">")) {
4937                 try {
4938                     Pattern.compile(pat);
4939                     failCount++;
4940                 } catch (PatternSyntaxException e) {
4941                     if (!e.getMessage().startsWith(
4942                             "named capturing group is missing trailing '>'")) {
4943                         failCount++;
4944                     }
4945                 }
4946             }
4947         }
4948         report("Invalid capturing group names");
4949     }
4950 
4951     private static void illegalRepetitionRange() {
4952         // huge integers > (2^31 - 1)
4953         String n = BigInteger.valueOf(1L << 32)
4954             .toString();
4955         String m = BigInteger.valueOf(1L << 31)
4956             .add(new BigInteger(80, generator))
4957             .toString();
4958         for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4959                 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4960             String pat = ".{" + rep + "}";
4961             try {
4962                 Pattern.compile(pat);
4963                 failCount++;
4964                 System.out.println("Expected to fail. Pattern: " + pat);
4965             } catch (PatternSyntaxException e) {
4966                 if (!e.getMessage().startsWith("Illegal repetition")) {
4967                     failCount++;
4968                     System.out.println("Unexpected error message: " + e.getMessage());
4969                 }
4970             } catch (Throwable t) {
4971                 failCount++;
4972                 System.out.println("Unexpected exception: " + t);
4973             }
4974         }
4975         report("illegalRepetitionRange");
4976     }
4977 
4978     private static void surrogatePairWithCanonEq() {
4979         try {
4980             Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
4981         } catch (Throwable t) {
4982             failCount++;
4983             System.out.println("Unexpected exception: " + t);
4984         }
4985         report("surrogatePairWithCanonEq");
4986     }
4987 }