1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  37  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
  38  * 8194667 8197462 8184692 8221431 8224789
  39  *
  40  * @library /test/lib
  41  * @library /lib/testlibrary/java/lang
  42  * @build jdk.test.lib.RandomFactory
  43  * @run main RegExTest
  44  * @key randomness
  45  */
  46 
  47 import java.util.function.Function;
  48 import java.util.regex.*;
  49 import java.util.Random;
  50 import java.util.Scanner;
  51 import java.io.*;
  52 import java.nio.file.*;
  53 import java.util.*;
  54 import java.nio.CharBuffer;
  55 import java.util.function.Predicate;
  56 import jdk.test.lib.RandomFactory;
  57 
  58 /**
  59  * This is a test class created to check the operation of
  60  * the Pattern and Matcher classes.
  61  */
  62 public class RegExTest {
  63 
  64     private static Random generator = RandomFactory.getRandom();
  65     private static boolean failure = false;
  66     private static int failCount = 0;
  67     private static String firstFailure = null;
  68 
  69     /**
  70      * Main to interpret arguments and run several tests.
  71      *
  72      */
  73     public static void main(String[] args) throws Exception {
  74         // Most of the tests are in a file
  75         processFile("TestCases.txt");
  76         //processFile("PerlCases.txt");
  77         processFile("BMPTestCases.txt");
  78         processFile("SupplementaryTestCases.txt");
  79 
  80         // These test many randomly generated char patterns
  81         bm();
  82         slice();
  83 
  84         // These are hard to put into the file
  85         escapes();
  86         blankInput();
  87 
  88         // Substitition tests on randomly generated sequences
  89         globalSubstitute();
  90         stringbufferSubstitute();
  91         stringbuilderSubstitute();
  92 
  93         substitutionBasher();
  94         substitutionBasher2();
  95 
  96         // Canonical Equivalence
  97         ceTest();
  98 
  99         // Anchors
 100         anchorTest();
 101 
 102         // boolean match calls
 103         matchesTest();
 104         lookingAtTest();
 105 
 106         // Pattern API
 107         patternMatchesTest();
 108 
 109         // Misc
 110         lookbehindTest();
 111         nullArgumentTest();
 112         backRefTest();
 113         groupCaptureTest();
 114         caretTest();
 115         charClassTest();
 116         emptyPatternTest();
 117         findIntTest();
 118         group0Test();
 119         longPatternTest();
 120         octalTest();
 121         ampersandTest();
 122         negationTest();
 123         splitTest();
 124         appendTest();
 125         caseFoldingTest();
 126         commentsTest();
 127         unixLinesTest();
 128         replaceFirstTest();
 129         gTest();
 130         zTest();
 131         serializeTest();
 132         reluctantRepetitionTest();
 133         multilineDollarTest();
 134         dollarAtEndTest();
 135         caretBetweenTerminatorsTest();
 136         // This RFE rejected in Tiger numOccurrencesTest();
 137         javaCharClassTest();
 138         nonCaptureRepetitionTest();
 139         notCapturedGroupCurlyMatchTest();
 140         escapedSegmentTest();
 141         literalPatternTest();
 142         literalReplacementTest();
 143         regionTest();
 144         toStringTest();
 145         negatedCharClassTest();
 146         findFromTest();
 147         boundsTest();
 148         unicodeWordBoundsTest();
 149         caretAtEndTest();
 150         wordSearchTest();
 151         hitEndTest();
 152         toMatchResultTest();
 153         toMatchResultTest2();
 154         surrogatesInClassTest();
 155         removeQEQuotingTest();
 156         namedGroupCaptureTest();
 157         nonBmpClassComplementTest();
 158         unicodePropertiesTest();
 159         unicodeHexNotationTest();
 160         unicodeClassesTest();
 161         unicodeCharacterNameTest();
 162         horizontalAndVerticalWSTest();
 163         linebreakTest();
 164         branchTest();
 165         groupCurlyNotFoundSuppTest();
 166         groupCurlyBackoffTest();
 167         patternAsPredicate();
 168         patternAsMatchPredicate();
 169         invalidFlags();
 170         embeddedFlags();
 171         grapheme();
 172         expoBacktracking();
 173         invalidGroupName();
 174         illegalRepetitionRange();
 175 
 176         if (failure) {
 177             throw new
 178                 RuntimeException("RegExTest failed, 1st failure: " +
 179                                  firstFailure);
 180         } else {
 181             System.err.println("OKAY: All tests passed.");
 182         }
 183     }
 184 
 185     // Utility functions
 186 
 187     private static String getRandomAlphaString(int length) {
 188         StringBuffer buf = new StringBuffer(length);
 189         for (int i=0; i<length; i++) {
 190             char randChar = (char)(97 + generator.nextInt(26));
 191             buf.append(randChar);
 192         }
 193         return buf.toString();
 194     }
 195 
 196     private static void check(Matcher m, String expected) {
 197         m.find();
 198         if (!m.group().equals(expected))
 199             failCount++;
 200     }
 201 
 202     private static void check(Matcher m, String result, boolean expected) {
 203         m.find();
 204         if (m.group().equals(result) != expected)
 205             failCount++;
 206     }
 207 
 208     private static void check(Pattern p, String s, boolean expected) {
 209         if (p.matcher(s).find() != expected)
 210             failCount++;
 211     }
 212 
 213     private static void check(String p, String s, boolean expected) {
 214         Matcher matcher = Pattern.compile(p).matcher(s);
 215         if (matcher.find() != expected)
 216             failCount++;
 217     }
 218 
 219     private static void check(String p, char c, boolean expected) {
 220         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 221         Pattern pattern = Pattern.compile(propertyPattern);
 222         char[] ca = new char[1]; ca[0] = c;
 223         Matcher matcher = pattern.matcher(new String(ca));
 224         if (!matcher.find())
 225             failCount++;
 226     }
 227 
 228     private static void check(String p, int codePoint, boolean expected) {
 229         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 230         Pattern pattern = Pattern.compile(propertyPattern);
 231         char[] ca = Character.toChars(codePoint);
 232         Matcher matcher = pattern.matcher(new String(ca));
 233         if (!matcher.find())
 234             failCount++;
 235     }
 236 
 237     private static void check(String p, int flag, String input, String s,
 238                               boolean expected)
 239     {
 240         Pattern pattern = Pattern.compile(p, flag);
 241         Matcher matcher = pattern.matcher(input);
 242         if (expected)
 243             check(matcher, s, expected);
 244         else
 245             check(pattern, input, false);
 246     }
 247 
 248     private static void report(String testName) {
 249         int spacesToAdd = 30 - testName.length();
 250         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 251         for (int i=0; i<spacesToAdd; i++)
 252             paddedNameBuffer.append(" ");
 253         String paddedName = paddedNameBuffer.toString();
 254         System.err.println(paddedName + ": " +
 255                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 256         if (failCount > 0) {
 257             failure = true;
 258 
 259             if (firstFailure == null) {
 260                 firstFailure = testName;
 261             }
 262         }
 263 
 264         failCount = 0;
 265     }
 266 
 267     /**
 268      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 269      * supplementary characters. This method does NOT fully take care
 270      * of the regex syntax.
 271      */
 272     private static String toSupplementaries(String s) {
 273         int length = s.length();
 274         StringBuffer sb = new StringBuffer(length * 2);
 275 
 276         for (int i = 0; i < length; ) {
 277             char c = s.charAt(i++);
 278             if (c == '\\') {
 279                 sb.append(c);
 280                 if (i < length) {
 281                     c = s.charAt(i++);
 282                     sb.append(c);
 283                     if (c == 'u') {
 284                         // assume no syntax error
 285                         sb.append(s.charAt(i++));
 286                         sb.append(s.charAt(i++));
 287                         sb.append(s.charAt(i++));
 288                         sb.append(s.charAt(i++));
 289                     }
 290                 }
 291             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 292                 sb.append('\ud800').append((char)('\udc00'+c));
 293             } else {
 294                 sb.append(c);
 295             }
 296         }
 297         return sb.toString();
 298     }
 299 
 300     // Regular expression tests
 301 
 302     // This is for bug 6178785
 303     // Test if an expected NPE gets thrown when passing in a null argument
 304     private static boolean check(Runnable test) {
 305         try {
 306             test.run();
 307             failCount++;
 308             return false;
 309         } catch (NullPointerException npe) {
 310             return true;
 311         }
 312     }
 313 
 314     private static void nullArgumentTest() {
 315         check(() -> Pattern.compile(null));
 316         check(() -> Pattern.matches(null, null));
 317         check(() -> Pattern.matches("xyz", null));
 318         check(() -> Pattern.quote(null));
 319         check(() -> Pattern.compile("xyz").split(null));
 320         check(() -> Pattern.compile("xyz").matcher(null));
 321 
 322         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 323         m.matches();
 324         check(() -> m.appendTail((StringBuffer) null));
 325         check(() -> m.appendTail((StringBuilder)null));
 326         check(() -> m.replaceAll((String) null));
 327         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 328         check(() -> m.replaceFirst((String)null));
 329         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 330         check(() -> m.appendReplacement((StringBuffer)null, null));
 331         check(() -> m.appendReplacement((StringBuilder)null, null));
 332         check(() -> m.reset(null));
 333         check(() -> Matcher.quoteReplacement(null));
 334         //check(() -> m.usePattern(null));
 335 
 336         report("Null Argument");
 337     }
 338 
 339     // This is for bug6635133
 340     // Test if surrogate pair in Unicode escapes can be handled correctly.
 341     private static void surrogatesInClassTest() throws Exception {
 342         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 343         Matcher matcher = pattern.matcher("\ud834\udd22");
 344         if (!matcher.find())
 345             failCount++;
 346 
 347         report("Surrogate pair in Unicode escape");
 348     }
 349 
 350     // This is for bug6990617
 351     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 352     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 353     // char is an octal digit.
 354     private static void removeQEQuotingTest() throws Exception {
 355         Pattern pattern =
 356             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 357         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 358         if (!matcher.find())
 359             failCount++;
 360 
 361         report("Remove Q/E Quoting");
 362     }
 363 
 364     // This is for bug 4988891
 365     // Test toMatchResult to see that it is a copy of the Matcher
 366     // that is not affected by subsequent operations on the original
 367     private static void toMatchResultTest() throws Exception {
 368         Pattern pattern = Pattern.compile("squid");
 369         Matcher matcher = pattern.matcher(
 370             "agiantsquidofdestinyasmallsquidoffate");
 371         matcher.find();
 372         int matcherStart1 = matcher.start();
 373         MatchResult mr = matcher.toMatchResult();
 374         if (mr == matcher)
 375             failCount++;
 376         int resultStart1 = mr.start();
 377         if (matcherStart1 != resultStart1)
 378             failCount++;
 379         matcher.find();
 380         int matcherStart2 = matcher.start();
 381         int resultStart2 = mr.start();
 382         if (matcherStart2 == resultStart2)
 383             failCount++;
 384         if (resultStart1 != resultStart2)
 385             failCount++;
 386         MatchResult mr2 = matcher.toMatchResult();
 387         if (mr == mr2)
 388             failCount++;
 389         if (mr2.start() != matcherStart2)
 390             failCount++;
 391         report("toMatchResult is a copy");
 392     }
 393 
 394     private static void checkExpectedISE(Runnable test) {
 395         try {
 396             test.run();
 397             failCount++;
 398         } catch (IllegalStateException x) {
 399         } catch (IndexOutOfBoundsException xx) {
 400             failCount++;
 401         }
 402     }
 403 
 404     private static void checkExpectedIOOE(Runnable test) {
 405         try {
 406             test.run();
 407             failCount++;
 408         } catch (IndexOutOfBoundsException x) {}
 409     }
 410 
 411     // This is for bug 8074678
 412     // Test the result of toMatchResult throws ISE if no match is availble
 413     private static void toMatchResultTest2() throws Exception {
 414         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 415         matcher.find();
 416         MatchResult mr = matcher.toMatchResult();
 417 
 418         checkExpectedISE(() -> mr.start());
 419         checkExpectedISE(() -> mr.start(2));
 420         checkExpectedISE(() -> mr.end());
 421         checkExpectedISE(() -> mr.end(2));
 422         checkExpectedISE(() -> mr.group());
 423         checkExpectedISE(() -> mr.group(2));
 424 
 425         matcher = Pattern.compile("(match)").matcher("there is a match");
 426         matcher.find();
 427         MatchResult mr2 = matcher.toMatchResult();
 428         checkExpectedIOOE(() -> mr2.start(2));
 429         checkExpectedIOOE(() -> mr2.end(2));
 430         checkExpectedIOOE(() -> mr2.group(2));
 431 
 432         report("toMatchResult2 appropriate exceptions");
 433     }
 434 
 435     // This is for bug 5013885
 436     // Must test a slice to see if it reports hitEnd correctly
 437     private static void hitEndTest() throws Exception {
 438         // Basic test of Slice node
 439         Pattern p = Pattern.compile("^squidattack");
 440         Matcher m = p.matcher("squack");
 441         m.find();
 442         if (m.hitEnd())
 443             failCount++;
 444         m.reset("squid");
 445         m.find();
 446         if (!m.hitEnd())
 447             failCount++;
 448 
 449         // Test Slice, SliceA and SliceU nodes
 450         for (int i=0; i<3; i++) {
 451             int flags = 0;
 452             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 453             if (i==2) flags = Pattern.UNICODE_CASE;
 454             p = Pattern.compile("^abc", flags);
 455             m = p.matcher("ad");
 456             m.find();
 457             if (m.hitEnd())
 458                 failCount++;
 459             m.reset("ab");
 460             m.find();
 461             if (!m.hitEnd())
 462                 failCount++;
 463         }
 464 
 465         // Test Boyer-Moore node
 466         p = Pattern.compile("catattack");
 467         m = p.matcher("attack");
 468         m.find();
 469         if (!m.hitEnd())
 470             failCount++;
 471 
 472         p = Pattern.compile("catattack");
 473         m = p.matcher("attackattackattackcatatta");
 474         m.find();
 475         if (!m.hitEnd())
 476             failCount++;
 477 
 478         // 8184706: Matching u+0d at EOL against \R should hit-end
 479         p = Pattern.compile("...\\R");
 480         m = p.matcher("cat" + (char)0x0a);
 481         m.find();
 482         if (m.hitEnd())
 483             failCount++;
 484 
 485         m = p.matcher("cat" + (char)0x0d);
 486         m.find();
 487         if (!m.hitEnd())
 488             failCount++;
 489 
 490         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
 491         m.find();
 492         if (m.hitEnd())
 493             failCount++;
 494 
 495         report("hitEnd");
 496     }
 497 
 498     // This is for bug 4997476
 499     // It is weird code submitted by customer demonstrating a regression
 500     private static void wordSearchTest() throws Exception {
 501         String testString = new String("word1 word2 word3");
 502         Pattern p = Pattern.compile("\\b");
 503         Matcher m = p.matcher(testString);
 504         int position = 0;
 505         int start = 0;
 506         while (m.find(position)) {
 507             start = m.start();
 508             if (start == testString.length())
 509                 break;
 510             if (m.find(start+1)) {
 511                 position = m.start();
 512             } else {
 513                 position = testString.length();
 514             }
 515             if (testString.substring(start, position).equals(" "))
 516                 continue;
 517             if (!testString.substring(start, position-1).startsWith("word"))
 518                 failCount++;
 519         }
 520         report("Customer word search");
 521     }
 522 
 523     // This is for bug 4994840
 524     private static void caretAtEndTest() throws Exception {
 525         // Problem only occurs with multiline patterns
 526         // containing a beginning-of-line caret "^" followed
 527         // by an expression that also matches the empty string.
 528         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 529         Matcher matcher = pattern.matcher("\r");
 530         matcher.find();
 531         matcher.find();
 532         report("Caret at end");
 533     }
 534 
 535     // This test is for 4979006
 536     // Check to see if word boundary construct properly handles unicode
 537     // non spacing marks
 538     private static void unicodeWordBoundsTest() throws Exception {
 539         String spaces = "  ";
 540         String wordChar = "a";
 541         String nsm = "\u030a";
 542 
 543         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 544 
 545         Pattern pattern = Pattern.compile("\\b");
 546         Matcher matcher = pattern.matcher("");
 547         // S=other B=word character N=non spacing mark .=word boundary
 548         // SS.BB.SS
 549         String input = spaces + wordChar + wordChar + spaces;
 550         twoFindIndexes(input, matcher, 2, 4);
 551         // SS.BBN.SS
 552         input = spaces + wordChar +wordChar + nsm + spaces;
 553         twoFindIndexes(input, matcher, 2, 5);
 554         // SS.BN.SS
 555         input = spaces + wordChar + nsm + spaces;
 556         twoFindIndexes(input, matcher, 2, 4);
 557         // SS.BNN.SS
 558         input = spaces + wordChar + nsm + nsm + spaces;
 559         twoFindIndexes(input, matcher, 2, 5);
 560         // SSN.BB.SS
 561         input = spaces + nsm + wordChar + wordChar + spaces;
 562         twoFindIndexes(input, matcher, 3, 5);
 563         // SS.BNB.SS
 564         input = spaces + wordChar + nsm + wordChar + spaces;
 565         twoFindIndexes(input, matcher, 2, 5);
 566         // SSNNSS
 567         input = spaces + nsm + nsm + spaces;
 568         matcher.reset(input);
 569         if (matcher.find())
 570             failCount++;
 571         // SSN.BBN.SS
 572         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 573         twoFindIndexes(input, matcher, 3, 6);
 574 
 575         report("Unicode word boundary");
 576     }
 577 
 578     private static void twoFindIndexes(String input, Matcher matcher, int a,
 579                                        int b) throws Exception
 580     {
 581         matcher.reset(input);
 582         matcher.find();
 583         if (matcher.start() != a)
 584             failCount++;
 585         matcher.find();
 586         if (matcher.start() != b)
 587             failCount++;
 588     }
 589 
 590     // This test is for 6284152
 591     static void check(String regex, String input, String[] expected) {
 592         List<String> result = new ArrayList<String>();
 593         Pattern p = Pattern.compile(regex);
 594         Matcher m = p.matcher(input);
 595         while (m.find()) {
 596             result.add(m.group());
 597         }
 598         if (!Arrays.asList(expected).equals(result))
 599             failCount++;
 600     }
 601 
 602     private static void lookbehindTest() throws Exception {
 603         //Positive
 604         check("(?<=%.{0,5})foo\\d",
 605               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 606               new String[]{"foo1", "foo2", "foo3"});
 607 
 608         //boundary at end of the lookbehind sub-regex should work consistently
 609         //with the boundary just after the lookbehind sub-regex
 610         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 611         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 612         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 613         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 614 
 615         //Negative
 616         check("(?<!%.{0,5})foo\\d",
 617               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 618               new String[] {"foo4", "foo5"});
 619 
 620         //Positive greedy
 621         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 622 
 623         //Positive reluctant
 624         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 625 
 626         //supplementary
 627         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 628               new String[] {"fo\ud800\udc00o"});
 629         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 630               new String[] {"fo\ud800\udc00o"});
 631         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 632               new String[] {"fo\ud800\udc00o"});
 633         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 634               new String[] {"fo\ud800\udc00o"});
 635         report("Lookbehind");
 636     }
 637 
 638     // This test is for 4938995
 639     // Check to see if weak region boundaries are transparent to
 640     // lookahead and lookbehind constructs
 641     private static void boundsTest() throws Exception {
 642         String fullMessage = "catdogcat";
 643         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 644         Matcher matcher = pattern.matcher("catdogca");
 645         matcher.useTransparentBounds(true);
 646         if (matcher.find())
 647             failCount++;
 648         matcher.reset("atdogcat");
 649         if (matcher.find())
 650             failCount++;
 651         matcher.reset(fullMessage);
 652         if (!matcher.find())
 653             failCount++;
 654         matcher.reset(fullMessage);
 655         matcher.region(0,9);
 656         if (!matcher.find())
 657             failCount++;
 658         matcher.reset(fullMessage);
 659         matcher.region(0,6);
 660         if (!matcher.find())
 661             failCount++;
 662         matcher.reset(fullMessage);
 663         matcher.region(3,6);
 664         if (!matcher.find())
 665             failCount++;
 666         matcher.useTransparentBounds(false);
 667         if (matcher.find())
 668             failCount++;
 669 
 670         // Negative lookahead/lookbehind
 671         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 672         matcher = pattern.matcher("dogcat");
 673         matcher.useTransparentBounds(true);
 674         matcher.region(0,3);
 675         if (matcher.find())
 676             failCount++;
 677         matcher.reset("catdog");
 678         matcher.region(3,6);
 679         if (matcher.find())
 680             failCount++;
 681         matcher.useTransparentBounds(false);
 682         matcher.reset("dogcat");
 683         matcher.region(0,3);
 684         if (!matcher.find())
 685             failCount++;
 686         matcher.reset("catdog");
 687         matcher.region(3,6);
 688         if (!matcher.find())
 689             failCount++;
 690 
 691         report("Region bounds transparency");
 692     }
 693 
 694     // This test is for 4945394
 695     private static void findFromTest() throws Exception {
 696         String message = "This is 40 $0 message.";
 697         Pattern pat = Pattern.compile("\\$0");
 698         Matcher match = pat.matcher(message);
 699         if (!match.find())
 700             failCount++;
 701         if (match.find())
 702             failCount++;
 703         if (match.find())
 704             failCount++;
 705         report("Check for alternating find");
 706     }
 707 
 708     // This test is for 4872664 and 4892980
 709     private static void negatedCharClassTest() throws Exception {
 710         Pattern pattern = Pattern.compile("[^>]");
 711         Matcher matcher = pattern.matcher("\u203A");
 712         if (!matcher.matches())
 713             failCount++;
 714         pattern = Pattern.compile("[^fr]");
 715         matcher = pattern.matcher("a");
 716         if (!matcher.find())
 717             failCount++;
 718         matcher.reset("\u203A");
 719         if (!matcher.find())
 720             failCount++;
 721         String s = "for";
 722         String result[] = s.split("[^fr]");
 723         if (!result[0].equals("f"))
 724             failCount++;
 725         if (!result[1].equals("r"))
 726             failCount++;
 727         s = "f\u203Ar";
 728         result = s.split("[^fr]");
 729         if (!result[0].equals("f"))
 730             failCount++;
 731         if (!result[1].equals("r"))
 732             failCount++;
 733 
 734         // Test adding to bits, subtracting a node, then adding to bits again
 735         pattern = Pattern.compile("[^f\u203Ar]");
 736         matcher = pattern.matcher("a");
 737         if (!matcher.find())
 738             failCount++;
 739         matcher.reset("f");
 740         if (matcher.find())
 741             failCount++;
 742         matcher.reset("\u203A");
 743         if (matcher.find())
 744             failCount++;
 745         matcher.reset("r");
 746         if (matcher.find())
 747             failCount++;
 748         matcher.reset("\u203B");
 749         if (!matcher.find())
 750             failCount++;
 751 
 752         // Test subtracting a node, adding to bits, subtracting again
 753         pattern = Pattern.compile("[^\u203Ar\u203B]");
 754         matcher = pattern.matcher("a");
 755         if (!matcher.find())
 756             failCount++;
 757         matcher.reset("\u203A");
 758         if (matcher.find())
 759             failCount++;
 760         matcher.reset("r");
 761         if (matcher.find())
 762             failCount++;
 763         matcher.reset("\u203B");
 764         if (matcher.find())
 765             failCount++;
 766         matcher.reset("\u203C");
 767         if (!matcher.find())
 768             failCount++;
 769 
 770         report("Negated Character Class");
 771     }
 772 
 773     // This test is for 4628291
 774     private static void toStringTest() throws Exception {
 775         Pattern pattern = Pattern.compile("b+");
 776         if (pattern.toString() != "b+")
 777             failCount++;
 778         Matcher matcher = pattern.matcher("aaabbbccc");
 779         String matcherString = matcher.toString(); // unspecified
 780         matcher.find();
 781         matcherString = matcher.toString(); // unspecified
 782         matcher.region(0,3);
 783         matcherString = matcher.toString(); // unspecified
 784         matcher.reset();
 785         matcherString = matcher.toString(); // unspecified
 786         report("toString");
 787     }
 788 
 789     // This test is for 4808962
 790     private static void literalPatternTest() throws Exception {
 791         int flags = Pattern.LITERAL;
 792 
 793         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 794         check(pattern, "abc\\t$^", true);
 795 
 796         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 797         check(pattern, "abc\\t$^", true);
 798 
 799         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 800         check(pattern, "\\Qa^$bcabc\\E", true);
 801         check(pattern, "a^$bcabc", false);
 802 
 803         pattern = Pattern.compile("\\\\Q\\\\E");
 804         check(pattern, "\\Q\\E", true);
 805 
 806         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 807         check(pattern, "abcefg\\Q\\Ehij", true);
 808 
 809         pattern = Pattern.compile("\\\\\\Q\\\\E");
 810         check(pattern, "\\\\\\\\", true);
 811 
 812         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 813         check(pattern, "\\Qa^$bcabc\\E", true);
 814         check(pattern, "a^$bcabc", false);
 815 
 816         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 817         check(pattern, "\\Qabc\\Edef", true);
 818         check(pattern, "abcdef", false);
 819 
 820         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 821         check(pattern, "abc\\Edef", true);
 822         check(pattern, "abcdef", false);
 823 
 824         pattern = Pattern.compile(Pattern.quote("\\E"));
 825         check(pattern, "\\E", true);
 826 
 827         pattern = Pattern.compile("((((abc.+?:)", flags);
 828         check(pattern, "((((abc.+?:)", true);
 829 
 830         flags |= Pattern.MULTILINE;
 831 
 832         pattern = Pattern.compile("^cat$", flags);
 833         check(pattern, "abc^cat$def", true);
 834         check(pattern, "cat", false);
 835 
 836         flags |= Pattern.CASE_INSENSITIVE;
 837 
 838         pattern = Pattern.compile("abcdef", flags);
 839         check(pattern, "ABCDEF", true);
 840         check(pattern, "AbCdEf", true);
 841 
 842         flags |= Pattern.DOTALL;
 843 
 844         pattern = Pattern.compile("a...b", flags);
 845         check(pattern, "A...b", true);
 846         check(pattern, "Axxxb", false);
 847 
 848         flags |= Pattern.CANON_EQ;
 849 
 850         Pattern p = Pattern.compile("testa\u030a", flags);
 851         check(pattern, "testa\u030a", false);
 852         check(pattern, "test\u00e5", false);
 853 
 854         // Supplementary character test
 855         flags = Pattern.LITERAL;
 856 
 857         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 858         check(pattern, toSupplementaries("abc\\t$^"), true);
 859 
 860         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 861         check(pattern, toSupplementaries("abc\\t$^"), true);
 862 
 863         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 864         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 865         check(pattern, toSupplementaries("a^$bcabc"), false);
 866 
 867         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 868         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 869         check(pattern, toSupplementaries("a^$bcabc"), false);
 870 
 871         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 872         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 873         check(pattern, toSupplementaries("abcdef"), false);
 874 
 875         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 876         check(pattern, toSupplementaries("abc\\Edef"), true);
 877         check(pattern, toSupplementaries("abcdef"), false);
 878 
 879         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 880         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 881 
 882         flags |= Pattern.MULTILINE;
 883 
 884         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 885         check(pattern, toSupplementaries("abc^cat$def"), true);
 886         check(pattern, toSupplementaries("cat"), false);
 887 
 888         flags |= Pattern.DOTALL;
 889 
 890         // note: this is case-sensitive.
 891         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 892         check(pattern, toSupplementaries("a...b"), true);
 893         check(pattern, toSupplementaries("axxxb"), false);
 894 
 895         flags |= Pattern.CANON_EQ;
 896 
 897         String t = toSupplementaries("test");
 898         p = Pattern.compile(t + "a\u030a", flags);
 899         check(pattern, t + "a\u030a", false);
 900         check(pattern, t + "\u00e5", false);
 901 
 902         report("Literal pattern");
 903     }
 904 
 905     // This test is for 4803179
 906     // This test is also for 4808962, replacement parts
 907     private static void literalReplacementTest() throws Exception {
 908         int flags = Pattern.LITERAL;
 909 
 910         Pattern pattern = Pattern.compile("abc", flags);
 911         Matcher matcher = pattern.matcher("zzzabczzz");
 912         String replaceTest = "$0";
 913         String result = matcher.replaceAll(replaceTest);
 914         if (!result.equals("zzzabczzz"))
 915             failCount++;
 916 
 917         matcher.reset();
 918         String literalReplacement = matcher.quoteReplacement(replaceTest);
 919         result = matcher.replaceAll(literalReplacement);
 920         if (!result.equals("zzz$0zzz"))
 921             failCount++;
 922 
 923         matcher.reset();
 924         replaceTest = "\\t$\\$";
 925         literalReplacement = matcher.quoteReplacement(replaceTest);
 926         result = matcher.replaceAll(literalReplacement);
 927         if (!result.equals("zzz\\t$\\$zzz"))
 928             failCount++;
 929 
 930         // Supplementary character test
 931         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 932         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 933         replaceTest = "$0";
 934         result = matcher.replaceAll(replaceTest);
 935         if (!result.equals(toSupplementaries("zzzabczzz")))
 936             failCount++;
 937 
 938         matcher.reset();
 939         literalReplacement = matcher.quoteReplacement(replaceTest);
 940         result = matcher.replaceAll(literalReplacement);
 941         if (!result.equals(toSupplementaries("zzz$0zzz")))
 942             failCount++;
 943 
 944         matcher.reset();
 945         replaceTest = "\\t$\\$";
 946         literalReplacement = matcher.quoteReplacement(replaceTest);
 947         result = matcher.replaceAll(literalReplacement);
 948         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 949             failCount++;
 950 
 951         // IAE should be thrown if backslash or '$' is the last character
 952         // in replacement string
 953         try {
 954             "\uac00".replaceAll("\uac00", "$");
 955             failCount++;
 956         } catch (IllegalArgumentException iie) {
 957         } catch (Exception e) {
 958             failCount++;
 959         }
 960         try {
 961             "\uac00".replaceAll("\uac00", "\\");
 962             failCount++;
 963         } catch (IllegalArgumentException iie) {
 964         } catch (Exception e) {
 965             failCount++;
 966         }
 967         report("Literal replacement");
 968     }
 969 
 970     // This test is for 4757029
 971     private static void regionTest() throws Exception {
 972         Pattern pattern = Pattern.compile("abc");
 973         Matcher matcher = pattern.matcher("abcdefabc");
 974 
 975         matcher.region(0,9);
 976         if (!matcher.find())
 977             failCount++;
 978         if (!matcher.find())
 979             failCount++;
 980         matcher.region(0,3);
 981         if (!matcher.find())
 982            failCount++;
 983         matcher.region(3,6);
 984         if (matcher.find())
 985            failCount++;
 986         matcher.region(0,2);
 987         if (matcher.find())
 988            failCount++;
 989 
 990         expectRegionFail(matcher, 1, -1);
 991         expectRegionFail(matcher, -1, -1);
 992         expectRegionFail(matcher, -1, 1);
 993         expectRegionFail(matcher, 5, 3);
 994         expectRegionFail(matcher, 5, 12);
 995         expectRegionFail(matcher, 12, 12);
 996 
 997         pattern = Pattern.compile("^abc$");
 998         matcher = pattern.matcher("zzzabczzz");
 999         matcher.region(0,9);
1000         if (matcher.find())
1001             failCount++;
1002         matcher.region(3,6);
1003         if (!matcher.find())
1004            failCount++;
1005         matcher.region(3,6);
1006         matcher.useAnchoringBounds(false);
1007         if (matcher.find())
1008            failCount++;
1009 
1010         // Supplementary character test
1011         pattern = Pattern.compile(toSupplementaries("abc"));
1012         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1013         matcher.region(0,9*2);
1014         if (!matcher.find())
1015             failCount++;
1016         if (!matcher.find())
1017             failCount++;
1018         matcher.region(0,3*2);
1019         if (!matcher.find())
1020            failCount++;
1021         matcher.region(1,3*2);
1022         if (matcher.find())
1023            failCount++;
1024         matcher.region(3*2,6*2);
1025         if (matcher.find())
1026            failCount++;
1027         matcher.region(0,2*2);
1028         if (matcher.find())
1029            failCount++;
1030         matcher.region(0,2*2+1);
1031         if (matcher.find())
1032            failCount++;
1033 
1034         expectRegionFail(matcher, 1*2, -1);
1035         expectRegionFail(matcher, -1, -1);
1036         expectRegionFail(matcher, -1, 1*2);
1037         expectRegionFail(matcher, 5*2, 3*2);
1038         expectRegionFail(matcher, 5*2, 12*2);
1039         expectRegionFail(matcher, 12*2, 12*2);
1040 
1041         pattern = Pattern.compile(toSupplementaries("^abc$"));
1042         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1043         matcher.region(0,9*2);
1044         if (matcher.find())
1045             failCount++;
1046         matcher.region(3*2,6*2);
1047         if (!matcher.find())
1048            failCount++;
1049         matcher.region(3*2+1,6*2);
1050         if (matcher.find())
1051            failCount++;
1052         matcher.region(3*2,6*2-1);
1053         if (matcher.find())
1054            failCount++;
1055         matcher.region(3*2,6*2);
1056         matcher.useAnchoringBounds(false);
1057         if (matcher.find())
1058            failCount++;
1059         report("Regions");
1060     }
1061 
1062     private static void expectRegionFail(Matcher matcher, int index1,
1063                                          int index2)
1064     {
1065         try {
1066             matcher.region(index1, index2);
1067             failCount++;
1068         } catch (IndexOutOfBoundsException ioobe) {
1069             // Correct result
1070         } catch (IllegalStateException ise) {
1071             // Correct result
1072         }
1073     }
1074 
1075     // This test is for 4803197
1076     private static void escapedSegmentTest() throws Exception {
1077 
1078         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1079         check(pattern, "dir1\\dir2", true);
1080 
1081         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1082         check(pattern, "dir1\\dir2\\", true);
1083 
1084         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1085         check(pattern, "dir1\\dir2\\", true);
1086 
1087         // Supplementary character test
1088         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1089         check(pattern, toSupplementaries("dir1\\dir2"), true);
1090 
1091         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1092         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1093 
1094         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1095         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1096 
1097         report("Escaped segment");
1098     }
1099 
1100     // This test is for 4792284
1101     private static void nonCaptureRepetitionTest() throws Exception {
1102         String input = "abcdefgh;";
1103 
1104         String[] patterns = new String[] {
1105             "(?:\\w{4})+;",
1106             "(?:\\w{8})*;",
1107             "(?:\\w{2}){2,4};",
1108             "(?:\\w{4}){2,};",   // only matches the
1109             ".*?(?:\\w{5})+;",   //     specified minimum
1110             ".*?(?:\\w{9})*;",   //     number of reps - OK
1111             "(?:\\w{4})+?;",     // lazy repetition - OK
1112             "(?:\\w{4})++;",     // possessive repetition - OK
1113             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1114             "(\\w{4})+;",        // capturing group - OK
1115         };
1116 
1117         for (int i = 0; i < patterns.length; i++) {
1118             // Check find()
1119             check(patterns[i], 0, input, input, true);
1120             // Check matches()
1121             Pattern p = Pattern.compile(patterns[i]);
1122             Matcher m = p.matcher(input);
1123 
1124             if (m.matches()) {
1125                 if (!m.group(0).equals(input))
1126                     failCount++;
1127             } else {
1128                 failCount++;
1129             }
1130         }
1131 
1132         report("Non capturing repetition");
1133     }
1134 
1135     // This test is for 6358731
1136     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1137         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1138         Matcher matcher = pattern.matcher("abcd");
1139         if (!matcher.matches() ||
1140              matcher.group(1) != null ||
1141              !matcher.group(2).equals("abcd")) {
1142             failCount++;
1143         }
1144         report("Not captured GroupCurly");
1145     }
1146 
1147     // This test is for 4706545
1148     private static void javaCharClassTest() throws Exception {
1149         for (int i=0; i<1000; i++) {
1150             char c = (char)generator.nextInt();
1151             check("{javaLowerCase}", c, Character.isLowerCase(c));
1152             check("{javaUpperCase}", c, Character.isUpperCase(c));
1153             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1154             check("{javaTitleCase}", c, Character.isTitleCase(c));
1155             check("{javaDigit}", c, Character.isDigit(c));
1156             check("{javaDefined}", c, Character.isDefined(c));
1157             check("{javaLetter}", c, Character.isLetter(c));
1158             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1159             check("{javaJavaIdentifierStart}", c,
1160                   Character.isJavaIdentifierStart(c));
1161             check("{javaJavaIdentifierPart}", c,
1162                   Character.isJavaIdentifierPart(c));
1163             check("{javaUnicodeIdentifierStart}", c,
1164                   Character.isUnicodeIdentifierStart(c));
1165             check("{javaUnicodeIdentifierPart}", c,
1166                   Character.isUnicodeIdentifierPart(c));
1167             check("{javaIdentifierIgnorable}", c,
1168                   Character.isIdentifierIgnorable(c));
1169             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1170             check("{javaWhitespace}", c, Character.isWhitespace(c));
1171             check("{javaISOControl}", c, Character.isISOControl(c));
1172             check("{javaMirrored}", c, Character.isMirrored(c));
1173 
1174         }
1175 
1176         // Supplementary character test
1177         for (int i=0; i<1000; i++) {
1178             int c = generator.nextInt(Character.MAX_CODE_POINT
1179                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1180                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1181             check("{javaLowerCase}", c, Character.isLowerCase(c));
1182             check("{javaUpperCase}", c, Character.isUpperCase(c));
1183             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1184             check("{javaTitleCase}", c, Character.isTitleCase(c));
1185             check("{javaDigit}", c, Character.isDigit(c));
1186             check("{javaDefined}", c, Character.isDefined(c));
1187             check("{javaLetter}", c, Character.isLetter(c));
1188             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1189             check("{javaJavaIdentifierStart}", c,
1190                   Character.isJavaIdentifierStart(c));
1191             check("{javaJavaIdentifierPart}", c,
1192                   Character.isJavaIdentifierPart(c));
1193             check("{javaUnicodeIdentifierStart}", c,
1194                   Character.isUnicodeIdentifierStart(c));
1195             check("{javaUnicodeIdentifierPart}", c,
1196                   Character.isUnicodeIdentifierPart(c));
1197             check("{javaIdentifierIgnorable}", c,
1198                   Character.isIdentifierIgnorable(c));
1199             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1200             check("{javaWhitespace}", c, Character.isWhitespace(c));
1201             check("{javaISOControl}", c, Character.isISOControl(c));
1202             check("{javaMirrored}", c, Character.isMirrored(c));
1203         }
1204 
1205         report("Java character classes");
1206     }
1207 
1208     // This test is for 4523620
1209     /*
1210     private static void numOccurrencesTest() throws Exception {
1211         Pattern pattern = Pattern.compile("aaa");
1212 
1213         if (pattern.numOccurrences("aaaaaa", false) != 2)
1214             failCount++;
1215         if (pattern.numOccurrences("aaaaaa", true) != 4)
1216             failCount++;
1217 
1218         pattern = Pattern.compile("^");
1219         if (pattern.numOccurrences("aaaaaa", false) != 1)
1220             failCount++;
1221         if (pattern.numOccurrences("aaaaaa", true) != 1)
1222             failCount++;
1223 
1224         report("Number of Occurrences");
1225     }
1226     */
1227 
1228     // This test is for 4776374
1229     private static void caretBetweenTerminatorsTest() throws Exception {
1230         int flags1 = Pattern.DOTALL;
1231         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1232         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1233         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1234 
1235         check("^....", flags1, "test\ntest", "test", true);
1236         check(".....^", flags1, "test\ntest", "test", false);
1237         check(".....^", flags1, "test\n", "test", false);
1238         check("....^", flags1, "test\r\n", "test", false);
1239 
1240         check("^....", flags2, "test\ntest", "test", true);
1241         check("....^", flags2, "test\ntest", "test", false);
1242         check(".....^", flags2, "test\n", "test", false);
1243         check("....^", flags2, "test\r\n", "test", false);
1244 
1245         check("^....", flags3, "test\ntest", "test", true);
1246         check(".....^", flags3, "test\ntest", "test\n", true);
1247         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1248         check(".....^", flags3, "test\n", "test", false);
1249         check(".....^", flags3, "test\r\n", "test", false);
1250         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1251 
1252         check("^....", flags4, "test\ntest", "test", true);
1253         check(".....^", flags3, "test\ntest", "test\n", true);
1254         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1255         check(".....^", flags4, "test\n", "test\n", false);
1256         check(".....^", flags4, "test\r\n", "test\r", false);
1257 
1258         // Supplementary character test
1259         String t = toSupplementaries("test");
1260         check("^....", flags1, t+"\n"+t, t, true);
1261         check(".....^", flags1, t+"\n"+t, t, false);
1262         check(".....^", flags1, t+"\n", t, false);
1263         check("....^", flags1, t+"\r\n", t, false);
1264 
1265         check("^....", flags2, t+"\n"+t, t, true);
1266         check("....^", flags2, t+"\n"+t, t, false);
1267         check(".....^", flags2, t+"\n", t, false);
1268         check("....^", flags2, t+"\r\n", t, false);
1269 
1270         check("^....", flags3, t+"\n"+t, t, true);
1271         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1272         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1273         check(".....^", flags3, t+"\n", t, false);
1274         check(".....^", flags3, t+"\r\n", t, false);
1275         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1276 
1277         check("^....", flags4, t+"\n"+t, t, true);
1278         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1279         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1280         check(".....^", flags4, t+"\n", t+"\n", false);
1281         check(".....^", flags4, t+"\r\n", t+"\r", false);
1282 
1283         report("Caret between terminators");
1284     }
1285 
1286     // This test is for 4727935
1287     private static void dollarAtEndTest() throws Exception {
1288         int flags1 = Pattern.DOTALL;
1289         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1290         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1291 
1292         check("....$", flags1, "test\n", "test", true);
1293         check("....$", flags1, "test\r\n", "test", true);
1294         check(".....$", flags1, "test\n", "test\n", true);
1295         check(".....$", flags1, "test\u0085", "test\u0085", true);
1296         check("....$", flags1, "test\u0085", "test", true);
1297 
1298         check("....$", flags2, "test\n", "test", true);
1299         check(".....$", flags2, "test\n", "test\n", true);
1300         check(".....$", flags2, "test\u0085", "test\u0085", true);
1301         check("....$", flags2, "test\u0085", "est\u0085", true);
1302 
1303         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1304         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1305         check("....$blah", flags3, "test\nblah", "!!!!", false);
1306         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1307 
1308         // Supplementary character test
1309         String t = toSupplementaries("test");
1310         String b = toSupplementaries("blah");
1311         check("....$", flags1, t+"\n", t, true);
1312         check("....$", flags1, t+"\r\n", t, true);
1313         check(".....$", flags1, t+"\n", t+"\n", true);
1314         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1315         check("....$", flags1, t+"\u0085", t, true);
1316 
1317         check("....$", flags2, t+"\n", t, true);
1318         check(".....$", flags2, t+"\n", t+"\n", true);
1319         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1320         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1321 
1322         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1323         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1324         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1325         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1326 
1327         report("Dollar at End");
1328     }
1329 
1330     // This test is for 4711773
1331     private static void multilineDollarTest() throws Exception {
1332         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1333         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1334         matcher.find();
1335         if (matcher.start(0) != 9)
1336             failCount++;
1337         matcher.find();
1338         if (matcher.start(0) != 20)
1339             failCount++;
1340 
1341         // Supplementary character test
1342         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1343         matcher.find();
1344         if (matcher.start(0) != 9*2)
1345             failCount++;
1346         matcher.find();
1347         if (matcher.start(0) != 20*2)
1348             failCount++;
1349 
1350         report("Multiline Dollar");
1351     }
1352 
1353     private static void reluctantRepetitionTest() throws Exception {
1354         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1355         check(p, "1 word word word 2", true);
1356         check(p, "1 wor wo w 2", true);
1357         check(p, "1 word word 2", true);
1358         check(p, "1 word 2", true);
1359         check(p, "1 wo w w 2", true);
1360         check(p, "1 wo w 2", true);
1361         check(p, "1 wor w 2", true);
1362 
1363         p = Pattern.compile("([a-z])+?c");
1364         Matcher m = p.matcher("ababcdefdec");
1365         check(m, "ababc");
1366 
1367         // Supplementary character test
1368         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1369         m = p.matcher(toSupplementaries("ababcdefdec"));
1370         check(m, toSupplementaries("ababc"));
1371 
1372         report("Reluctant Repetition");
1373     }
1374 
1375     private static Pattern serializedPattern(Pattern p) throws Exception {
1376         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1377         ObjectOutputStream oos = new ObjectOutputStream(baos);
1378         oos.writeObject(p);
1379         oos.close();
1380         try (ObjectInputStream ois = new ObjectInputStream(
1381                 new ByteArrayInputStream(baos.toByteArray()))) {
1382             return (Pattern)ois.readObject();
1383         }
1384     }
1385 
1386     private static void serializeTest() throws Exception {
1387         String patternStr = "(b)";
1388         String matchStr = "b";
1389         Pattern pattern = Pattern.compile(patternStr);
1390         Pattern serializedPattern = serializedPattern(pattern);
1391         Matcher matcher = serializedPattern.matcher(matchStr);
1392         if (!matcher.matches())
1393             failCount++;
1394         if (matcher.groupCount() != 1)
1395             failCount++;
1396 
1397         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1398         serializedPattern = serializedPattern(pattern);
1399         if (!serializedPattern.matcher("Ab").matches())
1400             failCount++;
1401         if (serializedPattern.matcher("AB").matches())
1402             failCount++;
1403 
1404         report("Serialization");
1405     }
1406 
1407     private static void gTest() {
1408         Pattern pattern = Pattern.compile("\\G\\w");
1409         Matcher matcher = pattern.matcher("abc#x#x");
1410         matcher.find();
1411         matcher.find();
1412         matcher.find();
1413         if (matcher.find())
1414             failCount++;
1415 
1416         pattern = Pattern.compile("\\GA*");
1417         matcher = pattern.matcher("1A2AA3");
1418         matcher.find();
1419         if (matcher.find())
1420             failCount++;
1421 
1422         pattern = Pattern.compile("\\GA*");
1423         matcher = pattern.matcher("1A2AA3");
1424         if (!matcher.find(1))
1425             failCount++;
1426         matcher.find();
1427         if (matcher.find())
1428             failCount++;
1429 
1430         report("\\G");
1431     }
1432 
1433     private static void zTest() {
1434         Pattern pattern = Pattern.compile("foo\\Z");
1435         // Positives
1436         check(pattern, "foo\u0085", true);
1437         check(pattern, "foo\u2028", true);
1438         check(pattern, "foo\u2029", true);
1439         check(pattern, "foo\n", true);
1440         check(pattern, "foo\r", true);
1441         check(pattern, "foo\r\n", true);
1442         // Negatives
1443         check(pattern, "fooo", false);
1444         check(pattern, "foo\n\r", false);
1445 
1446         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1447         // Positives
1448         check(pattern, "foo", true);
1449         check(pattern, "foo\n", true);
1450         // Negatives
1451         check(pattern, "foo\r", false);
1452         check(pattern, "foo\u0085", false);
1453         check(pattern, "foo\u2028", false);
1454         check(pattern, "foo\u2029", false);
1455 
1456         report("\\Z");
1457     }
1458 
1459     private static void replaceFirstTest() {
1460         Pattern pattern = Pattern.compile("(ab)(c*)");
1461         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1462         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1463             failCount++;
1464 
1465         matcher.reset("zzzabccczzzabcczzzabccczzz");
1466         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1467             failCount++;
1468 
1469         matcher.reset("zzzabccczzzabcczzzabccczzz");
1470         String result = matcher.replaceFirst("$1");
1471         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1472             failCount++;
1473 
1474         matcher.reset("zzzabccczzzabcczzzabccczzz");
1475         result = matcher.replaceFirst("$2");
1476         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1477             failCount++;
1478 
1479         pattern = Pattern.compile("a*");
1480         matcher = pattern.matcher("aaaaaaaaaa");
1481         if (!matcher.replaceFirst("test").equals("test"))
1482             failCount++;
1483 
1484         pattern = Pattern.compile("a+");
1485         matcher = pattern.matcher("zzzaaaaaaaaaa");
1486         if (!matcher.replaceFirst("test").equals("zzztest"))
1487             failCount++;
1488 
1489         // Supplementary character test
1490         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1491         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1492         if (!matcher.replaceFirst(toSupplementaries("test"))
1493                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1494             failCount++;
1495 
1496         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1497         if (!matcher.replaceFirst(toSupplementaries("test")).
1498             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1499             failCount++;
1500 
1501         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1502         result = matcher.replaceFirst("$1");
1503         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1504             failCount++;
1505 
1506         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1507         result = matcher.replaceFirst("$2");
1508         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1509             failCount++;
1510 
1511         pattern = Pattern.compile(toSupplementaries("a*"));
1512         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1513         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1514             failCount++;
1515 
1516         pattern = Pattern.compile(toSupplementaries("a+"));
1517         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1518         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1519             failCount++;
1520 
1521         report("Replace First");
1522     }
1523 
1524     private static void unixLinesTest() {
1525         Pattern pattern = Pattern.compile(".*");
1526         Matcher matcher = pattern.matcher("aa\u2028blah");
1527         matcher.find();
1528         if (!matcher.group(0).equals("aa"))
1529             failCount++;
1530 
1531         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1532         matcher = pattern.matcher("aa\u2028blah");
1533         matcher.find();
1534         if (!matcher.group(0).equals("aa\u2028blah"))
1535             failCount++;
1536 
1537         pattern = Pattern.compile("[az]$",
1538                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1539         matcher = pattern.matcher("aa\u2028zz");
1540         check(matcher, "a\u2028", false);
1541 
1542         // Supplementary character test
1543         pattern = Pattern.compile(".*");
1544         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1545         matcher.find();
1546         if (!matcher.group(0).equals(toSupplementaries("aa")))
1547             failCount++;
1548 
1549         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1550         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1551         matcher.find();
1552         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1553             failCount++;
1554 
1555         pattern = Pattern.compile(toSupplementaries("[az]$"),
1556                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1557         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1558         check(matcher, toSupplementaries("a\u2028"), false);
1559 
1560         report("Unix Lines");
1561     }
1562 
1563     private static void commentsTest() {
1564         int flags = Pattern.COMMENTS;
1565 
1566         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1567         Matcher matcher = pattern.matcher("aa#aa");
1568         if (!matcher.matches())
1569             failCount++;
1570 
1571         pattern = Pattern.compile("aa  # blah", flags);
1572         matcher = pattern.matcher("aa");
1573         if (!matcher.matches())
1574             failCount++;
1575 
1576         pattern = Pattern.compile("aa blah", flags);
1577         matcher = pattern.matcher("aablah");
1578         if (!matcher.matches())
1579              failCount++;
1580 
1581         pattern = Pattern.compile("aa  # blah blech  ", flags);
1582         matcher = pattern.matcher("aa");
1583         if (!matcher.matches())
1584             failCount++;
1585 
1586         pattern = Pattern.compile("aa  # blah\n  ", flags);
1587         matcher = pattern.matcher("aa");
1588         if (!matcher.matches())
1589             failCount++;
1590 
1591         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1592         matcher = pattern.matcher("aabc");
1593         if (!matcher.matches())
1594              failCount++;
1595 
1596         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1597         matcher = pattern.matcher("aabc");
1598         if (!matcher.matches())
1599              failCount++;
1600 
1601         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1602         matcher = pattern.matcher("aabc#blech");
1603         if (!matcher.matches())
1604              failCount++;
1605 
1606         // Supplementary character test
1607         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1608         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1609         if (!matcher.matches())
1610             failCount++;
1611 
1612         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1613         matcher = pattern.matcher(toSupplementaries("aa"));
1614         if (!matcher.matches())
1615             failCount++;
1616 
1617         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1618         matcher = pattern.matcher(toSupplementaries("aablah"));
1619         if (!matcher.matches())
1620              failCount++;
1621 
1622         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1623         matcher = pattern.matcher(toSupplementaries("aa"));
1624         if (!matcher.matches())
1625             failCount++;
1626 
1627         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1628         matcher = pattern.matcher(toSupplementaries("aa"));
1629         if (!matcher.matches())
1630             failCount++;
1631 
1632         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1633         matcher = pattern.matcher(toSupplementaries("aabc"));
1634         if (!matcher.matches())
1635              failCount++;
1636 
1637         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1638         matcher = pattern.matcher(toSupplementaries("aabc"));
1639         if (!matcher.matches())
1640              failCount++;
1641 
1642         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1643         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1644         if (!matcher.matches())
1645              failCount++;
1646 
1647         report("Comments");
1648     }
1649 
1650     private static void caseFoldingTest() { // bug 4504687
1651         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1652         Pattern pattern = Pattern.compile("aa", flags);
1653         Matcher matcher = pattern.matcher("ab");
1654         if (matcher.matches())
1655             failCount++;
1656 
1657         pattern = Pattern.compile("aA", flags);
1658         matcher = pattern.matcher("ab");
1659         if (matcher.matches())
1660             failCount++;
1661 
1662         pattern = Pattern.compile("aa", flags);
1663         matcher = pattern.matcher("aB");
1664         if (matcher.matches())
1665             failCount++;
1666         matcher = pattern.matcher("Ab");
1667         if (matcher.matches())
1668             failCount++;
1669 
1670         // ASCII               "a"
1671         // Latin-1 Supplement  "a" + grave
1672         // Cyrillic            "a"
1673         String[] patterns = new String[] {
1674             //single
1675             "a", "\u00e0", "\u0430",
1676             //slice
1677             "ab", "\u00e0\u00e1", "\u0430\u0431",
1678             //class single
1679             "[a]", "[\u00e0]", "[\u0430]",
1680             //class range
1681             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1682             //back reference
1683             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1684         };
1685 
1686         String[] texts = new String[] {
1687             "A", "\u00c0", "\u0410",
1688             "AB", "\u00c0\u00c1", "\u0410\u0411",
1689             "A", "\u00c0", "\u0410",
1690             "B", "\u00c2", "\u0411",
1691             "aA", "\u00e0\u00c0", "\u0430\u0410"
1692         };
1693 
1694         boolean[] expected = new boolean[] {
1695             true, false, false,
1696             true, false, false,
1697             true, false, false,
1698             true, false, false,
1699             true, false, false
1700         };
1701 
1702         flags = Pattern.CASE_INSENSITIVE;
1703         for (int i = 0; i < patterns.length; i++) {
1704             pattern = Pattern.compile(patterns[i], flags);
1705             matcher = pattern.matcher(texts[i]);
1706             if (matcher.matches() != expected[i]) {
1707                 System.out.println("<1> Failed at " + i);
1708                 failCount++;
1709             }
1710         }
1711 
1712         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1713         for (int i = 0; i < patterns.length; i++) {
1714             pattern = Pattern.compile(patterns[i], flags);
1715             matcher = pattern.matcher(texts[i]);
1716             if (!matcher.matches()) {
1717                 System.out.println("<2> Failed at " + i);
1718                 failCount++;
1719             }
1720         }
1721         // flag unicode_case alone should do nothing
1722         flags = Pattern.UNICODE_CASE;
1723         for (int i = 0; i < patterns.length; i++) {
1724             pattern = Pattern.compile(patterns[i], flags);
1725             matcher = pattern.matcher(texts[i]);
1726             if (matcher.matches()) {
1727                 System.out.println("<3> Failed at " + i);
1728                 failCount++;
1729             }
1730         }
1731 
1732         // Special cases: i, I, u+0131 and u+0130
1733         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1734         pattern = Pattern.compile("[h-j]+", flags);
1735         if (!pattern.matcher("\u0131\u0130").matches())
1736             failCount++;
1737         report("Case Folding");
1738     }
1739 
1740     private static void appendTest() {
1741         Pattern pattern = Pattern.compile("(ab)(cd)");
1742         Matcher matcher = pattern.matcher("abcd");
1743         String result = matcher.replaceAll("$2$1");
1744         if (!result.equals("cdab"))
1745             failCount++;
1746 
1747         String  s1 = "Swap all: first = 123, second = 456";
1748         String  s2 = "Swap one: first = 123, second = 456";
1749         String  r  = "$3$2$1";
1750         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1751         matcher = pattern.matcher(s1);
1752 
1753         result = matcher.replaceAll(r);
1754         if (!result.equals("Swap all: 123 = first, 456 = second"))
1755             failCount++;
1756 
1757         matcher = pattern.matcher(s2);
1758 
1759         if (matcher.find()) {
1760             StringBuffer sb = new StringBuffer();
1761             matcher.appendReplacement(sb, r);
1762             matcher.appendTail(sb);
1763             result = sb.toString();
1764             if (!result.equals("Swap one: 123 = first, second = 456"))
1765                 failCount++;
1766         }
1767 
1768         // Supplementary character test
1769         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1770         matcher = pattern.matcher(toSupplementaries("abcd"));
1771         result = matcher.replaceAll("$2$1");
1772         if (!result.equals(toSupplementaries("cdab")))
1773             failCount++;
1774 
1775         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1776         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1777         r  = toSupplementaries("$3$2$1");
1778         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1779         matcher = pattern.matcher(s1);
1780 
1781         result = matcher.replaceAll(r);
1782         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1783             failCount++;
1784 
1785         matcher = pattern.matcher(s2);
1786 
1787         if (matcher.find()) {
1788             StringBuffer sb = new StringBuffer();
1789             matcher.appendReplacement(sb, r);
1790             matcher.appendTail(sb);
1791             result = sb.toString();
1792             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1793                 failCount++;
1794         }
1795         report("Append");
1796     }
1797 
1798     private static void splitTest() {
1799         Pattern pattern = Pattern.compile(":");
1800         String[] result = pattern.split("foo:and:boo", 2);
1801         if (!result[0].equals("foo"))
1802             failCount++;
1803         if (!result[1].equals("and:boo"))
1804             failCount++;
1805         // Supplementary character test
1806         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1807         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1808         if (!result[0].equals(toSupplementaries("foo")))
1809             failCount++;
1810         if (!result[1].equals(toSupplementaries("andXboo")))
1811             failCount++;
1812 
1813         CharBuffer cb = CharBuffer.allocate(100);
1814         cb.put("foo:and:boo");
1815         cb.flip();
1816         result = pattern.split(cb);
1817         if (!result[0].equals("foo"))
1818             failCount++;
1819         if (!result[1].equals("and"))
1820             failCount++;
1821         if (!result[2].equals("boo"))
1822             failCount++;
1823 
1824         // Supplementary character test
1825         CharBuffer cbs = CharBuffer.allocate(100);
1826         cbs.put(toSupplementaries("fooXandXboo"));
1827         cbs.flip();
1828         result = patternX.split(cbs);
1829         if (!result[0].equals(toSupplementaries("foo")))
1830             failCount++;
1831         if (!result[1].equals(toSupplementaries("and")))
1832             failCount++;
1833         if (!result[2].equals(toSupplementaries("boo")))
1834             failCount++;
1835 
1836         String source = "0123456789";
1837         for (int limit=-2; limit<3; limit++) {
1838             for (int x=0; x<10; x++) {
1839                 result = source.split(Integer.toString(x), limit);
1840                 int expectedLength = limit < 1 ? 2 : limit;
1841 
1842                 if ((limit == 0) && (x == 9)) {
1843                     // expected dropping of ""
1844                     if (result.length != 1)
1845                         failCount++;
1846                     if (!result[0].equals("012345678")) {
1847                         failCount++;
1848                     }
1849                 } else {
1850                     if (result.length != expectedLength) {
1851                         failCount++;
1852                     }
1853                     if (!result[0].equals(source.substring(0,x))) {
1854                         if (limit != 1) {
1855                             failCount++;
1856                         } else {
1857                             if (!result[0].equals(source.substring(0,10))) {
1858                                 failCount++;
1859                             }
1860                         }
1861                     }
1862                     if (expectedLength > 1) { // Check segment 2
1863                         if (!result[1].equals(source.substring(x+1,10)))
1864                             failCount++;
1865                     }
1866                 }
1867             }
1868         }
1869         // Check the case for no match found
1870         for (int limit=-2; limit<3; limit++) {
1871             result = source.split("e", limit);
1872             if (result.length != 1)
1873                 failCount++;
1874             if (!result[0].equals(source))
1875                 failCount++;
1876         }
1877         // Check the case for limit == 0, source = "";
1878         // split() now returns 0-length for empty source "" see #6559590
1879         source = "";
1880         result = source.split("e", 0);
1881         if (result.length != 1)
1882             failCount++;
1883         if (!result[0].equals(source))
1884             failCount++;
1885 
1886         // Check both split() and splitAsStraem(), especially for zero-lenth
1887         // input and zero-lenth match cases
1888         String[][] input = new String[][] {
1889             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1890             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1891             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1892             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1893             { "(?=\\p{Lu})", "AbcEfg" },
1894             { "(?=\\p{Lu})", "Abc" },
1895             { " ",           "" },              // zero-length input
1896             { ".*",          "" },
1897 
1898             // some tests from PatternStreamTest.java
1899             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1900             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1901             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1902             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1903             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1904             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1905             { "\u56da",  "" },
1906             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1907             { "o",       "boo:and:foo" },
1908             { "o",       "booooo:and:fooooo" },
1909             { "o",       "fooooo:" },
1910         };
1911 
1912         String[][] expected = new String[][] {
1913             { "Abc", "Efg", "Hij" },
1914             { "", "Abc", "Efg", "Hij" },
1915             { "Abc", "", "Efg", "Hij" },
1916             { "Abc", "Efg", "Hij" },
1917             { "Abc", "Efg" },
1918             { "Abc" },
1919             { "" },
1920             { "" },
1921 
1922             { "awgqwefg1fefw", "vssv1vvv1" },
1923             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1924             { "awgqwefg", "fefw4vssv", "vvv" },
1925             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1926             { "1", "23", "456", "7890" },
1927             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1928             { "" },
1929             { "This", "is", "testing", "", "with", "different", "separators" },
1930             { "b", "", ":and:f" },
1931             { "b", "", "", "", "", ":and:f" },
1932             { "f", "", "", "", "", ":" },
1933         };
1934         for (int i = 0; i < input.length; i++) {
1935             pattern = Pattern.compile(input[i][0]);
1936             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1937                 failCount++;
1938             }
1939             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1940                                              // array for zero-length input for now
1941                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1942                                expected[i])) {
1943                 failCount++;
1944             }
1945         }
1946         report("Split");
1947     }
1948 
1949     private static void negationTest() {
1950         Pattern pattern = Pattern.compile("[\\[@^]+");
1951         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1952         if (!matcher.find())
1953             failCount++;
1954         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1955             failCount++;
1956         pattern = Pattern.compile("[@\\[^]+");
1957         matcher = pattern.matcher("@@@@[[[[^^^^");
1958         if (!matcher.find())
1959             failCount++;
1960         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1961             failCount++;
1962         pattern = Pattern.compile("[@\\[^@]+");
1963         matcher = pattern.matcher("@@@@[[[[^^^^");
1964         if (!matcher.find())
1965             failCount++;
1966         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1967             failCount++;
1968 
1969         pattern = Pattern.compile("\\)");
1970         matcher = pattern.matcher("xxx)xxx");
1971         if (!matcher.find())
1972             failCount++;
1973 
1974         report("Negation");
1975     }
1976 
1977     private static void ampersandTest() {
1978         Pattern pattern = Pattern.compile("[&@]+");
1979         check(pattern, "@@@@&&&&", true);
1980 
1981         pattern = Pattern.compile("[@&]+");
1982         check(pattern, "@@@@&&&&", true);
1983 
1984         pattern = Pattern.compile("[@\\&]+");
1985         check(pattern, "@@@@&&&&", true);
1986 
1987         report("Ampersand");
1988     }
1989 
1990     private static void octalTest() throws Exception {
1991         Pattern pattern = Pattern.compile("\\u0007");
1992         Matcher matcher = pattern.matcher("\u0007");
1993         if (!matcher.matches())
1994             failCount++;
1995         pattern = Pattern.compile("\\07");
1996         matcher = pattern.matcher("\u0007");
1997         if (!matcher.matches())
1998             failCount++;
1999         pattern = Pattern.compile("\\007");
2000         matcher = pattern.matcher("\u0007");
2001         if (!matcher.matches())
2002             failCount++;
2003         pattern = Pattern.compile("\\0007");
2004         matcher = pattern.matcher("\u0007");
2005         if (!matcher.matches())
2006             failCount++;
2007         pattern = Pattern.compile("\\040");
2008         matcher = pattern.matcher("\u0020");
2009         if (!matcher.matches())
2010             failCount++;
2011         pattern = Pattern.compile("\\0403");
2012         matcher = pattern.matcher("\u00203");
2013         if (!matcher.matches())
2014             failCount++;
2015         pattern = Pattern.compile("\\0103");
2016         matcher = pattern.matcher("\u0043");
2017         if (!matcher.matches())
2018             failCount++;
2019 
2020         report("Octal");
2021     }
2022 
2023     private static void longPatternTest() throws Exception {
2024         try {
2025             Pattern pattern = Pattern.compile(
2026                 "a 32-character-long pattern xxxx");
2027             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2028             pattern = Pattern.compile("a thirty four character long regex");
2029             StringBuffer patternToBe = new StringBuffer(101);
2030             for (int i=0; i<100; i++)
2031                 patternToBe.append((char)(97 + i%26));
2032             pattern = Pattern.compile(patternToBe.toString());
2033         } catch (PatternSyntaxException e) {
2034             failCount++;
2035         }
2036 
2037         // Supplementary character test
2038         try {
2039             Pattern pattern = Pattern.compile(
2040                 toSupplementaries("a 32-character-long pattern xxxx"));
2041             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2042             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2043             StringBuffer patternToBe = new StringBuffer(101*2);
2044             for (int i=0; i<100; i++)
2045                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2046                                                      + 97 + i%26));
2047             pattern = Pattern.compile(patternToBe.toString());
2048         } catch (PatternSyntaxException e) {
2049             failCount++;
2050         }
2051         report("LongPattern");
2052     }
2053 
2054     private static void group0Test() throws Exception {
2055         Pattern pattern = Pattern.compile("(tes)ting");
2056         Matcher matcher = pattern.matcher("testing");
2057         check(matcher, "testing");
2058 
2059         matcher.reset("testing");
2060         if (matcher.lookingAt()) {
2061             if (!matcher.group(0).equals("testing"))
2062                 failCount++;
2063         } else {
2064             failCount++;
2065         }
2066 
2067         matcher.reset("testing");
2068         if (matcher.matches()) {
2069             if (!matcher.group(0).equals("testing"))
2070                 failCount++;
2071         } else {
2072             failCount++;
2073         }
2074 
2075         pattern = Pattern.compile("(tes)ting");
2076         matcher = pattern.matcher("testing");
2077         if (matcher.lookingAt()) {
2078             if (!matcher.group(0).equals("testing"))
2079                 failCount++;
2080         } else {
2081             failCount++;
2082         }
2083 
2084         pattern = Pattern.compile("^(tes)ting");
2085         matcher = pattern.matcher("testing");
2086         if (matcher.matches()) {
2087             if (!matcher.group(0).equals("testing"))
2088                 failCount++;
2089         } else {
2090             failCount++;
2091         }
2092 
2093         // Supplementary character test
2094         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2095         matcher = pattern.matcher(toSupplementaries("testing"));
2096         check(matcher, toSupplementaries("testing"));
2097 
2098         matcher.reset(toSupplementaries("testing"));
2099         if (matcher.lookingAt()) {
2100             if (!matcher.group(0).equals(toSupplementaries("testing")))
2101                 failCount++;
2102         } else {
2103             failCount++;
2104         }
2105 
2106         matcher.reset(toSupplementaries("testing"));
2107         if (matcher.matches()) {
2108             if (!matcher.group(0).equals(toSupplementaries("testing")))
2109                 failCount++;
2110         } else {
2111             failCount++;
2112         }
2113 
2114         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2115         matcher = pattern.matcher(toSupplementaries("testing"));
2116         if (matcher.lookingAt()) {
2117             if (!matcher.group(0).equals(toSupplementaries("testing")))
2118                 failCount++;
2119         } else {
2120             failCount++;
2121         }
2122 
2123         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2124         matcher = pattern.matcher(toSupplementaries("testing"));
2125         if (matcher.matches()) {
2126             if (!matcher.group(0).equals(toSupplementaries("testing")))
2127                 failCount++;
2128         } else {
2129             failCount++;
2130         }
2131 
2132         report("Group0");
2133     }
2134 
2135     private static void findIntTest() throws Exception {
2136         Pattern p = Pattern.compile("blah");
2137         Matcher m = p.matcher("zzzzblahzzzzzblah");
2138         boolean result = m.find(2);
2139         if (!result)
2140             failCount++;
2141 
2142         p = Pattern.compile("$");
2143         m = p.matcher("1234567890");
2144         result = m.find(10);
2145         if (!result)
2146             failCount++;
2147         try {
2148             result = m.find(11);
2149             failCount++;
2150         } catch (IndexOutOfBoundsException e) {
2151             // correct result
2152         }
2153 
2154         // Supplementary character test
2155         p = Pattern.compile(toSupplementaries("blah"));
2156         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2157         result = m.find(2);
2158         if (!result)
2159             failCount++;
2160 
2161         report("FindInt");
2162     }
2163 
2164     private static void emptyPatternTest() throws Exception {
2165         Pattern p = Pattern.compile("");
2166         Matcher m = p.matcher("foo");
2167 
2168         // Should find empty pattern at beginning of input
2169         boolean result = m.find();
2170         if (result != true)
2171             failCount++;
2172         if (m.start() != 0)
2173             failCount++;
2174 
2175         // Should not match entire input if input is not empty
2176         m.reset();
2177         result = m.matches();
2178         if (result == true)
2179             failCount++;
2180 
2181         try {
2182             m.start(0);
2183             failCount++;
2184         } catch (IllegalStateException e) {
2185             // Correct result
2186         }
2187 
2188         // Should match entire input if input is empty
2189         m.reset("");
2190         result = m.matches();
2191         if (result != true)
2192             failCount++;
2193 
2194         result = Pattern.matches("", "");
2195         if (result != true)
2196             failCount++;
2197 
2198         result = Pattern.matches("", "foo");
2199         if (result == true)
2200             failCount++;
2201         report("EmptyPattern");
2202     }
2203 
2204     private static void charClassTest() throws Exception {
2205         Pattern pattern = Pattern.compile("blah[ab]]blech");
2206         check(pattern, "blahb]blech", true);
2207 
2208         pattern = Pattern.compile("[abc[def]]");
2209         check(pattern, "b", true);
2210 
2211         // Supplementary character tests
2212         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2213         check(pattern, toSupplementaries("blahb]blech"), true);
2214 
2215         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2216         check(pattern, toSupplementaries("b"), true);
2217 
2218         try {
2219             // u00ff when UNICODE_CASE
2220             pattern = Pattern.compile("[ab\u00ffcd]",
2221                                       Pattern.CASE_INSENSITIVE|
2222                                       Pattern.UNICODE_CASE);
2223             check(pattern, "ab\u00ffcd", true);
2224             check(pattern, "Ab\u0178Cd", true);
2225 
2226             // u00b5 when UNICODE_CASE
2227             pattern = Pattern.compile("[ab\u00b5cd]",
2228                                       Pattern.CASE_INSENSITIVE|
2229                                       Pattern.UNICODE_CASE);
2230             check(pattern, "ab\u00b5cd", true);
2231             check(pattern, "Ab\u039cCd", true);
2232         } catch (Exception e) { failCount++; }
2233 
2234         /* Special cases
2235            (1)LatinSmallLetterLongS u+017f
2236            (2)LatinSmallLetterDotlessI u+0131
2237            (3)LatineCapitalLetterIWithDotAbove u+0130
2238            (4)KelvinSign u+212a
2239            (5)AngstromSign u+212b
2240         */
2241         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2242         pattern = Pattern.compile("[sik\u00c5]+", flags);
2243         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2244             failCount++;
2245 
2246         report("CharClass");
2247     }
2248 
2249     private static void caretTest() throws Exception {
2250         Pattern pattern = Pattern.compile("\\w*");
2251         Matcher matcher = pattern.matcher("a#bc#def##g");
2252         check(matcher, "a");
2253         check(matcher, "");
2254         check(matcher, "bc");
2255         check(matcher, "");
2256         check(matcher, "def");
2257         check(matcher, "");
2258         check(matcher, "");
2259         check(matcher, "g");
2260         check(matcher, "");
2261         if (matcher.find())
2262             failCount++;
2263 
2264         pattern = Pattern.compile("^\\w*");
2265         matcher = pattern.matcher("a#bc#def##g");
2266         check(matcher, "a");
2267         if (matcher.find())
2268             failCount++;
2269 
2270         pattern = Pattern.compile("\\w");
2271         matcher = pattern.matcher("abc##x");
2272         check(matcher, "a");
2273         check(matcher, "b");
2274         check(matcher, "c");
2275         check(matcher, "x");
2276         if (matcher.find())
2277             failCount++;
2278 
2279         pattern = Pattern.compile("^\\w");
2280         matcher = pattern.matcher("abc##x");
2281         check(matcher, "a");
2282         if (matcher.find())
2283             failCount++;
2284 
2285         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2286         matcher = pattern.matcher("abcdef-ghi\njklmno");
2287         check(matcher, "abc");
2288         if (matcher.find())
2289             failCount++;
2290 
2291         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2292         matcher = pattern.matcher("abcdef-ghi\njklmno");
2293         check(matcher, "abc");
2294         check(matcher, "jkl");
2295         if (matcher.find())
2296             failCount++;
2297 
2298         pattern = Pattern.compile("^", Pattern.MULTILINE);
2299         matcher = pattern.matcher("this is some text");
2300         String result = matcher.replaceAll("X");
2301         if (!result.equals("Xthis is some text"))
2302             failCount++;
2303 
2304         pattern = Pattern.compile("^");
2305         matcher = pattern.matcher("this is some text");
2306         result = matcher.replaceAll("X");
2307         if (!result.equals("Xthis is some text"))
2308             failCount++;
2309 
2310         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2311         matcher = pattern.matcher("this is some text\n");
2312         result = matcher.replaceAll("X");
2313         if (!result.equals("Xthis is some text\n"))
2314             failCount++;
2315 
2316         report("Caret");
2317     }
2318 
2319     private static void groupCaptureTest() throws Exception {
2320         // Independent group
2321         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2322         Matcher matcher = pattern.matcher("xxxyyyzzz");
2323         matcher.find();
2324         try {
2325             String blah = matcher.group(1);
2326             failCount++;
2327         } catch (IndexOutOfBoundsException ioobe) {
2328             // Good result
2329         }
2330         // Pure group
2331         pattern = Pattern.compile("x+(?:y+)z+");
2332         matcher = pattern.matcher("xxxyyyzzz");
2333         matcher.find();
2334         try {
2335             String blah = matcher.group(1);
2336             failCount++;
2337         } catch (IndexOutOfBoundsException ioobe) {
2338             // Good result
2339         }
2340 
2341         // Supplementary character tests
2342         // Independent group
2343         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2344         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2345         matcher.find();
2346         try {
2347             String blah = matcher.group(1);
2348             failCount++;
2349         } catch (IndexOutOfBoundsException ioobe) {
2350             // Good result
2351         }
2352         // Pure group
2353         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2354         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2355         matcher.find();
2356         try {
2357             String blah = matcher.group(1);
2358             failCount++;
2359         } catch (IndexOutOfBoundsException ioobe) {
2360             // Good result
2361         }
2362 
2363         report("GroupCapture");
2364     }
2365 
2366     private static void backRefTest() throws Exception {
2367         Pattern pattern = Pattern.compile("(a*)bc\\1");
2368         check(pattern, "zzzaabcazzz", true);
2369 
2370         pattern = Pattern.compile("(a*)bc\\1");
2371         check(pattern, "zzzaabcaazzz", true);
2372 
2373         pattern = Pattern.compile("(abc)(def)\\1");
2374         check(pattern, "abcdefabc", true);
2375 
2376         pattern = Pattern.compile("(abc)(def)\\3");
2377         check(pattern, "abcdefabc", false);
2378 
2379         try {
2380             for (int i = 1; i < 10; i++) {
2381                 // Make sure backref 1-9 are always accepted
2382                 pattern = Pattern.compile("abcdef\\" + i);
2383                 // and fail to match if the target group does not exit
2384                 check(pattern, "abcdef", false);
2385             }
2386         } catch(PatternSyntaxException e) {
2387             failCount++;
2388         }
2389 
2390         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2391         check(pattern, "abcdefghija", false);
2392         check(pattern, "abcdefghija1", true);
2393 
2394         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2395         check(pattern, "abcdefghijkk", true);
2396 
2397         pattern = Pattern.compile("(a)bcdefghij\\11");
2398         check(pattern, "abcdefghija1", true);
2399 
2400         // Supplementary character tests
2401         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2402         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2403 
2404         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2405         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2406 
2407         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2408         check(pattern, toSupplementaries("abcdefabc"), true);
2409 
2410         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2411         check(pattern, toSupplementaries("abcdefabc"), false);
2412 
2413         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2414         check(pattern, toSupplementaries("abcdefghija"), false);
2415         check(pattern, toSupplementaries("abcdefghija1"), true);
2416 
2417         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2418         check(pattern, toSupplementaries("abcdefghijkk"), true);
2419 
2420         report("BackRef");
2421     }
2422 
2423     /**
2424      * Unicode Technical Report #18, section 2.6 End of Line
2425      * There is no empty line to be matched in the sequence \u000D\u000A
2426      * but there is an empty line in the sequence \u000A\u000D.
2427      */
2428     private static void anchorTest() throws Exception {
2429         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2430         Matcher m = p.matcher("blah1\r\nblah2");
2431         m.find();
2432         m.find();
2433         if (!m.group().equals("blah2"))
2434             failCount++;
2435 
2436         m.reset("blah1\n\rblah2");
2437         m.find();
2438         m.find();
2439         m.find();
2440         if (!m.group().equals("blah2"))
2441             failCount++;
2442 
2443         // Test behavior of $ with \r\n at end of input
2444         p = Pattern.compile(".+$");
2445         m = p.matcher("blah1\r\n");
2446         if (!m.find())
2447             failCount++;
2448        if (!m.group().equals("blah1"))
2449             failCount++;
2450         if (m.find())
2451             failCount++;
2452 
2453         // Test behavior of $ with \r\n at end of input in multiline
2454         p = Pattern.compile(".+$", Pattern.MULTILINE);
2455         m = p.matcher("blah1\r\n");
2456         if (!m.find())
2457             failCount++;
2458         if (m.find())
2459             failCount++;
2460 
2461         // Test for $ recognition of \u0085 for bug 4527731
2462         p = Pattern.compile(".+$", Pattern.MULTILINE);
2463         m = p.matcher("blah1\u0085");
2464         if (!m.find())
2465             failCount++;
2466 
2467         // Supplementary character test
2468         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2469         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2470         m.find();
2471         m.find();
2472         if (!m.group().equals(toSupplementaries("blah2")))
2473             failCount++;
2474 
2475         m.reset(toSupplementaries("blah1\n\rblah2"));
2476         m.find();
2477         m.find();
2478         m.find();
2479         if (!m.group().equals(toSupplementaries("blah2")))
2480             failCount++;
2481 
2482         // Test behavior of $ with \r\n at end of input
2483         p = Pattern.compile(".+$");
2484         m = p.matcher(toSupplementaries("blah1\r\n"));
2485         if (!m.find())
2486             failCount++;
2487         if (!m.group().equals(toSupplementaries("blah1")))
2488             failCount++;
2489         if (m.find())
2490             failCount++;
2491 
2492         // Test behavior of $ with \r\n at end of input in multiline
2493         p = Pattern.compile(".+$", Pattern.MULTILINE);
2494         m = p.matcher(toSupplementaries("blah1\r\n"));
2495         if (!m.find())
2496             failCount++;
2497         if (m.find())
2498             failCount++;
2499 
2500         // Test for $ recognition of \u0085 for bug 4527731
2501         p = Pattern.compile(".+$", Pattern.MULTILINE);
2502         m = p.matcher(toSupplementaries("blah1\u0085"));
2503         if (!m.find())
2504             failCount++;
2505 
2506         report("Anchors");
2507     }
2508 
2509     /**
2510      * A basic sanity test of Matcher.lookingAt().
2511      */
2512     private static void lookingAtTest() throws Exception {
2513         Pattern p = Pattern.compile("(ab)(c*)");
2514         Matcher m = p.matcher("abccczzzabcczzzabccc");
2515 
2516         if (!m.lookingAt())
2517             failCount++;
2518 
2519         if (!m.group().equals(m.group(0)))
2520             failCount++;
2521 
2522         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2523         if (m.lookingAt())
2524             failCount++;
2525 
2526         // Supplementary character test
2527         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2528         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2529 
2530         if (!m.lookingAt())
2531             failCount++;
2532 
2533         if (!m.group().equals(m.group(0)))
2534             failCount++;
2535 
2536         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2537         if (m.lookingAt())
2538             failCount++;
2539 
2540         report("Looking At");
2541     }
2542 
2543     /**
2544      * A basic sanity test of Matcher.matches().
2545      */
2546     private static void matchesTest() throws Exception {
2547         // matches()
2548         Pattern p = Pattern.compile("ulb(c*)");
2549         Matcher m = p.matcher("ulbcccccc");
2550         if (!m.matches())
2551             failCount++;
2552 
2553         // find() but not matches()
2554         m.reset("zzzulbcccccc");
2555         if (m.matches())
2556             failCount++;
2557 
2558         // lookingAt() but not matches()
2559         m.reset("ulbccccccdef");
2560         if (m.matches())
2561             failCount++;
2562 
2563         // matches()
2564         p = Pattern.compile("a|ad");
2565         m = p.matcher("ad");
2566         if (!m.matches())
2567             failCount++;
2568 
2569         // Supplementary character test
2570         // matches()
2571         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2572         m = p.matcher(toSupplementaries("ulbcccccc"));
2573         if (!m.matches())
2574             failCount++;
2575 
2576         // find() but not matches()
2577         m.reset(toSupplementaries("zzzulbcccccc"));
2578         if (m.matches())
2579             failCount++;
2580 
2581         // lookingAt() but not matches()
2582         m.reset(toSupplementaries("ulbccccccdef"));
2583         if (m.matches())
2584             failCount++;
2585 
2586         // matches()
2587         p = Pattern.compile(toSupplementaries("a|ad"));
2588         m = p.matcher(toSupplementaries("ad"));
2589         if (!m.matches())
2590             failCount++;
2591 
2592         report("Matches");
2593     }
2594 
2595     /**
2596      * A basic sanity test of Pattern.matches().
2597      */
2598     private static void patternMatchesTest() throws Exception {
2599         // matches()
2600         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2601                              toSupplementaries("ulbcccccc")))
2602             failCount++;
2603 
2604         // find() but not matches()
2605         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2606                             toSupplementaries("zzzulbcccccc")))
2607             failCount++;
2608 
2609         // lookingAt() but not matches()
2610         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2611                             toSupplementaries("ulbccccccdef")))
2612             failCount++;
2613 
2614         // Supplementary character test
2615         // matches()
2616         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2617                              toSupplementaries("ulbcccccc")))
2618             failCount++;
2619 
2620         // find() but not matches()
2621         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2622                             toSupplementaries("zzzulbcccccc")))
2623             failCount++;
2624 
2625         // lookingAt() but not matches()
2626         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2627                             toSupplementaries("ulbccccccdef")))
2628             failCount++;
2629 
2630         report("Pattern Matches");
2631     }
2632 
2633     /**
2634      * Canonical equivalence testing. Tests the ability of the engine
2635      * to match sequences that are not explicitly specified in the
2636      * pattern when they are considered equivalent by the Unicode Standard.
2637      */
2638     private static void ceTest() throws Exception {
2639         // Decomposed char outside char classes
2640         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2641         Matcher m = p.matcher("test\u00e5");
2642         if (!m.matches())
2643             failCount++;
2644 
2645         m.reset("testa\u030a");
2646         if (!m.matches())
2647             failCount++;
2648 
2649         // Composed char outside char classes
2650         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2651         m = p.matcher("test\u00e5");
2652         if (!m.matches())
2653             failCount++;
2654 
2655         m.reset("testa\u030a");
2656         if (!m.find())
2657             failCount++;
2658 
2659         // Decomposed char inside a char class
2660         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2661         m = p.matcher("test\u00e5");
2662         if (!m.find())
2663             failCount++;
2664 
2665         m.reset("testa\u030a");
2666         if (!m.find())
2667             failCount++;
2668 
2669         // Composed char inside a char class
2670         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2671         m = p.matcher("test\u00e5");
2672         if (!m.find())
2673             failCount++;
2674 
2675         m.reset("testa\u0300");
2676         if (!m.find())
2677             failCount++;
2678 
2679         m.reset("testa\u030a");
2680         if (!m.find())
2681             failCount++;
2682 
2683         // Marks that cannot legally change order and be equivalent
2684         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2685         check(p, "testa\u0308\u0300", true);
2686         check(p, "testa\u0300\u0308", false);
2687 
2688         // Marks that can legally change order and be equivalent
2689         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2690         check(p, "testa\u0308\u0323", true);
2691         check(p, "testa\u0323\u0308", true);
2692 
2693         // Test all equivalences of the sequence a\u0308\u0323\u0300
2694         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2695         check(p, "testa\u0308\u0323\u0300", true);
2696         check(p, "testa\u0323\u0308\u0300", true);
2697         check(p, "testa\u0308\u0300\u0323", true);
2698         check(p, "test\u00e4\u0323\u0300", true);
2699         check(p, "test\u00e4\u0300\u0323", true);
2700 
2701         Object[][] data = new Object[][] {
2702 
2703         // JDK-4867170
2704         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2705         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2706         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2707         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2708         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2709         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2710         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2711 
2712         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2713         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2714         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2715         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2716         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2717 
2718         // backtracking, force to match "\u1f80", instead of \u1f82"
2719         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2720 
2721         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2722         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2723 
2724         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2725         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2726         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2727 
2728         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2729         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2730         { "\u1f80",          "ab\u1f80cd",             "f", true },
2731         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2732         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2733         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2734         { "\u1f82",          "\u1f80\u0300",           "m", true },
2735 
2736         // JDK-7080302       # compile failed
2737         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2738 
2739         // JDK-6728861, same cause as above one
2740         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2741 
2742         // JDK-6995635
2743         { "(\u00e9)", "e\u0301", "m", true },
2744 
2745         // JDK-6736245
2746         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2747         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2748         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2749 
2750         //  4916384.
2751         // Decomposed hangul (jamos) works inside clazz
2752         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2753         { "[\u1100\u1161]", "\uac00", "m", true},
2754 
2755         { "[\uac00]", "\u1100\u1161", "m", true},
2756         { "[\uac00]", "\uac00", "m", true},
2757 
2758         // Decomposed hangul (jamos)
2759         { "\u1100\u1161", "\u1100\u1161", "m", true},
2760         { "\u1100\u1161", "\uac00", "m", true},
2761 
2762         // Composed hangul
2763         { "\uac00",  "\u1100\u1161", "m", true },
2764         { "\uac00",  "\uac00", "m", true },
2765 
2766         /* Need a NFDSlice to nfd the source to solve this issue
2767            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2768            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2769            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2770 
2771         // Decomposed supplementary outside char classes
2772         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2773         // Composed supplementary outside char classes
2774         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2775         */
2776         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2777         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2778 
2779         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2780         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2781         };
2782 
2783         int failCount = 0;
2784         for (Object[] d : data) {
2785             String pn = (String)d[0];
2786             String tt = (String)d[1];
2787             boolean isFind = "f".equals(((String)d[2]));
2788             boolean expected = (boolean)d[3];
2789             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2790                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2791             if (ret != expected) {
2792                 failCount++;
2793                 continue;
2794             }
2795         }
2796         report("Canonical Equivalence");
2797     }
2798 
2799     /**
2800      * A basic sanity test of Matcher.replaceAll().
2801      */
2802     private static void globalSubstitute() throws Exception {
2803         // Global substitution with a literal
2804         Pattern p = Pattern.compile("(ab)(c*)");
2805         Matcher m = p.matcher("abccczzzabcczzzabccc");
2806         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2807             failCount++;
2808 
2809         m.reset("zzzabccczzzabcczzzabccczzz");
2810         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2811             failCount++;
2812 
2813         // Global substitution with groups
2814         m.reset("zzzabccczzzabcczzzabccczzz");
2815         String result = m.replaceAll("$1");
2816         if (!result.equals("zzzabzzzabzzzabzzz"))
2817             failCount++;
2818 
2819         // Supplementary character test
2820         // Global substitution with a literal
2821         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2822         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2823         if (!m.replaceAll(toSupplementaries("test")).
2824             equals(toSupplementaries("testzzztestzzztest")))
2825             failCount++;
2826 
2827         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2828         if (!m.replaceAll(toSupplementaries("test")).
2829             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2830             failCount++;
2831 
2832         // Global substitution with groups
2833         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2834         result = m.replaceAll("$1");
2835         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2836             failCount++;
2837 
2838         report("Global Substitution");
2839     }
2840 
2841     /**
2842      * Tests the usage of Matcher.appendReplacement() with literal
2843      * and group substitutions.
2844      */
2845     private static void stringbufferSubstitute() throws Exception {
2846         // SB substitution with literal
2847         String blah = "zzzblahzzz";
2848         Pattern p = Pattern.compile("blah");
2849         Matcher m = p.matcher(blah);
2850         StringBuffer result = new StringBuffer();
2851         try {
2852             m.appendReplacement(result, "blech");
2853             failCount++;
2854         } catch (IllegalStateException e) {
2855         }
2856         m.find();
2857         m.appendReplacement(result, "blech");
2858         if (!result.toString().equals("zzzblech"))
2859             failCount++;
2860 
2861         m.appendTail(result);
2862         if (!result.toString().equals("zzzblechzzz"))
2863             failCount++;
2864 
2865         // SB substitution with groups
2866         blah = "zzzabcdzzz";
2867         p = Pattern.compile("(ab)(cd)*");
2868         m = p.matcher(blah);
2869         result = new StringBuffer();
2870         try {
2871             m.appendReplacement(result, "$1");
2872             failCount++;
2873         } catch (IllegalStateException e) {
2874         }
2875         m.find();
2876         m.appendReplacement(result, "$1");
2877         if (!result.toString().equals("zzzab"))
2878             failCount++;
2879 
2880         m.appendTail(result);
2881         if (!result.toString().equals("zzzabzzz"))
2882             failCount++;
2883 
2884         // SB substitution with 3 groups
2885         blah = "zzzabcdcdefzzz";
2886         p = Pattern.compile("(ab)(cd)*(ef)");
2887         m = p.matcher(blah);
2888         result = new StringBuffer();
2889         try {
2890             m.appendReplacement(result, "$1w$2w$3");
2891             failCount++;
2892         } catch (IllegalStateException e) {
2893         }
2894         m.find();
2895         m.appendReplacement(result, "$1w$2w$3");
2896         if (!result.toString().equals("zzzabwcdwef"))
2897             failCount++;
2898 
2899         m.appendTail(result);
2900         if (!result.toString().equals("zzzabwcdwefzzz"))
2901             failCount++;
2902 
2903         // SB substitution with groups and three matches
2904         // skipping middle match
2905         blah = "zzzabcdzzzabcddzzzabcdzzz";
2906         p = Pattern.compile("(ab)(cd*)");
2907         m = p.matcher(blah);
2908         result = new StringBuffer();
2909         try {
2910             m.appendReplacement(result, "$1");
2911             failCount++;
2912         } catch (IllegalStateException e) {
2913         }
2914         m.find();
2915         m.appendReplacement(result, "$1");
2916         if (!result.toString().equals("zzzab"))
2917             failCount++;
2918 
2919         m.find();
2920         m.find();
2921         m.appendReplacement(result, "$2");
2922         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2923             failCount++;
2924 
2925         m.appendTail(result);
2926         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2927             failCount++;
2928 
2929         // Check to make sure escaped $ is ignored
2930         blah = "zzzabcdcdefzzz";
2931         p = Pattern.compile("(ab)(cd)*(ef)");
2932         m = p.matcher(blah);
2933         result = new StringBuffer();
2934         m.find();
2935         m.appendReplacement(result, "$1w\\$2w$3");
2936         if (!result.toString().equals("zzzabw$2wef"))
2937             failCount++;
2938 
2939         m.appendTail(result);
2940         if (!result.toString().equals("zzzabw$2wefzzz"))
2941             failCount++;
2942 
2943         // Check to make sure a reference to nonexistent group causes error
2944         blah = "zzzabcdcdefzzz";
2945         p = Pattern.compile("(ab)(cd)*(ef)");
2946         m = p.matcher(blah);
2947         result = new StringBuffer();
2948         m.find();
2949         try {
2950             m.appendReplacement(result, "$1w$5w$3");
2951             failCount++;
2952         } catch (IndexOutOfBoundsException ioobe) {
2953             // Correct result
2954         }
2955 
2956         // Check double digit group references
2957         blah = "zzz123456789101112zzz";
2958         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2959         m = p.matcher(blah);
2960         result = new StringBuffer();
2961         m.find();
2962         m.appendReplacement(result, "$1w$11w$3");
2963         if (!result.toString().equals("zzz1w11w3"))
2964             failCount++;
2965 
2966         // Check to make sure it backs off $15 to $1 if only three groups
2967         blah = "zzzabcdcdefzzz";
2968         p = Pattern.compile("(ab)(cd)*(ef)");
2969         m = p.matcher(blah);
2970         result = new StringBuffer();
2971         m.find();
2972         m.appendReplacement(result, "$1w$15w$3");
2973         if (!result.toString().equals("zzzabwab5wef"))
2974             failCount++;
2975 
2976 
2977         // Supplementary character test
2978         // SB substitution with literal
2979         blah = toSupplementaries("zzzblahzzz");
2980         p = Pattern.compile(toSupplementaries("blah"));
2981         m = p.matcher(blah);
2982         result = new StringBuffer();
2983         try {
2984             m.appendReplacement(result, toSupplementaries("blech"));
2985             failCount++;
2986         } catch (IllegalStateException e) {
2987         }
2988         m.find();
2989         m.appendReplacement(result, toSupplementaries("blech"));
2990         if (!result.toString().equals(toSupplementaries("zzzblech")))
2991             failCount++;
2992 
2993         m.appendTail(result);
2994         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2995             failCount++;
2996 
2997         // SB substitution with groups
2998         blah = toSupplementaries("zzzabcdzzz");
2999         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3000         m = p.matcher(blah);
3001         result = new StringBuffer();
3002         try {
3003             m.appendReplacement(result, "$1");
3004             failCount++;
3005         } catch (IllegalStateException e) {
3006         }
3007         m.find();
3008         m.appendReplacement(result, "$1");
3009         if (!result.toString().equals(toSupplementaries("zzzab")))
3010             failCount++;
3011 
3012         m.appendTail(result);
3013         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3014             failCount++;
3015 
3016         // SB substitution with 3 groups
3017         blah = toSupplementaries("zzzabcdcdefzzz");
3018         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3019         m = p.matcher(blah);
3020         result = new StringBuffer();
3021         try {
3022             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3023             failCount++;
3024         } catch (IllegalStateException e) {
3025         }
3026         m.find();
3027         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3028         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3029             failCount++;
3030 
3031         m.appendTail(result);
3032         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3033             failCount++;
3034 
3035         // SB substitution with groups and three matches
3036         // skipping middle match
3037         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3038         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3039         m = p.matcher(blah);
3040         result = new StringBuffer();
3041         try {
3042             m.appendReplacement(result, "$1");
3043             failCount++;
3044         } catch (IllegalStateException e) {
3045         }
3046         m.find();
3047         m.appendReplacement(result, "$1");
3048         if (!result.toString().equals(toSupplementaries("zzzab")))
3049             failCount++;
3050 
3051         m.find();
3052         m.find();
3053         m.appendReplacement(result, "$2");
3054         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3055             failCount++;
3056 
3057         m.appendTail(result);
3058         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3059             failCount++;
3060 
3061         // Check to make sure escaped $ is ignored
3062         blah = toSupplementaries("zzzabcdcdefzzz");
3063         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3064         m = p.matcher(blah);
3065         result = new StringBuffer();
3066         m.find();
3067         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3068         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3069             failCount++;
3070 
3071         m.appendTail(result);
3072         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3073             failCount++;
3074 
3075         // Check to make sure a reference to nonexistent group causes error
3076         blah = toSupplementaries("zzzabcdcdefzzz");
3077         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3078         m = p.matcher(blah);
3079         result = new StringBuffer();
3080         m.find();
3081         try {
3082             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3083             failCount++;
3084         } catch (IndexOutOfBoundsException ioobe) {
3085             // Correct result
3086         }
3087 
3088         // Check double digit group references
3089         blah = toSupplementaries("zzz123456789101112zzz");
3090         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3091         m = p.matcher(blah);
3092         result = new StringBuffer();
3093         m.find();
3094         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3095         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3096             failCount++;
3097 
3098         // Check to make sure it backs off $15 to $1 if only three groups
3099         blah = toSupplementaries("zzzabcdcdefzzz");
3100         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3101         m = p.matcher(blah);
3102         result = new StringBuffer();
3103         m.find();
3104         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3105         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3106             failCount++;
3107 
3108         // Check nothing has been appended into the output buffer if
3109         // the replacement string triggers IllegalArgumentException.
3110         p = Pattern.compile("(abc)");
3111         m = p.matcher("abcd");
3112         result = new StringBuffer();
3113         m.find();
3114         try {
3115             m.appendReplacement(result, ("xyz$g"));
3116             failCount++;
3117         } catch (IllegalArgumentException iae) {
3118             if (result.length() != 0)
3119                 failCount++;
3120         }
3121 
3122         report("SB Substitution");
3123     }
3124 
3125     /**
3126      * Tests the usage of Matcher.appendReplacement() with literal
3127      * and group substitutions.
3128      */
3129     private static void stringbuilderSubstitute() throws Exception {
3130         // SB substitution with literal
3131         String blah = "zzzblahzzz";
3132         Pattern p = Pattern.compile("blah");
3133         Matcher m = p.matcher(blah);
3134         StringBuilder result = new StringBuilder();
3135         try {
3136             m.appendReplacement(result, "blech");
3137             failCount++;
3138         } catch (IllegalStateException e) {
3139         }
3140         m.find();
3141         m.appendReplacement(result, "blech");
3142         if (!result.toString().equals("zzzblech"))
3143             failCount++;
3144 
3145         m.appendTail(result);
3146         if (!result.toString().equals("zzzblechzzz"))
3147             failCount++;
3148 
3149         // SB substitution with groups
3150         blah = "zzzabcdzzz";
3151         p = Pattern.compile("(ab)(cd)*");
3152         m = p.matcher(blah);
3153         result = new StringBuilder();
3154         try {
3155             m.appendReplacement(result, "$1");
3156             failCount++;
3157         } catch (IllegalStateException e) {
3158         }
3159         m.find();
3160         m.appendReplacement(result, "$1");
3161         if (!result.toString().equals("zzzab"))
3162             failCount++;
3163 
3164         m.appendTail(result);
3165         if (!result.toString().equals("zzzabzzz"))
3166             failCount++;
3167 
3168         // SB substitution with 3 groups
3169         blah = "zzzabcdcdefzzz";
3170         p = Pattern.compile("(ab)(cd)*(ef)");
3171         m = p.matcher(blah);
3172         result = new StringBuilder();
3173         try {
3174             m.appendReplacement(result, "$1w$2w$3");
3175             failCount++;
3176         } catch (IllegalStateException e) {
3177         }
3178         m.find();
3179         m.appendReplacement(result, "$1w$2w$3");
3180         if (!result.toString().equals("zzzabwcdwef"))
3181             failCount++;
3182 
3183         m.appendTail(result);
3184         if (!result.toString().equals("zzzabwcdwefzzz"))
3185             failCount++;
3186 
3187         // SB substitution with groups and three matches
3188         // skipping middle match
3189         blah = "zzzabcdzzzabcddzzzabcdzzz";
3190         p = Pattern.compile("(ab)(cd*)");
3191         m = p.matcher(blah);
3192         result = new StringBuilder();
3193         try {
3194             m.appendReplacement(result, "$1");
3195             failCount++;
3196         } catch (IllegalStateException e) {
3197         }
3198         m.find();
3199         m.appendReplacement(result, "$1");
3200         if (!result.toString().equals("zzzab"))
3201             failCount++;
3202 
3203         m.find();
3204         m.find();
3205         m.appendReplacement(result, "$2");
3206         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3207             failCount++;
3208 
3209         m.appendTail(result);
3210         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3211             failCount++;
3212 
3213         // Check to make sure escaped $ is ignored
3214         blah = "zzzabcdcdefzzz";
3215         p = Pattern.compile("(ab)(cd)*(ef)");
3216         m = p.matcher(blah);
3217         result = new StringBuilder();
3218         m.find();
3219         m.appendReplacement(result, "$1w\\$2w$3");
3220         if (!result.toString().equals("zzzabw$2wef"))
3221             failCount++;
3222 
3223         m.appendTail(result);
3224         if (!result.toString().equals("zzzabw$2wefzzz"))
3225             failCount++;
3226 
3227         // Check to make sure a reference to nonexistent group causes error
3228         blah = "zzzabcdcdefzzz";
3229         p = Pattern.compile("(ab)(cd)*(ef)");
3230         m = p.matcher(blah);
3231         result = new StringBuilder();
3232         m.find();
3233         try {
3234             m.appendReplacement(result, "$1w$5w$3");
3235             failCount++;
3236         } catch (IndexOutOfBoundsException ioobe) {
3237             // Correct result
3238         }
3239 
3240         // Check double digit group references
3241         blah = "zzz123456789101112zzz";
3242         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3243         m = p.matcher(blah);
3244         result = new StringBuilder();
3245         m.find();
3246         m.appendReplacement(result, "$1w$11w$3");
3247         if (!result.toString().equals("zzz1w11w3"))
3248             failCount++;
3249 
3250         // Check to make sure it backs off $15 to $1 if only three groups
3251         blah = "zzzabcdcdefzzz";
3252         p = Pattern.compile("(ab)(cd)*(ef)");
3253         m = p.matcher(blah);
3254         result = new StringBuilder();
3255         m.find();
3256         m.appendReplacement(result, "$1w$15w$3");
3257         if (!result.toString().equals("zzzabwab5wef"))
3258             failCount++;
3259 
3260 
3261         // Supplementary character test
3262         // SB substitution with literal
3263         blah = toSupplementaries("zzzblahzzz");
3264         p = Pattern.compile(toSupplementaries("blah"));
3265         m = p.matcher(blah);
3266         result = new StringBuilder();
3267         try {
3268             m.appendReplacement(result, toSupplementaries("blech"));
3269             failCount++;
3270         } catch (IllegalStateException e) {
3271         }
3272         m.find();
3273         m.appendReplacement(result, toSupplementaries("blech"));
3274         if (!result.toString().equals(toSupplementaries("zzzblech")))
3275             failCount++;
3276         m.appendTail(result);
3277         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3278             failCount++;
3279 
3280         // SB substitution with groups
3281         blah = toSupplementaries("zzzabcdzzz");
3282         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3283         m = p.matcher(blah);
3284         result = new StringBuilder();
3285         try {
3286             m.appendReplacement(result, "$1");
3287             failCount++;
3288         } catch (IllegalStateException e) {
3289         }
3290         m.find();
3291         m.appendReplacement(result, "$1");
3292         if (!result.toString().equals(toSupplementaries("zzzab")))
3293             failCount++;
3294 
3295         m.appendTail(result);
3296         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3297             failCount++;
3298 
3299         // SB substitution with 3 groups
3300         blah = toSupplementaries("zzzabcdcdefzzz");
3301         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3302         m = p.matcher(blah);
3303         result = new StringBuilder();
3304         try {
3305             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3306             failCount++;
3307         } catch (IllegalStateException e) {
3308         }
3309         m.find();
3310         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3311         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3312             failCount++;
3313 
3314         m.appendTail(result);
3315         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3316             failCount++;
3317 
3318         // SB substitution with groups and three matches
3319         // skipping middle match
3320         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3321         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3322         m = p.matcher(blah);
3323         result = new StringBuilder();
3324         try {
3325             m.appendReplacement(result, "$1");
3326             failCount++;
3327         } catch (IllegalStateException e) {
3328         }
3329         m.find();
3330         m.appendReplacement(result, "$1");
3331         if (!result.toString().equals(toSupplementaries("zzzab")))
3332             failCount++;
3333 
3334         m.find();
3335         m.find();
3336         m.appendReplacement(result, "$2");
3337         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3338             failCount++;
3339 
3340         m.appendTail(result);
3341         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3342             failCount++;
3343 
3344         // Check to make sure escaped $ is ignored
3345         blah = toSupplementaries("zzzabcdcdefzzz");
3346         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3347         m = p.matcher(blah);
3348         result = new StringBuilder();
3349         m.find();
3350         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3351         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3352             failCount++;
3353 
3354         m.appendTail(result);
3355         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3356             failCount++;
3357 
3358         // Check to make sure a reference to nonexistent group causes error
3359         blah = toSupplementaries("zzzabcdcdefzzz");
3360         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3361         m = p.matcher(blah);
3362         result = new StringBuilder();
3363         m.find();
3364         try {
3365             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3366             failCount++;
3367         } catch (IndexOutOfBoundsException ioobe) {
3368             // Correct result
3369         }
3370         // Check double digit group references
3371         blah = toSupplementaries("zzz123456789101112zzz");
3372         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3373         m = p.matcher(blah);
3374         result = new StringBuilder();
3375         m.find();
3376         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3377         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3378             failCount++;
3379 
3380         // Check to make sure it backs off $15 to $1 if only three groups
3381         blah = toSupplementaries("zzzabcdcdefzzz");
3382         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3383         m = p.matcher(blah);
3384         result = new StringBuilder();
3385         m.find();
3386         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3387         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3388             failCount++;
3389         // Check nothing has been appended into the output buffer if
3390         // the replacement string triggers IllegalArgumentException.
3391         p = Pattern.compile("(abc)");
3392         m = p.matcher("abcd");
3393         result = new StringBuilder();
3394         m.find();
3395         try {
3396             m.appendReplacement(result, ("xyz$g"));
3397             failCount++;
3398         } catch (IllegalArgumentException iae) {
3399             if (result.length() != 0)
3400                 failCount++;
3401         }
3402         report("SB Substitution 2");
3403     }
3404 
3405     /*
3406      * 5 groups of characters are created to make a substitution string.
3407      * A base string will be created including random lead chars, the
3408      * substitution string, and random trailing chars.
3409      * A pattern containing the 5 groups is searched for and replaced with:
3410      * random group + random string + random group.
3411      * The results are checked for correctness.
3412      */
3413     private static void substitutionBasher() {
3414         for (int runs = 0; runs<1000; runs++) {
3415             // Create a base string to work in
3416             int leadingChars = generator.nextInt(10);
3417             StringBuffer baseBuffer = new StringBuffer(100);
3418             String leadingString = getRandomAlphaString(leadingChars);
3419             baseBuffer.append(leadingString);
3420 
3421             // Create 5 groups of random number of random chars
3422             // Create the string to substitute
3423             // Create the pattern string to search for
3424             StringBuffer bufferToSub = new StringBuffer(25);
3425             StringBuffer bufferToPat = new StringBuffer(50);
3426             String[] groups = new String[5];
3427             for(int i=0; i<5; i++) {
3428                 int aGroupSize = generator.nextInt(5)+1;
3429                 groups[i] = getRandomAlphaString(aGroupSize);
3430                 bufferToSub.append(groups[i]);
3431                 bufferToPat.append('(');
3432                 bufferToPat.append(groups[i]);
3433                 bufferToPat.append(')');
3434             }
3435             String stringToSub = bufferToSub.toString();
3436             String pattern = bufferToPat.toString();
3437 
3438             // Place sub string into working string at random index
3439             baseBuffer.append(stringToSub);
3440 
3441             // Append random chars to end
3442             int trailingChars = generator.nextInt(10);
3443             String trailingString = getRandomAlphaString(trailingChars);
3444             baseBuffer.append(trailingString);
3445             String baseString = baseBuffer.toString();
3446 
3447             // Create test pattern and matcher
3448             Pattern p = Pattern.compile(pattern);
3449             Matcher m = p.matcher(baseString);
3450 
3451             // Reject candidate if pattern happens to start early
3452             m.find();
3453             if (m.start() < leadingChars)
3454                 continue;
3455 
3456             // Reject candidate if more than one match
3457             if (m.find())
3458                 continue;
3459 
3460             // Construct a replacement string with :
3461             // random group + random string + random group
3462             StringBuffer bufferToRep = new StringBuffer();
3463             int groupIndex1 = generator.nextInt(5);
3464             bufferToRep.append("$" + (groupIndex1 + 1));
3465             String randomMidString = getRandomAlphaString(5);
3466             bufferToRep.append(randomMidString);
3467             int groupIndex2 = generator.nextInt(5);
3468             bufferToRep.append("$" + (groupIndex2 + 1));
3469             String replacement = bufferToRep.toString();
3470 
3471             // Do the replacement
3472             String result = m.replaceAll(replacement);
3473 
3474             // Construct expected result
3475             StringBuffer bufferToRes = new StringBuffer();
3476             bufferToRes.append(leadingString);
3477             bufferToRes.append(groups[groupIndex1]);
3478             bufferToRes.append(randomMidString);
3479             bufferToRes.append(groups[groupIndex2]);
3480             bufferToRes.append(trailingString);
3481             String expectedResult = bufferToRes.toString();
3482 
3483             // Check results
3484             if (!result.equals(expectedResult))
3485                 failCount++;
3486         }
3487 
3488         report("Substitution Basher");
3489     }
3490 
3491     /*
3492      * 5 groups of characters are created to make a substitution string.
3493      * A base string will be created including random lead chars, the
3494      * substitution string, and random trailing chars.
3495      * A pattern containing the 5 groups is searched for and replaced with:
3496      * random group + random string + random group.
3497      * The results are checked for correctness.
3498      */
3499     private static void substitutionBasher2() {
3500         for (int runs = 0; runs<1000; runs++) {
3501             // Create a base string to work in
3502             int leadingChars = generator.nextInt(10);
3503             StringBuilder baseBuffer = new StringBuilder(100);
3504             String leadingString = getRandomAlphaString(leadingChars);
3505             baseBuffer.append(leadingString);
3506 
3507             // Create 5 groups of random number of random chars
3508             // Create the string to substitute
3509             // Create the pattern string to search for
3510             StringBuilder bufferToSub = new StringBuilder(25);
3511             StringBuilder bufferToPat = new StringBuilder(50);
3512             String[] groups = new String[5];
3513             for(int i=0; i<5; i++) {
3514                 int aGroupSize = generator.nextInt(5)+1;
3515                 groups[i] = getRandomAlphaString(aGroupSize);
3516                 bufferToSub.append(groups[i]);
3517                 bufferToPat.append('(');
3518                 bufferToPat.append(groups[i]);
3519                 bufferToPat.append(')');
3520             }
3521             String stringToSub = bufferToSub.toString();
3522             String pattern = bufferToPat.toString();
3523 
3524             // Place sub string into working string at random index
3525             baseBuffer.append(stringToSub);
3526 
3527             // Append random chars to end
3528             int trailingChars = generator.nextInt(10);
3529             String trailingString = getRandomAlphaString(trailingChars);
3530             baseBuffer.append(trailingString);
3531             String baseString = baseBuffer.toString();
3532 
3533             // Create test pattern and matcher
3534             Pattern p = Pattern.compile(pattern);
3535             Matcher m = p.matcher(baseString);
3536 
3537             // Reject candidate if pattern happens to start early
3538             m.find();
3539             if (m.start() < leadingChars)
3540                 continue;
3541 
3542             // Reject candidate if more than one match
3543             if (m.find())
3544                 continue;
3545 
3546             // Construct a replacement string with :
3547             // random group + random string + random group
3548             StringBuilder bufferToRep = new StringBuilder();
3549             int groupIndex1 = generator.nextInt(5);
3550             bufferToRep.append("$" + (groupIndex1 + 1));
3551             String randomMidString = getRandomAlphaString(5);
3552             bufferToRep.append(randomMidString);
3553             int groupIndex2 = generator.nextInt(5);
3554             bufferToRep.append("$" + (groupIndex2 + 1));
3555             String replacement = bufferToRep.toString();
3556 
3557             // Do the replacement
3558             String result = m.replaceAll(replacement);
3559 
3560             // Construct expected result
3561             StringBuilder bufferToRes = new StringBuilder();
3562             bufferToRes.append(leadingString);
3563             bufferToRes.append(groups[groupIndex1]);
3564             bufferToRes.append(randomMidString);
3565             bufferToRes.append(groups[groupIndex2]);
3566             bufferToRes.append(trailingString);
3567             String expectedResult = bufferToRes.toString();
3568 
3569             // Check results
3570             if (!result.equals(expectedResult)) {
3571                 failCount++;
3572             }
3573         }
3574 
3575         report("Substitution Basher 2");
3576     }
3577 
3578     /**
3579      * Checks the handling of some escape sequences that the Pattern
3580      * class should process instead of the java compiler. These are
3581      * not in the file because the escapes should be be processed
3582      * by the Pattern class when the regex is compiled.
3583      */
3584     private static void escapes() throws Exception {
3585         Pattern p = Pattern.compile("\\043");
3586         Matcher m = p.matcher("#");
3587         if (!m.find())
3588             failCount++;
3589 
3590         p = Pattern.compile("\\x23");
3591         m = p.matcher("#");
3592         if (!m.find())
3593             failCount++;
3594 
3595         p = Pattern.compile("\\u0023");
3596         m = p.matcher("#");
3597         if (!m.find())
3598             failCount++;
3599 
3600         report("Escape sequences");
3601     }
3602 
3603     /**
3604      * Checks the handling of blank input situations. These
3605      * tests are incompatible with my test file format.
3606      */
3607     private static void blankInput() throws Exception {
3608         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3609         Matcher m = p.matcher("");
3610         if (m.find())
3611             failCount++;
3612 
3613         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3614         m = p.matcher("");
3615         if (!m.find())
3616             failCount++;
3617 
3618         p = Pattern.compile("abc");
3619         m = p.matcher("");
3620         if (m.find())
3621             failCount++;
3622 
3623         p = Pattern.compile("a*");
3624         m = p.matcher("");
3625         if (!m.find())
3626             failCount++;
3627 
3628         report("Blank input");
3629     }
3630 
3631     /**
3632      * Tests the Boyer-Moore pattern matching of a character sequence
3633      * on randomly generated patterns.
3634      */
3635     private static void bm() throws Exception {
3636         doBnM('a');
3637         report("Boyer Moore (ASCII)");
3638 
3639         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3640         report("Boyer Moore (Supplementary)");
3641     }
3642 
3643     private static void doBnM(int baseCharacter) throws Exception {
3644         int achar=0;
3645 
3646         for (int i=0; i<100; i++) {
3647             // Create a short pattern to search for
3648             int patternLength = generator.nextInt(7) + 4;
3649             StringBuffer patternBuffer = new StringBuffer(patternLength);
3650             String pattern;
3651             retry: for (;;) {
3652                 for (int x=0; x<patternLength; x++) {
3653                     int ch = baseCharacter + generator.nextInt(26);
3654                     if (Character.isSupplementaryCodePoint(ch)) {
3655                         patternBuffer.append(Character.toChars(ch));
3656                     } else {
3657                         patternBuffer.append((char)ch);
3658                     }
3659                 }
3660                 pattern = patternBuffer.toString();
3661 
3662                 // Avoid patterns that start and end with the same substring
3663                 // See JDK-6854417
3664                 for (int x=1; x < pattern.length(); x++) {
3665                     if (pattern.startsWith(pattern.substring(x)))
3666                         continue retry;
3667                 }
3668                 break;
3669             }
3670             Pattern p = Pattern.compile(pattern);
3671 
3672             // Create a buffer with random ASCII chars that does
3673             // not match the sample
3674             String toSearch = null;
3675             StringBuffer s = null;
3676             Matcher m = p.matcher("");
3677             do {
3678                 s = new StringBuffer(100);
3679                 for (int x=0; x<100; x++) {
3680                     int ch = baseCharacter + generator.nextInt(26);
3681                     if (Character.isSupplementaryCodePoint(ch)) {
3682                         s.append(Character.toChars(ch));
3683                     } else {
3684                         s.append((char)ch);
3685                     }
3686                 }
3687                 toSearch = s.toString();
3688                 m.reset(toSearch);
3689             } while (m.find());
3690 
3691             // Insert the pattern at a random spot
3692             int insertIndex = generator.nextInt(99);
3693             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3694                 insertIndex++;
3695             s = s.insert(insertIndex, pattern);
3696             toSearch = s.toString();
3697 
3698             // Make sure that the pattern is found
3699             m.reset(toSearch);
3700             if (!m.find())
3701                 failCount++;
3702 
3703             // Make sure that the match text is the pattern
3704             if (!m.group().equals(pattern))
3705                 failCount++;
3706 
3707             // Make sure match occured at insertion point
3708             if (m.start() != insertIndex)
3709                 failCount++;
3710         }
3711     }
3712 
3713     /**
3714      * Tests the matching of slices on randomly generated patterns.
3715      * The Boyer-Moore optimization is not done on these patterns
3716      * because it uses unicode case folding.
3717      */
3718     private static void slice() throws Exception {
3719         doSlice(Character.MAX_VALUE);
3720         report("Slice");
3721 
3722         doSlice(Character.MAX_CODE_POINT);
3723         report("Slice (Supplementary)");
3724     }
3725 
3726     private static void doSlice(int maxCharacter) throws Exception {
3727         Random generator = new Random();
3728         int achar=0;
3729 
3730         for (int i=0; i<100; i++) {
3731             // Create a short pattern to search for
3732             int patternLength = generator.nextInt(7) + 4;
3733             StringBuffer patternBuffer = new StringBuffer(patternLength);
3734             for (int x=0; x<patternLength; x++) {
3735                 int randomChar = 0;
3736                 while (!Character.isLetterOrDigit(randomChar))
3737                     randomChar = generator.nextInt(maxCharacter);
3738                 if (Character.isSupplementaryCodePoint(randomChar)) {
3739                     patternBuffer.append(Character.toChars(randomChar));
3740                 } else {
3741                     patternBuffer.append((char) randomChar);
3742                 }
3743             }
3744             String pattern =  patternBuffer.toString();
3745             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3746 
3747             // Create a buffer with random chars that does not match the sample
3748             String toSearch = null;
3749             StringBuffer s = null;
3750             Matcher m = p.matcher("");
3751             do {
3752                 s = new StringBuffer(100);
3753                 for (int x=0; x<100; x++) {
3754                     int randomChar = 0;
3755                     while (!Character.isLetterOrDigit(randomChar))
3756                         randomChar = generator.nextInt(maxCharacter);
3757                     if (Character.isSupplementaryCodePoint(randomChar)) {
3758                         s.append(Character.toChars(randomChar));
3759                     } else {
3760                         s.append((char) randomChar);
3761                     }
3762                 }
3763                 toSearch = s.toString();
3764                 m.reset(toSearch);
3765             } while (m.find());
3766 
3767             // Insert the pattern at a random spot
3768             int insertIndex = generator.nextInt(99);
3769             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3770                 insertIndex++;
3771             s = s.insert(insertIndex, pattern);
3772             toSearch = s.toString();
3773 
3774             // Make sure that the pattern is found
3775             m.reset(toSearch);
3776             if (!m.find())
3777                 failCount++;
3778 
3779             // Make sure that the match text is the pattern
3780             if (!m.group().equals(pattern))
3781                 failCount++;
3782 
3783             // Make sure match occured at insertion point
3784             if (m.start() != insertIndex)
3785                 failCount++;
3786         }
3787     }
3788 
3789     private static void explainFailure(String pattern, String data,
3790                                        String expected, String actual) {
3791         System.err.println("----------------------------------------");
3792         System.err.println("Pattern = "+pattern);
3793         System.err.println("Data = "+data);
3794         System.err.println("Expected = " + expected);
3795         System.err.println("Actual   = " + actual);
3796     }
3797 
3798     private static void explainFailure(String pattern, String data,
3799                                        Throwable t) {
3800         System.err.println("----------------------------------------");
3801         System.err.println("Pattern = "+pattern);
3802         System.err.println("Data = "+data);
3803         t.printStackTrace(System.err);
3804     }
3805 
3806     // Testing examples from a file
3807 
3808     /**
3809      * Goes through the file "TestCases.txt" and creates many patterns
3810      * described in the file, matching the patterns against input lines in
3811      * the file, and comparing the results against the correct results
3812      * also found in the file. The file format is described in comments
3813      * at the head of the file.
3814      */
3815     private static void processFile(String fileName) throws Exception {
3816         File testCases = new File(System.getProperty("test.src", "."),
3817                                   fileName);
3818         FileInputStream in = new FileInputStream(testCases);
3819         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3820 
3821         // Process next test case.
3822         String aLine;
3823         while((aLine = r.readLine()) != null) {
3824             // Read a line for pattern
3825             String patternString = grabLine(r);
3826             Pattern p = null;
3827             try {
3828                 p = compileTestPattern(patternString);
3829             } catch (PatternSyntaxException e) {
3830                 String dataString = grabLine(r);
3831                 String expectedResult = grabLine(r);
3832                 if (expectedResult.startsWith("error"))
3833                     continue;
3834                 explainFailure(patternString, dataString, e);
3835                 failCount++;
3836                 continue;
3837             }
3838 
3839             // Read a line for input string
3840             String dataString = grabLine(r);
3841             Matcher m = p.matcher(dataString);
3842             StringBuffer result = new StringBuffer();
3843 
3844             // Check for IllegalStateExceptions before a match
3845             failCount += preMatchInvariants(m);
3846 
3847             boolean found = m.find();
3848 
3849             if (found)
3850                 failCount += postTrueMatchInvariants(m);
3851             else
3852                 failCount += postFalseMatchInvariants(m);
3853 
3854             if (found) {
3855                 result.append("true ");
3856                 result.append(m.group(0) + " ");
3857             } else {
3858                 result.append("false ");
3859             }
3860 
3861             result.append(m.groupCount());
3862 
3863             if (found) {
3864                 for (int i=1; i<m.groupCount()+1; i++)
3865                     if (m.group(i) != null)
3866                         result.append(" " +m.group(i));
3867             }
3868 
3869             // Read a line for the expected result
3870             String expectedResult = grabLine(r);
3871 
3872             if (!result.toString().equals(expectedResult)) {
3873                 explainFailure(patternString, dataString, expectedResult, result.toString());
3874                 failCount++;
3875             }
3876         }
3877 
3878         report(fileName);
3879     }
3880 
3881     private static int preMatchInvariants(Matcher m) {
3882         int failCount = 0;
3883         try {
3884             m.start();
3885             failCount++;
3886         } catch (IllegalStateException ise) {}
3887         try {
3888             m.end();
3889             failCount++;
3890         } catch (IllegalStateException ise) {}
3891         try {
3892             m.group();
3893             failCount++;
3894         } catch (IllegalStateException ise) {}
3895         return failCount;
3896     }
3897 
3898     private static int postFalseMatchInvariants(Matcher m) {
3899         int failCount = 0;
3900         try {
3901             m.group();
3902             failCount++;
3903         } catch (IllegalStateException ise) {}
3904         try {
3905             m.start();
3906             failCount++;
3907         } catch (IllegalStateException ise) {}
3908         try {
3909             m.end();
3910             failCount++;
3911         } catch (IllegalStateException ise) {}
3912         return failCount;
3913     }
3914 
3915     private static int postTrueMatchInvariants(Matcher m) {
3916         int failCount = 0;
3917         //assert(m.start() = m.start(0);
3918         if (m.start() != m.start(0))
3919             failCount++;
3920         //assert(m.end() = m.end(0);
3921         if (m.start() != m.start(0))
3922             failCount++;
3923         //assert(m.group() = m.group(0);
3924         if (!m.group().equals(m.group(0)))
3925             failCount++;
3926         try {
3927             m.group(50);
3928             failCount++;
3929         } catch (IndexOutOfBoundsException ise) {}
3930 
3931         return failCount;
3932     }
3933 
3934     private static Pattern compileTestPattern(String patternString) {
3935         if (!patternString.startsWith("'")) {
3936             return Pattern.compile(patternString);
3937         }
3938         int break1 = patternString.lastIndexOf("'");
3939         String flagString = patternString.substring(
3940                                           break1+1, patternString.length());
3941         patternString = patternString.substring(1, break1);
3942 
3943         if (flagString.equals("i"))
3944             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3945 
3946         if (flagString.equals("m"))
3947             return Pattern.compile(patternString, Pattern.MULTILINE);
3948 
3949         return Pattern.compile(patternString);
3950     }
3951 
3952     /**
3953      * Reads a line from the input file. Keeps reading lines until a non
3954      * empty non comment line is read. If the line contains a \n then
3955      * these two characters are replaced by a newline char. If a \\uxxxx
3956      * sequence is read then the sequence is replaced by the unicode char.
3957      */
3958     private static String grabLine(BufferedReader r) throws Exception {
3959         int index = 0;
3960         String line = r.readLine();
3961         while (line.startsWith("//") || line.length() < 1)
3962             line = r.readLine();
3963         while ((index = line.indexOf("\\n")) != -1) {
3964             StringBuffer temp = new StringBuffer(line);
3965             temp.replace(index, index+2, "\n");
3966             line = temp.toString();
3967         }
3968         while ((index = line.indexOf("\\u")) != -1) {
3969             StringBuffer temp = new StringBuffer(line);
3970             String value = temp.substring(index+2, index+6);
3971             char aChar = (char)Integer.parseInt(value, 16);
3972             String unicodeChar = "" + aChar;
3973             temp.replace(index, index+6, unicodeChar);
3974             line = temp.toString();
3975         }
3976 
3977         return line;
3978     }
3979 
3980     private static void check(Pattern p, String s, String g, String expected) {
3981         Matcher m = p.matcher(s);
3982         m.find();
3983         if (!m.group(g).equals(expected) ||
3984             s.charAt(m.start(g)) != expected.charAt(0) ||
3985             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3986             failCount++;
3987     }
3988 
3989     private static void checkReplaceFirst(String p, String s, String r, String expected)
3990     {
3991         if (!expected.equals(Pattern.compile(p)
3992                                     .matcher(s)
3993                                     .replaceFirst(r)))
3994             failCount++;
3995     }
3996 
3997     private static void checkReplaceAll(String p, String s, String r, String expected)
3998     {
3999         if (!expected.equals(Pattern.compile(p)
4000                                     .matcher(s)
4001                                     .replaceAll(r)))
4002             failCount++;
4003     }
4004 
4005     private static void checkExpectedFail(String p) {
4006         try {
4007             Pattern.compile(p);
4008         } catch (PatternSyntaxException pse) {
4009             //pse.printStackTrace();
4010             return;
4011         }
4012         failCount++;
4013     }
4014 
4015     private static void checkExpectedIAE(Matcher m, String g) {
4016         m.find();
4017         try {
4018             m.group(g);
4019         } catch (IllegalArgumentException x) {
4020             //iae.printStackTrace();
4021             try {
4022                 m.start(g);
4023             } catch (IllegalArgumentException xx) {
4024                 try {
4025                     m.start(g);
4026                 } catch (IllegalArgumentException xxx) {
4027                     return;
4028                 }
4029             }
4030         }
4031         failCount++;
4032     }
4033 
4034     private static void checkExpectedNPE(Matcher m) {
4035         m.find();
4036         try {
4037             m.group(null);
4038         } catch (NullPointerException x) {
4039             try {
4040                 m.start(null);
4041             } catch (NullPointerException xx) {
4042                 try {
4043                     m.end(null);
4044                 } catch (NullPointerException xxx) {
4045                     return;
4046                 }
4047             }
4048         }
4049         failCount++;
4050     }
4051 
4052     private static void namedGroupCaptureTest() throws Exception {
4053         check(Pattern.compile("x+(?<gname>y+)z+"),
4054               "xxxyyyzzz",
4055               "gname",
4056               "yyy");
4057 
4058         check(Pattern.compile("x+(?<gname8>y+)z+"),
4059               "xxxyyyzzz",
4060               "gname8",
4061               "yyy");
4062 
4063         //backref
4064         Pattern pattern = Pattern.compile("(a*)bc\\1");
4065         check(pattern, "zzzaabcazzz", true);  // found "abca"
4066 
4067         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4068               "zzzaabcaazzz", true);
4069 
4070         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4071               "abcdefabc", true);
4072 
4073         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4074               "abcdefghijkk", true);
4075 
4076         // Supplementary character tests
4077         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4078               toSupplementaries("zzzaabcazzz"), true);
4079 
4080         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4081               toSupplementaries("zzzaabcaazzz"), true);
4082 
4083         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4084               toSupplementaries("abcdefabc"), true);
4085 
4086         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4087                               "(?<gname>" +
4088                               toSupplementaries("k)") + "\\k<gname>"),
4089               toSupplementaries("abcdefghijkk"), true);
4090 
4091         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4092               "xxxyyyzzzyyy",
4093               "gname",
4094               "yyy");
4095 
4096         //replaceFirst/All
4097         checkReplaceFirst("(?<gn>ab)(c*)",
4098                           "abccczzzabcczzzabccc",
4099                           "${gn}",
4100                           "abzzzabcczzzabccc");
4101 
4102         checkReplaceAll("(?<gn>ab)(c*)",
4103                         "abccczzzabcczzzabccc",
4104                         "${gn}",
4105                         "abzzzabzzzab");
4106 
4107 
4108         checkReplaceFirst("(?<gn>ab)(c*)",
4109                           "zzzabccczzzabcczzzabccczzz",
4110                           "${gn}",
4111                           "zzzabzzzabcczzzabccczzz");
4112 
4113         checkReplaceAll("(?<gn>ab)(c*)",
4114                         "zzzabccczzzabcczzzabccczzz",
4115                         "${gn}",
4116                         "zzzabzzzabzzzabzzz");
4117 
4118         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4119                           "zzzabccczzzabcczzzabccczzz",
4120                           "${gn2}",
4121                           "zzzccczzzabcczzzabccczzz");
4122 
4123         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4124                         "zzzabccczzzabcczzzabccczzz",
4125                         "${gn2}",
4126                         "zzzccczzzcczzzccczzz");
4127 
4128         //toSupplementaries("(ab)(c*)"));
4129         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4130                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4131                           toSupplementaries("abccczzzabcczzzabccc"),
4132                           "${gn1}",
4133                           toSupplementaries("abzzzabcczzzabccc"));
4134 
4135 
4136         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4137                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4138                         toSupplementaries("abccczzzabcczzzabccc"),
4139                         "${gn1}",
4140                         toSupplementaries("abzzzabzzzab"));
4141 
4142         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4143                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4144                           toSupplementaries("abccczzzabcczzzabccc"),
4145                           "${gn2}",
4146                           toSupplementaries("ccczzzabcczzzabccc"));
4147 
4148 
4149         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4150                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4151                         toSupplementaries("abccczzzabcczzzabccc"),
4152                         "${gn2}",
4153                         toSupplementaries("ccczzzcczzzccc"));
4154 
4155         checkReplaceFirst("(?<dog>Dog)AndCat",
4156                           "zzzDogAndCatzzzDogAndCatzzz",
4157                           "${dog}",
4158                           "zzzDogzzzDogAndCatzzz");
4159 
4160 
4161         checkReplaceAll("(?<dog>Dog)AndCat",
4162                           "zzzDogAndCatzzzDogAndCatzzz",
4163                           "${dog}",
4164                           "zzzDogzzzDogzzz");
4165 
4166         // backref in Matcher & String
4167         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4168             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4169             failCount++;
4170 
4171         // negative
4172         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4173         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4174         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4175         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4176         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4177         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4178                          "gnameX");
4179         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4180         report("NamedGroupCapture");
4181     }
4182 
4183     // This is for bug 6919132
4184     private static void nonBmpClassComplementTest() throws Exception {
4185         Pattern p = Pattern.compile("\\P{Lu}");
4186         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4187 
4188         if (m.find() && m.start() == 1)
4189             failCount++;
4190 
4191         // from a unicode category
4192         p = Pattern.compile("\\P{Lu}");
4193         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4194         if (m.find())
4195             failCount++;
4196         if (!m.hitEnd())
4197             failCount++;
4198 
4199         // block
4200         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4201         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4202         if (m.find() && m.start() == 1)
4203             failCount++;
4204 
4205         p = Pattern.compile("\\P{sc=GRANTHA}");
4206         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4207         if (m.find() && m.start() == 1)
4208             failCount++;
4209 
4210         report("NonBmpClassComplement");
4211     }
4212 
4213     private static void unicodePropertiesTest() throws Exception {
4214         // different forms
4215         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4216             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4217             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4218             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4219             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4220             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4221             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4222             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4223             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4224             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4225             failCount++;
4226 
4227         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4228         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4229         Matcher lastSM  = common;
4230         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4231 
4232         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4233         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4234         Matcher lastBM = latin;
4235         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4236 
4237         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4238             if (cp >= 0x30000 && (cp & 0x70) == 0){
4239                 continue;  // only pick couple code points, they are the same
4240             }
4241 
4242             // Unicode Script
4243             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4244             Matcher m;
4245             String str = new String(Character.toChars(cp));
4246             if (script == lastScript) {
4247                  m = lastSM;
4248                  m.reset(str);
4249             } else {
4250                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4251             }
4252             if (!m.matches()) {
4253                 failCount++;
4254             }
4255             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4256             other.reset(str);
4257             if (other.matches()) {
4258                 failCount++;
4259             }
4260             lastSM = m;
4261             lastScript = script;
4262 
4263             // Unicode Block
4264             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4265             if (block == null) {
4266                 //System.out.printf("Not a Block: cp=%x%n", cp);
4267                 continue;
4268             }
4269             if (block == lastBlock) {
4270                  m = lastBM;
4271                  m.reset(str);
4272             } else {
4273                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4274             }
4275             if (!m.matches()) {
4276                 failCount++;
4277             }
4278             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4279             other.reset(str);
4280             if (other.matches()) {
4281                 failCount++;
4282             }
4283             lastBM = m;
4284             lastBlock = block;
4285         }
4286         report("unicodeProperties");
4287     }
4288 
4289     private static void unicodeHexNotationTest() throws Exception {
4290 
4291         // negative
4292         checkExpectedFail("\\x{-23}");
4293         checkExpectedFail("\\x{110000}");
4294         checkExpectedFail("\\x{}");
4295         checkExpectedFail("\\x{AB[ef]");
4296 
4297         // codepoint
4298         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4299         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4300         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4301         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4302 
4303         // in class
4304         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4305         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4306         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4307         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4308         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4309         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4310 
4311         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4312              String s = "A" + new String(Character.toChars(cp)) + "B";
4313              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4314                                              : String.format("\\u%04x\\u%04x",
4315                                                (int) Character.toChars(cp)[0],
4316                                                (int) Character.toChars(cp)[1]);
4317              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4318              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4319                  failCount++;
4320              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4321                  failCount++;
4322              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4323                  failCount++;
4324              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4325                  failCount++;
4326          }
4327          report("unicodeHexNotation");
4328     }
4329 
4330     private static void unicodeClassesTest() throws Exception {
4331 
4332         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4333         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4334         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4335         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4336         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4337         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4338         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4339         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4340         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4341         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4342         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4343         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4344         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4345         Matcher bound  = Pattern.compile("\\b").matcher("");
4346         Matcher word   = Pattern.compile("\\w++").matcher("");
4347         // UNICODE_CHARACTER_CLASS
4348         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4349         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4350         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4351         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4352         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4353         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4354         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4355         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4356         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4357         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4358         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4359         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4360         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4361         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4362         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4363         // embedded flag (?U)
4364         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4365         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4366         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4367 
4368         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4369         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4370         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4371         // properties
4372         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4373         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4374         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4375         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4376         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4377         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4378         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4379         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4380         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4381         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4382         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4383         // javaMethod
4384         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4385         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4386         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4387         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4388         // GC/C
4389         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
4390 
4391         for (int cp = 1; cp < 0x30000; cp++) {
4392             String str = new String(Character.toChars(cp));
4393             int type = Character.getType(cp);
4394             if (// lower
4395                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4396                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4397                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4398                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4399                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4400                 // upper
4401                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4402                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4403                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4404                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4405                 // alpha
4406                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4407                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4408                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4409                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4410                 // digit
4411                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4412                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4413                 // alnum
4414                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4415                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4416                 // punct
4417                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4418                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4419                 // graph
4420                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4421                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4422                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4423                 // blank
4424                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4425                                           != blank.reset(str).matches()  ||
4426                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4427                 // print
4428                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4429                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4430                 // cntrl
4431                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4432                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4433                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4434                 // hexdigit
4435                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4436                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4437                 // space
4438                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4439                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4440                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4441                 // word
4442                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4443                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4444                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4445                 // bwordb
4446                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4447                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4448                 // properties
4449                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4450                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4451                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4452                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4453                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4454                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4455                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4456                 // gc_C
4457                 (Character.CONTROL == type || Character.FORMAT == type ||
4458                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4459                  Character.UNASSIGNED == type)
4460                 != gcC.reset(str).matches()) {
4461                 failCount++;
4462             }
4463         }
4464 
4465         // bounds/word align
4466         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4467         if (!bwbU.reset("\u0180sherman\u0400").matches())
4468             failCount++;
4469         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4470         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4471             failCount++;
4472         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4473         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4474             failCount++;
4475         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4476             failCount++;
4477         report("unicodePredefinedClasses");
4478     }
4479 
4480     private static void unicodeCharacterNameTest() throws Exception {
4481 
4482         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4483             if (!Character.isValidCodePoint(cp) ||
4484                 Character.getType(cp) == Character.UNASSIGNED)
4485                 continue;
4486             String str = new String(Character.toChars(cp));
4487             // single
4488             String p = "\\N{" + Character.getName(cp) + "}";
4489             if (!Pattern.compile(p).matcher(str).matches()) {
4490                 failCount++;
4491             }
4492             // class[c]
4493             p = "[\\N{" + Character.getName(cp) + "}]";
4494             if (!Pattern.compile(p).matcher(str).matches()) {
4495                 failCount++;
4496             }
4497         }
4498 
4499         // range
4500         for (int i = 0; i < 10; i++) {
4501             int start = generator.nextInt(20);
4502             int end = start + generator.nextInt(200);
4503             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4504             String str;
4505             for (int cp = start; cp < end; cp++) {
4506                 str = new String(Character.toChars(cp));
4507                 if (!Pattern.compile(p).matcher(str).matches()) {
4508                     failCount++;
4509                 }
4510             }
4511             str = new String(Character.toChars(end + 10));
4512             if (Pattern.compile(p).matcher(str).matches()) {
4513                 failCount++;
4514             }
4515         }
4516 
4517         // slice
4518         for (int i = 0; i < 10; i++) {
4519             int n = generator.nextInt(256);
4520             int[] buf = new int[n];
4521             StringBuffer sb = new StringBuffer(1024);
4522             for (int j = 0; j < n; j++) {
4523                 int cp = generator.nextInt(1000);
4524                 if (!Character.isValidCodePoint(cp) ||
4525                     Character.getType(cp) == Character.UNASSIGNED)
4526                     cp = 0x4e00;    // just use 4e00
4527                 sb.append("\\N{" + Character.getName(cp) + "}");
4528                 buf[j] = cp;
4529             }
4530             String p = sb.toString();
4531             String str = new String(buf, 0, buf.length);
4532             if (!Pattern.compile(p).matcher(str).matches()) {
4533                 failCount++;
4534             }
4535         }
4536         report("unicodeCharacterName");
4537     }
4538 
4539     private static void horizontalAndVerticalWSTest() throws Exception {
4540         String hws = new String (new char[] {
4541                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4542                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4543                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4544                                      0x202f, 0x205f, 0x3000 });
4545         String vws = new String (new char[] {
4546                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4547         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4548             !Pattern.compile("[\\h]+").matcher(hws).matches())
4549             failCount++;
4550         if (Pattern.compile("\\H").matcher(hws).find() ||
4551             Pattern.compile("[\\H]").matcher(hws).find())
4552             failCount++;
4553         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4554             !Pattern.compile("[\\v]+").matcher(vws).matches())
4555             failCount++;
4556         if (Pattern.compile("\\V").matcher(vws).find() ||
4557             Pattern.compile("[\\V]").matcher(vws).find())
4558             failCount++;
4559         String prefix = "abcd";
4560         String suffix = "efgh";
4561         String ng = "A";
4562         for (int i = 0; i < hws.length(); i++) {
4563             String c = String.valueOf(hws.charAt(i));
4564             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4565             if (!m.find() || !c.equals(m.group()))
4566                 failCount++;
4567             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4568             if (!m.find() || !c.equals(m.group()))
4569                 failCount++;
4570 
4571             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4572             if (!m.find() || !ng.equals(m.group()))
4573                 failCount++;
4574             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4575             if (!m.find() || !ng.equals(m.group()))
4576                 failCount++;
4577         }
4578         for (int i = 0; i < vws.length(); i++) {
4579             String c = String.valueOf(vws.charAt(i));
4580             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4581             if (!m.find() || !c.equals(m.group()))
4582                 failCount++;
4583             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4584             if (!m.find() || !c.equals(m.group()))
4585                 failCount++;
4586 
4587             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4588             if (!m.find() || !ng.equals(m.group()))
4589                 failCount++;
4590             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4591             if (!m.find() || !ng.equals(m.group()))
4592                 failCount++;
4593         }
4594         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4595         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4596             failCount++;
4597         report("horizontalAndVerticalWSTest");
4598     }
4599 
4600     private static void linebreakTest() throws Exception {
4601         String linebreaks = new String (new char[] {
4602             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4603         String crnl = "\r\n";
4604         if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4605               Pattern.compile("\\R").matcher(crnl).matches() &&
4606               Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4607               Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4608               Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4609               Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4610               !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4611             failCount++;
4612         }
4613         report("linebreakTest");
4614     }
4615 
4616     // #7189363
4617     private static void branchTest() throws Exception {
4618         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4619             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4620             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4621             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4622             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4623             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4624             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4625             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4626             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4627             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4628             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4629             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4630             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4631             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4632             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4633             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4634             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4635             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4636             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4637             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4638             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4639             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4640             failCount++;
4641         report("branchTest");
4642     }
4643 
4644     // This test is for 8007395
4645     private static void groupCurlyNotFoundSuppTest() throws Exception {
4646         String input = "test this as \ud83d\ude0d";
4647         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4648                                           "test(.)*(@[a-zA-Z.]+)",
4649                                           "test([^B])+(@[a-zA-Z.]+)",
4650                                           "test([^B])*(@[a-zA-Z.]+)",
4651                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4652                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4653                                         }) {
4654             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4655                                .matcher(input);
4656             try {
4657                 if (m.find()) {
4658                     failCount++;
4659                 }
4660             } catch (Exception x) {
4661                 failCount++;
4662             }
4663         }
4664         report("GroupCurly NotFoundSupp");
4665     }
4666 
4667     // This test is for 8023647
4668     private static void groupCurlyBackoffTest() throws Exception {
4669         if (!"abc1c".matches("(\\w)+1\\1") ||
4670             "abc11".matches("(\\w)+1\\1")) {
4671             failCount++;
4672         }
4673         report("GroupCurly backoff");
4674     }
4675 
4676     // This test is for 8012646
4677     private static void patternAsPredicate() throws Exception {
4678         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4679 
4680         if (p.test("")) {
4681             failCount++;
4682         }
4683         if (!p.test("word")) {
4684             failCount++;
4685         }
4686         if (p.test("1234")) {
4687             failCount++;
4688         }
4689         if (!p.test("word1234")) {
4690             failCount++;
4691         }
4692         report("Pattern.asPredicate");
4693     }
4694 
4695     // This test is for 8184692
4696     private static void patternAsMatchPredicate() throws Exception {
4697         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4698 
4699         if (p.test("")) {
4700             failCount++;
4701         }
4702         if (!p.test("word")) {
4703             failCount++;
4704         }
4705         if (p.test("1234word")) {
4706             failCount++;
4707         }
4708         if (p.test("1234")) {
4709             failCount++;
4710         }
4711         report("Pattern.asMatchPredicate");
4712     }
4713 
4714 
4715     // This test is for 8035975
4716     private static void invalidFlags() throws Exception {
4717         for (int flag = 1; flag != 0; flag <<= 1) {
4718             switch (flag) {
4719             case Pattern.CASE_INSENSITIVE:
4720             case Pattern.MULTILINE:
4721             case Pattern.DOTALL:
4722             case Pattern.UNICODE_CASE:
4723             case Pattern.CANON_EQ:
4724             case Pattern.UNIX_LINES:
4725             case Pattern.LITERAL:
4726             case Pattern.UNICODE_CHARACTER_CLASS:
4727             case Pattern.COMMENTS:
4728                 // valid flag, continue
4729                 break;
4730             default:
4731                 try {
4732                     Pattern.compile(".", flag);
4733                     failCount++;
4734                 } catch (IllegalArgumentException expected) {
4735                 }
4736             }
4737         }
4738         report("Invalid compile flags");
4739     }
4740 
4741     // This test is for 8158482
4742     private static void embeddedFlags() throws Exception {
4743         try {
4744             Pattern.compile("(?i).(?-i).");
4745             Pattern.compile("(?m).(?-m).");
4746             Pattern.compile("(?s).(?-s).");
4747             Pattern.compile("(?d).(?-d).");
4748             Pattern.compile("(?u).(?-u).");
4749             Pattern.compile("(?c).(?-c).");
4750             Pattern.compile("(?x).(?-x).");
4751             Pattern.compile("(?U).(?-U).");
4752             Pattern.compile("(?imsducxU).(?-imsducxU).");
4753         } catch (PatternSyntaxException x) {
4754             failCount++;
4755         }
4756         report("Embedded flags");
4757     }
4758 
4759     private static void grapheme() throws Exception {
4760         Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
4761             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4762             .forEach( ln -> {
4763                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4764                     // System.out.println(str);
4765                     String[] strs = ln.split("\u00f7|\u00d7");
4766                     StringBuilder src = new StringBuilder();
4767                     ArrayList<String> graphemes = new ArrayList<>();
4768                     StringBuilder buf = new StringBuilder();
4769                     int offBk = 0;
4770                     for (String str : strs) {
4771                         if (str.length() == 0)  // first empty str
4772                             continue;
4773                         int cp = Integer.parseInt(str, 16);
4774                         src.appendCodePoint(cp);
4775                         buf.appendCodePoint(cp);
4776                         offBk += (str.length() + 1);
4777                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4778                             graphemes.add(buf.toString());
4779                             buf = new StringBuilder();
4780                         }
4781                     }
4782                     Pattern p = Pattern.compile("\\X");
4783                     Matcher m = p.matcher(src.toString());
4784                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4785                     for (String g : graphemes) {
4786                         // System.out.printf("     grapheme:=[%s]%n", g);
4787                         // (1) test \\X directly
4788                         if (!m.find() || !m.group().equals(g)) {
4789                             System.out.println("Failed \\X [" + ln + "] : " + g);
4790                             failCount++;
4791                         }
4792                         // (2) test \\b{g} + \\X  via Scanner
4793                         boolean hasNext = s.hasNext(p);
4794                         // if (!s.hasNext() || !s.next().equals(next)) {
4795                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4796                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4797                             failCount++;
4798                         }
4799                     }
4800                 });
4801         // some sanity checks
4802         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4803             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4804             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4805             failCount++;
4806         // make sure "\b{n}" still works
4807         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4808             failCount++;
4809         report("Unicode extended grapheme cluster");
4810     }
4811 
4812     // hangup/timeout if go into exponential backtracking
4813     private static void expoBacktracking() throws Exception {
4814 
4815         Object[][] patternMatchers = {
4816             // 6328855
4817             { "(.*\n*)*",
4818               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4819               false },
4820             // 6192895
4821             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4822               "Hello World this is a test this is a test this is a test A",
4823               true },
4824             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4825               "Hello World this is a test this is a test this is a test \u4e00 ",
4826               false },
4827             { " *([a-z0-9]+ *)+",
4828               "hello world this is a test this is a test this is a test A",
4829               false },
4830             // 4771934 [FIXED] #5013651?
4831             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4832               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4833               true },
4834             // 4866249 [FIXED]
4835             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4836               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4837               true },
4838             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4839               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4840               false },
4841             // 6345469
4842             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4843               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4844               true }, // --> matched
4845             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4846               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4847               false },
4848             // 5026912
4849             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4850               "156580451111112225588087755221111111566969655555555",
4851               false},
4852             // 6988218
4853             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4854               "'%)) order by ANGEBOT.ID",
4855               false},    // find
4856             // 6693451
4857             { "^(\\s*foo\\s*)*$",
4858               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4859               true },
4860             { "^(\\s*foo\\s*)*$",
4861               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4862               false
4863             },
4864             // 7006761
4865             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4866             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4867             // 8140212
4868             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4869               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4870               false
4871             },
4872             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4873             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4874 
4875             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4876             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4877 
4878             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4879             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4880 
4881             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4882 
4883             /* not fixed
4884             //8132141   --->    second level exponential backtracking
4885             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4886               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4887             */
4888         };
4889 
4890         for (Object[] pm : patternMatchers) {
4891             String p = (String)pm[0];
4892             String s = (String)pm[1];
4893             boolean r = (Boolean)pm[2];
4894             if (r != Pattern.compile(p).matcher(s).matches()) {
4895                 failCount++;
4896             }
4897         }
4898     }
4899 
4900     private static void invalidGroupName() {
4901         // Invalid start of a group name
4902         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4903                 "\u0060", "\u007b", "\u0416")) {
4904             for (String pat : List.of("(?<" + groupName + ">)",
4905                     "\\k<" + groupName + ">")) {
4906                 try {
4907                     Pattern.compile(pat);
4908                     failCount++;
4909                 } catch (PatternSyntaxException e) {
4910                     if (!e.getMessage().startsWith(
4911                             "capturing group name does not start with a"
4912                             + " Latin letter")) {
4913                         failCount++;
4914                     }
4915                 }
4916             }
4917         }
4918         // Invalid char in a group name
4919         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4920                 "d\u0060", "e\u007b", "f\u0416")) {
4921             for (String pat : List.of("(?<" + groupName + ">)",
4922                     "\\k<" + groupName + ">")) {
4923                 try {
4924                     Pattern.compile(pat);
4925                     failCount++;
4926                 } catch (PatternSyntaxException e) {
4927                     if (!e.getMessage().startsWith(
4928                             "named capturing group is missing trailing '>'")) {
4929                         failCount++;
4930                     }
4931                 }
4932             }
4933         }
4934         report("Invalid capturing group names");
4935     }
4936 
4937     private static void illegalRepetitionRange() {
4938         for (String rep : List.of("", "x", ".", ",", "-1", "2147483648",
4939                 "4294967296", "4294967297", "9223372032559808512",
4940                 "18446744073709551615", "420000000000000000000",
4941                 "4294967296,", "4294967297,", "4294967296,", "4294967297,",
4942                 "9223372032559808512,", "18446744073709551615,",
4943                 "420000000000000000000,", "4294967296,4294967296",
4944                 "4294967296,4294967298", "4294967297,4294967299",
4945                 "4294967297,42", "9223372032559808514,42",
4946                 "84467440737095516150123456,84467440737095516150123457",
4947                 "0,4294967296", "42,4294967397", "1,9223372032559808514",
4948                 "0,84467440737095515690237952"))
4949         {
4950             String pat = ".{" + rep + "}";
4951             try {
4952                 Pattern.compile(pat);
4953                 failCount++;
4954                 System.out.println("Expected to fail. Pattern: " + pat);
4955             } catch (PatternSyntaxException e) {
4956                 if (!e.getMessage().startsWith("Illegal repetition")) {
4957                     failCount++;
4958                     System.out.println("Unexpected error message: " + e.getMessage());
4959                 }
4960             } catch (Throwable t) {
4961                 failCount++;
4962                 System.out.println("Unexpected exception: " + t);
4963             }
4964         }
4965         report("illegalRepetitionRange");
4966     }
4967 }