rev 57941 : [mq]: 8235812-Unicode-linebreak-with-quantifier-does-not-match-valid-input

   1 /*
   2  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  37  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
  38  * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
  39  *
  40  * @library /test/lib
  41  * @library /lib/testlibrary/java/lang
  42  * @build jdk.test.lib.RandomFactory
  43  * @run main RegExTest
  44  * @key randomness
  45  */
  46 
  47 import java.io.BufferedReader;
  48 import java.io.ByteArrayInputStream;
  49 import java.io.ByteArrayOutputStream;
  50 import java.io.File;
  51 import java.io.FileInputStream;
  52 import java.io.InputStreamReader;
  53 import java.io.ObjectInputStream;
  54 import java.io.ObjectOutputStream;
  55 import java.math.BigInteger;
  56 import java.nio.CharBuffer;
  57 import java.nio.file.Files;
  58 import java.util.ArrayList;
  59 import java.util.Arrays;
  60 import java.util.HashMap;
  61 import java.util.List;
  62 import java.util.Map;
  63 import java.util.Random;
  64 import java.util.Scanner;
  65 import java.util.function.Function;
  66 import java.util.function.Predicate;
  67 import java.util.regex.Matcher;
  68 import java.util.regex.MatchResult;
  69 import java.util.regex.Pattern;
  70 import java.util.regex.PatternSyntaxException;
  71 import jdk.test.lib.RandomFactory;
  72 
  73 /**
  74  * This is a test class created to check the operation of
  75  * the Pattern and Matcher classes.
  76  */
  77 public class RegExTest {
  78 
  79     private static Random generator = RandomFactory.getRandom();
  80     private static boolean failure = false;
  81     private static int failCount = 0;
  82     private static String firstFailure = null;
  83 
  84     /**
  85      * Main to interpret arguments and run several tests.
  86      *
  87      */
  88     public static void main(String[] args) throws Exception {
  89         // Most of the tests are in a file
  90         processFile("TestCases.txt");
  91         //processFile("PerlCases.txt");
  92         processFile("BMPTestCases.txt");
  93         processFile("SupplementaryTestCases.txt");
  94 
  95         // These test many randomly generated char patterns
  96         bm();
  97         slice();
  98 
  99         // These are hard to put into the file
 100         escapes();
 101         blankInput();
 102 
 103         // Substitition tests on randomly generated sequences
 104         globalSubstitute();
 105         stringbufferSubstitute();
 106         stringbuilderSubstitute();
 107 
 108         substitutionBasher();
 109         substitutionBasher2();
 110 
 111         // Canonical Equivalence
 112         ceTest();
 113 
 114         // Anchors
 115         anchorTest();
 116 
 117         // boolean match calls
 118         matchesTest();
 119         lookingAtTest();
 120 
 121         // Pattern API
 122         patternMatchesTest();
 123 
 124         // Misc
 125         lookbehindTest();
 126         nullArgumentTest();
 127         backRefTest();
 128         groupCaptureTest();
 129         caretTest();
 130         charClassTest();
 131         emptyPatternTest();
 132         findIntTest();
 133         group0Test();
 134         longPatternTest();
 135         octalTest();
 136         ampersandTest();
 137         negationTest();
 138         splitTest();
 139         appendTest();
 140         caseFoldingTest();
 141         commentsTest();
 142         unixLinesTest();
 143         replaceFirstTest();
 144         gTest();
 145         zTest();
 146         serializeTest();
 147         reluctantRepetitionTest();
 148         multilineDollarTest();
 149         dollarAtEndTest();
 150         caretBetweenTerminatorsTest();
 151         // This RFE rejected in Tiger numOccurrencesTest();
 152         javaCharClassTest();
 153         nonCaptureRepetitionTest();
 154         notCapturedGroupCurlyMatchTest();
 155         escapedSegmentTest();
 156         literalPatternTest();
 157         literalReplacementTest();
 158         regionTest();
 159         toStringTest();
 160         negatedCharClassTest();
 161         findFromTest();
 162         boundsTest();
 163         unicodeWordBoundsTest();
 164         caretAtEndTest();
 165         wordSearchTest();
 166         hitEndTest();
 167         toMatchResultTest();
 168         toMatchResultTest2();
 169         surrogatesInClassTest();
 170         removeQEQuotingTest();
 171         namedGroupCaptureTest();
 172         nonBmpClassComplementTest();
 173         unicodePropertiesTest();
 174         unicodeHexNotationTest();
 175         unicodeClassesTest();
 176         unicodeCharacterNameTest();
 177         horizontalAndVerticalWSTest();
 178         linebreakTest();
 179         branchTest();
 180         groupCurlyNotFoundSuppTest();
 181         groupCurlyBackoffTest();
 182         patternAsPredicate();
 183         patternAsMatchPredicate();
 184         invalidFlags();
 185         embeddedFlags();
 186         grapheme();
 187         expoBacktracking();
 188         invalidGroupName();
 189         illegalRepetitionRange();
 190         surrogatePairWithCanonEq();
 191         lineBreakWithQuantifier();
 192 
 193         if (failure) {
 194             throw new
 195                 RuntimeException("RegExTest failed, 1st failure: " +
 196                                  firstFailure);
 197         } else {
 198             System.err.println("OKAY: All tests passed.");
 199         }
 200     }
 201 
 202     // Utility functions
 203 
 204     private static String getRandomAlphaString(int length) {
 205         StringBuffer buf = new StringBuffer(length);
 206         for (int i=0; i<length; i++) {
 207             char randChar = (char)(97 + generator.nextInt(26));
 208             buf.append(randChar);
 209         }
 210         return buf.toString();
 211     }
 212 
 213     private static void check(Matcher m, String expected) {
 214         m.find();
 215         if (!m.group().equals(expected))
 216             failCount++;
 217     }
 218 
 219     private static void check(Matcher m, String result, boolean expected) {
 220         m.find();
 221         if (m.group().equals(result) != expected)
 222             failCount++;
 223     }
 224 
 225     private static void check(Pattern p, String s, boolean expected) {
 226         if (p.matcher(s).find() != expected)
 227             failCount++;
 228     }
 229 
 230     private static void check(String p, String s, boolean expected) {
 231         Matcher matcher = Pattern.compile(p).matcher(s);
 232         if (matcher.find() != expected)
 233             failCount++;
 234     }
 235 
 236     private static void check(String p, char c, boolean expected) {
 237         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 238         Pattern pattern = Pattern.compile(propertyPattern);
 239         char[] ca = new char[1]; ca[0] = c;
 240         Matcher matcher = pattern.matcher(new String(ca));
 241         if (!matcher.find())
 242             failCount++;
 243     }
 244 
 245     private static void check(String p, int codePoint, boolean expected) {
 246         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 247         Pattern pattern = Pattern.compile(propertyPattern);
 248         char[] ca = Character.toChars(codePoint);
 249         Matcher matcher = pattern.matcher(new String(ca));
 250         if (!matcher.find())
 251             failCount++;
 252     }
 253 
 254     private static void check(String p, int flag, String input, String s,
 255                               boolean expected)
 256     {
 257         Pattern pattern = Pattern.compile(p, flag);
 258         Matcher matcher = pattern.matcher(input);
 259         if (expected)
 260             check(matcher, s, expected);
 261         else
 262             check(pattern, input, false);
 263     }
 264 
 265     private static void report(String testName) {
 266         int spacesToAdd = 30 - testName.length();
 267         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 268         for (int i=0; i<spacesToAdd; i++)
 269             paddedNameBuffer.append(" ");
 270         String paddedName = paddedNameBuffer.toString();
 271         System.err.println(paddedName + ": " +
 272                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 273         if (failCount > 0) {
 274             failure = true;
 275 
 276             if (firstFailure == null) {
 277                 firstFailure = testName;
 278             }
 279         }
 280 
 281         failCount = 0;
 282     }
 283 
 284     /**
 285      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 286      * supplementary characters. This method does NOT fully take care
 287      * of the regex syntax.
 288      */
 289     private static String toSupplementaries(String s) {
 290         int length = s.length();
 291         StringBuffer sb = new StringBuffer(length * 2);
 292 
 293         for (int i = 0; i < length; ) {
 294             char c = s.charAt(i++);
 295             if (c == '\\') {
 296                 sb.append(c);
 297                 if (i < length) {
 298                     c = s.charAt(i++);
 299                     sb.append(c);
 300                     if (c == 'u') {
 301                         // assume no syntax error
 302                         sb.append(s.charAt(i++));
 303                         sb.append(s.charAt(i++));
 304                         sb.append(s.charAt(i++));
 305                         sb.append(s.charAt(i++));
 306                     }
 307                 }
 308             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 309                 sb.append('\ud800').append((char)('\udc00'+c));
 310             } else {
 311                 sb.append(c);
 312             }
 313         }
 314         return sb.toString();
 315     }
 316 
 317     // Regular expression tests
 318 
 319     // This is for bug 6178785
 320     // Test if an expected NPE gets thrown when passing in a null argument
 321     private static boolean check(Runnable test) {
 322         try {
 323             test.run();
 324             failCount++;
 325             return false;
 326         } catch (NullPointerException npe) {
 327             return true;
 328         }
 329     }
 330 
 331     private static void nullArgumentTest() {
 332         check(() -> Pattern.compile(null));
 333         check(() -> Pattern.matches(null, null));
 334         check(() -> Pattern.matches("xyz", null));
 335         check(() -> Pattern.quote(null));
 336         check(() -> Pattern.compile("xyz").split(null));
 337         check(() -> Pattern.compile("xyz").matcher(null));
 338 
 339         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 340         m.matches();
 341         check(() -> m.appendTail((StringBuffer) null));
 342         check(() -> m.appendTail((StringBuilder)null));
 343         check(() -> m.replaceAll((String) null));
 344         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 345         check(() -> m.replaceFirst((String)null));
 346         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 347         check(() -> m.appendReplacement((StringBuffer)null, null));
 348         check(() -> m.appendReplacement((StringBuilder)null, null));
 349         check(() -> m.reset(null));
 350         check(() -> Matcher.quoteReplacement(null));
 351         //check(() -> m.usePattern(null));
 352 
 353         report("Null Argument");
 354     }
 355 
 356     // This is for bug6635133
 357     // Test if surrogate pair in Unicode escapes can be handled correctly.
 358     private static void surrogatesInClassTest() throws Exception {
 359         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 360         Matcher matcher = pattern.matcher("\ud834\udd22");
 361         if (!matcher.find())
 362             failCount++;
 363 
 364         report("Surrogate pair in Unicode escape");
 365     }
 366 
 367     // This is for bug6990617
 368     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 369     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 370     // char is an octal digit.
 371     private static void removeQEQuotingTest() throws Exception {
 372         Pattern pattern =
 373             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 374         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 375         if (!matcher.find())
 376             failCount++;
 377 
 378         report("Remove Q/E Quoting");
 379     }
 380 
 381     // This is for bug 4988891
 382     // Test toMatchResult to see that it is a copy of the Matcher
 383     // that is not affected by subsequent operations on the original
 384     private static void toMatchResultTest() throws Exception {
 385         Pattern pattern = Pattern.compile("squid");
 386         Matcher matcher = pattern.matcher(
 387             "agiantsquidofdestinyasmallsquidoffate");
 388         matcher.find();
 389         int matcherStart1 = matcher.start();
 390         MatchResult mr = matcher.toMatchResult();
 391         if (mr == matcher)
 392             failCount++;
 393         int resultStart1 = mr.start();
 394         if (matcherStart1 != resultStart1)
 395             failCount++;
 396         matcher.find();
 397         int matcherStart2 = matcher.start();
 398         int resultStart2 = mr.start();
 399         if (matcherStart2 == resultStart2)
 400             failCount++;
 401         if (resultStart1 != resultStart2)
 402             failCount++;
 403         MatchResult mr2 = matcher.toMatchResult();
 404         if (mr == mr2)
 405             failCount++;
 406         if (mr2.start() != matcherStart2)
 407             failCount++;
 408         report("toMatchResult is a copy");
 409     }
 410 
 411     private static void checkExpectedISE(Runnable test) {
 412         try {
 413             test.run();
 414             failCount++;
 415         } catch (IllegalStateException x) {
 416         } catch (IndexOutOfBoundsException xx) {
 417             failCount++;
 418         }
 419     }
 420 
 421     private static void checkExpectedIOOE(Runnable test) {
 422         try {
 423             test.run();
 424             failCount++;
 425         } catch (IndexOutOfBoundsException x) {}
 426     }
 427 
 428     // This is for bug 8074678
 429     // Test the result of toMatchResult throws ISE if no match is availble
 430     private static void toMatchResultTest2() throws Exception {
 431         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 432         matcher.find();
 433         MatchResult mr = matcher.toMatchResult();
 434 
 435         checkExpectedISE(() -> mr.start());
 436         checkExpectedISE(() -> mr.start(2));
 437         checkExpectedISE(() -> mr.end());
 438         checkExpectedISE(() -> mr.end(2));
 439         checkExpectedISE(() -> mr.group());
 440         checkExpectedISE(() -> mr.group(2));
 441 
 442         matcher = Pattern.compile("(match)").matcher("there is a match");
 443         matcher.find();
 444         MatchResult mr2 = matcher.toMatchResult();
 445         checkExpectedIOOE(() -> mr2.start(2));
 446         checkExpectedIOOE(() -> mr2.end(2));
 447         checkExpectedIOOE(() -> mr2.group(2));
 448 
 449         report("toMatchResult2 appropriate exceptions");
 450     }
 451 
 452     // This is for bug 5013885
 453     // Must test a slice to see if it reports hitEnd correctly
 454     private static void hitEndTest() throws Exception {
 455         // Basic test of Slice node
 456         Pattern p = Pattern.compile("^squidattack");
 457         Matcher m = p.matcher("squack");
 458         m.find();
 459         if (m.hitEnd())
 460             failCount++;
 461         m.reset("squid");
 462         m.find();
 463         if (!m.hitEnd())
 464             failCount++;
 465 
 466         // Test Slice, SliceA and SliceU nodes
 467         for (int i=0; i<3; i++) {
 468             int flags = 0;
 469             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 470             if (i==2) flags = Pattern.UNICODE_CASE;
 471             p = Pattern.compile("^abc", flags);
 472             m = p.matcher("ad");
 473             m.find();
 474             if (m.hitEnd())
 475                 failCount++;
 476             m.reset("ab");
 477             m.find();
 478             if (!m.hitEnd())
 479                 failCount++;
 480         }
 481 
 482         // Test Boyer-Moore node
 483         p = Pattern.compile("catattack");
 484         m = p.matcher("attack");
 485         m.find();
 486         if (!m.hitEnd())
 487             failCount++;
 488 
 489         p = Pattern.compile("catattack");
 490         m = p.matcher("attackattackattackcatatta");
 491         m.find();
 492         if (!m.hitEnd())
 493             failCount++;
 494 
 495         // 8184706: Matching u+0d at EOL against \R should hit-end
 496         p = Pattern.compile("...\\R");
 497         m = p.matcher("cat" + (char)0x0a);
 498         m.find();
 499         if (m.hitEnd())
 500             failCount++;
 501 
 502         m = p.matcher("cat" + (char)0x0d);
 503         m.find();
 504         if (!m.hitEnd())
 505             failCount++;
 506 
 507         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
 508         m.find();
 509         if (m.hitEnd())
 510             failCount++;
 511 
 512         report("hitEnd");
 513     }
 514 
 515     // This is for bug 4997476
 516     // It is weird code submitted by customer demonstrating a regression
 517     private static void wordSearchTest() throws Exception {
 518         String testString = new String("word1 word2 word3");
 519         Pattern p = Pattern.compile("\\b");
 520         Matcher m = p.matcher(testString);
 521         int position = 0;
 522         int start = 0;
 523         while (m.find(position)) {
 524             start = m.start();
 525             if (start == testString.length())
 526                 break;
 527             if (m.find(start+1)) {
 528                 position = m.start();
 529             } else {
 530                 position = testString.length();
 531             }
 532             if (testString.substring(start, position).equals(" "))
 533                 continue;
 534             if (!testString.substring(start, position-1).startsWith("word"))
 535                 failCount++;
 536         }
 537         report("Customer word search");
 538     }
 539 
 540     // This is for bug 4994840
 541     private static void caretAtEndTest() throws Exception {
 542         // Problem only occurs with multiline patterns
 543         // containing a beginning-of-line caret "^" followed
 544         // by an expression that also matches the empty string.
 545         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 546         Matcher matcher = pattern.matcher("\r");
 547         matcher.find();
 548         matcher.find();
 549         report("Caret at end");
 550     }
 551 
 552     // This test is for 4979006
 553     // Check to see if word boundary construct properly handles unicode
 554     // non spacing marks
 555     private static void unicodeWordBoundsTest() throws Exception {
 556         String spaces = "  ";
 557         String wordChar = "a";
 558         String nsm = "\u030a";
 559 
 560         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 561 
 562         Pattern pattern = Pattern.compile("\\b");
 563         Matcher matcher = pattern.matcher("");
 564         // S=other B=word character N=non spacing mark .=word boundary
 565         // SS.BB.SS
 566         String input = spaces + wordChar + wordChar + spaces;
 567         twoFindIndexes(input, matcher, 2, 4);
 568         // SS.BBN.SS
 569         input = spaces + wordChar +wordChar + nsm + spaces;
 570         twoFindIndexes(input, matcher, 2, 5);
 571         // SS.BN.SS
 572         input = spaces + wordChar + nsm + spaces;
 573         twoFindIndexes(input, matcher, 2, 4);
 574         // SS.BNN.SS
 575         input = spaces + wordChar + nsm + nsm + spaces;
 576         twoFindIndexes(input, matcher, 2, 5);
 577         // SSN.BB.SS
 578         input = spaces + nsm + wordChar + wordChar + spaces;
 579         twoFindIndexes(input, matcher, 3, 5);
 580         // SS.BNB.SS
 581         input = spaces + wordChar + nsm + wordChar + spaces;
 582         twoFindIndexes(input, matcher, 2, 5);
 583         // SSNNSS
 584         input = spaces + nsm + nsm + spaces;
 585         matcher.reset(input);
 586         if (matcher.find())
 587             failCount++;
 588         // SSN.BBN.SS
 589         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 590         twoFindIndexes(input, matcher, 3, 6);
 591 
 592         report("Unicode word boundary");
 593     }
 594 
 595     private static void twoFindIndexes(String input, Matcher matcher, int a,
 596                                        int b) throws Exception
 597     {
 598         matcher.reset(input);
 599         matcher.find();
 600         if (matcher.start() != a)
 601             failCount++;
 602         matcher.find();
 603         if (matcher.start() != b)
 604             failCount++;
 605     }
 606 
 607     // This test is for 6284152
 608     static void check(String regex, String input, String[] expected) {
 609         List<String> result = new ArrayList<String>();
 610         Pattern p = Pattern.compile(regex);
 611         Matcher m = p.matcher(input);
 612         while (m.find()) {
 613             result.add(m.group());
 614         }
 615         if (!Arrays.asList(expected).equals(result))
 616             failCount++;
 617     }
 618 
 619     private static void lookbehindTest() throws Exception {
 620         //Positive
 621         check("(?<=%.{0,5})foo\\d",
 622               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 623               new String[]{"foo1", "foo2", "foo3"});
 624 
 625         //boundary at end of the lookbehind sub-regex should work consistently
 626         //with the boundary just after the lookbehind sub-regex
 627         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 628         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 629         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 630         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 631 
 632         //Negative
 633         check("(?<!%.{0,5})foo\\d",
 634               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 635               new String[] {"foo4", "foo5"});
 636 
 637         //Positive greedy
 638         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 639 
 640         //Positive reluctant
 641         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 642 
 643         //supplementary
 644         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 645               new String[] {"fo\ud800\udc00o"});
 646         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 647               new String[] {"fo\ud800\udc00o"});
 648         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 649               new String[] {"fo\ud800\udc00o"});
 650         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 651               new String[] {"fo\ud800\udc00o"});
 652         report("Lookbehind");
 653     }
 654 
 655     // This test is for 4938995
 656     // Check to see if weak region boundaries are transparent to
 657     // lookahead and lookbehind constructs
 658     private static void boundsTest() throws Exception {
 659         String fullMessage = "catdogcat";
 660         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 661         Matcher matcher = pattern.matcher("catdogca");
 662         matcher.useTransparentBounds(true);
 663         if (matcher.find())
 664             failCount++;
 665         matcher.reset("atdogcat");
 666         if (matcher.find())
 667             failCount++;
 668         matcher.reset(fullMessage);
 669         if (!matcher.find())
 670             failCount++;
 671         matcher.reset(fullMessage);
 672         matcher.region(0,9);
 673         if (!matcher.find())
 674             failCount++;
 675         matcher.reset(fullMessage);
 676         matcher.region(0,6);
 677         if (!matcher.find())
 678             failCount++;
 679         matcher.reset(fullMessage);
 680         matcher.region(3,6);
 681         if (!matcher.find())
 682             failCount++;
 683         matcher.useTransparentBounds(false);
 684         if (matcher.find())
 685             failCount++;
 686 
 687         // Negative lookahead/lookbehind
 688         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 689         matcher = pattern.matcher("dogcat");
 690         matcher.useTransparentBounds(true);
 691         matcher.region(0,3);
 692         if (matcher.find())
 693             failCount++;
 694         matcher.reset("catdog");
 695         matcher.region(3,6);
 696         if (matcher.find())
 697             failCount++;
 698         matcher.useTransparentBounds(false);
 699         matcher.reset("dogcat");
 700         matcher.region(0,3);
 701         if (!matcher.find())
 702             failCount++;
 703         matcher.reset("catdog");
 704         matcher.region(3,6);
 705         if (!matcher.find())
 706             failCount++;
 707 
 708         report("Region bounds transparency");
 709     }
 710 
 711     // This test is for 4945394
 712     private static void findFromTest() throws Exception {
 713         String message = "This is 40 $0 message.";
 714         Pattern pat = Pattern.compile("\\$0");
 715         Matcher match = pat.matcher(message);
 716         if (!match.find())
 717             failCount++;
 718         if (match.find())
 719             failCount++;
 720         if (match.find())
 721             failCount++;
 722         report("Check for alternating find");
 723     }
 724 
 725     // This test is for 4872664 and 4892980
 726     private static void negatedCharClassTest() throws Exception {
 727         Pattern pattern = Pattern.compile("[^>]");
 728         Matcher matcher = pattern.matcher("\u203A");
 729         if (!matcher.matches())
 730             failCount++;
 731         pattern = Pattern.compile("[^fr]");
 732         matcher = pattern.matcher("a");
 733         if (!matcher.find())
 734             failCount++;
 735         matcher.reset("\u203A");
 736         if (!matcher.find())
 737             failCount++;
 738         String s = "for";
 739         String result[] = s.split("[^fr]");
 740         if (!result[0].equals("f"))
 741             failCount++;
 742         if (!result[1].equals("r"))
 743             failCount++;
 744         s = "f\u203Ar";
 745         result = s.split("[^fr]");
 746         if (!result[0].equals("f"))
 747             failCount++;
 748         if (!result[1].equals("r"))
 749             failCount++;
 750 
 751         // Test adding to bits, subtracting a node, then adding to bits again
 752         pattern = Pattern.compile("[^f\u203Ar]");
 753         matcher = pattern.matcher("a");
 754         if (!matcher.find())
 755             failCount++;
 756         matcher.reset("f");
 757         if (matcher.find())
 758             failCount++;
 759         matcher.reset("\u203A");
 760         if (matcher.find())
 761             failCount++;
 762         matcher.reset("r");
 763         if (matcher.find())
 764             failCount++;
 765         matcher.reset("\u203B");
 766         if (!matcher.find())
 767             failCount++;
 768 
 769         // Test subtracting a node, adding to bits, subtracting again
 770         pattern = Pattern.compile("[^\u203Ar\u203B]");
 771         matcher = pattern.matcher("a");
 772         if (!matcher.find())
 773             failCount++;
 774         matcher.reset("\u203A");
 775         if (matcher.find())
 776             failCount++;
 777         matcher.reset("r");
 778         if (matcher.find())
 779             failCount++;
 780         matcher.reset("\u203B");
 781         if (matcher.find())
 782             failCount++;
 783         matcher.reset("\u203C");
 784         if (!matcher.find())
 785             failCount++;
 786 
 787         report("Negated Character Class");
 788     }
 789 
 790     // This test is for 4628291
 791     private static void toStringTest() throws Exception {
 792         Pattern pattern = Pattern.compile("b+");
 793         if (pattern.toString() != "b+")
 794             failCount++;
 795         Matcher matcher = pattern.matcher("aaabbbccc");
 796         String matcherString = matcher.toString(); // unspecified
 797         matcher.find();
 798         matcherString = matcher.toString(); // unspecified
 799         matcher.region(0,3);
 800         matcherString = matcher.toString(); // unspecified
 801         matcher.reset();
 802         matcherString = matcher.toString(); // unspecified
 803         report("toString");
 804     }
 805 
 806     // This test is for 4808962
 807     private static void literalPatternTest() throws Exception {
 808         int flags = Pattern.LITERAL;
 809 
 810         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 811         check(pattern, "abc\\t$^", true);
 812 
 813         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 814         check(pattern, "abc\\t$^", true);
 815 
 816         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 817         check(pattern, "\\Qa^$bcabc\\E", true);
 818         check(pattern, "a^$bcabc", false);
 819 
 820         pattern = Pattern.compile("\\\\Q\\\\E");
 821         check(pattern, "\\Q\\E", true);
 822 
 823         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 824         check(pattern, "abcefg\\Q\\Ehij", true);
 825 
 826         pattern = Pattern.compile("\\\\\\Q\\\\E");
 827         check(pattern, "\\\\\\\\", true);
 828 
 829         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 830         check(pattern, "\\Qa^$bcabc\\E", true);
 831         check(pattern, "a^$bcabc", false);
 832 
 833         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 834         check(pattern, "\\Qabc\\Edef", true);
 835         check(pattern, "abcdef", false);
 836 
 837         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 838         check(pattern, "abc\\Edef", true);
 839         check(pattern, "abcdef", false);
 840 
 841         pattern = Pattern.compile(Pattern.quote("\\E"));
 842         check(pattern, "\\E", true);
 843 
 844         pattern = Pattern.compile("((((abc.+?:)", flags);
 845         check(pattern, "((((abc.+?:)", true);
 846 
 847         flags |= Pattern.MULTILINE;
 848 
 849         pattern = Pattern.compile("^cat$", flags);
 850         check(pattern, "abc^cat$def", true);
 851         check(pattern, "cat", false);
 852 
 853         flags |= Pattern.CASE_INSENSITIVE;
 854 
 855         pattern = Pattern.compile("abcdef", flags);
 856         check(pattern, "ABCDEF", true);
 857         check(pattern, "AbCdEf", true);
 858 
 859         flags |= Pattern.DOTALL;
 860 
 861         pattern = Pattern.compile("a...b", flags);
 862         check(pattern, "A...b", true);
 863         check(pattern, "Axxxb", false);
 864 
 865         flags |= Pattern.CANON_EQ;
 866 
 867         Pattern p = Pattern.compile("testa\u030a", flags);
 868         check(pattern, "testa\u030a", false);
 869         check(pattern, "test\u00e5", false);
 870 
 871         // Supplementary character test
 872         flags = Pattern.LITERAL;
 873 
 874         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 875         check(pattern, toSupplementaries("abc\\t$^"), true);
 876 
 877         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 878         check(pattern, toSupplementaries("abc\\t$^"), true);
 879 
 880         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 881         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 882         check(pattern, toSupplementaries("a^$bcabc"), false);
 883 
 884         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 885         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 886         check(pattern, toSupplementaries("a^$bcabc"), false);
 887 
 888         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 889         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 890         check(pattern, toSupplementaries("abcdef"), false);
 891 
 892         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 893         check(pattern, toSupplementaries("abc\\Edef"), true);
 894         check(pattern, toSupplementaries("abcdef"), false);
 895 
 896         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 897         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 898 
 899         flags |= Pattern.MULTILINE;
 900 
 901         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 902         check(pattern, toSupplementaries("abc^cat$def"), true);
 903         check(pattern, toSupplementaries("cat"), false);
 904 
 905         flags |= Pattern.DOTALL;
 906 
 907         // note: this is case-sensitive.
 908         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 909         check(pattern, toSupplementaries("a...b"), true);
 910         check(pattern, toSupplementaries("axxxb"), false);
 911 
 912         flags |= Pattern.CANON_EQ;
 913 
 914         String t = toSupplementaries("test");
 915         p = Pattern.compile(t + "a\u030a", flags);
 916         check(pattern, t + "a\u030a", false);
 917         check(pattern, t + "\u00e5", false);
 918 
 919         report("Literal pattern");
 920     }
 921 
 922     // This test is for 4803179
 923     // This test is also for 4808962, replacement parts
 924     private static void literalReplacementTest() throws Exception {
 925         int flags = Pattern.LITERAL;
 926 
 927         Pattern pattern = Pattern.compile("abc", flags);
 928         Matcher matcher = pattern.matcher("zzzabczzz");
 929         String replaceTest = "$0";
 930         String result = matcher.replaceAll(replaceTest);
 931         if (!result.equals("zzzabczzz"))
 932             failCount++;
 933 
 934         matcher.reset();
 935         String literalReplacement = matcher.quoteReplacement(replaceTest);
 936         result = matcher.replaceAll(literalReplacement);
 937         if (!result.equals("zzz$0zzz"))
 938             failCount++;
 939 
 940         matcher.reset();
 941         replaceTest = "\\t$\\$";
 942         literalReplacement = matcher.quoteReplacement(replaceTest);
 943         result = matcher.replaceAll(literalReplacement);
 944         if (!result.equals("zzz\\t$\\$zzz"))
 945             failCount++;
 946 
 947         // Supplementary character test
 948         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 949         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 950         replaceTest = "$0";
 951         result = matcher.replaceAll(replaceTest);
 952         if (!result.equals(toSupplementaries("zzzabczzz")))
 953             failCount++;
 954 
 955         matcher.reset();
 956         literalReplacement = matcher.quoteReplacement(replaceTest);
 957         result = matcher.replaceAll(literalReplacement);
 958         if (!result.equals(toSupplementaries("zzz$0zzz")))
 959             failCount++;
 960 
 961         matcher.reset();
 962         replaceTest = "\\t$\\$";
 963         literalReplacement = matcher.quoteReplacement(replaceTest);
 964         result = matcher.replaceAll(literalReplacement);
 965         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 966             failCount++;
 967 
 968         // IAE should be thrown if backslash or '$' is the last character
 969         // in replacement string
 970         try {
 971             "\uac00".replaceAll("\uac00", "$");
 972             failCount++;
 973         } catch (IllegalArgumentException iie) {
 974         } catch (Exception e) {
 975             failCount++;
 976         }
 977         try {
 978             "\uac00".replaceAll("\uac00", "\\");
 979             failCount++;
 980         } catch (IllegalArgumentException iie) {
 981         } catch (Exception e) {
 982             failCount++;
 983         }
 984         report("Literal replacement");
 985     }
 986 
 987     // This test is for 4757029
 988     private static void regionTest() throws Exception {
 989         Pattern pattern = Pattern.compile("abc");
 990         Matcher matcher = pattern.matcher("abcdefabc");
 991 
 992         matcher.region(0,9);
 993         if (!matcher.find())
 994             failCount++;
 995         if (!matcher.find())
 996             failCount++;
 997         matcher.region(0,3);
 998         if (!matcher.find())
 999            failCount++;
1000         matcher.region(3,6);
1001         if (matcher.find())
1002            failCount++;
1003         matcher.region(0,2);
1004         if (matcher.find())
1005            failCount++;
1006 
1007         expectRegionFail(matcher, 1, -1);
1008         expectRegionFail(matcher, -1, -1);
1009         expectRegionFail(matcher, -1, 1);
1010         expectRegionFail(matcher, 5, 3);
1011         expectRegionFail(matcher, 5, 12);
1012         expectRegionFail(matcher, 12, 12);
1013 
1014         pattern = Pattern.compile("^abc$");
1015         matcher = pattern.matcher("zzzabczzz");
1016         matcher.region(0,9);
1017         if (matcher.find())
1018             failCount++;
1019         matcher.region(3,6);
1020         if (!matcher.find())
1021            failCount++;
1022         matcher.region(3,6);
1023         matcher.useAnchoringBounds(false);
1024         if (matcher.find())
1025            failCount++;
1026 
1027         // Supplementary character test
1028         pattern = Pattern.compile(toSupplementaries("abc"));
1029         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1030         matcher.region(0,9*2);
1031         if (!matcher.find())
1032             failCount++;
1033         if (!matcher.find())
1034             failCount++;
1035         matcher.region(0,3*2);
1036         if (!matcher.find())
1037            failCount++;
1038         matcher.region(1,3*2);
1039         if (matcher.find())
1040            failCount++;
1041         matcher.region(3*2,6*2);
1042         if (matcher.find())
1043            failCount++;
1044         matcher.region(0,2*2);
1045         if (matcher.find())
1046            failCount++;
1047         matcher.region(0,2*2+1);
1048         if (matcher.find())
1049            failCount++;
1050 
1051         expectRegionFail(matcher, 1*2, -1);
1052         expectRegionFail(matcher, -1, -1);
1053         expectRegionFail(matcher, -1, 1*2);
1054         expectRegionFail(matcher, 5*2, 3*2);
1055         expectRegionFail(matcher, 5*2, 12*2);
1056         expectRegionFail(matcher, 12*2, 12*2);
1057 
1058         pattern = Pattern.compile(toSupplementaries("^abc$"));
1059         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1060         matcher.region(0,9*2);
1061         if (matcher.find())
1062             failCount++;
1063         matcher.region(3*2,6*2);
1064         if (!matcher.find())
1065            failCount++;
1066         matcher.region(3*2+1,6*2);
1067         if (matcher.find())
1068            failCount++;
1069         matcher.region(3*2,6*2-1);
1070         if (matcher.find())
1071            failCount++;
1072         matcher.region(3*2,6*2);
1073         matcher.useAnchoringBounds(false);
1074         if (matcher.find())
1075            failCount++;
1076 
1077         // JDK-8230829
1078         pattern = Pattern.compile("\\ud800\\udc61");
1079         matcher = pattern.matcher("\ud800\udc61");
1080         matcher.region(0, 1);
1081         if (matcher.find()) {
1082             failCount++;
1083             System.out.println("Matched a surrogate pair" +
1084                     " that crosses border of region");
1085         }
1086         if (!matcher.hitEnd()) {
1087             failCount++;
1088             System.out.println("Expected to hit the end when" +
1089                     " matching a surrogate pair crossing region");
1090         }
1091 
1092         report("Regions");
1093     }
1094 
1095     private static void expectRegionFail(Matcher matcher, int index1,
1096                                          int index2)
1097     {
1098         try {
1099             matcher.region(index1, index2);
1100             failCount++;
1101         } catch (IndexOutOfBoundsException ioobe) {
1102             // Correct result
1103         } catch (IllegalStateException ise) {
1104             // Correct result
1105         }
1106     }
1107 
1108     // This test is for 4803197
1109     private static void escapedSegmentTest() throws Exception {
1110 
1111         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1112         check(pattern, "dir1\\dir2", true);
1113 
1114         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1115         check(pattern, "dir1\\dir2\\", true);
1116 
1117         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1118         check(pattern, "dir1\\dir2\\", true);
1119 
1120         // Supplementary character test
1121         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1122         check(pattern, toSupplementaries("dir1\\dir2"), true);
1123 
1124         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1125         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1126 
1127         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1128         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1129 
1130         report("Escaped segment");
1131     }
1132 
1133     // This test is for 4792284
1134     private static void nonCaptureRepetitionTest() throws Exception {
1135         String input = "abcdefgh;";
1136 
1137         String[] patterns = new String[] {
1138             "(?:\\w{4})+;",
1139             "(?:\\w{8})*;",
1140             "(?:\\w{2}){2,4};",
1141             "(?:\\w{4}){2,};",   // only matches the
1142             ".*?(?:\\w{5})+;",   //     specified minimum
1143             ".*?(?:\\w{9})*;",   //     number of reps - OK
1144             "(?:\\w{4})+?;",     // lazy repetition - OK
1145             "(?:\\w{4})++;",     // possessive repetition - OK
1146             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1147             "(\\w{4})+;",        // capturing group - OK
1148         };
1149 
1150         for (int i = 0; i < patterns.length; i++) {
1151             // Check find()
1152             check(patterns[i], 0, input, input, true);
1153             // Check matches()
1154             Pattern p = Pattern.compile(patterns[i]);
1155             Matcher m = p.matcher(input);
1156 
1157             if (m.matches()) {
1158                 if (!m.group(0).equals(input))
1159                     failCount++;
1160             } else {
1161                 failCount++;
1162             }
1163         }
1164 
1165         report("Non capturing repetition");
1166     }
1167 
1168     // This test is for 6358731
1169     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1170         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1171         Matcher matcher = pattern.matcher("abcd");
1172         if (!matcher.matches() ||
1173              matcher.group(1) != null ||
1174              !matcher.group(2).equals("abcd")) {
1175             failCount++;
1176         }
1177         report("Not captured GroupCurly");
1178     }
1179 
1180     // This test is for 4706545
1181     private static void javaCharClassTest() throws Exception {
1182         for (int i=0; i<1000; i++) {
1183             char c = (char)generator.nextInt();
1184             check("{javaLowerCase}", c, Character.isLowerCase(c));
1185             check("{javaUpperCase}", c, Character.isUpperCase(c));
1186             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1187             check("{javaTitleCase}", c, Character.isTitleCase(c));
1188             check("{javaDigit}", c, Character.isDigit(c));
1189             check("{javaDefined}", c, Character.isDefined(c));
1190             check("{javaLetter}", c, Character.isLetter(c));
1191             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1192             check("{javaJavaIdentifierStart}", c,
1193                   Character.isJavaIdentifierStart(c));
1194             check("{javaJavaIdentifierPart}", c,
1195                   Character.isJavaIdentifierPart(c));
1196             check("{javaUnicodeIdentifierStart}", c,
1197                   Character.isUnicodeIdentifierStart(c));
1198             check("{javaUnicodeIdentifierPart}", c,
1199                   Character.isUnicodeIdentifierPart(c));
1200             check("{javaIdentifierIgnorable}", c,
1201                   Character.isIdentifierIgnorable(c));
1202             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1203             check("{javaWhitespace}", c, Character.isWhitespace(c));
1204             check("{javaISOControl}", c, Character.isISOControl(c));
1205             check("{javaMirrored}", c, Character.isMirrored(c));
1206 
1207         }
1208 
1209         // Supplementary character test
1210         for (int i=0; i<1000; i++) {
1211             int c = generator.nextInt(Character.MAX_CODE_POINT
1212                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1213                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1214             check("{javaLowerCase}", c, Character.isLowerCase(c));
1215             check("{javaUpperCase}", c, Character.isUpperCase(c));
1216             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1217             check("{javaTitleCase}", c, Character.isTitleCase(c));
1218             check("{javaDigit}", c, Character.isDigit(c));
1219             check("{javaDefined}", c, Character.isDefined(c));
1220             check("{javaLetter}", c, Character.isLetter(c));
1221             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1222             check("{javaJavaIdentifierStart}", c,
1223                   Character.isJavaIdentifierStart(c));
1224             check("{javaJavaIdentifierPart}", c,
1225                   Character.isJavaIdentifierPart(c));
1226             check("{javaUnicodeIdentifierStart}", c,
1227                   Character.isUnicodeIdentifierStart(c));
1228             check("{javaUnicodeIdentifierPart}", c,
1229                   Character.isUnicodeIdentifierPart(c));
1230             check("{javaIdentifierIgnorable}", c,
1231                   Character.isIdentifierIgnorable(c));
1232             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1233             check("{javaWhitespace}", c, Character.isWhitespace(c));
1234             check("{javaISOControl}", c, Character.isISOControl(c));
1235             check("{javaMirrored}", c, Character.isMirrored(c));
1236         }
1237 
1238         report("Java character classes");
1239     }
1240 
1241     // This test is for 4523620
1242     /*
1243     private static void numOccurrencesTest() throws Exception {
1244         Pattern pattern = Pattern.compile("aaa");
1245 
1246         if (pattern.numOccurrences("aaaaaa", false) != 2)
1247             failCount++;
1248         if (pattern.numOccurrences("aaaaaa", true) != 4)
1249             failCount++;
1250 
1251         pattern = Pattern.compile("^");
1252         if (pattern.numOccurrences("aaaaaa", false) != 1)
1253             failCount++;
1254         if (pattern.numOccurrences("aaaaaa", true) != 1)
1255             failCount++;
1256 
1257         report("Number of Occurrences");
1258     }
1259     */
1260 
1261     // This test is for 4776374
1262     private static void caretBetweenTerminatorsTest() throws Exception {
1263         int flags1 = Pattern.DOTALL;
1264         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1265         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1266         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1267 
1268         check("^....", flags1, "test\ntest", "test", true);
1269         check(".....^", flags1, "test\ntest", "test", false);
1270         check(".....^", flags1, "test\n", "test", false);
1271         check("....^", flags1, "test\r\n", "test", false);
1272 
1273         check("^....", flags2, "test\ntest", "test", true);
1274         check("....^", flags2, "test\ntest", "test", false);
1275         check(".....^", flags2, "test\n", "test", false);
1276         check("....^", flags2, "test\r\n", "test", false);
1277 
1278         check("^....", flags3, "test\ntest", "test", true);
1279         check(".....^", flags3, "test\ntest", "test\n", true);
1280         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1281         check(".....^", flags3, "test\n", "test", false);
1282         check(".....^", flags3, "test\r\n", "test", false);
1283         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1284 
1285         check("^....", flags4, "test\ntest", "test", true);
1286         check(".....^", flags3, "test\ntest", "test\n", true);
1287         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1288         check(".....^", flags4, "test\n", "test\n", false);
1289         check(".....^", flags4, "test\r\n", "test\r", false);
1290 
1291         // Supplementary character test
1292         String t = toSupplementaries("test");
1293         check("^....", flags1, t+"\n"+t, t, true);
1294         check(".....^", flags1, t+"\n"+t, t, false);
1295         check(".....^", flags1, t+"\n", t, false);
1296         check("....^", flags1, t+"\r\n", t, false);
1297 
1298         check("^....", flags2, t+"\n"+t, t, true);
1299         check("....^", flags2, t+"\n"+t, t, false);
1300         check(".....^", flags2, t+"\n", t, false);
1301         check("....^", flags2, t+"\r\n", t, false);
1302 
1303         check("^....", flags3, t+"\n"+t, t, true);
1304         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1305         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1306         check(".....^", flags3, t+"\n", t, false);
1307         check(".....^", flags3, t+"\r\n", t, false);
1308         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1309 
1310         check("^....", flags4, t+"\n"+t, t, true);
1311         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1312         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1313         check(".....^", flags4, t+"\n", t+"\n", false);
1314         check(".....^", flags4, t+"\r\n", t+"\r", false);
1315 
1316         report("Caret between terminators");
1317     }
1318 
1319     // This test is for 4727935
1320     private static void dollarAtEndTest() throws Exception {
1321         int flags1 = Pattern.DOTALL;
1322         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1323         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1324 
1325         check("....$", flags1, "test\n", "test", true);
1326         check("....$", flags1, "test\r\n", "test", true);
1327         check(".....$", flags1, "test\n", "test\n", true);
1328         check(".....$", flags1, "test\u0085", "test\u0085", true);
1329         check("....$", flags1, "test\u0085", "test", true);
1330 
1331         check("....$", flags2, "test\n", "test", true);
1332         check(".....$", flags2, "test\n", "test\n", true);
1333         check(".....$", flags2, "test\u0085", "test\u0085", true);
1334         check("....$", flags2, "test\u0085", "est\u0085", true);
1335 
1336         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1337         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1338         check("....$blah", flags3, "test\nblah", "!!!!", false);
1339         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1340 
1341         // Supplementary character test
1342         String t = toSupplementaries("test");
1343         String b = toSupplementaries("blah");
1344         check("....$", flags1, t+"\n", t, true);
1345         check("....$", flags1, t+"\r\n", t, true);
1346         check(".....$", flags1, t+"\n", t+"\n", true);
1347         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1348         check("....$", flags1, t+"\u0085", t, true);
1349 
1350         check("....$", flags2, t+"\n", t, true);
1351         check(".....$", flags2, t+"\n", t+"\n", true);
1352         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1353         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1354 
1355         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1356         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1357         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1358         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1359 
1360         report("Dollar at End");
1361     }
1362 
1363     // This test is for 4711773
1364     private static void multilineDollarTest() throws Exception {
1365         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1366         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1367         matcher.find();
1368         if (matcher.start(0) != 9)
1369             failCount++;
1370         matcher.find();
1371         if (matcher.start(0) != 20)
1372             failCount++;
1373 
1374         // Supplementary character test
1375         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1376         matcher.find();
1377         if (matcher.start(0) != 9*2)
1378             failCount++;
1379         matcher.find();
1380         if (matcher.start(0) != 20*2)
1381             failCount++;
1382 
1383         report("Multiline Dollar");
1384     }
1385 
1386     private static void reluctantRepetitionTest() throws Exception {
1387         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1388         check(p, "1 word word word 2", true);
1389         check(p, "1 wor wo w 2", true);
1390         check(p, "1 word word 2", true);
1391         check(p, "1 word 2", true);
1392         check(p, "1 wo w w 2", true);
1393         check(p, "1 wo w 2", true);
1394         check(p, "1 wor w 2", true);
1395 
1396         p = Pattern.compile("([a-z])+?c");
1397         Matcher m = p.matcher("ababcdefdec");
1398         check(m, "ababc");
1399 
1400         // Supplementary character test
1401         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1402         m = p.matcher(toSupplementaries("ababcdefdec"));
1403         check(m, toSupplementaries("ababc"));
1404 
1405         report("Reluctant Repetition");
1406     }
1407 
1408     private static Pattern serializedPattern(Pattern p) throws Exception {
1409         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1410         ObjectOutputStream oos = new ObjectOutputStream(baos);
1411         oos.writeObject(p);
1412         oos.close();
1413         try (ObjectInputStream ois = new ObjectInputStream(
1414                 new ByteArrayInputStream(baos.toByteArray()))) {
1415             return (Pattern)ois.readObject();
1416         }
1417     }
1418 
1419     private static void serializeTest() throws Exception {
1420         String patternStr = "(b)";
1421         String matchStr = "b";
1422         Pattern pattern = Pattern.compile(patternStr);
1423         Pattern serializedPattern = serializedPattern(pattern);
1424         Matcher matcher = serializedPattern.matcher(matchStr);
1425         if (!matcher.matches())
1426             failCount++;
1427         if (matcher.groupCount() != 1)
1428             failCount++;
1429 
1430         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1431         serializedPattern = serializedPattern(pattern);
1432         if (!serializedPattern.matcher("Ab").matches())
1433             failCount++;
1434         if (serializedPattern.matcher("AB").matches())
1435             failCount++;
1436 
1437         report("Serialization");
1438     }
1439 
1440     private static void gTest() {
1441         Pattern pattern = Pattern.compile("\\G\\w");
1442         Matcher matcher = pattern.matcher("abc#x#x");
1443         matcher.find();
1444         matcher.find();
1445         matcher.find();
1446         if (matcher.find())
1447             failCount++;
1448 
1449         pattern = Pattern.compile("\\GA*");
1450         matcher = pattern.matcher("1A2AA3");
1451         matcher.find();
1452         if (matcher.find())
1453             failCount++;
1454 
1455         pattern = Pattern.compile("\\GA*");
1456         matcher = pattern.matcher("1A2AA3");
1457         if (!matcher.find(1))
1458             failCount++;
1459         matcher.find();
1460         if (matcher.find())
1461             failCount++;
1462 
1463         report("\\G");
1464     }
1465 
1466     private static void zTest() {
1467         Pattern pattern = Pattern.compile("foo\\Z");
1468         // Positives
1469         check(pattern, "foo\u0085", true);
1470         check(pattern, "foo\u2028", true);
1471         check(pattern, "foo\u2029", true);
1472         check(pattern, "foo\n", true);
1473         check(pattern, "foo\r", true);
1474         check(pattern, "foo\r\n", true);
1475         // Negatives
1476         check(pattern, "fooo", false);
1477         check(pattern, "foo\n\r", false);
1478 
1479         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1480         // Positives
1481         check(pattern, "foo", true);
1482         check(pattern, "foo\n", true);
1483         // Negatives
1484         check(pattern, "foo\r", false);
1485         check(pattern, "foo\u0085", false);
1486         check(pattern, "foo\u2028", false);
1487         check(pattern, "foo\u2029", false);
1488 
1489         report("\\Z");
1490     }
1491 
1492     private static void replaceFirstTest() {
1493         Pattern pattern = Pattern.compile("(ab)(c*)");
1494         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1495         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1496             failCount++;
1497 
1498         matcher.reset("zzzabccczzzabcczzzabccczzz");
1499         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1500             failCount++;
1501 
1502         matcher.reset("zzzabccczzzabcczzzabccczzz");
1503         String result = matcher.replaceFirst("$1");
1504         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1505             failCount++;
1506 
1507         matcher.reset("zzzabccczzzabcczzzabccczzz");
1508         result = matcher.replaceFirst("$2");
1509         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1510             failCount++;
1511 
1512         pattern = Pattern.compile("a*");
1513         matcher = pattern.matcher("aaaaaaaaaa");
1514         if (!matcher.replaceFirst("test").equals("test"))
1515             failCount++;
1516 
1517         pattern = Pattern.compile("a+");
1518         matcher = pattern.matcher("zzzaaaaaaaaaa");
1519         if (!matcher.replaceFirst("test").equals("zzztest"))
1520             failCount++;
1521 
1522         // Supplementary character test
1523         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1524         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1525         if (!matcher.replaceFirst(toSupplementaries("test"))
1526                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1527             failCount++;
1528 
1529         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1530         if (!matcher.replaceFirst(toSupplementaries("test")).
1531             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1532             failCount++;
1533 
1534         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1535         result = matcher.replaceFirst("$1");
1536         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1537             failCount++;
1538 
1539         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1540         result = matcher.replaceFirst("$2");
1541         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1542             failCount++;
1543 
1544         pattern = Pattern.compile(toSupplementaries("a*"));
1545         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1546         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1547             failCount++;
1548 
1549         pattern = Pattern.compile(toSupplementaries("a+"));
1550         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1551         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1552             failCount++;
1553 
1554         report("Replace First");
1555     }
1556 
1557     private static void unixLinesTest() {
1558         Pattern pattern = Pattern.compile(".*");
1559         Matcher matcher = pattern.matcher("aa\u2028blah");
1560         matcher.find();
1561         if (!matcher.group(0).equals("aa"))
1562             failCount++;
1563 
1564         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1565         matcher = pattern.matcher("aa\u2028blah");
1566         matcher.find();
1567         if (!matcher.group(0).equals("aa\u2028blah"))
1568             failCount++;
1569 
1570         pattern = Pattern.compile("[az]$",
1571                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1572         matcher = pattern.matcher("aa\u2028zz");
1573         check(matcher, "a\u2028", false);
1574 
1575         // Supplementary character test
1576         pattern = Pattern.compile(".*");
1577         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1578         matcher.find();
1579         if (!matcher.group(0).equals(toSupplementaries("aa")))
1580             failCount++;
1581 
1582         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1583         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1584         matcher.find();
1585         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1586             failCount++;
1587 
1588         pattern = Pattern.compile(toSupplementaries("[az]$"),
1589                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1590         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1591         check(matcher, toSupplementaries("a\u2028"), false);
1592 
1593         report("Unix Lines");
1594     }
1595 
1596     private static void commentsTest() {
1597         int flags = Pattern.COMMENTS;
1598 
1599         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1600         Matcher matcher = pattern.matcher("aa#aa");
1601         if (!matcher.matches())
1602             failCount++;
1603 
1604         pattern = Pattern.compile("aa  # blah", flags);
1605         matcher = pattern.matcher("aa");
1606         if (!matcher.matches())
1607             failCount++;
1608 
1609         pattern = Pattern.compile("aa blah", flags);
1610         matcher = pattern.matcher("aablah");
1611         if (!matcher.matches())
1612              failCount++;
1613 
1614         pattern = Pattern.compile("aa  # blah blech  ", flags);
1615         matcher = pattern.matcher("aa");
1616         if (!matcher.matches())
1617             failCount++;
1618 
1619         pattern = Pattern.compile("aa  # blah\n  ", flags);
1620         matcher = pattern.matcher("aa");
1621         if (!matcher.matches())
1622             failCount++;
1623 
1624         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1625         matcher = pattern.matcher("aabc");
1626         if (!matcher.matches())
1627              failCount++;
1628 
1629         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1630         matcher = pattern.matcher("aabc");
1631         if (!matcher.matches())
1632              failCount++;
1633 
1634         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1635         matcher = pattern.matcher("aabc#blech");
1636         if (!matcher.matches())
1637              failCount++;
1638 
1639         // Supplementary character test
1640         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1641         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1642         if (!matcher.matches())
1643             failCount++;
1644 
1645         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1646         matcher = pattern.matcher(toSupplementaries("aa"));
1647         if (!matcher.matches())
1648             failCount++;
1649 
1650         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1651         matcher = pattern.matcher(toSupplementaries("aablah"));
1652         if (!matcher.matches())
1653              failCount++;
1654 
1655         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1656         matcher = pattern.matcher(toSupplementaries("aa"));
1657         if (!matcher.matches())
1658             failCount++;
1659 
1660         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1661         matcher = pattern.matcher(toSupplementaries("aa"));
1662         if (!matcher.matches())
1663             failCount++;
1664 
1665         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1666         matcher = pattern.matcher(toSupplementaries("aabc"));
1667         if (!matcher.matches())
1668              failCount++;
1669 
1670         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1671         matcher = pattern.matcher(toSupplementaries("aabc"));
1672         if (!matcher.matches())
1673              failCount++;
1674 
1675         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1676         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1677         if (!matcher.matches())
1678              failCount++;
1679 
1680         report("Comments");
1681     }
1682 
1683     private static void caseFoldingTest() { // bug 4504687
1684         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1685         Pattern pattern = Pattern.compile("aa", flags);
1686         Matcher matcher = pattern.matcher("ab");
1687         if (matcher.matches())
1688             failCount++;
1689 
1690         pattern = Pattern.compile("aA", flags);
1691         matcher = pattern.matcher("ab");
1692         if (matcher.matches())
1693             failCount++;
1694 
1695         pattern = Pattern.compile("aa", flags);
1696         matcher = pattern.matcher("aB");
1697         if (matcher.matches())
1698             failCount++;
1699         matcher = pattern.matcher("Ab");
1700         if (matcher.matches())
1701             failCount++;
1702 
1703         // ASCII               "a"
1704         // Latin-1 Supplement  "a" + grave
1705         // Cyrillic            "a"
1706         String[] patterns = new String[] {
1707             //single
1708             "a", "\u00e0", "\u0430",
1709             //slice
1710             "ab", "\u00e0\u00e1", "\u0430\u0431",
1711             //class single
1712             "[a]", "[\u00e0]", "[\u0430]",
1713             //class range
1714             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1715             //back reference
1716             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1717         };
1718 
1719         String[] texts = new String[] {
1720             "A", "\u00c0", "\u0410",
1721             "AB", "\u00c0\u00c1", "\u0410\u0411",
1722             "A", "\u00c0", "\u0410",
1723             "B", "\u00c2", "\u0411",
1724             "aA", "\u00e0\u00c0", "\u0430\u0410"
1725         };
1726 
1727         boolean[] expected = new boolean[] {
1728             true, false, false,
1729             true, false, false,
1730             true, false, false,
1731             true, false, false,
1732             true, false, false
1733         };
1734 
1735         flags = Pattern.CASE_INSENSITIVE;
1736         for (int i = 0; i < patterns.length; i++) {
1737             pattern = Pattern.compile(patterns[i], flags);
1738             matcher = pattern.matcher(texts[i]);
1739             if (matcher.matches() != expected[i]) {
1740                 System.out.println("<1> Failed at " + i);
1741                 failCount++;
1742             }
1743         }
1744 
1745         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1746         for (int i = 0; i < patterns.length; i++) {
1747             pattern = Pattern.compile(patterns[i], flags);
1748             matcher = pattern.matcher(texts[i]);
1749             if (!matcher.matches()) {
1750                 System.out.println("<2> Failed at " + i);
1751                 failCount++;
1752             }
1753         }
1754         // flag unicode_case alone should do nothing
1755         flags = Pattern.UNICODE_CASE;
1756         for (int i = 0; i < patterns.length; i++) {
1757             pattern = Pattern.compile(patterns[i], flags);
1758             matcher = pattern.matcher(texts[i]);
1759             if (matcher.matches()) {
1760                 System.out.println("<3> Failed at " + i);
1761                 failCount++;
1762             }
1763         }
1764 
1765         // Special cases: i, I, u+0131 and u+0130
1766         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1767         pattern = Pattern.compile("[h-j]+", flags);
1768         if (!pattern.matcher("\u0131\u0130").matches())
1769             failCount++;
1770         report("Case Folding");
1771     }
1772 
1773     private static void appendTest() {
1774         Pattern pattern = Pattern.compile("(ab)(cd)");
1775         Matcher matcher = pattern.matcher("abcd");
1776         String result = matcher.replaceAll("$2$1");
1777         if (!result.equals("cdab"))
1778             failCount++;
1779 
1780         String  s1 = "Swap all: first = 123, second = 456";
1781         String  s2 = "Swap one: first = 123, second = 456";
1782         String  r  = "$3$2$1";
1783         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1784         matcher = pattern.matcher(s1);
1785 
1786         result = matcher.replaceAll(r);
1787         if (!result.equals("Swap all: 123 = first, 456 = second"))
1788             failCount++;
1789 
1790         matcher = pattern.matcher(s2);
1791 
1792         if (matcher.find()) {
1793             StringBuffer sb = new StringBuffer();
1794             matcher.appendReplacement(sb, r);
1795             matcher.appendTail(sb);
1796             result = sb.toString();
1797             if (!result.equals("Swap one: 123 = first, second = 456"))
1798                 failCount++;
1799         }
1800 
1801         // Supplementary character test
1802         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1803         matcher = pattern.matcher(toSupplementaries("abcd"));
1804         result = matcher.replaceAll("$2$1");
1805         if (!result.equals(toSupplementaries("cdab")))
1806             failCount++;
1807 
1808         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1809         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1810         r  = toSupplementaries("$3$2$1");
1811         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1812         matcher = pattern.matcher(s1);
1813 
1814         result = matcher.replaceAll(r);
1815         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1816             failCount++;
1817 
1818         matcher = pattern.matcher(s2);
1819 
1820         if (matcher.find()) {
1821             StringBuffer sb = new StringBuffer();
1822             matcher.appendReplacement(sb, r);
1823             matcher.appendTail(sb);
1824             result = sb.toString();
1825             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1826                 failCount++;
1827         }
1828         report("Append");
1829     }
1830 
1831     private static void splitTest() {
1832         Pattern pattern = Pattern.compile(":");
1833         String[] result = pattern.split("foo:and:boo", 2);
1834         if (!result[0].equals("foo"))
1835             failCount++;
1836         if (!result[1].equals("and:boo"))
1837             failCount++;
1838         // Supplementary character test
1839         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1840         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1841         if (!result[0].equals(toSupplementaries("foo")))
1842             failCount++;
1843         if (!result[1].equals(toSupplementaries("andXboo")))
1844             failCount++;
1845 
1846         CharBuffer cb = CharBuffer.allocate(100);
1847         cb.put("foo:and:boo");
1848         cb.flip();
1849         result = pattern.split(cb);
1850         if (!result[0].equals("foo"))
1851             failCount++;
1852         if (!result[1].equals("and"))
1853             failCount++;
1854         if (!result[2].equals("boo"))
1855             failCount++;
1856 
1857         // Supplementary character test
1858         CharBuffer cbs = CharBuffer.allocate(100);
1859         cbs.put(toSupplementaries("fooXandXboo"));
1860         cbs.flip();
1861         result = patternX.split(cbs);
1862         if (!result[0].equals(toSupplementaries("foo")))
1863             failCount++;
1864         if (!result[1].equals(toSupplementaries("and")))
1865             failCount++;
1866         if (!result[2].equals(toSupplementaries("boo")))
1867             failCount++;
1868 
1869         String source = "0123456789";
1870         for (int limit=-2; limit<3; limit++) {
1871             for (int x=0; x<10; x++) {
1872                 result = source.split(Integer.toString(x), limit);
1873                 int expectedLength = limit < 1 ? 2 : limit;
1874 
1875                 if ((limit == 0) && (x == 9)) {
1876                     // expected dropping of ""
1877                     if (result.length != 1)
1878                         failCount++;
1879                     if (!result[0].equals("012345678")) {
1880                         failCount++;
1881                     }
1882                 } else {
1883                     if (result.length != expectedLength) {
1884                         failCount++;
1885                     }
1886                     if (!result[0].equals(source.substring(0,x))) {
1887                         if (limit != 1) {
1888                             failCount++;
1889                         } else {
1890                             if (!result[0].equals(source.substring(0,10))) {
1891                                 failCount++;
1892                             }
1893                         }
1894                     }
1895                     if (expectedLength > 1) { // Check segment 2
1896                         if (!result[1].equals(source.substring(x+1,10)))
1897                             failCount++;
1898                     }
1899                 }
1900             }
1901         }
1902         // Check the case for no match found
1903         for (int limit=-2; limit<3; limit++) {
1904             result = source.split("e", limit);
1905             if (result.length != 1)
1906                 failCount++;
1907             if (!result[0].equals(source))
1908                 failCount++;
1909         }
1910         // Check the case for limit == 0, source = "";
1911         // split() now returns 0-length for empty source "" see #6559590
1912         source = "";
1913         result = source.split("e", 0);
1914         if (result.length != 1)
1915             failCount++;
1916         if (!result[0].equals(source))
1917             failCount++;
1918 
1919         // Check both split() and splitAsStraem(), especially for zero-lenth
1920         // input and zero-lenth match cases
1921         String[][] input = new String[][] {
1922             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1923             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1924             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1925             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1926             { "(?=\\p{Lu})", "AbcEfg" },
1927             { "(?=\\p{Lu})", "Abc" },
1928             { " ",           "" },              // zero-length input
1929             { ".*",          "" },
1930 
1931             // some tests from PatternStreamTest.java
1932             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1933             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1934             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1935             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1936             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1937             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1938             { "\u56da",  "" },
1939             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1940             { "o",       "boo:and:foo" },
1941             { "o",       "booooo:and:fooooo" },
1942             { "o",       "fooooo:" },
1943         };
1944 
1945         String[][] expected = new String[][] {
1946             { "Abc", "Efg", "Hij" },
1947             { "", "Abc", "Efg", "Hij" },
1948             { "Abc", "", "Efg", "Hij" },
1949             { "Abc", "Efg", "Hij" },
1950             { "Abc", "Efg" },
1951             { "Abc" },
1952             { "" },
1953             { "" },
1954 
1955             { "awgqwefg1fefw", "vssv1vvv1" },
1956             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1957             { "awgqwefg", "fefw4vssv", "vvv" },
1958             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1959             { "1", "23", "456", "7890" },
1960             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1961             { "" },
1962             { "This", "is", "testing", "", "with", "different", "separators" },
1963             { "b", "", ":and:f" },
1964             { "b", "", "", "", "", ":and:f" },
1965             { "f", "", "", "", "", ":" },
1966         };
1967         for (int i = 0; i < input.length; i++) {
1968             pattern = Pattern.compile(input[i][0]);
1969             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1970                 failCount++;
1971             }
1972             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1973                                              // array for zero-length input for now
1974                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1975                                expected[i])) {
1976                 failCount++;
1977             }
1978         }
1979         report("Split");
1980     }
1981 
1982     private static void negationTest() {
1983         Pattern pattern = Pattern.compile("[\\[@^]+");
1984         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1985         if (!matcher.find())
1986             failCount++;
1987         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1988             failCount++;
1989         pattern = Pattern.compile("[@\\[^]+");
1990         matcher = pattern.matcher("@@@@[[[[^^^^");
1991         if (!matcher.find())
1992             failCount++;
1993         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1994             failCount++;
1995         pattern = Pattern.compile("[@\\[^@]+");
1996         matcher = pattern.matcher("@@@@[[[[^^^^");
1997         if (!matcher.find())
1998             failCount++;
1999         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
2000             failCount++;
2001 
2002         pattern = Pattern.compile("\\)");
2003         matcher = pattern.matcher("xxx)xxx");
2004         if (!matcher.find())
2005             failCount++;
2006 
2007         report("Negation");
2008     }
2009 
2010     private static void ampersandTest() {
2011         Pattern pattern = Pattern.compile("[&@]+");
2012         check(pattern, "@@@@&&&&", true);
2013 
2014         pattern = Pattern.compile("[@&]+");
2015         check(pattern, "@@@@&&&&", true);
2016 
2017         pattern = Pattern.compile("[@\\&]+");
2018         check(pattern, "@@@@&&&&", true);
2019 
2020         report("Ampersand");
2021     }
2022 
2023     private static void octalTest() throws Exception {
2024         Pattern pattern = Pattern.compile("\\u0007");
2025         Matcher matcher = pattern.matcher("\u0007");
2026         if (!matcher.matches())
2027             failCount++;
2028         pattern = Pattern.compile("\\07");
2029         matcher = pattern.matcher("\u0007");
2030         if (!matcher.matches())
2031             failCount++;
2032         pattern = Pattern.compile("\\007");
2033         matcher = pattern.matcher("\u0007");
2034         if (!matcher.matches())
2035             failCount++;
2036         pattern = Pattern.compile("\\0007");
2037         matcher = pattern.matcher("\u0007");
2038         if (!matcher.matches())
2039             failCount++;
2040         pattern = Pattern.compile("\\040");
2041         matcher = pattern.matcher("\u0020");
2042         if (!matcher.matches())
2043             failCount++;
2044         pattern = Pattern.compile("\\0403");
2045         matcher = pattern.matcher("\u00203");
2046         if (!matcher.matches())
2047             failCount++;
2048         pattern = Pattern.compile("\\0103");
2049         matcher = pattern.matcher("\u0043");
2050         if (!matcher.matches())
2051             failCount++;
2052 
2053         report("Octal");
2054     }
2055 
2056     private static void longPatternTest() throws Exception {
2057         try {
2058             Pattern pattern = Pattern.compile(
2059                 "a 32-character-long pattern xxxx");
2060             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2061             pattern = Pattern.compile("a thirty four character long regex");
2062             StringBuffer patternToBe = new StringBuffer(101);
2063             for (int i=0; i<100; i++)
2064                 patternToBe.append((char)(97 + i%26));
2065             pattern = Pattern.compile(patternToBe.toString());
2066         } catch (PatternSyntaxException e) {
2067             failCount++;
2068         }
2069 
2070         // Supplementary character test
2071         try {
2072             Pattern pattern = Pattern.compile(
2073                 toSupplementaries("a 32-character-long pattern xxxx"));
2074             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2075             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2076             StringBuffer patternToBe = new StringBuffer(101*2);
2077             for (int i=0; i<100; i++)
2078                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2079                                                      + 97 + i%26));
2080             pattern = Pattern.compile(patternToBe.toString());
2081         } catch (PatternSyntaxException e) {
2082             failCount++;
2083         }
2084         report("LongPattern");
2085     }
2086 
2087     private static void group0Test() throws Exception {
2088         Pattern pattern = Pattern.compile("(tes)ting");
2089         Matcher matcher = pattern.matcher("testing");
2090         check(matcher, "testing");
2091 
2092         matcher.reset("testing");
2093         if (matcher.lookingAt()) {
2094             if (!matcher.group(0).equals("testing"))
2095                 failCount++;
2096         } else {
2097             failCount++;
2098         }
2099 
2100         matcher.reset("testing");
2101         if (matcher.matches()) {
2102             if (!matcher.group(0).equals("testing"))
2103                 failCount++;
2104         } else {
2105             failCount++;
2106         }
2107 
2108         pattern = Pattern.compile("(tes)ting");
2109         matcher = pattern.matcher("testing");
2110         if (matcher.lookingAt()) {
2111             if (!matcher.group(0).equals("testing"))
2112                 failCount++;
2113         } else {
2114             failCount++;
2115         }
2116 
2117         pattern = Pattern.compile("^(tes)ting");
2118         matcher = pattern.matcher("testing");
2119         if (matcher.matches()) {
2120             if (!matcher.group(0).equals("testing"))
2121                 failCount++;
2122         } else {
2123             failCount++;
2124         }
2125 
2126         // Supplementary character test
2127         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2128         matcher = pattern.matcher(toSupplementaries("testing"));
2129         check(matcher, toSupplementaries("testing"));
2130 
2131         matcher.reset(toSupplementaries("testing"));
2132         if (matcher.lookingAt()) {
2133             if (!matcher.group(0).equals(toSupplementaries("testing")))
2134                 failCount++;
2135         } else {
2136             failCount++;
2137         }
2138 
2139         matcher.reset(toSupplementaries("testing"));
2140         if (matcher.matches()) {
2141             if (!matcher.group(0).equals(toSupplementaries("testing")))
2142                 failCount++;
2143         } else {
2144             failCount++;
2145         }
2146 
2147         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2148         matcher = pattern.matcher(toSupplementaries("testing"));
2149         if (matcher.lookingAt()) {
2150             if (!matcher.group(0).equals(toSupplementaries("testing")))
2151                 failCount++;
2152         } else {
2153             failCount++;
2154         }
2155 
2156         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2157         matcher = pattern.matcher(toSupplementaries("testing"));
2158         if (matcher.matches()) {
2159             if (!matcher.group(0).equals(toSupplementaries("testing")))
2160                 failCount++;
2161         } else {
2162             failCount++;
2163         }
2164 
2165         report("Group0");
2166     }
2167 
2168     private static void findIntTest() throws Exception {
2169         Pattern p = Pattern.compile("blah");
2170         Matcher m = p.matcher("zzzzblahzzzzzblah");
2171         boolean result = m.find(2);
2172         if (!result)
2173             failCount++;
2174 
2175         p = Pattern.compile("$");
2176         m = p.matcher("1234567890");
2177         result = m.find(10);
2178         if (!result)
2179             failCount++;
2180         try {
2181             result = m.find(11);
2182             failCount++;
2183         } catch (IndexOutOfBoundsException e) {
2184             // correct result
2185         }
2186 
2187         // Supplementary character test
2188         p = Pattern.compile(toSupplementaries("blah"));
2189         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2190         result = m.find(2);
2191         if (!result)
2192             failCount++;
2193 
2194         report("FindInt");
2195     }
2196 
2197     private static void emptyPatternTest() throws Exception {
2198         Pattern p = Pattern.compile("");
2199         Matcher m = p.matcher("foo");
2200 
2201         // Should find empty pattern at beginning of input
2202         boolean result = m.find();
2203         if (result != true)
2204             failCount++;
2205         if (m.start() != 0)
2206             failCount++;
2207 
2208         // Should not match entire input if input is not empty
2209         m.reset();
2210         result = m.matches();
2211         if (result == true)
2212             failCount++;
2213 
2214         try {
2215             m.start(0);
2216             failCount++;
2217         } catch (IllegalStateException e) {
2218             // Correct result
2219         }
2220 
2221         // Should match entire input if input is empty
2222         m.reset("");
2223         result = m.matches();
2224         if (result != true)
2225             failCount++;
2226 
2227         result = Pattern.matches("", "");
2228         if (result != true)
2229             failCount++;
2230 
2231         result = Pattern.matches("", "foo");
2232         if (result == true)
2233             failCount++;
2234         report("EmptyPattern");
2235     }
2236 
2237     private static void charClassTest() throws Exception {
2238         Pattern pattern = Pattern.compile("blah[ab]]blech");
2239         check(pattern, "blahb]blech", true);
2240 
2241         pattern = Pattern.compile("[abc[def]]");
2242         check(pattern, "b", true);
2243 
2244         // Supplementary character tests
2245         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2246         check(pattern, toSupplementaries("blahb]blech"), true);
2247 
2248         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2249         check(pattern, toSupplementaries("b"), true);
2250 
2251         try {
2252             // u00ff when UNICODE_CASE
2253             pattern = Pattern.compile("[ab\u00ffcd]",
2254                                       Pattern.CASE_INSENSITIVE|
2255                                       Pattern.UNICODE_CASE);
2256             check(pattern, "ab\u00ffcd", true);
2257             check(pattern, "Ab\u0178Cd", true);
2258 
2259             // u00b5 when UNICODE_CASE
2260             pattern = Pattern.compile("[ab\u00b5cd]",
2261                                       Pattern.CASE_INSENSITIVE|
2262                                       Pattern.UNICODE_CASE);
2263             check(pattern, "ab\u00b5cd", true);
2264             check(pattern, "Ab\u039cCd", true);
2265         } catch (Exception e) { failCount++; }
2266 
2267         /* Special cases
2268            (1)LatinSmallLetterLongS u+017f
2269            (2)LatinSmallLetterDotlessI u+0131
2270            (3)LatineCapitalLetterIWithDotAbove u+0130
2271            (4)KelvinSign u+212a
2272            (5)AngstromSign u+212b
2273         */
2274         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2275         pattern = Pattern.compile("[sik\u00c5]+", flags);
2276         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2277             failCount++;
2278 
2279         report("CharClass");
2280     }
2281 
2282     private static void caretTest() throws Exception {
2283         Pattern pattern = Pattern.compile("\\w*");
2284         Matcher matcher = pattern.matcher("a#bc#def##g");
2285         check(matcher, "a");
2286         check(matcher, "");
2287         check(matcher, "bc");
2288         check(matcher, "");
2289         check(matcher, "def");
2290         check(matcher, "");
2291         check(matcher, "");
2292         check(matcher, "g");
2293         check(matcher, "");
2294         if (matcher.find())
2295             failCount++;
2296 
2297         pattern = Pattern.compile("^\\w*");
2298         matcher = pattern.matcher("a#bc#def##g");
2299         check(matcher, "a");
2300         if (matcher.find())
2301             failCount++;
2302 
2303         pattern = Pattern.compile("\\w");
2304         matcher = pattern.matcher("abc##x");
2305         check(matcher, "a");
2306         check(matcher, "b");
2307         check(matcher, "c");
2308         check(matcher, "x");
2309         if (matcher.find())
2310             failCount++;
2311 
2312         pattern = Pattern.compile("^\\w");
2313         matcher = pattern.matcher("abc##x");
2314         check(matcher, "a");
2315         if (matcher.find())
2316             failCount++;
2317 
2318         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2319         matcher = pattern.matcher("abcdef-ghi\njklmno");
2320         check(matcher, "abc");
2321         if (matcher.find())
2322             failCount++;
2323 
2324         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2325         matcher = pattern.matcher("abcdef-ghi\njklmno");
2326         check(matcher, "abc");
2327         check(matcher, "jkl");
2328         if (matcher.find())
2329             failCount++;
2330 
2331         pattern = Pattern.compile("^", Pattern.MULTILINE);
2332         matcher = pattern.matcher("this is some text");
2333         String result = matcher.replaceAll("X");
2334         if (!result.equals("Xthis is some text"))
2335             failCount++;
2336 
2337         pattern = Pattern.compile("^");
2338         matcher = pattern.matcher("this is some text");
2339         result = matcher.replaceAll("X");
2340         if (!result.equals("Xthis is some text"))
2341             failCount++;
2342 
2343         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2344         matcher = pattern.matcher("this is some text\n");
2345         result = matcher.replaceAll("X");
2346         if (!result.equals("Xthis is some text\n"))
2347             failCount++;
2348 
2349         report("Caret");
2350     }
2351 
2352     private static void groupCaptureTest() throws Exception {
2353         // Independent group
2354         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2355         Matcher matcher = pattern.matcher("xxxyyyzzz");
2356         matcher.find();
2357         try {
2358             String blah = matcher.group(1);
2359             failCount++;
2360         } catch (IndexOutOfBoundsException ioobe) {
2361             // Good result
2362         }
2363         // Pure group
2364         pattern = Pattern.compile("x+(?:y+)z+");
2365         matcher = pattern.matcher("xxxyyyzzz");
2366         matcher.find();
2367         try {
2368             String blah = matcher.group(1);
2369             failCount++;
2370         } catch (IndexOutOfBoundsException ioobe) {
2371             // Good result
2372         }
2373 
2374         // Supplementary character tests
2375         // Independent group
2376         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2377         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2378         matcher.find();
2379         try {
2380             String blah = matcher.group(1);
2381             failCount++;
2382         } catch (IndexOutOfBoundsException ioobe) {
2383             // Good result
2384         }
2385         // Pure group
2386         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2387         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2388         matcher.find();
2389         try {
2390             String blah = matcher.group(1);
2391             failCount++;
2392         } catch (IndexOutOfBoundsException ioobe) {
2393             // Good result
2394         }
2395 
2396         report("GroupCapture");
2397     }
2398 
2399     private static void backRefTest() throws Exception {
2400         Pattern pattern = Pattern.compile("(a*)bc\\1");
2401         check(pattern, "zzzaabcazzz", true);
2402 
2403         pattern = Pattern.compile("(a*)bc\\1");
2404         check(pattern, "zzzaabcaazzz", true);
2405 
2406         pattern = Pattern.compile("(abc)(def)\\1");
2407         check(pattern, "abcdefabc", true);
2408 
2409         pattern = Pattern.compile("(abc)(def)\\3");
2410         check(pattern, "abcdefabc", false);
2411 
2412         try {
2413             for (int i = 1; i < 10; i++) {
2414                 // Make sure backref 1-9 are always accepted
2415                 pattern = Pattern.compile("abcdef\\" + i);
2416                 // and fail to match if the target group does not exit
2417                 check(pattern, "abcdef", false);
2418             }
2419         } catch(PatternSyntaxException e) {
2420             failCount++;
2421         }
2422 
2423         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2424         check(pattern, "abcdefghija", false);
2425         check(pattern, "abcdefghija1", true);
2426 
2427         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2428         check(pattern, "abcdefghijkk", true);
2429 
2430         pattern = Pattern.compile("(a)bcdefghij\\11");
2431         check(pattern, "abcdefghija1", true);
2432 
2433         // Supplementary character tests
2434         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2435         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2436 
2437         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2438         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2439 
2440         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2441         check(pattern, toSupplementaries("abcdefabc"), true);
2442 
2443         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2444         check(pattern, toSupplementaries("abcdefabc"), false);
2445 
2446         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2447         check(pattern, toSupplementaries("abcdefghija"), false);
2448         check(pattern, toSupplementaries("abcdefghija1"), true);
2449 
2450         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2451         check(pattern, toSupplementaries("abcdefghijkk"), true);
2452 
2453         report("BackRef");
2454     }
2455 
2456     /**
2457      * Unicode Technical Report #18, section 2.6 End of Line
2458      * There is no empty line to be matched in the sequence \u000D\u000A
2459      * but there is an empty line in the sequence \u000A\u000D.
2460      */
2461     private static void anchorTest() throws Exception {
2462         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2463         Matcher m = p.matcher("blah1\r\nblah2");
2464         m.find();
2465         m.find();
2466         if (!m.group().equals("blah2"))
2467             failCount++;
2468 
2469         m.reset("blah1\n\rblah2");
2470         m.find();
2471         m.find();
2472         m.find();
2473         if (!m.group().equals("blah2"))
2474             failCount++;
2475 
2476         // Test behavior of $ with \r\n at end of input
2477         p = Pattern.compile(".+$");
2478         m = p.matcher("blah1\r\n");
2479         if (!m.find())
2480             failCount++;
2481        if (!m.group().equals("blah1"))
2482             failCount++;
2483         if (m.find())
2484             failCount++;
2485 
2486         // Test behavior of $ with \r\n at end of input in multiline
2487         p = Pattern.compile(".+$", Pattern.MULTILINE);
2488         m = p.matcher("blah1\r\n");
2489         if (!m.find())
2490             failCount++;
2491         if (m.find())
2492             failCount++;
2493 
2494         // Test for $ recognition of \u0085 for bug 4527731
2495         p = Pattern.compile(".+$", Pattern.MULTILINE);
2496         m = p.matcher("blah1\u0085");
2497         if (!m.find())
2498             failCount++;
2499 
2500         // Supplementary character test
2501         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2502         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2503         m.find();
2504         m.find();
2505         if (!m.group().equals(toSupplementaries("blah2")))
2506             failCount++;
2507 
2508         m.reset(toSupplementaries("blah1\n\rblah2"));
2509         m.find();
2510         m.find();
2511         m.find();
2512         if (!m.group().equals(toSupplementaries("blah2")))
2513             failCount++;
2514 
2515         // Test behavior of $ with \r\n at end of input
2516         p = Pattern.compile(".+$");
2517         m = p.matcher(toSupplementaries("blah1\r\n"));
2518         if (!m.find())
2519             failCount++;
2520         if (!m.group().equals(toSupplementaries("blah1")))
2521             failCount++;
2522         if (m.find())
2523             failCount++;
2524 
2525         // Test behavior of $ with \r\n at end of input in multiline
2526         p = Pattern.compile(".+$", Pattern.MULTILINE);
2527         m = p.matcher(toSupplementaries("blah1\r\n"));
2528         if (!m.find())
2529             failCount++;
2530         if (m.find())
2531             failCount++;
2532 
2533         // Test for $ recognition of \u0085 for bug 4527731
2534         p = Pattern.compile(".+$", Pattern.MULTILINE);
2535         m = p.matcher(toSupplementaries("blah1\u0085"));
2536         if (!m.find())
2537             failCount++;
2538 
2539         report("Anchors");
2540     }
2541 
2542     /**
2543      * A basic sanity test of Matcher.lookingAt().
2544      */
2545     private static void lookingAtTest() throws Exception {
2546         Pattern p = Pattern.compile("(ab)(c*)");
2547         Matcher m = p.matcher("abccczzzabcczzzabccc");
2548 
2549         if (!m.lookingAt())
2550             failCount++;
2551 
2552         if (!m.group().equals(m.group(0)))
2553             failCount++;
2554 
2555         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2556         if (m.lookingAt())
2557             failCount++;
2558 
2559         // Supplementary character test
2560         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2561         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2562 
2563         if (!m.lookingAt())
2564             failCount++;
2565 
2566         if (!m.group().equals(m.group(0)))
2567             failCount++;
2568 
2569         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2570         if (m.lookingAt())
2571             failCount++;
2572 
2573         report("Looking At");
2574     }
2575 
2576     /**
2577      * A basic sanity test of Matcher.matches().
2578      */
2579     private static void matchesTest() throws Exception {
2580         // matches()
2581         Pattern p = Pattern.compile("ulb(c*)");
2582         Matcher m = p.matcher("ulbcccccc");
2583         if (!m.matches())
2584             failCount++;
2585 
2586         // find() but not matches()
2587         m.reset("zzzulbcccccc");
2588         if (m.matches())
2589             failCount++;
2590 
2591         // lookingAt() but not matches()
2592         m.reset("ulbccccccdef");
2593         if (m.matches())
2594             failCount++;
2595 
2596         // matches()
2597         p = Pattern.compile("a|ad");
2598         m = p.matcher("ad");
2599         if (!m.matches())
2600             failCount++;
2601 
2602         // Supplementary character test
2603         // matches()
2604         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2605         m = p.matcher(toSupplementaries("ulbcccccc"));
2606         if (!m.matches())
2607             failCount++;
2608 
2609         // find() but not matches()
2610         m.reset(toSupplementaries("zzzulbcccccc"));
2611         if (m.matches())
2612             failCount++;
2613 
2614         // lookingAt() but not matches()
2615         m.reset(toSupplementaries("ulbccccccdef"));
2616         if (m.matches())
2617             failCount++;
2618 
2619         // matches()
2620         p = Pattern.compile(toSupplementaries("a|ad"));
2621         m = p.matcher(toSupplementaries("ad"));
2622         if (!m.matches())
2623             failCount++;
2624 
2625         report("Matches");
2626     }
2627 
2628     /**
2629      * A basic sanity test of Pattern.matches().
2630      */
2631     private static void patternMatchesTest() throws Exception {
2632         // matches()
2633         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2634                              toSupplementaries("ulbcccccc")))
2635             failCount++;
2636 
2637         // find() but not matches()
2638         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2639                             toSupplementaries("zzzulbcccccc")))
2640             failCount++;
2641 
2642         // lookingAt() but not matches()
2643         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2644                             toSupplementaries("ulbccccccdef")))
2645             failCount++;
2646 
2647         // Supplementary character test
2648         // matches()
2649         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2650                              toSupplementaries("ulbcccccc")))
2651             failCount++;
2652 
2653         // find() but not matches()
2654         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2655                             toSupplementaries("zzzulbcccccc")))
2656             failCount++;
2657 
2658         // lookingAt() but not matches()
2659         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2660                             toSupplementaries("ulbccccccdef")))
2661             failCount++;
2662 
2663         report("Pattern Matches");
2664     }
2665 
2666     /**
2667      * Canonical equivalence testing. Tests the ability of the engine
2668      * to match sequences that are not explicitly specified in the
2669      * pattern when they are considered equivalent by the Unicode Standard.
2670      */
2671     private static void ceTest() throws Exception {
2672         // Decomposed char outside char classes
2673         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2674         Matcher m = p.matcher("test\u00e5");
2675         if (!m.matches())
2676             failCount++;
2677 
2678         m.reset("testa\u030a");
2679         if (!m.matches())
2680             failCount++;
2681 
2682         // Composed char outside char classes
2683         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2684         m = p.matcher("test\u00e5");
2685         if (!m.matches())
2686             failCount++;
2687 
2688         m.reset("testa\u030a");
2689         if (!m.find())
2690             failCount++;
2691 
2692         // Decomposed char inside a char class
2693         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2694         m = p.matcher("test\u00e5");
2695         if (!m.find())
2696             failCount++;
2697 
2698         m.reset("testa\u030a");
2699         if (!m.find())
2700             failCount++;
2701 
2702         // Composed char inside a char class
2703         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2704         m = p.matcher("test\u00e5");
2705         if (!m.find())
2706             failCount++;
2707 
2708         m.reset("testa\u0300");
2709         if (!m.find())
2710             failCount++;
2711 
2712         m.reset("testa\u030a");
2713         if (!m.find())
2714             failCount++;
2715 
2716         // Marks that cannot legally change order and be equivalent
2717         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2718         check(p, "testa\u0308\u0300", true);
2719         check(p, "testa\u0300\u0308", false);
2720 
2721         // Marks that can legally change order and be equivalent
2722         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2723         check(p, "testa\u0308\u0323", true);
2724         check(p, "testa\u0323\u0308", true);
2725 
2726         // Test all equivalences of the sequence a\u0308\u0323\u0300
2727         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2728         check(p, "testa\u0308\u0323\u0300", true);
2729         check(p, "testa\u0323\u0308\u0300", true);
2730         check(p, "testa\u0308\u0300\u0323", true);
2731         check(p, "test\u00e4\u0323\u0300", true);
2732         check(p, "test\u00e4\u0300\u0323", true);
2733 
2734         Object[][] data = new Object[][] {
2735 
2736         // JDK-4867170
2737         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2738         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2739         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2740         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2741         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2742         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2743         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2744 
2745         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2746         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2747         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2748         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2749         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2750 
2751         // backtracking, force to match "\u1f80", instead of \u1f82"
2752         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2753 
2754         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2755         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2756 
2757         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2758         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2759         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2760 
2761         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2762         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2763         { "\u1f80",          "ab\u1f80cd",             "f", true },
2764         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2765         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2766         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2767         { "\u1f82",          "\u1f80\u0300",           "m", true },
2768 
2769         // JDK-7080302       # compile failed
2770         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2771 
2772         // JDK-6728861, same cause as above one
2773         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2774 
2775         // JDK-6995635
2776         { "(\u00e9)", "e\u0301", "m", true },
2777 
2778         // JDK-6736245
2779         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2780         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2781         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2782 
2783         //  4916384.
2784         // Decomposed hangul (jamos) works inside clazz
2785         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2786         { "[\u1100\u1161]", "\uac00", "m", true},
2787 
2788         { "[\uac00]", "\u1100\u1161", "m", true},
2789         { "[\uac00]", "\uac00", "m", true},
2790 
2791         // Decomposed hangul (jamos)
2792         { "\u1100\u1161", "\u1100\u1161", "m", true},
2793         { "\u1100\u1161", "\uac00", "m", true},
2794 
2795         // Composed hangul
2796         { "\uac00",  "\u1100\u1161", "m", true },
2797         { "\uac00",  "\uac00", "m", true },
2798 
2799         /* Need a NFDSlice to nfd the source to solve this issue
2800            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2801            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2802            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2803 
2804         // Decomposed supplementary outside char classes
2805         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2806         // Composed supplementary outside char classes
2807         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2808         */
2809         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2810         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2811 
2812         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2813         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2814         };
2815 
2816         int failCount = 0;
2817         for (Object[] d : data) {
2818             String pn = (String)d[0];
2819             String tt = (String)d[1];
2820             boolean isFind = "f".equals(((String)d[2]));
2821             boolean expected = (boolean)d[3];
2822             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2823                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2824             if (ret != expected) {
2825                 failCount++;
2826                 continue;
2827             }
2828         }
2829         report("Canonical Equivalence");
2830     }
2831 
2832     /**
2833      * A basic sanity test of Matcher.replaceAll().
2834      */
2835     private static void globalSubstitute() throws Exception {
2836         // Global substitution with a literal
2837         Pattern p = Pattern.compile("(ab)(c*)");
2838         Matcher m = p.matcher("abccczzzabcczzzabccc");
2839         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2840             failCount++;
2841 
2842         m.reset("zzzabccczzzabcczzzabccczzz");
2843         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2844             failCount++;
2845 
2846         // Global substitution with groups
2847         m.reset("zzzabccczzzabcczzzabccczzz");
2848         String result = m.replaceAll("$1");
2849         if (!result.equals("zzzabzzzabzzzabzzz"))
2850             failCount++;
2851 
2852         // Supplementary character test
2853         // Global substitution with a literal
2854         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2855         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2856         if (!m.replaceAll(toSupplementaries("test")).
2857             equals(toSupplementaries("testzzztestzzztest")))
2858             failCount++;
2859 
2860         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2861         if (!m.replaceAll(toSupplementaries("test")).
2862             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2863             failCount++;
2864 
2865         // Global substitution with groups
2866         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2867         result = m.replaceAll("$1");
2868         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2869             failCount++;
2870 
2871         report("Global Substitution");
2872     }
2873 
2874     /**
2875      * Tests the usage of Matcher.appendReplacement() with literal
2876      * and group substitutions.
2877      */
2878     private static void stringbufferSubstitute() throws Exception {
2879         // SB substitution with literal
2880         String blah = "zzzblahzzz";
2881         Pattern p = Pattern.compile("blah");
2882         Matcher m = p.matcher(blah);
2883         StringBuffer result = new StringBuffer();
2884         try {
2885             m.appendReplacement(result, "blech");
2886             failCount++;
2887         } catch (IllegalStateException e) {
2888         }
2889         m.find();
2890         m.appendReplacement(result, "blech");
2891         if (!result.toString().equals("zzzblech"))
2892             failCount++;
2893 
2894         m.appendTail(result);
2895         if (!result.toString().equals("zzzblechzzz"))
2896             failCount++;
2897 
2898         // SB substitution with groups
2899         blah = "zzzabcdzzz";
2900         p = Pattern.compile("(ab)(cd)*");
2901         m = p.matcher(blah);
2902         result = new StringBuffer();
2903         try {
2904             m.appendReplacement(result, "$1");
2905             failCount++;
2906         } catch (IllegalStateException e) {
2907         }
2908         m.find();
2909         m.appendReplacement(result, "$1");
2910         if (!result.toString().equals("zzzab"))
2911             failCount++;
2912 
2913         m.appendTail(result);
2914         if (!result.toString().equals("zzzabzzz"))
2915             failCount++;
2916 
2917         // SB substitution with 3 groups
2918         blah = "zzzabcdcdefzzz";
2919         p = Pattern.compile("(ab)(cd)*(ef)");
2920         m = p.matcher(blah);
2921         result = new StringBuffer();
2922         try {
2923             m.appendReplacement(result, "$1w$2w$3");
2924             failCount++;
2925         } catch (IllegalStateException e) {
2926         }
2927         m.find();
2928         m.appendReplacement(result, "$1w$2w$3");
2929         if (!result.toString().equals("zzzabwcdwef"))
2930             failCount++;
2931 
2932         m.appendTail(result);
2933         if (!result.toString().equals("zzzabwcdwefzzz"))
2934             failCount++;
2935 
2936         // SB substitution with groups and three matches
2937         // skipping middle match
2938         blah = "zzzabcdzzzabcddzzzabcdzzz";
2939         p = Pattern.compile("(ab)(cd*)");
2940         m = p.matcher(blah);
2941         result = new StringBuffer();
2942         try {
2943             m.appendReplacement(result, "$1");
2944             failCount++;
2945         } catch (IllegalStateException e) {
2946         }
2947         m.find();
2948         m.appendReplacement(result, "$1");
2949         if (!result.toString().equals("zzzab"))
2950             failCount++;
2951 
2952         m.find();
2953         m.find();
2954         m.appendReplacement(result, "$2");
2955         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2956             failCount++;
2957 
2958         m.appendTail(result);
2959         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2960             failCount++;
2961 
2962         // Check to make sure escaped $ is ignored
2963         blah = "zzzabcdcdefzzz";
2964         p = Pattern.compile("(ab)(cd)*(ef)");
2965         m = p.matcher(blah);
2966         result = new StringBuffer();
2967         m.find();
2968         m.appendReplacement(result, "$1w\\$2w$3");
2969         if (!result.toString().equals("zzzabw$2wef"))
2970             failCount++;
2971 
2972         m.appendTail(result);
2973         if (!result.toString().equals("zzzabw$2wefzzz"))
2974             failCount++;
2975 
2976         // Check to make sure a reference to nonexistent group causes error
2977         blah = "zzzabcdcdefzzz";
2978         p = Pattern.compile("(ab)(cd)*(ef)");
2979         m = p.matcher(blah);
2980         result = new StringBuffer();
2981         m.find();
2982         try {
2983             m.appendReplacement(result, "$1w$5w$3");
2984             failCount++;
2985         } catch (IndexOutOfBoundsException ioobe) {
2986             // Correct result
2987         }
2988 
2989         // Check double digit group references
2990         blah = "zzz123456789101112zzz";
2991         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2992         m = p.matcher(blah);
2993         result = new StringBuffer();
2994         m.find();
2995         m.appendReplacement(result, "$1w$11w$3");
2996         if (!result.toString().equals("zzz1w11w3"))
2997             failCount++;
2998 
2999         // Check to make sure it backs off $15 to $1 if only three groups
3000         blah = "zzzabcdcdefzzz";
3001         p = Pattern.compile("(ab)(cd)*(ef)");
3002         m = p.matcher(blah);
3003         result = new StringBuffer();
3004         m.find();
3005         m.appendReplacement(result, "$1w$15w$3");
3006         if (!result.toString().equals("zzzabwab5wef"))
3007             failCount++;
3008 
3009 
3010         // Supplementary character test
3011         // SB substitution with literal
3012         blah = toSupplementaries("zzzblahzzz");
3013         p = Pattern.compile(toSupplementaries("blah"));
3014         m = p.matcher(blah);
3015         result = new StringBuffer();
3016         try {
3017             m.appendReplacement(result, toSupplementaries("blech"));
3018             failCount++;
3019         } catch (IllegalStateException e) {
3020         }
3021         m.find();
3022         m.appendReplacement(result, toSupplementaries("blech"));
3023         if (!result.toString().equals(toSupplementaries("zzzblech")))
3024             failCount++;
3025 
3026         m.appendTail(result);
3027         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3028             failCount++;
3029 
3030         // SB substitution with groups
3031         blah = toSupplementaries("zzzabcdzzz");
3032         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3033         m = p.matcher(blah);
3034         result = new StringBuffer();
3035         try {
3036             m.appendReplacement(result, "$1");
3037             failCount++;
3038         } catch (IllegalStateException e) {
3039         }
3040         m.find();
3041         m.appendReplacement(result, "$1");
3042         if (!result.toString().equals(toSupplementaries("zzzab")))
3043             failCount++;
3044 
3045         m.appendTail(result);
3046         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3047             failCount++;
3048 
3049         // SB substitution with 3 groups
3050         blah = toSupplementaries("zzzabcdcdefzzz");
3051         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3052         m = p.matcher(blah);
3053         result = new StringBuffer();
3054         try {
3055             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3056             failCount++;
3057         } catch (IllegalStateException e) {
3058         }
3059         m.find();
3060         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3061         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3062             failCount++;
3063 
3064         m.appendTail(result);
3065         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3066             failCount++;
3067 
3068         // SB substitution with groups and three matches
3069         // skipping middle match
3070         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3071         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3072         m = p.matcher(blah);
3073         result = new StringBuffer();
3074         try {
3075             m.appendReplacement(result, "$1");
3076             failCount++;
3077         } catch (IllegalStateException e) {
3078         }
3079         m.find();
3080         m.appendReplacement(result, "$1");
3081         if (!result.toString().equals(toSupplementaries("zzzab")))
3082             failCount++;
3083 
3084         m.find();
3085         m.find();
3086         m.appendReplacement(result, "$2");
3087         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3088             failCount++;
3089 
3090         m.appendTail(result);
3091         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3092             failCount++;
3093 
3094         // Check to make sure escaped $ is ignored
3095         blah = toSupplementaries("zzzabcdcdefzzz");
3096         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3097         m = p.matcher(blah);
3098         result = new StringBuffer();
3099         m.find();
3100         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3101         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3102             failCount++;
3103 
3104         m.appendTail(result);
3105         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3106             failCount++;
3107 
3108         // Check to make sure a reference to nonexistent group causes error
3109         blah = toSupplementaries("zzzabcdcdefzzz");
3110         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3111         m = p.matcher(blah);
3112         result = new StringBuffer();
3113         m.find();
3114         try {
3115             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3116             failCount++;
3117         } catch (IndexOutOfBoundsException ioobe) {
3118             // Correct result
3119         }
3120 
3121         // Check double digit group references
3122         blah = toSupplementaries("zzz123456789101112zzz");
3123         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3124         m = p.matcher(blah);
3125         result = new StringBuffer();
3126         m.find();
3127         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3128         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3129             failCount++;
3130 
3131         // Check to make sure it backs off $15 to $1 if only three groups
3132         blah = toSupplementaries("zzzabcdcdefzzz");
3133         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3134         m = p.matcher(blah);
3135         result = new StringBuffer();
3136         m.find();
3137         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3138         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3139             failCount++;
3140 
3141         // Check nothing has been appended into the output buffer if
3142         // the replacement string triggers IllegalArgumentException.
3143         p = Pattern.compile("(abc)");
3144         m = p.matcher("abcd");
3145         result = new StringBuffer();
3146         m.find();
3147         try {
3148             m.appendReplacement(result, ("xyz$g"));
3149             failCount++;
3150         } catch (IllegalArgumentException iae) {
3151             if (result.length() != 0)
3152                 failCount++;
3153         }
3154 
3155         report("SB Substitution");
3156     }
3157 
3158     /**
3159      * Tests the usage of Matcher.appendReplacement() with literal
3160      * and group substitutions.
3161      */
3162     private static void stringbuilderSubstitute() throws Exception {
3163         // SB substitution with literal
3164         String blah = "zzzblahzzz";
3165         Pattern p = Pattern.compile("blah");
3166         Matcher m = p.matcher(blah);
3167         StringBuilder result = new StringBuilder();
3168         try {
3169             m.appendReplacement(result, "blech");
3170             failCount++;
3171         } catch (IllegalStateException e) {
3172         }
3173         m.find();
3174         m.appendReplacement(result, "blech");
3175         if (!result.toString().equals("zzzblech"))
3176             failCount++;
3177 
3178         m.appendTail(result);
3179         if (!result.toString().equals("zzzblechzzz"))
3180             failCount++;
3181 
3182         // SB substitution with groups
3183         blah = "zzzabcdzzz";
3184         p = Pattern.compile("(ab)(cd)*");
3185         m = p.matcher(blah);
3186         result = new StringBuilder();
3187         try {
3188             m.appendReplacement(result, "$1");
3189             failCount++;
3190         } catch (IllegalStateException e) {
3191         }
3192         m.find();
3193         m.appendReplacement(result, "$1");
3194         if (!result.toString().equals("zzzab"))
3195             failCount++;
3196 
3197         m.appendTail(result);
3198         if (!result.toString().equals("zzzabzzz"))
3199             failCount++;
3200 
3201         // SB substitution with 3 groups
3202         blah = "zzzabcdcdefzzz";
3203         p = Pattern.compile("(ab)(cd)*(ef)");
3204         m = p.matcher(blah);
3205         result = new StringBuilder();
3206         try {
3207             m.appendReplacement(result, "$1w$2w$3");
3208             failCount++;
3209         } catch (IllegalStateException e) {
3210         }
3211         m.find();
3212         m.appendReplacement(result, "$1w$2w$3");
3213         if (!result.toString().equals("zzzabwcdwef"))
3214             failCount++;
3215 
3216         m.appendTail(result);
3217         if (!result.toString().equals("zzzabwcdwefzzz"))
3218             failCount++;
3219 
3220         // SB substitution with groups and three matches
3221         // skipping middle match
3222         blah = "zzzabcdzzzabcddzzzabcdzzz";
3223         p = Pattern.compile("(ab)(cd*)");
3224         m = p.matcher(blah);
3225         result = new StringBuilder();
3226         try {
3227             m.appendReplacement(result, "$1");
3228             failCount++;
3229         } catch (IllegalStateException e) {
3230         }
3231         m.find();
3232         m.appendReplacement(result, "$1");
3233         if (!result.toString().equals("zzzab"))
3234             failCount++;
3235 
3236         m.find();
3237         m.find();
3238         m.appendReplacement(result, "$2");
3239         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3240             failCount++;
3241 
3242         m.appendTail(result);
3243         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3244             failCount++;
3245 
3246         // Check to make sure escaped $ is ignored
3247         blah = "zzzabcdcdefzzz";
3248         p = Pattern.compile("(ab)(cd)*(ef)");
3249         m = p.matcher(blah);
3250         result = new StringBuilder();
3251         m.find();
3252         m.appendReplacement(result, "$1w\\$2w$3");
3253         if (!result.toString().equals("zzzabw$2wef"))
3254             failCount++;
3255 
3256         m.appendTail(result);
3257         if (!result.toString().equals("zzzabw$2wefzzz"))
3258             failCount++;
3259 
3260         // Check to make sure a reference to nonexistent group causes error
3261         blah = "zzzabcdcdefzzz";
3262         p = Pattern.compile("(ab)(cd)*(ef)");
3263         m = p.matcher(blah);
3264         result = new StringBuilder();
3265         m.find();
3266         try {
3267             m.appendReplacement(result, "$1w$5w$3");
3268             failCount++;
3269         } catch (IndexOutOfBoundsException ioobe) {
3270             // Correct result
3271         }
3272 
3273         // Check double digit group references
3274         blah = "zzz123456789101112zzz";
3275         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3276         m = p.matcher(blah);
3277         result = new StringBuilder();
3278         m.find();
3279         m.appendReplacement(result, "$1w$11w$3");
3280         if (!result.toString().equals("zzz1w11w3"))
3281             failCount++;
3282 
3283         // Check to make sure it backs off $15 to $1 if only three groups
3284         blah = "zzzabcdcdefzzz";
3285         p = Pattern.compile("(ab)(cd)*(ef)");
3286         m = p.matcher(blah);
3287         result = new StringBuilder();
3288         m.find();
3289         m.appendReplacement(result, "$1w$15w$3");
3290         if (!result.toString().equals("zzzabwab5wef"))
3291             failCount++;
3292 
3293 
3294         // Supplementary character test
3295         // SB substitution with literal
3296         blah = toSupplementaries("zzzblahzzz");
3297         p = Pattern.compile(toSupplementaries("blah"));
3298         m = p.matcher(blah);
3299         result = new StringBuilder();
3300         try {
3301             m.appendReplacement(result, toSupplementaries("blech"));
3302             failCount++;
3303         } catch (IllegalStateException e) {
3304         }
3305         m.find();
3306         m.appendReplacement(result, toSupplementaries("blech"));
3307         if (!result.toString().equals(toSupplementaries("zzzblech")))
3308             failCount++;
3309         m.appendTail(result);
3310         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3311             failCount++;
3312 
3313         // SB substitution with groups
3314         blah = toSupplementaries("zzzabcdzzz");
3315         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3316         m = p.matcher(blah);
3317         result = new StringBuilder();
3318         try {
3319             m.appendReplacement(result, "$1");
3320             failCount++;
3321         } catch (IllegalStateException e) {
3322         }
3323         m.find();
3324         m.appendReplacement(result, "$1");
3325         if (!result.toString().equals(toSupplementaries("zzzab")))
3326             failCount++;
3327 
3328         m.appendTail(result);
3329         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3330             failCount++;
3331 
3332         // SB substitution with 3 groups
3333         blah = toSupplementaries("zzzabcdcdefzzz");
3334         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3335         m = p.matcher(blah);
3336         result = new StringBuilder();
3337         try {
3338             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3339             failCount++;
3340         } catch (IllegalStateException e) {
3341         }
3342         m.find();
3343         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3344         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3345             failCount++;
3346 
3347         m.appendTail(result);
3348         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3349             failCount++;
3350 
3351         // SB substitution with groups and three matches
3352         // skipping middle match
3353         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3354         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3355         m = p.matcher(blah);
3356         result = new StringBuilder();
3357         try {
3358             m.appendReplacement(result, "$1");
3359             failCount++;
3360         } catch (IllegalStateException e) {
3361         }
3362         m.find();
3363         m.appendReplacement(result, "$1");
3364         if (!result.toString().equals(toSupplementaries("zzzab")))
3365             failCount++;
3366 
3367         m.find();
3368         m.find();
3369         m.appendReplacement(result, "$2");
3370         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3371             failCount++;
3372 
3373         m.appendTail(result);
3374         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3375             failCount++;
3376 
3377         // Check to make sure escaped $ is ignored
3378         blah = toSupplementaries("zzzabcdcdefzzz");
3379         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3380         m = p.matcher(blah);
3381         result = new StringBuilder();
3382         m.find();
3383         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3384         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3385             failCount++;
3386 
3387         m.appendTail(result);
3388         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3389             failCount++;
3390 
3391         // Check to make sure a reference to nonexistent group causes error
3392         blah = toSupplementaries("zzzabcdcdefzzz");
3393         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3394         m = p.matcher(blah);
3395         result = new StringBuilder();
3396         m.find();
3397         try {
3398             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3399             failCount++;
3400         } catch (IndexOutOfBoundsException ioobe) {
3401             // Correct result
3402         }
3403         // Check double digit group references
3404         blah = toSupplementaries("zzz123456789101112zzz");
3405         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3406         m = p.matcher(blah);
3407         result = new StringBuilder();
3408         m.find();
3409         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3410         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3411             failCount++;
3412 
3413         // Check to make sure it backs off $15 to $1 if only three groups
3414         blah = toSupplementaries("zzzabcdcdefzzz");
3415         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3416         m = p.matcher(blah);
3417         result = new StringBuilder();
3418         m.find();
3419         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3420         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3421             failCount++;
3422         // Check nothing has been appended into the output buffer if
3423         // the replacement string triggers IllegalArgumentException.
3424         p = Pattern.compile("(abc)");
3425         m = p.matcher("abcd");
3426         result = new StringBuilder();
3427         m.find();
3428         try {
3429             m.appendReplacement(result, ("xyz$g"));
3430             failCount++;
3431         } catch (IllegalArgumentException iae) {
3432             if (result.length() != 0)
3433                 failCount++;
3434         }
3435         report("SB Substitution 2");
3436     }
3437 
3438     /*
3439      * 5 groups of characters are created to make a substitution string.
3440      * A base string will be created including random lead chars, the
3441      * substitution string, and random trailing chars.
3442      * A pattern containing the 5 groups is searched for and replaced with:
3443      * random group + random string + random group.
3444      * The results are checked for correctness.
3445      */
3446     private static void substitutionBasher() {
3447         for (int runs = 0; runs<1000; runs++) {
3448             // Create a base string to work in
3449             int leadingChars = generator.nextInt(10);
3450             StringBuffer baseBuffer = new StringBuffer(100);
3451             String leadingString = getRandomAlphaString(leadingChars);
3452             baseBuffer.append(leadingString);
3453 
3454             // Create 5 groups of random number of random chars
3455             // Create the string to substitute
3456             // Create the pattern string to search for
3457             StringBuffer bufferToSub = new StringBuffer(25);
3458             StringBuffer bufferToPat = new StringBuffer(50);
3459             String[] groups = new String[5];
3460             for(int i=0; i<5; i++) {
3461                 int aGroupSize = generator.nextInt(5)+1;
3462                 groups[i] = getRandomAlphaString(aGroupSize);
3463                 bufferToSub.append(groups[i]);
3464                 bufferToPat.append('(');
3465                 bufferToPat.append(groups[i]);
3466                 bufferToPat.append(')');
3467             }
3468             String stringToSub = bufferToSub.toString();
3469             String pattern = bufferToPat.toString();
3470 
3471             // Place sub string into working string at random index
3472             baseBuffer.append(stringToSub);
3473 
3474             // Append random chars to end
3475             int trailingChars = generator.nextInt(10);
3476             String trailingString = getRandomAlphaString(trailingChars);
3477             baseBuffer.append(trailingString);
3478             String baseString = baseBuffer.toString();
3479 
3480             // Create test pattern and matcher
3481             Pattern p = Pattern.compile(pattern);
3482             Matcher m = p.matcher(baseString);
3483 
3484             // Reject candidate if pattern happens to start early
3485             m.find();
3486             if (m.start() < leadingChars)
3487                 continue;
3488 
3489             // Reject candidate if more than one match
3490             if (m.find())
3491                 continue;
3492 
3493             // Construct a replacement string with :
3494             // random group + random string + random group
3495             StringBuffer bufferToRep = new StringBuffer();
3496             int groupIndex1 = generator.nextInt(5);
3497             bufferToRep.append("$" + (groupIndex1 + 1));
3498             String randomMidString = getRandomAlphaString(5);
3499             bufferToRep.append(randomMidString);
3500             int groupIndex2 = generator.nextInt(5);
3501             bufferToRep.append("$" + (groupIndex2 + 1));
3502             String replacement = bufferToRep.toString();
3503 
3504             // Do the replacement
3505             String result = m.replaceAll(replacement);
3506 
3507             // Construct expected result
3508             StringBuffer bufferToRes = new StringBuffer();
3509             bufferToRes.append(leadingString);
3510             bufferToRes.append(groups[groupIndex1]);
3511             bufferToRes.append(randomMidString);
3512             bufferToRes.append(groups[groupIndex2]);
3513             bufferToRes.append(trailingString);
3514             String expectedResult = bufferToRes.toString();
3515 
3516             // Check results
3517             if (!result.equals(expectedResult))
3518                 failCount++;
3519         }
3520 
3521         report("Substitution Basher");
3522     }
3523 
3524     /*
3525      * 5 groups of characters are created to make a substitution string.
3526      * A base string will be created including random lead chars, the
3527      * substitution string, and random trailing chars.
3528      * A pattern containing the 5 groups is searched for and replaced with:
3529      * random group + random string + random group.
3530      * The results are checked for correctness.
3531      */
3532     private static void substitutionBasher2() {
3533         for (int runs = 0; runs<1000; runs++) {
3534             // Create a base string to work in
3535             int leadingChars = generator.nextInt(10);
3536             StringBuilder baseBuffer = new StringBuilder(100);
3537             String leadingString = getRandomAlphaString(leadingChars);
3538             baseBuffer.append(leadingString);
3539 
3540             // Create 5 groups of random number of random chars
3541             // Create the string to substitute
3542             // Create the pattern string to search for
3543             StringBuilder bufferToSub = new StringBuilder(25);
3544             StringBuilder bufferToPat = new StringBuilder(50);
3545             String[] groups = new String[5];
3546             for(int i=0; i<5; i++) {
3547                 int aGroupSize = generator.nextInt(5)+1;
3548                 groups[i] = getRandomAlphaString(aGroupSize);
3549                 bufferToSub.append(groups[i]);
3550                 bufferToPat.append('(');
3551                 bufferToPat.append(groups[i]);
3552                 bufferToPat.append(')');
3553             }
3554             String stringToSub = bufferToSub.toString();
3555             String pattern = bufferToPat.toString();
3556 
3557             // Place sub string into working string at random index
3558             baseBuffer.append(stringToSub);
3559 
3560             // Append random chars to end
3561             int trailingChars = generator.nextInt(10);
3562             String trailingString = getRandomAlphaString(trailingChars);
3563             baseBuffer.append(trailingString);
3564             String baseString = baseBuffer.toString();
3565 
3566             // Create test pattern and matcher
3567             Pattern p = Pattern.compile(pattern);
3568             Matcher m = p.matcher(baseString);
3569 
3570             // Reject candidate if pattern happens to start early
3571             m.find();
3572             if (m.start() < leadingChars)
3573                 continue;
3574 
3575             // Reject candidate if more than one match
3576             if (m.find())
3577                 continue;
3578 
3579             // Construct a replacement string with :
3580             // random group + random string + random group
3581             StringBuilder bufferToRep = new StringBuilder();
3582             int groupIndex1 = generator.nextInt(5);
3583             bufferToRep.append("$" + (groupIndex1 + 1));
3584             String randomMidString = getRandomAlphaString(5);
3585             bufferToRep.append(randomMidString);
3586             int groupIndex2 = generator.nextInt(5);
3587             bufferToRep.append("$" + (groupIndex2 + 1));
3588             String replacement = bufferToRep.toString();
3589 
3590             // Do the replacement
3591             String result = m.replaceAll(replacement);
3592 
3593             // Construct expected result
3594             StringBuilder bufferToRes = new StringBuilder();
3595             bufferToRes.append(leadingString);
3596             bufferToRes.append(groups[groupIndex1]);
3597             bufferToRes.append(randomMidString);
3598             bufferToRes.append(groups[groupIndex2]);
3599             bufferToRes.append(trailingString);
3600             String expectedResult = bufferToRes.toString();
3601 
3602             // Check results
3603             if (!result.equals(expectedResult)) {
3604                 failCount++;
3605             }
3606         }
3607 
3608         report("Substitution Basher 2");
3609     }
3610 
3611     /**
3612      * Checks the handling of some escape sequences that the Pattern
3613      * class should process instead of the java compiler. These are
3614      * not in the file because the escapes should be be processed
3615      * by the Pattern class when the regex is compiled.
3616      */
3617     private static void escapes() throws Exception {
3618         Pattern p = Pattern.compile("\\043");
3619         Matcher m = p.matcher("#");
3620         if (!m.find())
3621             failCount++;
3622 
3623         p = Pattern.compile("\\x23");
3624         m = p.matcher("#");
3625         if (!m.find())
3626             failCount++;
3627 
3628         p = Pattern.compile("\\u0023");
3629         m = p.matcher("#");
3630         if (!m.find())
3631             failCount++;
3632 
3633         report("Escape sequences");
3634     }
3635 
3636     /**
3637      * Checks the handling of blank input situations. These
3638      * tests are incompatible with my test file format.
3639      */
3640     private static void blankInput() throws Exception {
3641         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3642         Matcher m = p.matcher("");
3643         if (m.find())
3644             failCount++;
3645 
3646         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3647         m = p.matcher("");
3648         if (!m.find())
3649             failCount++;
3650 
3651         p = Pattern.compile("abc");
3652         m = p.matcher("");
3653         if (m.find())
3654             failCount++;
3655 
3656         p = Pattern.compile("a*");
3657         m = p.matcher("");
3658         if (!m.find())
3659             failCount++;
3660 
3661         report("Blank input");
3662     }
3663 
3664     /**
3665      * Tests the Boyer-Moore pattern matching of a character sequence
3666      * on randomly generated patterns.
3667      */
3668     private static void bm() throws Exception {
3669         doBnM('a');
3670         report("Boyer Moore (ASCII)");
3671 
3672         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3673         report("Boyer Moore (Supplementary)");
3674     }
3675 
3676     private static void doBnM(int baseCharacter) throws Exception {
3677         int achar=0;
3678 
3679         for (int i=0; i<100; i++) {
3680             // Create a short pattern to search for
3681             int patternLength = generator.nextInt(7) + 4;
3682             StringBuffer patternBuffer = new StringBuffer(patternLength);
3683             String pattern;
3684             retry: for (;;) {
3685                 for (int x=0; x<patternLength; x++) {
3686                     int ch = baseCharacter + generator.nextInt(26);
3687                     if (Character.isSupplementaryCodePoint(ch)) {
3688                         patternBuffer.append(Character.toChars(ch));
3689                     } else {
3690                         patternBuffer.append((char)ch);
3691                     }
3692                 }
3693                 pattern = patternBuffer.toString();
3694 
3695                 // Avoid patterns that start and end with the same substring
3696                 // See JDK-6854417
3697                 for (int x=1; x < pattern.length(); x++) {
3698                     if (pattern.startsWith(pattern.substring(x)))
3699                         continue retry;
3700                 }
3701                 break;
3702             }
3703             Pattern p = Pattern.compile(pattern);
3704 
3705             // Create a buffer with random ASCII chars that does
3706             // not match the sample
3707             String toSearch = null;
3708             StringBuffer s = null;
3709             Matcher m = p.matcher("");
3710             do {
3711                 s = new StringBuffer(100);
3712                 for (int x=0; x<100; x++) {
3713                     int ch = baseCharacter + generator.nextInt(26);
3714                     if (Character.isSupplementaryCodePoint(ch)) {
3715                         s.append(Character.toChars(ch));
3716                     } else {
3717                         s.append((char)ch);
3718                     }
3719                 }
3720                 toSearch = s.toString();
3721                 m.reset(toSearch);
3722             } while (m.find());
3723 
3724             // Insert the pattern at a random spot
3725             int insertIndex = generator.nextInt(99);
3726             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3727                 insertIndex++;
3728             s = s.insert(insertIndex, pattern);
3729             toSearch = s.toString();
3730 
3731             // Make sure that the pattern is found
3732             m.reset(toSearch);
3733             if (!m.find())
3734                 failCount++;
3735 
3736             // Make sure that the match text is the pattern
3737             if (!m.group().equals(pattern))
3738                 failCount++;
3739 
3740             // Make sure match occured at insertion point
3741             if (m.start() != insertIndex)
3742                 failCount++;
3743         }
3744     }
3745 
3746     /**
3747      * Tests the matching of slices on randomly generated patterns.
3748      * The Boyer-Moore optimization is not done on these patterns
3749      * because it uses unicode case folding.
3750      */
3751     private static void slice() throws Exception {
3752         doSlice(Character.MAX_VALUE);
3753         report("Slice");
3754 
3755         doSlice(Character.MAX_CODE_POINT);
3756         report("Slice (Supplementary)");
3757     }
3758 
3759     private static void doSlice(int maxCharacter) throws Exception {
3760         Random generator = new Random();
3761         int achar=0;
3762 
3763         for (int i=0; i<100; i++) {
3764             // Create a short pattern to search for
3765             int patternLength = generator.nextInt(7) + 4;
3766             StringBuffer patternBuffer = new StringBuffer(patternLength);
3767             for (int x=0; x<patternLength; x++) {
3768                 int randomChar = 0;
3769                 while (!Character.isLetterOrDigit(randomChar))
3770                     randomChar = generator.nextInt(maxCharacter);
3771                 if (Character.isSupplementaryCodePoint(randomChar)) {
3772                     patternBuffer.append(Character.toChars(randomChar));
3773                 } else {
3774                     patternBuffer.append((char) randomChar);
3775                 }
3776             }
3777             String pattern =  patternBuffer.toString();
3778             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3779 
3780             // Create a buffer with random chars that does not match the sample
3781             String toSearch = null;
3782             StringBuffer s = null;
3783             Matcher m = p.matcher("");
3784             do {
3785                 s = new StringBuffer(100);
3786                 for (int x=0; x<100; x++) {
3787                     int randomChar = 0;
3788                     while (!Character.isLetterOrDigit(randomChar))
3789                         randomChar = generator.nextInt(maxCharacter);
3790                     if (Character.isSupplementaryCodePoint(randomChar)) {
3791                         s.append(Character.toChars(randomChar));
3792                     } else {
3793                         s.append((char) randomChar);
3794                     }
3795                 }
3796                 toSearch = s.toString();
3797                 m.reset(toSearch);
3798             } while (m.find());
3799 
3800             // Insert the pattern at a random spot
3801             int insertIndex = generator.nextInt(99);
3802             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3803                 insertIndex++;
3804             s = s.insert(insertIndex, pattern);
3805             toSearch = s.toString();
3806 
3807             // Make sure that the pattern is found
3808             m.reset(toSearch);
3809             if (!m.find())
3810                 failCount++;
3811 
3812             // Make sure that the match text is the pattern
3813             if (!m.group().equals(pattern))
3814                 failCount++;
3815 
3816             // Make sure match occured at insertion point
3817             if (m.start() != insertIndex)
3818                 failCount++;
3819         }
3820     }
3821 
3822     private static void explainFailure(String pattern, String data,
3823                                        String expected, String actual) {
3824         System.err.println("----------------------------------------");
3825         System.err.println("Pattern = "+pattern);
3826         System.err.println("Data = "+data);
3827         System.err.println("Expected = " + expected);
3828         System.err.println("Actual   = " + actual);
3829     }
3830 
3831     private static void explainFailure(String pattern, String data,
3832                                        Throwable t) {
3833         System.err.println("----------------------------------------");
3834         System.err.println("Pattern = "+pattern);
3835         System.err.println("Data = "+data);
3836         t.printStackTrace(System.err);
3837     }
3838 
3839     // Testing examples from a file
3840 
3841     /**
3842      * Goes through the file "TestCases.txt" and creates many patterns
3843      * described in the file, matching the patterns against input lines in
3844      * the file, and comparing the results against the correct results
3845      * also found in the file. The file format is described in comments
3846      * at the head of the file.
3847      */
3848     private static void processFile(String fileName) throws Exception {
3849         File testCases = new File(System.getProperty("test.src", "."),
3850                                   fileName);
3851         FileInputStream in = new FileInputStream(testCases);
3852         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3853 
3854         // Process next test case.
3855         String aLine;
3856         while((aLine = r.readLine()) != null) {
3857             // Read a line for pattern
3858             String patternString = grabLine(r);
3859             Pattern p = null;
3860             try {
3861                 p = compileTestPattern(patternString);
3862             } catch (PatternSyntaxException e) {
3863                 String dataString = grabLine(r);
3864                 String expectedResult = grabLine(r);
3865                 if (expectedResult.startsWith("error"))
3866                     continue;
3867                 explainFailure(patternString, dataString, e);
3868                 failCount++;
3869                 continue;
3870             }
3871 
3872             // Read a line for input string
3873             String dataString = grabLine(r);
3874             Matcher m = p.matcher(dataString);
3875             StringBuffer result = new StringBuffer();
3876 
3877             // Check for IllegalStateExceptions before a match
3878             failCount += preMatchInvariants(m);
3879 
3880             boolean found = m.find();
3881 
3882             if (found)
3883                 failCount += postTrueMatchInvariants(m);
3884             else
3885                 failCount += postFalseMatchInvariants(m);
3886 
3887             if (found) {
3888                 result.append("true ");
3889                 result.append(m.group(0) + " ");
3890             } else {
3891                 result.append("false ");
3892             }
3893 
3894             result.append(m.groupCount());
3895 
3896             if (found) {
3897                 for (int i=1; i<m.groupCount()+1; i++)
3898                     if (m.group(i) != null)
3899                         result.append(" " +m.group(i));
3900             }
3901 
3902             // Read a line for the expected result
3903             String expectedResult = grabLine(r);
3904 
3905             if (!result.toString().equals(expectedResult)) {
3906                 explainFailure(patternString, dataString, expectedResult, result.toString());
3907                 failCount++;
3908             }
3909         }
3910 
3911         report(fileName);
3912     }
3913 
3914     private static int preMatchInvariants(Matcher m) {
3915         int failCount = 0;
3916         try {
3917             m.start();
3918             failCount++;
3919         } catch (IllegalStateException ise) {}
3920         try {
3921             m.end();
3922             failCount++;
3923         } catch (IllegalStateException ise) {}
3924         try {
3925             m.group();
3926             failCount++;
3927         } catch (IllegalStateException ise) {}
3928         return failCount;
3929     }
3930 
3931     private static int postFalseMatchInvariants(Matcher m) {
3932         int failCount = 0;
3933         try {
3934             m.group();
3935             failCount++;
3936         } catch (IllegalStateException ise) {}
3937         try {
3938             m.start();
3939             failCount++;
3940         } catch (IllegalStateException ise) {}
3941         try {
3942             m.end();
3943             failCount++;
3944         } catch (IllegalStateException ise) {}
3945         return failCount;
3946     }
3947 
3948     private static int postTrueMatchInvariants(Matcher m) {
3949         int failCount = 0;
3950         //assert(m.start() = m.start(0);
3951         if (m.start() != m.start(0))
3952             failCount++;
3953         //assert(m.end() = m.end(0);
3954         if (m.start() != m.start(0))
3955             failCount++;
3956         //assert(m.group() = m.group(0);
3957         if (!m.group().equals(m.group(0)))
3958             failCount++;
3959         try {
3960             m.group(50);
3961             failCount++;
3962         } catch (IndexOutOfBoundsException ise) {}
3963 
3964         return failCount;
3965     }
3966 
3967     private static Pattern compileTestPattern(String patternString) {
3968         if (!patternString.startsWith("'")) {
3969             return Pattern.compile(patternString);
3970         }
3971         int break1 = patternString.lastIndexOf("'");
3972         String flagString = patternString.substring(
3973                                           break1+1, patternString.length());
3974         patternString = patternString.substring(1, break1);
3975 
3976         if (flagString.equals("i"))
3977             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3978 
3979         if (flagString.equals("m"))
3980             return Pattern.compile(patternString, Pattern.MULTILINE);
3981 
3982         return Pattern.compile(patternString);
3983     }
3984 
3985     /**
3986      * Reads a line from the input file. Keeps reading lines until a non
3987      * empty non comment line is read. If the line contains a \n then
3988      * these two characters are replaced by a newline char. If a \\uxxxx
3989      * sequence is read then the sequence is replaced by the unicode char.
3990      */
3991     private static String grabLine(BufferedReader r) throws Exception {
3992         int index = 0;
3993         String line = r.readLine();
3994         while (line.startsWith("//") || line.length() < 1)
3995             line = r.readLine();
3996         while ((index = line.indexOf("\\n")) != -1) {
3997             StringBuffer temp = new StringBuffer(line);
3998             temp.replace(index, index+2, "\n");
3999             line = temp.toString();
4000         }
4001         while ((index = line.indexOf("\\u")) != -1) {
4002             StringBuffer temp = new StringBuffer(line);
4003             String value = temp.substring(index+2, index+6);
4004             char aChar = (char)Integer.parseInt(value, 16);
4005             String unicodeChar = "" + aChar;
4006             temp.replace(index, index+6, unicodeChar);
4007             line = temp.toString();
4008         }
4009 
4010         return line;
4011     }
4012 
4013     private static void check(Pattern p, String s, String g, String expected) {
4014         Matcher m = p.matcher(s);
4015         m.find();
4016         if (!m.group(g).equals(expected) ||
4017             s.charAt(m.start(g)) != expected.charAt(0) ||
4018             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4019             failCount++;
4020     }
4021 
4022     private static void checkReplaceFirst(String p, String s, String r, String expected)
4023     {
4024         if (!expected.equals(Pattern.compile(p)
4025                                     .matcher(s)
4026                                     .replaceFirst(r)))
4027             failCount++;
4028     }
4029 
4030     private static void checkReplaceAll(String p, String s, String r, String expected)
4031     {
4032         if (!expected.equals(Pattern.compile(p)
4033                                     .matcher(s)
4034                                     .replaceAll(r)))
4035             failCount++;
4036     }
4037 
4038     private static void checkExpectedFail(String p) {
4039         try {
4040             Pattern.compile(p);
4041         } catch (PatternSyntaxException pse) {
4042             //pse.printStackTrace();
4043             return;
4044         }
4045         failCount++;
4046     }
4047 
4048     private static void checkExpectedIAE(Matcher m, String g) {
4049         m.find();
4050         try {
4051             m.group(g);
4052         } catch (IllegalArgumentException x) {
4053             //iae.printStackTrace();
4054             try {
4055                 m.start(g);
4056             } catch (IllegalArgumentException xx) {
4057                 try {
4058                     m.start(g);
4059                 } catch (IllegalArgumentException xxx) {
4060                     return;
4061                 }
4062             }
4063         }
4064         failCount++;
4065     }
4066 
4067     private static void checkExpectedNPE(Matcher m) {
4068         m.find();
4069         try {
4070             m.group(null);
4071         } catch (NullPointerException x) {
4072             try {
4073                 m.start(null);
4074             } catch (NullPointerException xx) {
4075                 try {
4076                     m.end(null);
4077                 } catch (NullPointerException xxx) {
4078                     return;
4079                 }
4080             }
4081         }
4082         failCount++;
4083     }
4084 
4085     private static void namedGroupCaptureTest() throws Exception {
4086         check(Pattern.compile("x+(?<gname>y+)z+"),
4087               "xxxyyyzzz",
4088               "gname",
4089               "yyy");
4090 
4091         check(Pattern.compile("x+(?<gname8>y+)z+"),
4092               "xxxyyyzzz",
4093               "gname8",
4094               "yyy");
4095 
4096         //backref
4097         Pattern pattern = Pattern.compile("(a*)bc\\1");
4098         check(pattern, "zzzaabcazzz", true);  // found "abca"
4099 
4100         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4101               "zzzaabcaazzz", true);
4102 
4103         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4104               "abcdefabc", true);
4105 
4106         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4107               "abcdefghijkk", true);
4108 
4109         // Supplementary character tests
4110         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4111               toSupplementaries("zzzaabcazzz"), true);
4112 
4113         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4114               toSupplementaries("zzzaabcaazzz"), true);
4115 
4116         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4117               toSupplementaries("abcdefabc"), true);
4118 
4119         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4120                               "(?<gname>" +
4121                               toSupplementaries("k)") + "\\k<gname>"),
4122               toSupplementaries("abcdefghijkk"), true);
4123 
4124         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4125               "xxxyyyzzzyyy",
4126               "gname",
4127               "yyy");
4128 
4129         //replaceFirst/All
4130         checkReplaceFirst("(?<gn>ab)(c*)",
4131                           "abccczzzabcczzzabccc",
4132                           "${gn}",
4133                           "abzzzabcczzzabccc");
4134 
4135         checkReplaceAll("(?<gn>ab)(c*)",
4136                         "abccczzzabcczzzabccc",
4137                         "${gn}",
4138                         "abzzzabzzzab");
4139 
4140 
4141         checkReplaceFirst("(?<gn>ab)(c*)",
4142                           "zzzabccczzzabcczzzabccczzz",
4143                           "${gn}",
4144                           "zzzabzzzabcczzzabccczzz");
4145 
4146         checkReplaceAll("(?<gn>ab)(c*)",
4147                         "zzzabccczzzabcczzzabccczzz",
4148                         "${gn}",
4149                         "zzzabzzzabzzzabzzz");
4150 
4151         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4152                           "zzzabccczzzabcczzzabccczzz",
4153                           "${gn2}",
4154                           "zzzccczzzabcczzzabccczzz");
4155 
4156         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4157                         "zzzabccczzzabcczzzabccczzz",
4158                         "${gn2}",
4159                         "zzzccczzzcczzzccczzz");
4160 
4161         //toSupplementaries("(ab)(c*)"));
4162         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4163                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4164                           toSupplementaries("abccczzzabcczzzabccc"),
4165                           "${gn1}",
4166                           toSupplementaries("abzzzabcczzzabccc"));
4167 
4168 
4169         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4170                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4171                         toSupplementaries("abccczzzabcczzzabccc"),
4172                         "${gn1}",
4173                         toSupplementaries("abzzzabzzzab"));
4174 
4175         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4176                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4177                           toSupplementaries("abccczzzabcczzzabccc"),
4178                           "${gn2}",
4179                           toSupplementaries("ccczzzabcczzzabccc"));
4180 
4181 
4182         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4183                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4184                         toSupplementaries("abccczzzabcczzzabccc"),
4185                         "${gn2}",
4186                         toSupplementaries("ccczzzcczzzccc"));
4187 
4188         checkReplaceFirst("(?<dog>Dog)AndCat",
4189                           "zzzDogAndCatzzzDogAndCatzzz",
4190                           "${dog}",
4191                           "zzzDogzzzDogAndCatzzz");
4192 
4193 
4194         checkReplaceAll("(?<dog>Dog)AndCat",
4195                           "zzzDogAndCatzzzDogAndCatzzz",
4196                           "${dog}",
4197                           "zzzDogzzzDogzzz");
4198 
4199         // backref in Matcher & String
4200         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4201             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4202             failCount++;
4203 
4204         // negative
4205         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4206         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4207         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4208         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4209         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4210         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4211                          "gnameX");
4212         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4213         report("NamedGroupCapture");
4214     }
4215 
4216     // This is for bug 6919132
4217     private static void nonBmpClassComplementTest() throws Exception {
4218         Pattern p = Pattern.compile("\\P{Lu}");
4219         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4220 
4221         if (m.find() && m.start() == 1)
4222             failCount++;
4223 
4224         // from a unicode category
4225         p = Pattern.compile("\\P{Lu}");
4226         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4227         if (m.find())
4228             failCount++;
4229         if (!m.hitEnd())
4230             failCount++;
4231 
4232         // block
4233         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4234         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4235         if (m.find() && m.start() == 1)
4236             failCount++;
4237 
4238         p = Pattern.compile("\\P{sc=GRANTHA}");
4239         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4240         if (m.find() && m.start() == 1)
4241             failCount++;
4242 
4243         report("NonBmpClassComplement");
4244     }
4245 
4246     private static void unicodePropertiesTest() throws Exception {
4247         // different forms
4248         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4249             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4250             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4251             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4252             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4253             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4254             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4255             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4256             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4257             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4258             failCount++;
4259 
4260         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4261         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4262         Matcher lastSM  = common;
4263         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4264 
4265         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4266         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4267         Matcher lastBM = latin;
4268         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4269 
4270         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4271             if (cp >= 0x30000 && (cp & 0x70) == 0){
4272                 continue;  // only pick couple code points, they are the same
4273             }
4274 
4275             // Unicode Script
4276             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4277             Matcher m;
4278             String str = new String(Character.toChars(cp));
4279             if (script == lastScript) {
4280                  m = lastSM;
4281                  m.reset(str);
4282             } else {
4283                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4284             }
4285             if (!m.matches()) {
4286                 failCount++;
4287             }
4288             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4289             other.reset(str);
4290             if (other.matches()) {
4291                 failCount++;
4292             }
4293             lastSM = m;
4294             lastScript = script;
4295 
4296             // Unicode Block
4297             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4298             if (block == null) {
4299                 //System.out.printf("Not a Block: cp=%x%n", cp);
4300                 continue;
4301             }
4302             if (block == lastBlock) {
4303                  m = lastBM;
4304                  m.reset(str);
4305             } else {
4306                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4307             }
4308             if (!m.matches()) {
4309                 failCount++;
4310             }
4311             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4312             other.reset(str);
4313             if (other.matches()) {
4314                 failCount++;
4315             }
4316             lastBM = m;
4317             lastBlock = block;
4318         }
4319         report("unicodeProperties");
4320     }
4321 
4322     private static void unicodeHexNotationTest() throws Exception {
4323 
4324         // negative
4325         checkExpectedFail("\\x{-23}");
4326         checkExpectedFail("\\x{110000}");
4327         checkExpectedFail("\\x{}");
4328         checkExpectedFail("\\x{AB[ef]");
4329 
4330         // codepoint
4331         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4332         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4333         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4334         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4335 
4336         // in class
4337         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4338         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4339         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4340         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4341         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4342         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4343 
4344         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4345              String s = "A" + new String(Character.toChars(cp)) + "B";
4346              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4347                                              : String.format("\\u%04x\\u%04x",
4348                                                (int) Character.toChars(cp)[0],
4349                                                (int) Character.toChars(cp)[1]);
4350              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4351              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4352                  failCount++;
4353              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4354                  failCount++;
4355              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4356                  failCount++;
4357              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4358                  failCount++;
4359          }
4360          report("unicodeHexNotation");
4361     }
4362 
4363     private static void unicodeClassesTest() throws Exception {
4364 
4365         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4366         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4367         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4368         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4369         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4370         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4371         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4372         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4373         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4374         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4375         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4376         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4377         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4378         Matcher bound  = Pattern.compile("\\b").matcher("");
4379         Matcher word   = Pattern.compile("\\w++").matcher("");
4380         // UNICODE_CHARACTER_CLASS
4381         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4382         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4383         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4384         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4385         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4386         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4387         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4388         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4389         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4390         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4391         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4392         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4393         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4394         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4395         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4396         // embedded flag (?U)
4397         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4398         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4399         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4400 
4401         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4402         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4403         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4404         // properties
4405         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4406         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4407         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4408         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4409         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4410         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4411         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4412         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4413         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4414         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4415         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4416         // javaMethod
4417         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4418         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4419         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4420         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4421         // GC/C
4422         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
4423 
4424         for (int cp = 1; cp < 0x30000; cp++) {
4425             String str = new String(Character.toChars(cp));
4426             int type = Character.getType(cp);
4427             if (// lower
4428                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4429                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4430                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4431                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4432                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4433                 // upper
4434                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4435                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4436                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4437                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4438                 // alpha
4439                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4440                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4441                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4442                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4443                 // digit
4444                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4445                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4446                 // alnum
4447                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4448                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4449                 // punct
4450                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4451                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4452                 // graph
4453                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4454                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4455                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4456                 // blank
4457                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4458                                           != blank.reset(str).matches()  ||
4459                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4460                 // print
4461                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4462                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4463                 // cntrl
4464                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4465                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4466                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4467                 // hexdigit
4468                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4469                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4470                 // space
4471                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4472                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4473                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4474                 // word
4475                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4476                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4477                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4478                 // bwordb
4479                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4480                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4481                 // properties
4482                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4483                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4484                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4485                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4486                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4487                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4488                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4489                 // gc_C
4490                 (Character.CONTROL == type || Character.FORMAT == type ||
4491                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4492                  Character.UNASSIGNED == type)
4493                 != gcC.reset(str).matches()) {
4494                 failCount++;
4495             }
4496         }
4497 
4498         // bounds/word align
4499         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4500         if (!bwbU.reset("\u0180sherman\u0400").matches())
4501             failCount++;
4502         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4503         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4504             failCount++;
4505         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4506         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4507             failCount++;
4508         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4509             failCount++;
4510         report("unicodePredefinedClasses");
4511     }
4512 
4513     private static void unicodeCharacterNameTest() throws Exception {
4514 
4515         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4516             if (!Character.isValidCodePoint(cp) ||
4517                 Character.getType(cp) == Character.UNASSIGNED)
4518                 continue;
4519             String str = new String(Character.toChars(cp));
4520             // single
4521             String p = "\\N{" + Character.getName(cp) + "}";
4522             if (!Pattern.compile(p).matcher(str).matches()) {
4523                 failCount++;
4524             }
4525             // class[c]
4526             p = "[\\N{" + Character.getName(cp) + "}]";
4527             if (!Pattern.compile(p).matcher(str).matches()) {
4528                 failCount++;
4529             }
4530         }
4531 
4532         // range
4533         for (int i = 0; i < 10; i++) {
4534             int start = generator.nextInt(20);
4535             int end = start + generator.nextInt(200);
4536             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4537             String str;
4538             for (int cp = start; cp < end; cp++) {
4539                 str = new String(Character.toChars(cp));
4540                 if (!Pattern.compile(p).matcher(str).matches()) {
4541                     failCount++;
4542                 }
4543             }
4544             str = new String(Character.toChars(end + 10));
4545             if (Pattern.compile(p).matcher(str).matches()) {
4546                 failCount++;
4547             }
4548         }
4549 
4550         // slice
4551         for (int i = 0; i < 10; i++) {
4552             int n = generator.nextInt(256);
4553             int[] buf = new int[n];
4554             StringBuffer sb = new StringBuffer(1024);
4555             for (int j = 0; j < n; j++) {
4556                 int cp = generator.nextInt(1000);
4557                 if (!Character.isValidCodePoint(cp) ||
4558                     Character.getType(cp) == Character.UNASSIGNED)
4559                     cp = 0x4e00;    // just use 4e00
4560                 sb.append("\\N{" + Character.getName(cp) + "}");
4561                 buf[j] = cp;
4562             }
4563             String p = sb.toString();
4564             String str = new String(buf, 0, buf.length);
4565             if (!Pattern.compile(p).matcher(str).matches()) {
4566                 failCount++;
4567             }
4568         }
4569         report("unicodeCharacterName");
4570     }
4571 
4572     private static void horizontalAndVerticalWSTest() throws Exception {
4573         String hws = new String (new char[] {
4574                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4575                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4576                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4577                                      0x202f, 0x205f, 0x3000 });
4578         String vws = new String (new char[] {
4579                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4580         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4581             !Pattern.compile("[\\h]+").matcher(hws).matches())
4582             failCount++;
4583         if (Pattern.compile("\\H").matcher(hws).find() ||
4584             Pattern.compile("[\\H]").matcher(hws).find())
4585             failCount++;
4586         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4587             !Pattern.compile("[\\v]+").matcher(vws).matches())
4588             failCount++;
4589         if (Pattern.compile("\\V").matcher(vws).find() ||
4590             Pattern.compile("[\\V]").matcher(vws).find())
4591             failCount++;
4592         String prefix = "abcd";
4593         String suffix = "efgh";
4594         String ng = "A";
4595         for (int i = 0; i < hws.length(); i++) {
4596             String c = String.valueOf(hws.charAt(i));
4597             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4598             if (!m.find() || !c.equals(m.group()))
4599                 failCount++;
4600             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4601             if (!m.find() || !c.equals(m.group()))
4602                 failCount++;
4603 
4604             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4605             if (!m.find() || !ng.equals(m.group()))
4606                 failCount++;
4607             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4608             if (!m.find() || !ng.equals(m.group()))
4609                 failCount++;
4610         }
4611         for (int i = 0; i < vws.length(); i++) {
4612             String c = String.valueOf(vws.charAt(i));
4613             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4614             if (!m.find() || !c.equals(m.group()))
4615                 failCount++;
4616             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4617             if (!m.find() || !c.equals(m.group()))
4618                 failCount++;
4619 
4620             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4621             if (!m.find() || !ng.equals(m.group()))
4622                 failCount++;
4623             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4624             if (!m.find() || !ng.equals(m.group()))
4625                 failCount++;
4626         }
4627         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4628         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4629             failCount++;
4630         report("horizontalAndVerticalWSTest");
4631     }
4632 
4633     private static void linebreakTest() throws Exception {
4634         String linebreaks = new String (new char[] {
4635             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4636         String crnl = "\r\n";
4637         if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4638               Pattern.compile("\\R").matcher(crnl).matches() &&
4639               Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4640               Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4641               Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4642               Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4643               !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4644             failCount++;
4645         }
4646         report("linebreakTest");
4647     }
4648 
4649     // #7189363
4650     private static void branchTest() throws Exception {
4651         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4652             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4653             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4654             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4655             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4656             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4657             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4658             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4659             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4660             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4661             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4662             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4663             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4664             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4665             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4666             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4667             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4668             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4669             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4670             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4671             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4672             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4673             failCount++;
4674         report("branchTest");
4675     }
4676 
4677     // This test is for 8007395
4678     private static void groupCurlyNotFoundSuppTest() throws Exception {
4679         String input = "test this as \ud83d\ude0d";
4680         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4681                                           "test(.)*(@[a-zA-Z.]+)",
4682                                           "test([^B])+(@[a-zA-Z.]+)",
4683                                           "test([^B])*(@[a-zA-Z.]+)",
4684                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4685                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4686                                         }) {
4687             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4688                                .matcher(input);
4689             try {
4690                 if (m.find()) {
4691                     failCount++;
4692                 }
4693             } catch (Exception x) {
4694                 failCount++;
4695             }
4696         }
4697         report("GroupCurly NotFoundSupp");
4698     }
4699 
4700     // This test is for 8023647
4701     private static void groupCurlyBackoffTest() throws Exception {
4702         if (!"abc1c".matches("(\\w)+1\\1") ||
4703             "abc11".matches("(\\w)+1\\1")) {
4704             failCount++;
4705         }
4706         report("GroupCurly backoff");
4707     }
4708 
4709     // This test is for 8012646
4710     private static void patternAsPredicate() throws Exception {
4711         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4712 
4713         if (p.test("")) {
4714             failCount++;
4715         }
4716         if (!p.test("word")) {
4717             failCount++;
4718         }
4719         if (p.test("1234")) {
4720             failCount++;
4721         }
4722         if (!p.test("word1234")) {
4723             failCount++;
4724         }
4725         report("Pattern.asPredicate");
4726     }
4727 
4728     // This test is for 8184692
4729     private static void patternAsMatchPredicate() throws Exception {
4730         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4731 
4732         if (p.test("")) {
4733             failCount++;
4734         }
4735         if (!p.test("word")) {
4736             failCount++;
4737         }
4738         if (p.test("1234word")) {
4739             failCount++;
4740         }
4741         if (p.test("1234")) {
4742             failCount++;
4743         }
4744         report("Pattern.asMatchPredicate");
4745     }
4746 
4747 
4748     // This test is for 8035975
4749     private static void invalidFlags() throws Exception {
4750         for (int flag = 1; flag != 0; flag <<= 1) {
4751             switch (flag) {
4752             case Pattern.CASE_INSENSITIVE:
4753             case Pattern.MULTILINE:
4754             case Pattern.DOTALL:
4755             case Pattern.UNICODE_CASE:
4756             case Pattern.CANON_EQ:
4757             case Pattern.UNIX_LINES:
4758             case Pattern.LITERAL:
4759             case Pattern.UNICODE_CHARACTER_CLASS:
4760             case Pattern.COMMENTS:
4761                 // valid flag, continue
4762                 break;
4763             default:
4764                 try {
4765                     Pattern.compile(".", flag);
4766                     failCount++;
4767                 } catch (IllegalArgumentException expected) {
4768                 }
4769             }
4770         }
4771         report("Invalid compile flags");
4772     }
4773 
4774     // This test is for 8158482
4775     private static void embeddedFlags() throws Exception {
4776         try {
4777             Pattern.compile("(?i).(?-i).");
4778             Pattern.compile("(?m).(?-m).");
4779             Pattern.compile("(?s).(?-s).");
4780             Pattern.compile("(?d).(?-d).");
4781             Pattern.compile("(?u).(?-u).");
4782             Pattern.compile("(?c).(?-c).");
4783             Pattern.compile("(?x).(?-x).");
4784             Pattern.compile("(?U).(?-U).");
4785             Pattern.compile("(?imsducxU).(?-imsducxU).");
4786         } catch (PatternSyntaxException x) {
4787             failCount++;
4788         }
4789         report("Embedded flags");
4790     }
4791 
4792     private static void grapheme() throws Exception {
4793         Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
4794             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4795             .forEach( ln -> {
4796                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4797                     // System.out.println(str);
4798                     String[] strs = ln.split("\u00f7|\u00d7");
4799                     StringBuilder src = new StringBuilder();
4800                     ArrayList<String> graphemes = new ArrayList<>();
4801                     StringBuilder buf = new StringBuilder();
4802                     int offBk = 0;
4803                     for (String str : strs) {
4804                         if (str.length() == 0)  // first empty str
4805                             continue;
4806                         int cp = Integer.parseInt(str, 16);
4807                         src.appendCodePoint(cp);
4808                         buf.appendCodePoint(cp);
4809                         offBk += (str.length() + 1);
4810                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4811                             graphemes.add(buf.toString());
4812                             buf = new StringBuilder();
4813                         }
4814                     }
4815                     Pattern p = Pattern.compile("\\X");
4816                     Matcher m = p.matcher(src.toString());
4817                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4818                     for (String g : graphemes) {
4819                         // System.out.printf("     grapheme:=[%s]%n", g);
4820                         // (1) test \\X directly
4821                         if (!m.find() || !m.group().equals(g)) {
4822                             System.out.println("Failed \\X [" + ln + "] : " + g);
4823                             failCount++;
4824                         }
4825                         // (2) test \\b{g} + \\X  via Scanner
4826                         boolean hasNext = s.hasNext(p);
4827                         // if (!s.hasNext() || !s.next().equals(next)) {
4828                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4829                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4830                             failCount++;
4831                         }
4832                     }
4833                 });
4834         // some sanity checks
4835         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4836             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4837             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4838             failCount++;
4839         // make sure "\b{n}" still works
4840         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4841             failCount++;
4842         report("Unicode extended grapheme cluster");
4843     }
4844 
4845     // hangup/timeout if go into exponential backtracking
4846     private static void expoBacktracking() throws Exception {
4847 
4848         Object[][] patternMatchers = {
4849             // 6328855
4850             { "(.*\n*)*",
4851               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4852               false },
4853             // 6192895
4854             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4855               "Hello World this is a test this is a test this is a test A",
4856               true },
4857             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4858               "Hello World this is a test this is a test this is a test \u4e00 ",
4859               false },
4860             { " *([a-z0-9]+ *)+",
4861               "hello world this is a test this is a test this is a test A",
4862               false },
4863             // 4771934 [FIXED] #5013651?
4864             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4865               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4866               true },
4867             // 4866249 [FIXED]
4868             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4869               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4870               true },
4871             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4872               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4873               false },
4874             // 6345469
4875             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4876               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4877               true }, // --> matched
4878             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4879               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4880               false },
4881             // 5026912
4882             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4883               "156580451111112225588087755221111111566969655555555",
4884               false},
4885             // 6988218
4886             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4887               "'%)) order by ANGEBOT.ID",
4888               false},    // find
4889             // 6693451
4890             { "^(\\s*foo\\s*)*$",
4891               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4892               true },
4893             { "^(\\s*foo\\s*)*$",
4894               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4895               false
4896             },
4897             // 7006761
4898             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4899             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4900             // 8140212
4901             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4902               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4903               false
4904             },
4905             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4906             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4907 
4908             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4909             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4910 
4911             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4912             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4913 
4914             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4915 
4916             /* not fixed
4917             //8132141   --->    second level exponential backtracking
4918             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4919               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4920             */
4921         };
4922 
4923         for (Object[] pm : patternMatchers) {
4924             String p = (String)pm[0];
4925             String s = (String)pm[1];
4926             boolean r = (Boolean)pm[2];
4927             if (r != Pattern.compile(p).matcher(s).matches()) {
4928                 failCount++;
4929             }
4930         }
4931     }
4932 
4933     private static void invalidGroupName() {
4934         // Invalid start of a group name
4935         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4936                 "\u0060", "\u007b", "\u0416")) {
4937             for (String pat : List.of("(?<" + groupName + ">)",
4938                     "\\k<" + groupName + ">")) {
4939                 try {
4940                     Pattern.compile(pat);
4941                     failCount++;
4942                 } catch (PatternSyntaxException e) {
4943                     if (!e.getMessage().startsWith(
4944                             "capturing group name does not start with a"
4945                             + " Latin letter")) {
4946                         failCount++;
4947                     }
4948                 }
4949             }
4950         }
4951         // Invalid char in a group name
4952         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4953                 "d\u0060", "e\u007b", "f\u0416")) {
4954             for (String pat : List.of("(?<" + groupName + ">)",
4955                     "\\k<" + groupName + ">")) {
4956                 try {
4957                     Pattern.compile(pat);
4958                     failCount++;
4959                 } catch (PatternSyntaxException e) {
4960                     if (!e.getMessage().startsWith(
4961                             "named capturing group is missing trailing '>'")) {
4962                         failCount++;
4963                     }
4964                 }
4965             }
4966         }
4967         report("Invalid capturing group names");
4968     }
4969 
4970     private static void illegalRepetitionRange() {
4971         // huge integers > (2^31 - 1)
4972         String n = BigInteger.valueOf(1L << 32)
4973             .toString();
4974         String m = BigInteger.valueOf(1L << 31)
4975             .add(new BigInteger(80, generator))
4976             .toString();
4977         for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4978                 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4979             String pat = ".{" + rep + "}";
4980             try {
4981                 Pattern.compile(pat);
4982                 failCount++;
4983                 System.out.println("Expected to fail. Pattern: " + pat);
4984             } catch (PatternSyntaxException e) {
4985                 if (!e.getMessage().startsWith("Illegal repetition")) {
4986                     failCount++;
4987                     System.out.println("Unexpected error message: " + e.getMessage());
4988                 }
4989             } catch (Throwable t) {
4990                 failCount++;
4991                 System.out.println("Unexpected exception: " + t);
4992             }
4993         }
4994         report("illegalRepetitionRange");
4995     }
4996 
4997     private static void surrogatePairWithCanonEq() {
4998         try {
4999             Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
5000         } catch (Throwable t) {
5001             failCount++;
5002             System.out.println("Unexpected exception: " + t);
5003         }
5004         report("surrogatePairWithCanonEq");
5005     }
5006 
5007     // This test is for 8235812
5008     private static void lineBreakWithQuantifier() {
5009         // key:    pattern
5010         // value:  lengths of input that must match the pattern
5011         Map<String, List<Integer>> cases = Map.ofEntries(
5012             Map.entry("\\R?",      List.of(0, 1)),
5013             Map.entry("\\R*",      List.of(0, 1, 2, 3)),
5014             Map.entry("\\R+",      List.of(1, 2, 3)),
5015             Map.entry("\\R{0}",    List.of(0)),
5016             Map.entry("\\R{1}",    List.of(1)),
5017             Map.entry("\\R{2}",    List.of(2)),
5018             Map.entry("\\R{3}",    List.of(3)),
5019             Map.entry("\\R{0,}",   List.of(0, 1, 2, 3)),
5020             Map.entry("\\R{1,}",   List.of(1, 2, 3)),
5021             Map.entry("\\R{2,}",   List.of(2, 3)),
5022             Map.entry("\\R{3,}",   List.of(3)),
5023             Map.entry("\\R{0,0}",  List.of(0)),
5024             Map.entry("\\R{0,1}",  List.of(0, 1)),
5025             Map.entry("\\R{0,2}",  List.of(0, 1, 2)),
5026             Map.entry("\\R{0,3}",  List.of(0, 1, 2, 3)),
5027             Map.entry("\\R{1,1}",  List.of(1)),
5028             Map.entry("\\R{1,2}",  List.of(1, 2)),
5029             Map.entry("\\R{1,3}",  List.of(1, 2, 3)),
5030             Map.entry("\\R{2,2}",  List.of(2)),
5031             Map.entry("\\R{2,3}",  List.of(2, 3)),
5032             Map.entry("\\R{3,3}",  List.of(3)),
5033             Map.entry("\\R",       List.of(1)),
5034             Map.entry("\\R\\R",    List.of(2)),
5035             Map.entry("\\R\\R\\R", List.of(3))
5036         );
5037 
5038         // key:    length of input
5039         // value:  all possible inputs of given length
5040         Map<Integer, List<String>> inputs = new HashMap<>();
5041         String[] Rs = { "\r\n", "\r", "\n",
5042                         "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" };
5043         StringBuilder sb = new StringBuilder();
5044         for (int len = 0; len <= 3; ++len) {
5045             int[] idx = new int[len + 1];
5046             do {
5047                 sb.setLength(0);
5048                 for (int j = 0; j < len; ++j)
5049                     sb.append(Rs[idx[j]]);
5050                 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString());
5051                 idx[0]++;
5052                 for (int j = 0; j < len; ++j) {
5053                     if (idx[j] < Rs.length)
5054                         break;
5055                     idx[j] = 0;
5056                     idx[j+1]++;
5057                 }
5058             } while (idx[len] == 0);
5059         }
5060 
5061         // exhaustive testing
5062         for (String patStr : cases.keySet()) {
5063             Pattern[] pats = patStr.endsWith("R")
5064                 ? new Pattern[] { Pattern.compile(patStr) }  // no quantifiers
5065                 : new Pattern[] { Pattern.compile(patStr),          // greedy
5066                                   Pattern.compile(patStr + "?") };  // reluctant
5067             Matcher m = pats[0].matcher("");
5068             for (Pattern p : pats) {
5069                 m.usePattern(p);
5070                 for (int len : cases.get(patStr)) {
5071                     for (String in : inputs.get(len)) {
5072                         if (!m.reset(in).matches()) {
5073                             failCount++;
5074                             System.out.println("Expected to match '" +
5075                                     in + "' =~ /" + p + "/");
5076                         }
5077                     }
5078                 }
5079             }
5080         }
5081         report("lineBreakWithQuantifier");
5082     }
5083 }
--- EOF ---