1 /*
   2  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
  37  * 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
  38  * 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
  39  * 8216332 8214245
  40  *
  41  * @library /test/lib
  42  * @library /lib/testlibrary/java/lang
  43  * @build jdk.test.lib.RandomFactory
  44  * @run main RegExTest
  45  * @key randomness
  46  */
  47 
  48 import java.io.BufferedReader;
  49 import java.io.ByteArrayInputStream;
  50 import java.io.ByteArrayOutputStream;
  51 import java.io.File;
  52 import java.io.FileInputStream;
  53 import java.io.InputStreamReader;
  54 import java.io.ObjectInputStream;
  55 import java.io.ObjectOutputStream;
  56 import java.math.BigInteger;
  57 import java.nio.CharBuffer;
  58 import java.nio.file.Files;
  59 import java.nio.file.Path;
  60 import java.nio.file.Paths;
  61 import java.util.ArrayList;
  62 import java.util.Arrays;
  63 import java.util.HashMap;
  64 import java.util.List;
  65 import java.util.Map;
  66 import java.util.Random;
  67 import java.util.Scanner;
  68 import java.util.function.Function;
  69 import java.util.function.Predicate;
  70 import java.util.regex.Matcher;
  71 import java.util.regex.MatchResult;
  72 import java.util.regex.Pattern;
  73 import java.util.regex.PatternSyntaxException;
  74 import java.util.stream.Stream;
  75 
  76 import jdk.test.lib.RandomFactory;
  77 
  78 /**
  79  * This is a test class created to check the operation of
  80  * the Pattern and Matcher classes.
  81  */
  82 public class RegExTest {
  83 
  84     private static Random generator = RandomFactory.getRandom();
  85     private static boolean failure = false;
  86     private static int failCount = 0;
  87     private static String firstFailure = null;
  88 
  89     /**
  90      * Main to interpret arguments and run several tests.
  91      *
  92      */
  93     public static void main(String[] args) throws Exception {
  94         // Most of the tests are in a file
  95         processFile("TestCases.txt");
  96         //processFile("PerlCases.txt");
  97         processFile("BMPTestCases.txt");
  98         processFile("SupplementaryTestCases.txt");
  99 
 100         // These test many randomly generated char patterns
 101         bm();
 102         slice();
 103 
 104         // These are hard to put into the file
 105         escapes();
 106         blankInput();
 107 
 108         // Substitition tests on randomly generated sequences
 109         globalSubstitute();
 110         stringbufferSubstitute();
 111         stringbuilderSubstitute();
 112 
 113         substitutionBasher();
 114         substitutionBasher2();
 115 
 116         // Canonical Equivalence
 117         ceTest();
 118 
 119         // Anchors
 120         anchorTest();
 121 
 122         // boolean match calls
 123         matchesTest();
 124         lookingAtTest();
 125 
 126         // Pattern API
 127         patternMatchesTest();
 128 
 129         // Misc
 130         lookbehindTest();
 131         nullArgumentTest();
 132         backRefTest();
 133         groupCaptureTest();
 134         caretTest();
 135         charClassTest();
 136         emptyPatternTest();
 137         findIntTest();
 138         group0Test();
 139         longPatternTest();
 140         octalTest();
 141         ampersandTest();
 142         negationTest();
 143         splitTest();
 144         appendTest();
 145         caseFoldingTest();
 146         commentsTest();
 147         unixLinesTest();
 148         replaceFirstTest();
 149         gTest();
 150         zTest();
 151         serializeTest();
 152         reluctantRepetitionTest();
 153         multilineDollarTest();
 154         dollarAtEndTest();
 155         caretBetweenTerminatorsTest();
 156         // This RFE rejected in Tiger numOccurrencesTest();
 157         javaCharClassTest();
 158         nonCaptureRepetitionTest();
 159         notCapturedGroupCurlyMatchTest();
 160         escapedSegmentTest();
 161         literalPatternTest();
 162         literalReplacementTest();
 163         regionTest();
 164         toStringTest();
 165         negatedCharClassTest();
 166         findFromTest();
 167         boundsTest();
 168         unicodeWordBoundsTest();
 169         caretAtEndTest();
 170         wordSearchTest();
 171         hitEndTest();
 172         toMatchResultTest();
 173         toMatchResultTest2();
 174         surrogatesInClassTest();
 175         removeQEQuotingTest();
 176         namedGroupCaptureTest();
 177         nonBmpClassComplementTest();
 178         unicodePropertiesTest();
 179         unicodeHexNotationTest();
 180         unicodeClassesTest();
 181         unicodeCharacterNameTest();
 182         horizontalAndVerticalWSTest();
 183         linebreakTest();
 184         branchTest();
 185         groupCurlyNotFoundSuppTest();
 186         groupCurlyBackoffTest();
 187         patternAsPredicate();
 188         patternAsMatchPredicate();
 189         invalidFlags();
 190         embeddedFlags();
 191         grapheme();
 192         expoBacktracking();
 193         invalidGroupName();
 194         illegalRepetitionRange();
 195         surrogatePairWithCanonEq();
 196         lineBreakWithQuantifier();
 197         caseInsensitivePMatch();
 198 
 199         if (failure) {
 200             throw new
 201                 RuntimeException("RegExTest failed, 1st failure: " +
 202                                  firstFailure);
 203         } else {
 204             System.err.println("OKAY: All tests passed.");
 205         }
 206     }
 207 
 208     // Utility functions
 209 
 210     private static String getRandomAlphaString(int length) {
 211         StringBuffer buf = new StringBuffer(length);
 212         for (int i=0; i<length; i++) {
 213             char randChar = (char)(97 + generator.nextInt(26));
 214             buf.append(randChar);
 215         }
 216         return buf.toString();
 217     }
 218 
 219     private static void check(Matcher m, String expected) {
 220         m.find();
 221         if (!m.group().equals(expected))
 222             failCount++;
 223     }
 224 
 225     private static void check(Matcher m, String result, boolean expected) {
 226         m.find();
 227         if (m.group().equals(result) != expected)
 228             failCount++;
 229     }
 230 
 231     private static void check(Pattern p, String s, boolean expected) {
 232         if (p.matcher(s).find() != expected)
 233             failCount++;
 234     }
 235 
 236     private static void check(String p, String s, boolean expected) {
 237         Matcher matcher = Pattern.compile(p).matcher(s);
 238         if (matcher.find() != expected)
 239             failCount++;
 240     }
 241 
 242     private static void check(String p, char c, boolean expected) {
 243         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 244         Pattern pattern = Pattern.compile(propertyPattern);
 245         char[] ca = new char[1]; ca[0] = c;
 246         Matcher matcher = pattern.matcher(new String(ca));
 247         if (!matcher.find())
 248             failCount++;
 249     }
 250 
 251     private static void check(String p, int codePoint, boolean expected) {
 252         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 253         Pattern pattern = Pattern.compile(propertyPattern);
 254         char[] ca = Character.toChars(codePoint);
 255         Matcher matcher = pattern.matcher(new String(ca));
 256         if (!matcher.find())
 257             failCount++;
 258     }
 259 
 260     private static void check(String p, int flag, String input, String s,
 261                               boolean expected)
 262     {
 263         Pattern pattern = Pattern.compile(p, flag);
 264         Matcher matcher = pattern.matcher(input);
 265         if (expected)
 266             check(matcher, s, expected);
 267         else
 268             check(pattern, input, false);
 269     }
 270 
 271     private static void report(String testName) {
 272         int spacesToAdd = 30 - testName.length();
 273         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 274         for (int i=0; i<spacesToAdd; i++)
 275             paddedNameBuffer.append(" ");
 276         String paddedName = paddedNameBuffer.toString();
 277         System.err.println(paddedName + ": " +
 278                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 279         if (failCount > 0) {
 280             failure = true;
 281 
 282             if (firstFailure == null) {
 283                 firstFailure = testName;
 284             }
 285         }
 286 
 287         failCount = 0;
 288     }
 289 
 290     /**
 291      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 292      * supplementary characters. This method does NOT fully take care
 293      * of the regex syntax.
 294      */
 295     private static String toSupplementaries(String s) {
 296         int length = s.length();
 297         StringBuffer sb = new StringBuffer(length * 2);
 298 
 299         for (int i = 0; i < length; ) {
 300             char c = s.charAt(i++);
 301             if (c == '\\') {
 302                 sb.append(c);
 303                 if (i < length) {
 304                     c = s.charAt(i++);
 305                     sb.append(c);
 306                     if (c == 'u') {
 307                         // assume no syntax error
 308                         sb.append(s.charAt(i++));
 309                         sb.append(s.charAt(i++));
 310                         sb.append(s.charAt(i++));
 311                         sb.append(s.charAt(i++));
 312                     }
 313                 }
 314             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 315                 sb.append('\ud800').append((char)('\udc00'+c));
 316             } else {
 317                 sb.append(c);
 318             }
 319         }
 320         return sb.toString();
 321     }
 322 
 323     // Regular expression tests
 324 
 325     // This is for bug 6178785
 326     // Test if an expected NPE gets thrown when passing in a null argument
 327     private static boolean check(Runnable test) {
 328         try {
 329             test.run();
 330             failCount++;
 331             return false;
 332         } catch (NullPointerException npe) {
 333             return true;
 334         }
 335     }
 336 
 337     private static void nullArgumentTest() {
 338         check(() -> Pattern.compile(null));
 339         check(() -> Pattern.matches(null, null));
 340         check(() -> Pattern.matches("xyz", null));
 341         check(() -> Pattern.quote(null));
 342         check(() -> Pattern.compile("xyz").split(null));
 343         check(() -> Pattern.compile("xyz").matcher(null));
 344 
 345         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 346         m.matches();
 347         check(() -> m.appendTail((StringBuffer) null));
 348         check(() -> m.appendTail((StringBuilder)null));
 349         check(() -> m.replaceAll((String) null));
 350         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 351         check(() -> m.replaceFirst((String)null));
 352         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 353         check(() -> m.appendReplacement((StringBuffer)null, null));
 354         check(() -> m.appendReplacement((StringBuilder)null, null));
 355         check(() -> m.reset(null));
 356         check(() -> Matcher.quoteReplacement(null));
 357         //check(() -> m.usePattern(null));
 358 
 359         report("Null Argument");
 360     }
 361 
 362     // This is for bug6635133
 363     // Test if surrogate pair in Unicode escapes can be handled correctly.
 364     private static void surrogatesInClassTest() throws Exception {
 365         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 366         Matcher matcher = pattern.matcher("\ud834\udd22");
 367         if (!matcher.find())
 368             failCount++;
 369 
 370         report("Surrogate pair in Unicode escape");
 371     }
 372 
 373     // This is for bug6990617
 374     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 375     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 376     // char is an octal digit.
 377     private static void removeQEQuotingTest() throws Exception {
 378         Pattern pattern =
 379             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 380         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 381         if (!matcher.find())
 382             failCount++;
 383 
 384         report("Remove Q/E Quoting");
 385     }
 386 
 387     // This is for bug 4988891
 388     // Test toMatchResult to see that it is a copy of the Matcher
 389     // that is not affected by subsequent operations on the original
 390     private static void toMatchResultTest() throws Exception {
 391         Pattern pattern = Pattern.compile("squid");
 392         Matcher matcher = pattern.matcher(
 393             "agiantsquidofdestinyasmallsquidoffate");
 394         matcher.find();
 395         int matcherStart1 = matcher.start();
 396         MatchResult mr = matcher.toMatchResult();
 397         if (mr == matcher)
 398             failCount++;
 399         int resultStart1 = mr.start();
 400         if (matcherStart1 != resultStart1)
 401             failCount++;
 402         matcher.find();
 403         int matcherStart2 = matcher.start();
 404         int resultStart2 = mr.start();
 405         if (matcherStart2 == resultStart2)
 406             failCount++;
 407         if (resultStart1 != resultStart2)
 408             failCount++;
 409         MatchResult mr2 = matcher.toMatchResult();
 410         if (mr == mr2)
 411             failCount++;
 412         if (mr2.start() != matcherStart2)
 413             failCount++;
 414         report("toMatchResult is a copy");
 415     }
 416 
 417     private static void checkExpectedISE(Runnable test) {
 418         try {
 419             test.run();
 420             failCount++;
 421         } catch (IllegalStateException x) {
 422         } catch (IndexOutOfBoundsException xx) {
 423             failCount++;
 424         }
 425     }
 426 
 427     private static void checkExpectedIOOE(Runnable test) {
 428         try {
 429             test.run();
 430             failCount++;
 431         } catch (IndexOutOfBoundsException x) {}
 432     }
 433 
 434     // This is for bug 8074678
 435     // Test the result of toMatchResult throws ISE if no match is availble
 436     private static void toMatchResultTest2() throws Exception {
 437         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 438         matcher.find();
 439         MatchResult mr = matcher.toMatchResult();
 440 
 441         checkExpectedISE(() -> mr.start());
 442         checkExpectedISE(() -> mr.start(2));
 443         checkExpectedISE(() -> mr.end());
 444         checkExpectedISE(() -> mr.end(2));
 445         checkExpectedISE(() -> mr.group());
 446         checkExpectedISE(() -> mr.group(2));
 447 
 448         matcher = Pattern.compile("(match)").matcher("there is a match");
 449         matcher.find();
 450         MatchResult mr2 = matcher.toMatchResult();
 451         checkExpectedIOOE(() -> mr2.start(2));
 452         checkExpectedIOOE(() -> mr2.end(2));
 453         checkExpectedIOOE(() -> mr2.group(2));
 454 
 455         report("toMatchResult2 appropriate exceptions");
 456     }
 457 
 458     // This is for bug 5013885
 459     // Must test a slice to see if it reports hitEnd correctly
 460     private static void hitEndTest() throws Exception {
 461         // Basic test of Slice node
 462         Pattern p = Pattern.compile("^squidattack");
 463         Matcher m = p.matcher("squack");
 464         m.find();
 465         if (m.hitEnd())
 466             failCount++;
 467         m.reset("squid");
 468         m.find();
 469         if (!m.hitEnd())
 470             failCount++;
 471 
 472         // Test Slice, SliceA and SliceU nodes
 473         for (int i=0; i<3; i++) {
 474             int flags = 0;
 475             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 476             if (i==2) flags = Pattern.UNICODE_CASE;
 477             p = Pattern.compile("^abc", flags);
 478             m = p.matcher("ad");
 479             m.find();
 480             if (m.hitEnd())
 481                 failCount++;
 482             m.reset("ab");
 483             m.find();
 484             if (!m.hitEnd())
 485                 failCount++;
 486         }
 487 
 488         // Test Boyer-Moore node
 489         p = Pattern.compile("catattack");
 490         m = p.matcher("attack");
 491         m.find();
 492         if (!m.hitEnd())
 493             failCount++;
 494 
 495         p = Pattern.compile("catattack");
 496         m = p.matcher("attackattackattackcatatta");
 497         m.find();
 498         if (!m.hitEnd())
 499             failCount++;
 500 
 501         // 8184706: Matching u+0d at EOL against \R should hit-end
 502         p = Pattern.compile("...\\R");
 503         m = p.matcher("cat" + (char)0x0a);
 504         m.find();
 505         if (m.hitEnd())
 506             failCount++;
 507 
 508         m = p.matcher("cat" + (char)0x0d);
 509         m.find();
 510         if (!m.hitEnd())
 511             failCount++;
 512 
 513         m = p.matcher("cat" + (char)0x0d + (char)0x0a);
 514         m.find();
 515         if (m.hitEnd())
 516             failCount++;
 517 
 518         report("hitEnd");
 519     }
 520 
 521     // This is for bug 4997476
 522     // It is weird code submitted by customer demonstrating a regression
 523     private static void wordSearchTest() throws Exception {
 524         String testString = new String("word1 word2 word3");
 525         Pattern p = Pattern.compile("\\b");
 526         Matcher m = p.matcher(testString);
 527         int position = 0;
 528         int start = 0;
 529         while (m.find(position)) {
 530             start = m.start();
 531             if (start == testString.length())
 532                 break;
 533             if (m.find(start+1)) {
 534                 position = m.start();
 535             } else {
 536                 position = testString.length();
 537             }
 538             if (testString.substring(start, position).equals(" "))
 539                 continue;
 540             if (!testString.substring(start, position-1).startsWith("word"))
 541                 failCount++;
 542         }
 543         report("Customer word search");
 544     }
 545 
 546     // This is for bug 4994840
 547     private static void caretAtEndTest() throws Exception {
 548         // Problem only occurs with multiline patterns
 549         // containing a beginning-of-line caret "^" followed
 550         // by an expression that also matches the empty string.
 551         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 552         Matcher matcher = pattern.matcher("\r");
 553         matcher.find();
 554         matcher.find();
 555         report("Caret at end");
 556     }
 557 
 558     // This test is for 4979006
 559     // Check to see if word boundary construct properly handles unicode
 560     // non spacing marks
 561     private static void unicodeWordBoundsTest() throws Exception {
 562         String spaces = "  ";
 563         String wordChar = "a";
 564         String nsm = "\u030a";
 565 
 566         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 567 
 568         Pattern pattern = Pattern.compile("\\b");
 569         Matcher matcher = pattern.matcher("");
 570         // S=other B=word character N=non spacing mark .=word boundary
 571         // SS.BB.SS
 572         String input = spaces + wordChar + wordChar + spaces;
 573         twoFindIndexes(input, matcher, 2, 4);
 574         // SS.BBN.SS
 575         input = spaces + wordChar +wordChar + nsm + spaces;
 576         twoFindIndexes(input, matcher, 2, 5);
 577         // SS.BN.SS
 578         input = spaces + wordChar + nsm + spaces;
 579         twoFindIndexes(input, matcher, 2, 4);
 580         // SS.BNN.SS
 581         input = spaces + wordChar + nsm + nsm + spaces;
 582         twoFindIndexes(input, matcher, 2, 5);
 583         // SSN.BB.SS
 584         input = spaces + nsm + wordChar + wordChar + spaces;
 585         twoFindIndexes(input, matcher, 3, 5);
 586         // SS.BNB.SS
 587         input = spaces + wordChar + nsm + wordChar + spaces;
 588         twoFindIndexes(input, matcher, 2, 5);
 589         // SSNNSS
 590         input = spaces + nsm + nsm + spaces;
 591         matcher.reset(input);
 592         if (matcher.find())
 593             failCount++;
 594         // SSN.BBN.SS
 595         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 596         twoFindIndexes(input, matcher, 3, 6);
 597 
 598         report("Unicode word boundary");
 599     }
 600 
 601     private static void twoFindIndexes(String input, Matcher matcher, int a,
 602                                        int b) throws Exception
 603     {
 604         matcher.reset(input);
 605         matcher.find();
 606         if (matcher.start() != a)
 607             failCount++;
 608         matcher.find();
 609         if (matcher.start() != b)
 610             failCount++;
 611     }
 612 
 613     // This test is for 6284152
 614     static void check(String regex, String input, String[] expected) {
 615         List<String> result = new ArrayList<String>();
 616         Pattern p = Pattern.compile(regex);
 617         Matcher m = p.matcher(input);
 618         while (m.find()) {
 619             result.add(m.group());
 620         }
 621         if (!Arrays.asList(expected).equals(result))
 622             failCount++;
 623     }
 624 
 625     private static void lookbehindTest() throws Exception {
 626         //Positive
 627         check("(?<=%.{0,5})foo\\d",
 628               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 629               new String[]{"foo1", "foo2", "foo3"});
 630 
 631         //boundary at end of the lookbehind sub-regex should work consistently
 632         //with the boundary just after the lookbehind sub-regex
 633         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 634         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 635         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 636         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 637 
 638         //Negative
 639         check("(?<!%.{0,5})foo\\d",
 640               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 641               new String[] {"foo4", "foo5"});
 642 
 643         //Positive greedy
 644         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 645 
 646         //Positive reluctant
 647         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 648 
 649         //supplementary
 650         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 651               new String[] {"fo\ud800\udc00o"});
 652         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 653               new String[] {"fo\ud800\udc00o"});
 654         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 655               new String[] {"fo\ud800\udc00o"});
 656         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 657               new String[] {"fo\ud800\udc00o"});
 658         report("Lookbehind");
 659     }
 660 
 661     // This test is for 4938995
 662     // Check to see if weak region boundaries are transparent to
 663     // lookahead and lookbehind constructs
 664     private static void boundsTest() throws Exception {
 665         String fullMessage = "catdogcat";
 666         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 667         Matcher matcher = pattern.matcher("catdogca");
 668         matcher.useTransparentBounds(true);
 669         if (matcher.find())
 670             failCount++;
 671         matcher.reset("atdogcat");
 672         if (matcher.find())
 673             failCount++;
 674         matcher.reset(fullMessage);
 675         if (!matcher.find())
 676             failCount++;
 677         matcher.reset(fullMessage);
 678         matcher.region(0,9);
 679         if (!matcher.find())
 680             failCount++;
 681         matcher.reset(fullMessage);
 682         matcher.region(0,6);
 683         if (!matcher.find())
 684             failCount++;
 685         matcher.reset(fullMessage);
 686         matcher.region(3,6);
 687         if (!matcher.find())
 688             failCount++;
 689         matcher.useTransparentBounds(false);
 690         if (matcher.find())
 691             failCount++;
 692 
 693         // Negative lookahead/lookbehind
 694         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 695         matcher = pattern.matcher("dogcat");
 696         matcher.useTransparentBounds(true);
 697         matcher.region(0,3);
 698         if (matcher.find())
 699             failCount++;
 700         matcher.reset("catdog");
 701         matcher.region(3,6);
 702         if (matcher.find())
 703             failCount++;
 704         matcher.useTransparentBounds(false);
 705         matcher.reset("dogcat");
 706         matcher.region(0,3);
 707         if (!matcher.find())
 708             failCount++;
 709         matcher.reset("catdog");
 710         matcher.region(3,6);
 711         if (!matcher.find())
 712             failCount++;
 713 
 714         report("Region bounds transparency");
 715     }
 716 
 717     // This test is for 4945394
 718     private static void findFromTest() throws Exception {
 719         String message = "This is 40 $0 message.";
 720         Pattern pat = Pattern.compile("\\$0");
 721         Matcher match = pat.matcher(message);
 722         if (!match.find())
 723             failCount++;
 724         if (match.find())
 725             failCount++;
 726         if (match.find())
 727             failCount++;
 728         report("Check for alternating find");
 729     }
 730 
 731     // This test is for 4872664 and 4892980
 732     private static void negatedCharClassTest() throws Exception {
 733         Pattern pattern = Pattern.compile("[^>]");
 734         Matcher matcher = pattern.matcher("\u203A");
 735         if (!matcher.matches())
 736             failCount++;
 737         pattern = Pattern.compile("[^fr]");
 738         matcher = pattern.matcher("a");
 739         if (!matcher.find())
 740             failCount++;
 741         matcher.reset("\u203A");
 742         if (!matcher.find())
 743             failCount++;
 744         String s = "for";
 745         String result[] = s.split("[^fr]");
 746         if (!result[0].equals("f"))
 747             failCount++;
 748         if (!result[1].equals("r"))
 749             failCount++;
 750         s = "f\u203Ar";
 751         result = s.split("[^fr]");
 752         if (!result[0].equals("f"))
 753             failCount++;
 754         if (!result[1].equals("r"))
 755             failCount++;
 756 
 757         // Test adding to bits, subtracting a node, then adding to bits again
 758         pattern = Pattern.compile("[^f\u203Ar]");
 759         matcher = pattern.matcher("a");
 760         if (!matcher.find())
 761             failCount++;
 762         matcher.reset("f");
 763         if (matcher.find())
 764             failCount++;
 765         matcher.reset("\u203A");
 766         if (matcher.find())
 767             failCount++;
 768         matcher.reset("r");
 769         if (matcher.find())
 770             failCount++;
 771         matcher.reset("\u203B");
 772         if (!matcher.find())
 773             failCount++;
 774 
 775         // Test subtracting a node, adding to bits, subtracting again
 776         pattern = Pattern.compile("[^\u203Ar\u203B]");
 777         matcher = pattern.matcher("a");
 778         if (!matcher.find())
 779             failCount++;
 780         matcher.reset("\u203A");
 781         if (matcher.find())
 782             failCount++;
 783         matcher.reset("r");
 784         if (matcher.find())
 785             failCount++;
 786         matcher.reset("\u203B");
 787         if (matcher.find())
 788             failCount++;
 789         matcher.reset("\u203C");
 790         if (!matcher.find())
 791             failCount++;
 792 
 793         report("Negated Character Class");
 794     }
 795 
 796     // This test is for 4628291
 797     private static void toStringTest() throws Exception {
 798         Pattern pattern = Pattern.compile("b+");
 799         if (pattern.toString() != "b+")
 800             failCount++;
 801         Matcher matcher = pattern.matcher("aaabbbccc");
 802         String matcherString = matcher.toString(); // unspecified
 803         matcher.find();
 804         matcherString = matcher.toString(); // unspecified
 805         matcher.region(0,3);
 806         matcherString = matcher.toString(); // unspecified
 807         matcher.reset();
 808         matcherString = matcher.toString(); // unspecified
 809         report("toString");
 810     }
 811 
 812     // This test is for 4808962
 813     private static void literalPatternTest() throws Exception {
 814         int flags = Pattern.LITERAL;
 815 
 816         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 817         check(pattern, "abc\\t$^", true);
 818 
 819         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 820         check(pattern, "abc\\t$^", true);
 821 
 822         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 823         check(pattern, "\\Qa^$bcabc\\E", true);
 824         check(pattern, "a^$bcabc", false);
 825 
 826         pattern = Pattern.compile("\\\\Q\\\\E");
 827         check(pattern, "\\Q\\E", true);
 828 
 829         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 830         check(pattern, "abcefg\\Q\\Ehij", true);
 831 
 832         pattern = Pattern.compile("\\\\\\Q\\\\E");
 833         check(pattern, "\\\\\\\\", true);
 834 
 835         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 836         check(pattern, "\\Qa^$bcabc\\E", true);
 837         check(pattern, "a^$bcabc", false);
 838 
 839         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 840         check(pattern, "\\Qabc\\Edef", true);
 841         check(pattern, "abcdef", false);
 842 
 843         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 844         check(pattern, "abc\\Edef", true);
 845         check(pattern, "abcdef", false);
 846 
 847         pattern = Pattern.compile(Pattern.quote("\\E"));
 848         check(pattern, "\\E", true);
 849 
 850         pattern = Pattern.compile("((((abc.+?:)", flags);
 851         check(pattern, "((((abc.+?:)", true);
 852 
 853         flags |= Pattern.MULTILINE;
 854 
 855         pattern = Pattern.compile("^cat$", flags);
 856         check(pattern, "abc^cat$def", true);
 857         check(pattern, "cat", false);
 858 
 859         flags |= Pattern.CASE_INSENSITIVE;
 860 
 861         pattern = Pattern.compile("abcdef", flags);
 862         check(pattern, "ABCDEF", true);
 863         check(pattern, "AbCdEf", true);
 864 
 865         flags |= Pattern.DOTALL;
 866 
 867         pattern = Pattern.compile("a...b", flags);
 868         check(pattern, "A...b", true);
 869         check(pattern, "Axxxb", false);
 870 
 871         flags |= Pattern.CANON_EQ;
 872 
 873         Pattern p = Pattern.compile("testa\u030a", flags);
 874         check(pattern, "testa\u030a", false);
 875         check(pattern, "test\u00e5", false);
 876 
 877         // Supplementary character test
 878         flags = Pattern.LITERAL;
 879 
 880         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 881         check(pattern, toSupplementaries("abc\\t$^"), true);
 882 
 883         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 884         check(pattern, toSupplementaries("abc\\t$^"), true);
 885 
 886         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 887         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 888         check(pattern, toSupplementaries("a^$bcabc"), false);
 889 
 890         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 891         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 892         check(pattern, toSupplementaries("a^$bcabc"), false);
 893 
 894         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 895         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 896         check(pattern, toSupplementaries("abcdef"), false);
 897 
 898         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 899         check(pattern, toSupplementaries("abc\\Edef"), true);
 900         check(pattern, toSupplementaries("abcdef"), false);
 901 
 902         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 903         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 904 
 905         flags |= Pattern.MULTILINE;
 906 
 907         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 908         check(pattern, toSupplementaries("abc^cat$def"), true);
 909         check(pattern, toSupplementaries("cat"), false);
 910 
 911         flags |= Pattern.DOTALL;
 912 
 913         // note: this is case-sensitive.
 914         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 915         check(pattern, toSupplementaries("a...b"), true);
 916         check(pattern, toSupplementaries("axxxb"), false);
 917 
 918         flags |= Pattern.CANON_EQ;
 919 
 920         String t = toSupplementaries("test");
 921         p = Pattern.compile(t + "a\u030a", flags);
 922         check(pattern, t + "a\u030a", false);
 923         check(pattern, t + "\u00e5", false);
 924 
 925         report("Literal pattern");
 926     }
 927 
 928     // This test is for 4803179
 929     // This test is also for 4808962, replacement parts
 930     private static void literalReplacementTest() throws Exception {
 931         int flags = Pattern.LITERAL;
 932 
 933         Pattern pattern = Pattern.compile("abc", flags);
 934         Matcher matcher = pattern.matcher("zzzabczzz");
 935         String replaceTest = "$0";
 936         String result = matcher.replaceAll(replaceTest);
 937         if (!result.equals("zzzabczzz"))
 938             failCount++;
 939 
 940         matcher.reset();
 941         String literalReplacement = matcher.quoteReplacement(replaceTest);
 942         result = matcher.replaceAll(literalReplacement);
 943         if (!result.equals("zzz$0zzz"))
 944             failCount++;
 945 
 946         matcher.reset();
 947         replaceTest = "\\t$\\$";
 948         literalReplacement = matcher.quoteReplacement(replaceTest);
 949         result = matcher.replaceAll(literalReplacement);
 950         if (!result.equals("zzz\\t$\\$zzz"))
 951             failCount++;
 952 
 953         // Supplementary character test
 954         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 955         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 956         replaceTest = "$0";
 957         result = matcher.replaceAll(replaceTest);
 958         if (!result.equals(toSupplementaries("zzzabczzz")))
 959             failCount++;
 960 
 961         matcher.reset();
 962         literalReplacement = matcher.quoteReplacement(replaceTest);
 963         result = matcher.replaceAll(literalReplacement);
 964         if (!result.equals(toSupplementaries("zzz$0zzz")))
 965             failCount++;
 966 
 967         matcher.reset();
 968         replaceTest = "\\t$\\$";
 969         literalReplacement = matcher.quoteReplacement(replaceTest);
 970         result = matcher.replaceAll(literalReplacement);
 971         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 972             failCount++;
 973 
 974         // IAE should be thrown if backslash or '$' is the last character
 975         // in replacement string
 976         try {
 977             "\uac00".replaceAll("\uac00", "$");
 978             failCount++;
 979         } catch (IllegalArgumentException iie) {
 980         } catch (Exception e) {
 981             failCount++;
 982         }
 983         try {
 984             "\uac00".replaceAll("\uac00", "\\");
 985             failCount++;
 986         } catch (IllegalArgumentException iie) {
 987         } catch (Exception e) {
 988             failCount++;
 989         }
 990         report("Literal replacement");
 991     }
 992 
 993     // This test is for 4757029
 994     private static void regionTest() throws Exception {
 995         Pattern pattern = Pattern.compile("abc");
 996         Matcher matcher = pattern.matcher("abcdefabc");
 997 
 998         matcher.region(0,9);
 999         if (!matcher.find())
1000             failCount++;
1001         if (!matcher.find())
1002             failCount++;
1003         matcher.region(0,3);
1004         if (!matcher.find())
1005            failCount++;
1006         matcher.region(3,6);
1007         if (matcher.find())
1008            failCount++;
1009         matcher.region(0,2);
1010         if (matcher.find())
1011            failCount++;
1012 
1013         expectRegionFail(matcher, 1, -1);
1014         expectRegionFail(matcher, -1, -1);
1015         expectRegionFail(matcher, -1, 1);
1016         expectRegionFail(matcher, 5, 3);
1017         expectRegionFail(matcher, 5, 12);
1018         expectRegionFail(matcher, 12, 12);
1019 
1020         pattern = Pattern.compile("^abc$");
1021         matcher = pattern.matcher("zzzabczzz");
1022         matcher.region(0,9);
1023         if (matcher.find())
1024             failCount++;
1025         matcher.region(3,6);
1026         if (!matcher.find())
1027            failCount++;
1028         matcher.region(3,6);
1029         matcher.useAnchoringBounds(false);
1030         if (matcher.find())
1031            failCount++;
1032 
1033         // Supplementary character test
1034         pattern = Pattern.compile(toSupplementaries("abc"));
1035         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
1036         matcher.region(0,9*2);
1037         if (!matcher.find())
1038             failCount++;
1039         if (!matcher.find())
1040             failCount++;
1041         matcher.region(0,3*2);
1042         if (!matcher.find())
1043            failCount++;
1044         matcher.region(1,3*2);
1045         if (matcher.find())
1046            failCount++;
1047         matcher.region(3*2,6*2);
1048         if (matcher.find())
1049            failCount++;
1050         matcher.region(0,2*2);
1051         if (matcher.find())
1052            failCount++;
1053         matcher.region(0,2*2+1);
1054         if (matcher.find())
1055            failCount++;
1056 
1057         expectRegionFail(matcher, 1*2, -1);
1058         expectRegionFail(matcher, -1, -1);
1059         expectRegionFail(matcher, -1, 1*2);
1060         expectRegionFail(matcher, 5*2, 3*2);
1061         expectRegionFail(matcher, 5*2, 12*2);
1062         expectRegionFail(matcher, 12*2, 12*2);
1063 
1064         pattern = Pattern.compile(toSupplementaries("^abc$"));
1065         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1066         matcher.region(0,9*2);
1067         if (matcher.find())
1068             failCount++;
1069         matcher.region(3*2,6*2);
1070         if (!matcher.find())
1071            failCount++;
1072         matcher.region(3*2+1,6*2);
1073         if (matcher.find())
1074            failCount++;
1075         matcher.region(3*2,6*2-1);
1076         if (matcher.find())
1077            failCount++;
1078         matcher.region(3*2,6*2);
1079         matcher.useAnchoringBounds(false);
1080         if (matcher.find())
1081            failCount++;
1082 
1083         // JDK-8230829
1084         pattern = Pattern.compile("\\ud800\\udc61");
1085         matcher = pattern.matcher("\ud800\udc61");
1086         matcher.region(0, 1);
1087         if (matcher.find()) {
1088             failCount++;
1089             System.out.println("Matched a surrogate pair" +
1090                     " that crosses border of region");
1091         }
1092         if (!matcher.hitEnd()) {
1093             failCount++;
1094             System.out.println("Expected to hit the end when" +
1095                     " matching a surrogate pair crossing region");
1096         }
1097 
1098         report("Regions");
1099     }
1100 
1101     private static void expectRegionFail(Matcher matcher, int index1,
1102                                          int index2)
1103     {
1104         try {
1105             matcher.region(index1, index2);
1106             failCount++;
1107         } catch (IndexOutOfBoundsException ioobe) {
1108             // Correct result
1109         } catch (IllegalStateException ise) {
1110             // Correct result
1111         }
1112     }
1113 
1114     // This test is for 4803197
1115     private static void escapedSegmentTest() throws Exception {
1116 
1117         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1118         check(pattern, "dir1\\dir2", true);
1119 
1120         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1121         check(pattern, "dir1\\dir2\\", true);
1122 
1123         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1124         check(pattern, "dir1\\dir2\\", true);
1125 
1126         // Supplementary character test
1127         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1128         check(pattern, toSupplementaries("dir1\\dir2"), true);
1129 
1130         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1131         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1132 
1133         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1134         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1135 
1136         report("Escaped segment");
1137     }
1138 
1139     // This test is for 4792284
1140     private static void nonCaptureRepetitionTest() throws Exception {
1141         String input = "abcdefgh;";
1142 
1143         String[] patterns = new String[] {
1144             "(?:\\w{4})+;",
1145             "(?:\\w{8})*;",
1146             "(?:\\w{2}){2,4};",
1147             "(?:\\w{4}){2,};",   // only matches the
1148             ".*?(?:\\w{5})+;",   //     specified minimum
1149             ".*?(?:\\w{9})*;",   //     number of reps - OK
1150             "(?:\\w{4})+?;",     // lazy repetition - OK
1151             "(?:\\w{4})++;",     // possessive repetition - OK
1152             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1153             "(\\w{4})+;",        // capturing group - OK
1154         };
1155 
1156         for (int i = 0; i < patterns.length; i++) {
1157             // Check find()
1158             check(patterns[i], 0, input, input, true);
1159             // Check matches()
1160             Pattern p = Pattern.compile(patterns[i]);
1161             Matcher m = p.matcher(input);
1162 
1163             if (m.matches()) {
1164                 if (!m.group(0).equals(input))
1165                     failCount++;
1166             } else {
1167                 failCount++;
1168             }
1169         }
1170 
1171         report("Non capturing repetition");
1172     }
1173 
1174     // This test is for 6358731
1175     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1176         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1177         Matcher matcher = pattern.matcher("abcd");
1178         if (!matcher.matches() ||
1179              matcher.group(1) != null ||
1180              !matcher.group(2).equals("abcd")) {
1181             failCount++;
1182         }
1183         report("Not captured GroupCurly");
1184     }
1185 
1186     // This test is for 4706545
1187     private static void javaCharClassTest() throws Exception {
1188         for (int i=0; i<1000; i++) {
1189             char c = (char)generator.nextInt();
1190             check("{javaLowerCase}", c, Character.isLowerCase(c));
1191             check("{javaUpperCase}", c, Character.isUpperCase(c));
1192             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1193             check("{javaTitleCase}", c, Character.isTitleCase(c));
1194             check("{javaDigit}", c, Character.isDigit(c));
1195             check("{javaDefined}", c, Character.isDefined(c));
1196             check("{javaLetter}", c, Character.isLetter(c));
1197             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1198             check("{javaJavaIdentifierStart}", c,
1199                   Character.isJavaIdentifierStart(c));
1200             check("{javaJavaIdentifierPart}", c,
1201                   Character.isJavaIdentifierPart(c));
1202             check("{javaUnicodeIdentifierStart}", c,
1203                   Character.isUnicodeIdentifierStart(c));
1204             check("{javaUnicodeIdentifierPart}", c,
1205                   Character.isUnicodeIdentifierPart(c));
1206             check("{javaIdentifierIgnorable}", c,
1207                   Character.isIdentifierIgnorable(c));
1208             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1209             check("{javaWhitespace}", c, Character.isWhitespace(c));
1210             check("{javaISOControl}", c, Character.isISOControl(c));
1211             check("{javaMirrored}", c, Character.isMirrored(c));
1212 
1213         }
1214 
1215         // Supplementary character test
1216         for (int i=0; i<1000; i++) {
1217             int c = generator.nextInt(Character.MAX_CODE_POINT
1218                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1219                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1220             check("{javaLowerCase}", c, Character.isLowerCase(c));
1221             check("{javaUpperCase}", c, Character.isUpperCase(c));
1222             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1223             check("{javaTitleCase}", c, Character.isTitleCase(c));
1224             check("{javaDigit}", c, Character.isDigit(c));
1225             check("{javaDefined}", c, Character.isDefined(c));
1226             check("{javaLetter}", c, Character.isLetter(c));
1227             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1228             check("{javaJavaIdentifierStart}", c,
1229                   Character.isJavaIdentifierStart(c));
1230             check("{javaJavaIdentifierPart}", c,
1231                   Character.isJavaIdentifierPart(c));
1232             check("{javaUnicodeIdentifierStart}", c,
1233                   Character.isUnicodeIdentifierStart(c));
1234             check("{javaUnicodeIdentifierPart}", c,
1235                   Character.isUnicodeIdentifierPart(c));
1236             check("{javaIdentifierIgnorable}", c,
1237                   Character.isIdentifierIgnorable(c));
1238             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1239             check("{javaWhitespace}", c, Character.isWhitespace(c));
1240             check("{javaISOControl}", c, Character.isISOControl(c));
1241             check("{javaMirrored}", c, Character.isMirrored(c));
1242         }
1243 
1244         report("Java character classes");
1245     }
1246 
1247     // This test is for 4523620
1248     /*
1249     private static void numOccurrencesTest() throws Exception {
1250         Pattern pattern = Pattern.compile("aaa");
1251 
1252         if (pattern.numOccurrences("aaaaaa", false) != 2)
1253             failCount++;
1254         if (pattern.numOccurrences("aaaaaa", true) != 4)
1255             failCount++;
1256 
1257         pattern = Pattern.compile("^");
1258         if (pattern.numOccurrences("aaaaaa", false) != 1)
1259             failCount++;
1260         if (pattern.numOccurrences("aaaaaa", true) != 1)
1261             failCount++;
1262 
1263         report("Number of Occurrences");
1264     }
1265     */
1266 
1267     // This test is for 4776374
1268     private static void caretBetweenTerminatorsTest() throws Exception {
1269         int flags1 = Pattern.DOTALL;
1270         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1271         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1272         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1273 
1274         check("^....", flags1, "test\ntest", "test", true);
1275         check(".....^", flags1, "test\ntest", "test", false);
1276         check(".....^", flags1, "test\n", "test", false);
1277         check("....^", flags1, "test\r\n", "test", false);
1278 
1279         check("^....", flags2, "test\ntest", "test", true);
1280         check("....^", flags2, "test\ntest", "test", false);
1281         check(".....^", flags2, "test\n", "test", false);
1282         check("....^", flags2, "test\r\n", "test", false);
1283 
1284         check("^....", flags3, "test\ntest", "test", true);
1285         check(".....^", flags3, "test\ntest", "test\n", true);
1286         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1287         check(".....^", flags3, "test\n", "test", false);
1288         check(".....^", flags3, "test\r\n", "test", false);
1289         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1290 
1291         check("^....", flags4, "test\ntest", "test", true);
1292         check(".....^", flags3, "test\ntest", "test\n", true);
1293         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1294         check(".....^", flags4, "test\n", "test\n", false);
1295         check(".....^", flags4, "test\r\n", "test\r", false);
1296 
1297         // Supplementary character test
1298         String t = toSupplementaries("test");
1299         check("^....", flags1, t+"\n"+t, t, true);
1300         check(".....^", flags1, t+"\n"+t, t, false);
1301         check(".....^", flags1, t+"\n", t, false);
1302         check("....^", flags1, t+"\r\n", t, false);
1303 
1304         check("^....", flags2, t+"\n"+t, t, true);
1305         check("....^", flags2, t+"\n"+t, t, false);
1306         check(".....^", flags2, t+"\n", t, false);
1307         check("....^", flags2, t+"\r\n", t, false);
1308 
1309         check("^....", flags3, t+"\n"+t, t, true);
1310         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1311         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1312         check(".....^", flags3, t+"\n", t, false);
1313         check(".....^", flags3, t+"\r\n", t, false);
1314         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1315 
1316         check("^....", flags4, t+"\n"+t, t, true);
1317         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1318         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1319         check(".....^", flags4, t+"\n", t+"\n", false);
1320         check(".....^", flags4, t+"\r\n", t+"\r", false);
1321 
1322         report("Caret between terminators");
1323     }
1324 
1325     // This test is for 4727935
1326     private static void dollarAtEndTest() throws Exception {
1327         int flags1 = Pattern.DOTALL;
1328         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1329         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1330 
1331         check("....$", flags1, "test\n", "test", true);
1332         check("....$", flags1, "test\r\n", "test", true);
1333         check(".....$", flags1, "test\n", "test\n", true);
1334         check(".....$", flags1, "test\u0085", "test\u0085", true);
1335         check("....$", flags1, "test\u0085", "test", true);
1336 
1337         check("....$", flags2, "test\n", "test", true);
1338         check(".....$", flags2, "test\n", "test\n", true);
1339         check(".....$", flags2, "test\u0085", "test\u0085", true);
1340         check("....$", flags2, "test\u0085", "est\u0085", true);
1341 
1342         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1343         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1344         check("....$blah", flags3, "test\nblah", "!!!!", false);
1345         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1346 
1347         // Supplementary character test
1348         String t = toSupplementaries("test");
1349         String b = toSupplementaries("blah");
1350         check("....$", flags1, t+"\n", t, true);
1351         check("....$", flags1, t+"\r\n", t, true);
1352         check(".....$", flags1, t+"\n", t+"\n", true);
1353         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1354         check("....$", flags1, t+"\u0085", t, true);
1355 
1356         check("....$", flags2, t+"\n", t, true);
1357         check(".....$", flags2, t+"\n", t+"\n", true);
1358         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1359         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1360 
1361         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1362         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1363         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1364         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1365 
1366         report("Dollar at End");
1367     }
1368 
1369     // This test is for 4711773
1370     private static void multilineDollarTest() throws Exception {
1371         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1372         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1373         matcher.find();
1374         if (matcher.start(0) != 9)
1375             failCount++;
1376         matcher.find();
1377         if (matcher.start(0) != 20)
1378             failCount++;
1379 
1380         // Supplementary character test
1381         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1382         matcher.find();
1383         if (matcher.start(0) != 9*2)
1384             failCount++;
1385         matcher.find();
1386         if (matcher.start(0) != 20*2)
1387             failCount++;
1388 
1389         report("Multiline Dollar");
1390     }
1391 
1392     private static void reluctantRepetitionTest() throws Exception {
1393         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1394         check(p, "1 word word word 2", true);
1395         check(p, "1 wor wo w 2", true);
1396         check(p, "1 word word 2", true);
1397         check(p, "1 word 2", true);
1398         check(p, "1 wo w w 2", true);
1399         check(p, "1 wo w 2", true);
1400         check(p, "1 wor w 2", true);
1401 
1402         p = Pattern.compile("([a-z])+?c");
1403         Matcher m = p.matcher("ababcdefdec");
1404         check(m, "ababc");
1405 
1406         // Supplementary character test
1407         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1408         m = p.matcher(toSupplementaries("ababcdefdec"));
1409         check(m, toSupplementaries("ababc"));
1410 
1411         report("Reluctant Repetition");
1412     }
1413 
1414     private static Pattern serializedPattern(Pattern p) throws Exception {
1415         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1416         ObjectOutputStream oos = new ObjectOutputStream(baos);
1417         oos.writeObject(p);
1418         oos.close();
1419         try (ObjectInputStream ois = new ObjectInputStream(
1420                 new ByteArrayInputStream(baos.toByteArray()))) {
1421             return (Pattern)ois.readObject();
1422         }
1423     }
1424 
1425     private static void serializeTest() throws Exception {
1426         String patternStr = "(b)";
1427         String matchStr = "b";
1428         Pattern pattern = Pattern.compile(patternStr);
1429         Pattern serializedPattern = serializedPattern(pattern);
1430         Matcher matcher = serializedPattern.matcher(matchStr);
1431         if (!matcher.matches())
1432             failCount++;
1433         if (matcher.groupCount() != 1)
1434             failCount++;
1435 
1436         pattern = Pattern.compile("a(?-i)b", Pattern.CASE_INSENSITIVE);
1437         serializedPattern = serializedPattern(pattern);
1438         if (!serializedPattern.matcher("Ab").matches())
1439             failCount++;
1440         if (serializedPattern.matcher("AB").matches())
1441             failCount++;
1442 
1443         report("Serialization");
1444     }
1445 
1446     private static void gTest() {
1447         Pattern pattern = Pattern.compile("\\G\\w");
1448         Matcher matcher = pattern.matcher("abc#x#x");
1449         matcher.find();
1450         matcher.find();
1451         matcher.find();
1452         if (matcher.find())
1453             failCount++;
1454 
1455         pattern = Pattern.compile("\\GA*");
1456         matcher = pattern.matcher("1A2AA3");
1457         matcher.find();
1458         if (matcher.find())
1459             failCount++;
1460 
1461         pattern = Pattern.compile("\\GA*");
1462         matcher = pattern.matcher("1A2AA3");
1463         if (!matcher.find(1))
1464             failCount++;
1465         matcher.find();
1466         if (matcher.find())
1467             failCount++;
1468 
1469         report("\\G");
1470     }
1471 
1472     private static void zTest() {
1473         Pattern pattern = Pattern.compile("foo\\Z");
1474         // Positives
1475         check(pattern, "foo\u0085", true);
1476         check(pattern, "foo\u2028", true);
1477         check(pattern, "foo\u2029", true);
1478         check(pattern, "foo\n", true);
1479         check(pattern, "foo\r", true);
1480         check(pattern, "foo\r\n", true);
1481         // Negatives
1482         check(pattern, "fooo", false);
1483         check(pattern, "foo\n\r", false);
1484 
1485         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1486         // Positives
1487         check(pattern, "foo", true);
1488         check(pattern, "foo\n", true);
1489         // Negatives
1490         check(pattern, "foo\r", false);
1491         check(pattern, "foo\u0085", false);
1492         check(pattern, "foo\u2028", false);
1493         check(pattern, "foo\u2029", false);
1494 
1495         report("\\Z");
1496     }
1497 
1498     private static void replaceFirstTest() {
1499         Pattern pattern = Pattern.compile("(ab)(c*)");
1500         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1501         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1502             failCount++;
1503 
1504         matcher.reset("zzzabccczzzabcczzzabccczzz");
1505         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1506             failCount++;
1507 
1508         matcher.reset("zzzabccczzzabcczzzabccczzz");
1509         String result = matcher.replaceFirst("$1");
1510         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1511             failCount++;
1512 
1513         matcher.reset("zzzabccczzzabcczzzabccczzz");
1514         result = matcher.replaceFirst("$2");
1515         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1516             failCount++;
1517 
1518         pattern = Pattern.compile("a*");
1519         matcher = pattern.matcher("aaaaaaaaaa");
1520         if (!matcher.replaceFirst("test").equals("test"))
1521             failCount++;
1522 
1523         pattern = Pattern.compile("a+");
1524         matcher = pattern.matcher("zzzaaaaaaaaaa");
1525         if (!matcher.replaceFirst("test").equals("zzztest"))
1526             failCount++;
1527 
1528         // Supplementary character test
1529         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1530         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1531         if (!matcher.replaceFirst(toSupplementaries("test"))
1532                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1533             failCount++;
1534 
1535         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1536         if (!matcher.replaceFirst(toSupplementaries("test")).
1537             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1538             failCount++;
1539 
1540         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1541         result = matcher.replaceFirst("$1");
1542         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1543             failCount++;
1544 
1545         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1546         result = matcher.replaceFirst("$2");
1547         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1548             failCount++;
1549 
1550         pattern = Pattern.compile(toSupplementaries("a*"));
1551         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1552         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1553             failCount++;
1554 
1555         pattern = Pattern.compile(toSupplementaries("a+"));
1556         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1557         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1558             failCount++;
1559 
1560         report("Replace First");
1561     }
1562 
1563     private static void unixLinesTest() {
1564         Pattern pattern = Pattern.compile(".*");
1565         Matcher matcher = pattern.matcher("aa\u2028blah");
1566         matcher.find();
1567         if (!matcher.group(0).equals("aa"))
1568             failCount++;
1569 
1570         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1571         matcher = pattern.matcher("aa\u2028blah");
1572         matcher.find();
1573         if (!matcher.group(0).equals("aa\u2028blah"))
1574             failCount++;
1575 
1576         pattern = Pattern.compile("[az]$",
1577                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1578         matcher = pattern.matcher("aa\u2028zz");
1579         check(matcher, "a\u2028", false);
1580 
1581         // Supplementary character test
1582         pattern = Pattern.compile(".*");
1583         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1584         matcher.find();
1585         if (!matcher.group(0).equals(toSupplementaries("aa")))
1586             failCount++;
1587 
1588         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1589         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1590         matcher.find();
1591         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1592             failCount++;
1593 
1594         pattern = Pattern.compile(toSupplementaries("[az]$"),
1595                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1596         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1597         check(matcher, toSupplementaries("a\u2028"), false);
1598 
1599         report("Unix Lines");
1600     }
1601 
1602     private static void commentsTest() {
1603         int flags = Pattern.COMMENTS;
1604 
1605         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1606         Matcher matcher = pattern.matcher("aa#aa");
1607         if (!matcher.matches())
1608             failCount++;
1609 
1610         pattern = Pattern.compile("aa  # blah", flags);
1611         matcher = pattern.matcher("aa");
1612         if (!matcher.matches())
1613             failCount++;
1614 
1615         pattern = Pattern.compile("aa blah", flags);
1616         matcher = pattern.matcher("aablah");
1617         if (!matcher.matches())
1618              failCount++;
1619 
1620         pattern = Pattern.compile("aa  # blah blech  ", flags);
1621         matcher = pattern.matcher("aa");
1622         if (!matcher.matches())
1623             failCount++;
1624 
1625         pattern = Pattern.compile("aa  # blah\n  ", flags);
1626         matcher = pattern.matcher("aa");
1627         if (!matcher.matches())
1628             failCount++;
1629 
1630         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1631         matcher = pattern.matcher("aabc");
1632         if (!matcher.matches())
1633              failCount++;
1634 
1635         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1636         matcher = pattern.matcher("aabc");
1637         if (!matcher.matches())
1638              failCount++;
1639 
1640         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1641         matcher = pattern.matcher("aabc#blech");
1642         if (!matcher.matches())
1643              failCount++;
1644 
1645         // Supplementary character test
1646         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1647         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1648         if (!matcher.matches())
1649             failCount++;
1650 
1651         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1652         matcher = pattern.matcher(toSupplementaries("aa"));
1653         if (!matcher.matches())
1654             failCount++;
1655 
1656         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1657         matcher = pattern.matcher(toSupplementaries("aablah"));
1658         if (!matcher.matches())
1659              failCount++;
1660 
1661         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1662         matcher = pattern.matcher(toSupplementaries("aa"));
1663         if (!matcher.matches())
1664             failCount++;
1665 
1666         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1667         matcher = pattern.matcher(toSupplementaries("aa"));
1668         if (!matcher.matches())
1669             failCount++;
1670 
1671         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1672         matcher = pattern.matcher(toSupplementaries("aabc"));
1673         if (!matcher.matches())
1674              failCount++;
1675 
1676         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1677         matcher = pattern.matcher(toSupplementaries("aabc"));
1678         if (!matcher.matches())
1679              failCount++;
1680 
1681         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1682         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1683         if (!matcher.matches())
1684              failCount++;
1685 
1686         report("Comments");
1687     }
1688 
1689     private static void caseFoldingTest() { // bug 4504687
1690         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1691         Pattern pattern = Pattern.compile("aa", flags);
1692         Matcher matcher = pattern.matcher("ab");
1693         if (matcher.matches())
1694             failCount++;
1695 
1696         pattern = Pattern.compile("aA", flags);
1697         matcher = pattern.matcher("ab");
1698         if (matcher.matches())
1699             failCount++;
1700 
1701         pattern = Pattern.compile("aa", flags);
1702         matcher = pattern.matcher("aB");
1703         if (matcher.matches())
1704             failCount++;
1705         matcher = pattern.matcher("Ab");
1706         if (matcher.matches())
1707             failCount++;
1708 
1709         // ASCII               "a"
1710         // Latin-1 Supplement  "a" + grave
1711         // Cyrillic            "a"
1712         String[] patterns = new String[] {
1713             //single
1714             "a", "\u00e0", "\u0430",
1715             //slice
1716             "ab", "\u00e0\u00e1", "\u0430\u0431",
1717             //class single
1718             "[a]", "[\u00e0]", "[\u0430]",
1719             //class range
1720             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1721             //back reference
1722             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1723         };
1724 
1725         String[] texts = new String[] {
1726             "A", "\u00c0", "\u0410",
1727             "AB", "\u00c0\u00c1", "\u0410\u0411",
1728             "A", "\u00c0", "\u0410",
1729             "B", "\u00c2", "\u0411",
1730             "aA", "\u00e0\u00c0", "\u0430\u0410"
1731         };
1732 
1733         boolean[] expected = new boolean[] {
1734             true, false, false,
1735             true, false, false,
1736             true, false, false,
1737             true, false, false,
1738             true, false, false
1739         };
1740 
1741         flags = Pattern.CASE_INSENSITIVE;
1742         for (int i = 0; i < patterns.length; i++) {
1743             pattern = Pattern.compile(patterns[i], flags);
1744             matcher = pattern.matcher(texts[i]);
1745             if (matcher.matches() != expected[i]) {
1746                 System.out.println("<1> Failed at " + i);
1747                 failCount++;
1748             }
1749         }
1750 
1751         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1752         for (int i = 0; i < patterns.length; i++) {
1753             pattern = Pattern.compile(patterns[i], flags);
1754             matcher = pattern.matcher(texts[i]);
1755             if (!matcher.matches()) {
1756                 System.out.println("<2> Failed at " + i);
1757                 failCount++;
1758             }
1759         }
1760         // flag unicode_case alone should do nothing
1761         flags = Pattern.UNICODE_CASE;
1762         for (int i = 0; i < patterns.length; i++) {
1763             pattern = Pattern.compile(patterns[i], flags);
1764             matcher = pattern.matcher(texts[i]);
1765             if (matcher.matches()) {
1766                 System.out.println("<3> Failed at " + i);
1767                 failCount++;
1768             }
1769         }
1770 
1771         // Special cases: i, I, u+0131 and u+0130
1772         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1773         pattern = Pattern.compile("[h-j]+", flags);
1774         if (!pattern.matcher("\u0131\u0130").matches())
1775             failCount++;
1776         report("Case Folding");
1777     }
1778 
1779     private static void appendTest() {
1780         Pattern pattern = Pattern.compile("(ab)(cd)");
1781         Matcher matcher = pattern.matcher("abcd");
1782         String result = matcher.replaceAll("$2$1");
1783         if (!result.equals("cdab"))
1784             failCount++;
1785 
1786         String  s1 = "Swap all: first = 123, second = 456";
1787         String  s2 = "Swap one: first = 123, second = 456";
1788         String  r  = "$3$2$1";
1789         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1790         matcher = pattern.matcher(s1);
1791 
1792         result = matcher.replaceAll(r);
1793         if (!result.equals("Swap all: 123 = first, 456 = second"))
1794             failCount++;
1795 
1796         matcher = pattern.matcher(s2);
1797 
1798         if (matcher.find()) {
1799             StringBuffer sb = new StringBuffer();
1800             matcher.appendReplacement(sb, r);
1801             matcher.appendTail(sb);
1802             result = sb.toString();
1803             if (!result.equals("Swap one: 123 = first, second = 456"))
1804                 failCount++;
1805         }
1806 
1807         // Supplementary character test
1808         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1809         matcher = pattern.matcher(toSupplementaries("abcd"));
1810         result = matcher.replaceAll("$2$1");
1811         if (!result.equals(toSupplementaries("cdab")))
1812             failCount++;
1813 
1814         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1815         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1816         r  = toSupplementaries("$3$2$1");
1817         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1818         matcher = pattern.matcher(s1);
1819 
1820         result = matcher.replaceAll(r);
1821         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1822             failCount++;
1823 
1824         matcher = pattern.matcher(s2);
1825 
1826         if (matcher.find()) {
1827             StringBuffer sb = new StringBuffer();
1828             matcher.appendReplacement(sb, r);
1829             matcher.appendTail(sb);
1830             result = sb.toString();
1831             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1832                 failCount++;
1833         }
1834         report("Append");
1835     }
1836 
1837     private static void splitTest() {
1838         Pattern pattern = Pattern.compile(":");
1839         String[] result = pattern.split("foo:and:boo", 2);
1840         if (!result[0].equals("foo"))
1841             failCount++;
1842         if (!result[1].equals("and:boo"))
1843             failCount++;
1844         // Supplementary character test
1845         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1846         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1847         if (!result[0].equals(toSupplementaries("foo")))
1848             failCount++;
1849         if (!result[1].equals(toSupplementaries("andXboo")))
1850             failCount++;
1851 
1852         CharBuffer cb = CharBuffer.allocate(100);
1853         cb.put("foo:and:boo");
1854         cb.flip();
1855         result = pattern.split(cb);
1856         if (!result[0].equals("foo"))
1857             failCount++;
1858         if (!result[1].equals("and"))
1859             failCount++;
1860         if (!result[2].equals("boo"))
1861             failCount++;
1862 
1863         // Supplementary character test
1864         CharBuffer cbs = CharBuffer.allocate(100);
1865         cbs.put(toSupplementaries("fooXandXboo"));
1866         cbs.flip();
1867         result = patternX.split(cbs);
1868         if (!result[0].equals(toSupplementaries("foo")))
1869             failCount++;
1870         if (!result[1].equals(toSupplementaries("and")))
1871             failCount++;
1872         if (!result[2].equals(toSupplementaries("boo")))
1873             failCount++;
1874 
1875         String source = "0123456789";
1876         for (int limit=-2; limit<3; limit++) {
1877             for (int x=0; x<10; x++) {
1878                 result = source.split(Integer.toString(x), limit);
1879                 int expectedLength = limit < 1 ? 2 : limit;
1880 
1881                 if ((limit == 0) && (x == 9)) {
1882                     // expected dropping of ""
1883                     if (result.length != 1)
1884                         failCount++;
1885                     if (!result[0].equals("012345678")) {
1886                         failCount++;
1887                     }
1888                 } else {
1889                     if (result.length != expectedLength) {
1890                         failCount++;
1891                     }
1892                     if (!result[0].equals(source.substring(0,x))) {
1893                         if (limit != 1) {
1894                             failCount++;
1895                         } else {
1896                             if (!result[0].equals(source.substring(0,10))) {
1897                                 failCount++;
1898                             }
1899                         }
1900                     }
1901                     if (expectedLength > 1) { // Check segment 2
1902                         if (!result[1].equals(source.substring(x+1,10)))
1903                             failCount++;
1904                     }
1905                 }
1906             }
1907         }
1908         // Check the case for no match found
1909         for (int limit=-2; limit<3; limit++) {
1910             result = source.split("e", limit);
1911             if (result.length != 1)
1912                 failCount++;
1913             if (!result[0].equals(source))
1914                 failCount++;
1915         }
1916         // Check the case for limit == 0, source = "";
1917         // split() now returns 0-length for empty source "" see #6559590
1918         source = "";
1919         result = source.split("e", 0);
1920         if (result.length != 1)
1921             failCount++;
1922         if (!result[0].equals(source))
1923             failCount++;
1924 
1925         // Check both split() and splitAsStraem(), especially for zero-lenth
1926         // input and zero-lenth match cases
1927         String[][] input = new String[][] {
1928             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1929             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1930             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1931             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1932             { "(?=\\p{Lu})", "AbcEfg" },
1933             { "(?=\\p{Lu})", "Abc" },
1934             { " ",           "" },              // zero-length input
1935             { ".*",          "" },
1936 
1937             // some tests from PatternStreamTest.java
1938             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1939             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1940             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1941             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1942             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1943             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1944             { "\u56da",  "" },
1945             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1946             { "o",       "boo:and:foo" },
1947             { "o",       "booooo:and:fooooo" },
1948             { "o",       "fooooo:" },
1949         };
1950 
1951         String[][] expected = new String[][] {
1952             { "Abc", "Efg", "Hij" },
1953             { "", "Abc", "Efg", "Hij" },
1954             { "Abc", "", "Efg", "Hij" },
1955             { "Abc", "Efg", "Hij" },
1956             { "Abc", "Efg" },
1957             { "Abc" },
1958             { "" },
1959             { "" },
1960 
1961             { "awgqwefg1fefw", "vssv1vvv1" },
1962             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1963             { "awgqwefg", "fefw4vssv", "vvv" },
1964             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1965             { "1", "23", "456", "7890" },
1966             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1967             { "" },
1968             { "This", "is", "testing", "", "with", "different", "separators" },
1969             { "b", "", ":and:f" },
1970             { "b", "", "", "", "", ":and:f" },
1971             { "f", "", "", "", "", ":" },
1972         };
1973         for (int i = 0; i < input.length; i++) {
1974             pattern = Pattern.compile(input[i][0]);
1975             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1976                 failCount++;
1977             }
1978             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1979                                              // array for zero-length input for now
1980                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1981                                expected[i])) {
1982                 failCount++;
1983             }
1984         }
1985         report("Split");
1986     }
1987 
1988     private static void negationTest() {
1989         Pattern pattern = Pattern.compile("[\\[@^]+");
1990         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1991         if (!matcher.find())
1992             failCount++;
1993         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1994             failCount++;
1995         pattern = Pattern.compile("[@\\[^]+");
1996         matcher = pattern.matcher("@@@@[[[[^^^^");
1997         if (!matcher.find())
1998             failCount++;
1999         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
2000             failCount++;
2001         pattern = Pattern.compile("[@\\[^@]+");
2002         matcher = pattern.matcher("@@@@[[[[^^^^");
2003         if (!matcher.find())
2004             failCount++;
2005         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
2006             failCount++;
2007 
2008         pattern = Pattern.compile("\\)");
2009         matcher = pattern.matcher("xxx)xxx");
2010         if (!matcher.find())
2011             failCount++;
2012 
2013         report("Negation");
2014     }
2015 
2016     private static void ampersandTest() {
2017         Pattern pattern = Pattern.compile("[&@]+");
2018         check(pattern, "@@@@&&&&", true);
2019 
2020         pattern = Pattern.compile("[@&]+");
2021         check(pattern, "@@@@&&&&", true);
2022 
2023         pattern = Pattern.compile("[@\\&]+");
2024         check(pattern, "@@@@&&&&", true);
2025 
2026         report("Ampersand");
2027     }
2028 
2029     private static void octalTest() throws Exception {
2030         Pattern pattern = Pattern.compile("\\u0007");
2031         Matcher matcher = pattern.matcher("\u0007");
2032         if (!matcher.matches())
2033             failCount++;
2034         pattern = Pattern.compile("\\07");
2035         matcher = pattern.matcher("\u0007");
2036         if (!matcher.matches())
2037             failCount++;
2038         pattern = Pattern.compile("\\007");
2039         matcher = pattern.matcher("\u0007");
2040         if (!matcher.matches())
2041             failCount++;
2042         pattern = Pattern.compile("\\0007");
2043         matcher = pattern.matcher("\u0007");
2044         if (!matcher.matches())
2045             failCount++;
2046         pattern = Pattern.compile("\\040");
2047         matcher = pattern.matcher("\u0020");
2048         if (!matcher.matches())
2049             failCount++;
2050         pattern = Pattern.compile("\\0403");
2051         matcher = pattern.matcher("\u00203");
2052         if (!matcher.matches())
2053             failCount++;
2054         pattern = Pattern.compile("\\0103");
2055         matcher = pattern.matcher("\u0043");
2056         if (!matcher.matches())
2057             failCount++;
2058 
2059         report("Octal");
2060     }
2061 
2062     private static void longPatternTest() throws Exception {
2063         try {
2064             Pattern pattern = Pattern.compile(
2065                 "a 32-character-long pattern xxxx");
2066             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
2067             pattern = Pattern.compile("a thirty four character long regex");
2068             StringBuffer patternToBe = new StringBuffer(101);
2069             for (int i=0; i<100; i++)
2070                 patternToBe.append((char)(97 + i%26));
2071             pattern = Pattern.compile(patternToBe.toString());
2072         } catch (PatternSyntaxException e) {
2073             failCount++;
2074         }
2075 
2076         // Supplementary character test
2077         try {
2078             Pattern pattern = Pattern.compile(
2079                 toSupplementaries("a 32-character-long pattern xxxx"));
2080             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2081             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2082             StringBuffer patternToBe = new StringBuffer(101*2);
2083             for (int i=0; i<100; i++)
2084                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2085                                                      + 97 + i%26));
2086             pattern = Pattern.compile(patternToBe.toString());
2087         } catch (PatternSyntaxException e) {
2088             failCount++;
2089         }
2090         report("LongPattern");
2091     }
2092 
2093     private static void group0Test() throws Exception {
2094         Pattern pattern = Pattern.compile("(tes)ting");
2095         Matcher matcher = pattern.matcher("testing");
2096         check(matcher, "testing");
2097 
2098         matcher.reset("testing");
2099         if (matcher.lookingAt()) {
2100             if (!matcher.group(0).equals("testing"))
2101                 failCount++;
2102         } else {
2103             failCount++;
2104         }
2105 
2106         matcher.reset("testing");
2107         if (matcher.matches()) {
2108             if (!matcher.group(0).equals("testing"))
2109                 failCount++;
2110         } else {
2111             failCount++;
2112         }
2113 
2114         pattern = Pattern.compile("(tes)ting");
2115         matcher = pattern.matcher("testing");
2116         if (matcher.lookingAt()) {
2117             if (!matcher.group(0).equals("testing"))
2118                 failCount++;
2119         } else {
2120             failCount++;
2121         }
2122 
2123         pattern = Pattern.compile("^(tes)ting");
2124         matcher = pattern.matcher("testing");
2125         if (matcher.matches()) {
2126             if (!matcher.group(0).equals("testing"))
2127                 failCount++;
2128         } else {
2129             failCount++;
2130         }
2131 
2132         // Supplementary character test
2133         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2134         matcher = pattern.matcher(toSupplementaries("testing"));
2135         check(matcher, toSupplementaries("testing"));
2136 
2137         matcher.reset(toSupplementaries("testing"));
2138         if (matcher.lookingAt()) {
2139             if (!matcher.group(0).equals(toSupplementaries("testing")))
2140                 failCount++;
2141         } else {
2142             failCount++;
2143         }
2144 
2145         matcher.reset(toSupplementaries("testing"));
2146         if (matcher.matches()) {
2147             if (!matcher.group(0).equals(toSupplementaries("testing")))
2148                 failCount++;
2149         } else {
2150             failCount++;
2151         }
2152 
2153         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2154         matcher = pattern.matcher(toSupplementaries("testing"));
2155         if (matcher.lookingAt()) {
2156             if (!matcher.group(0).equals(toSupplementaries("testing")))
2157                 failCount++;
2158         } else {
2159             failCount++;
2160         }
2161 
2162         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2163         matcher = pattern.matcher(toSupplementaries("testing"));
2164         if (matcher.matches()) {
2165             if (!matcher.group(0).equals(toSupplementaries("testing")))
2166                 failCount++;
2167         } else {
2168             failCount++;
2169         }
2170 
2171         report("Group0");
2172     }
2173 
2174     private static void findIntTest() throws Exception {
2175         Pattern p = Pattern.compile("blah");
2176         Matcher m = p.matcher("zzzzblahzzzzzblah");
2177         boolean result = m.find(2);
2178         if (!result)
2179             failCount++;
2180 
2181         p = Pattern.compile("$");
2182         m = p.matcher("1234567890");
2183         result = m.find(10);
2184         if (!result)
2185             failCount++;
2186         try {
2187             result = m.find(11);
2188             failCount++;
2189         } catch (IndexOutOfBoundsException e) {
2190             // correct result
2191         }
2192 
2193         // Supplementary character test
2194         p = Pattern.compile(toSupplementaries("blah"));
2195         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2196         result = m.find(2);
2197         if (!result)
2198             failCount++;
2199 
2200         report("FindInt");
2201     }
2202 
2203     private static void emptyPatternTest() throws Exception {
2204         Pattern p = Pattern.compile("");
2205         Matcher m = p.matcher("foo");
2206 
2207         // Should find empty pattern at beginning of input
2208         boolean result = m.find();
2209         if (result != true)
2210             failCount++;
2211         if (m.start() != 0)
2212             failCount++;
2213 
2214         // Should not match entire input if input is not empty
2215         m.reset();
2216         result = m.matches();
2217         if (result == true)
2218             failCount++;
2219 
2220         try {
2221             m.start(0);
2222             failCount++;
2223         } catch (IllegalStateException e) {
2224             // Correct result
2225         }
2226 
2227         // Should match entire input if input is empty
2228         m.reset("");
2229         result = m.matches();
2230         if (result != true)
2231             failCount++;
2232 
2233         result = Pattern.matches("", "");
2234         if (result != true)
2235             failCount++;
2236 
2237         result = Pattern.matches("", "foo");
2238         if (result == true)
2239             failCount++;
2240         report("EmptyPattern");
2241     }
2242 
2243     private static void charClassTest() throws Exception {
2244         Pattern pattern = Pattern.compile("blah[ab]]blech");
2245         check(pattern, "blahb]blech", true);
2246 
2247         pattern = Pattern.compile("[abc[def]]");
2248         check(pattern, "b", true);
2249 
2250         // Supplementary character tests
2251         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2252         check(pattern, toSupplementaries("blahb]blech"), true);
2253 
2254         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2255         check(pattern, toSupplementaries("b"), true);
2256 
2257         try {
2258             // u00ff when UNICODE_CASE
2259             pattern = Pattern.compile("[ab\u00ffcd]",
2260                                       Pattern.CASE_INSENSITIVE|
2261                                       Pattern.UNICODE_CASE);
2262             check(pattern, "ab\u00ffcd", true);
2263             check(pattern, "Ab\u0178Cd", true);
2264 
2265             // u00b5 when UNICODE_CASE
2266             pattern = Pattern.compile("[ab\u00b5cd]",
2267                                       Pattern.CASE_INSENSITIVE|
2268                                       Pattern.UNICODE_CASE);
2269             check(pattern, "ab\u00b5cd", true);
2270             check(pattern, "Ab\u039cCd", true);
2271         } catch (Exception e) { failCount++; }
2272 
2273         /* Special cases
2274            (1)LatinSmallLetterLongS u+017f
2275            (2)LatinSmallLetterDotlessI u+0131
2276            (3)LatineCapitalLetterIWithDotAbove u+0130
2277            (4)KelvinSign u+212a
2278            (5)AngstromSign u+212b
2279         */
2280         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2281         pattern = Pattern.compile("[sik\u00c5]+", flags);
2282         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2283             failCount++;
2284 
2285         report("CharClass");
2286     }
2287 
2288     private static void caretTest() throws Exception {
2289         Pattern pattern = Pattern.compile("\\w*");
2290         Matcher matcher = pattern.matcher("a#bc#def##g");
2291         check(matcher, "a");
2292         check(matcher, "");
2293         check(matcher, "bc");
2294         check(matcher, "");
2295         check(matcher, "def");
2296         check(matcher, "");
2297         check(matcher, "");
2298         check(matcher, "g");
2299         check(matcher, "");
2300         if (matcher.find())
2301             failCount++;
2302 
2303         pattern = Pattern.compile("^\\w*");
2304         matcher = pattern.matcher("a#bc#def##g");
2305         check(matcher, "a");
2306         if (matcher.find())
2307             failCount++;
2308 
2309         pattern = Pattern.compile("\\w");
2310         matcher = pattern.matcher("abc##x");
2311         check(matcher, "a");
2312         check(matcher, "b");
2313         check(matcher, "c");
2314         check(matcher, "x");
2315         if (matcher.find())
2316             failCount++;
2317 
2318         pattern = Pattern.compile("^\\w");
2319         matcher = pattern.matcher("abc##x");
2320         check(matcher, "a");
2321         if (matcher.find())
2322             failCount++;
2323 
2324         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2325         matcher = pattern.matcher("abcdef-ghi\njklmno");
2326         check(matcher, "abc");
2327         if (matcher.find())
2328             failCount++;
2329 
2330         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2331         matcher = pattern.matcher("abcdef-ghi\njklmno");
2332         check(matcher, "abc");
2333         check(matcher, "jkl");
2334         if (matcher.find())
2335             failCount++;
2336 
2337         pattern = Pattern.compile("^", Pattern.MULTILINE);
2338         matcher = pattern.matcher("this is some text");
2339         String result = matcher.replaceAll("X");
2340         if (!result.equals("Xthis is some text"))
2341             failCount++;
2342 
2343         pattern = Pattern.compile("^");
2344         matcher = pattern.matcher("this is some text");
2345         result = matcher.replaceAll("X");
2346         if (!result.equals("Xthis is some text"))
2347             failCount++;
2348 
2349         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2350         matcher = pattern.matcher("this is some text\n");
2351         result = matcher.replaceAll("X");
2352         if (!result.equals("Xthis is some text\n"))
2353             failCount++;
2354 
2355         report("Caret");
2356     }
2357 
2358     private static void groupCaptureTest() throws Exception {
2359         // Independent group
2360         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2361         Matcher matcher = pattern.matcher("xxxyyyzzz");
2362         matcher.find();
2363         try {
2364             String blah = matcher.group(1);
2365             failCount++;
2366         } catch (IndexOutOfBoundsException ioobe) {
2367             // Good result
2368         }
2369         // Pure group
2370         pattern = Pattern.compile("x+(?:y+)z+");
2371         matcher = pattern.matcher("xxxyyyzzz");
2372         matcher.find();
2373         try {
2374             String blah = matcher.group(1);
2375             failCount++;
2376         } catch (IndexOutOfBoundsException ioobe) {
2377             // Good result
2378         }
2379 
2380         // Supplementary character tests
2381         // Independent group
2382         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2383         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2384         matcher.find();
2385         try {
2386             String blah = matcher.group(1);
2387             failCount++;
2388         } catch (IndexOutOfBoundsException ioobe) {
2389             // Good result
2390         }
2391         // Pure group
2392         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2393         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2394         matcher.find();
2395         try {
2396             String blah = matcher.group(1);
2397             failCount++;
2398         } catch (IndexOutOfBoundsException ioobe) {
2399             // Good result
2400         }
2401 
2402         report("GroupCapture");
2403     }
2404 
2405     private static void backRefTest() throws Exception {
2406         Pattern pattern = Pattern.compile("(a*)bc\\1");
2407         check(pattern, "zzzaabcazzz", true);
2408 
2409         pattern = Pattern.compile("(a*)bc\\1");
2410         check(pattern, "zzzaabcaazzz", true);
2411 
2412         pattern = Pattern.compile("(abc)(def)\\1");
2413         check(pattern, "abcdefabc", true);
2414 
2415         pattern = Pattern.compile("(abc)(def)\\3");
2416         check(pattern, "abcdefabc", false);
2417 
2418         try {
2419             for (int i = 1; i < 10; i++) {
2420                 // Make sure backref 1-9 are always accepted
2421                 pattern = Pattern.compile("abcdef\\" + i);
2422                 // and fail to match if the target group does not exit
2423                 check(pattern, "abcdef", false);
2424             }
2425         } catch(PatternSyntaxException e) {
2426             failCount++;
2427         }
2428 
2429         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2430         check(pattern, "abcdefghija", false);
2431         check(pattern, "abcdefghija1", true);
2432 
2433         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2434         check(pattern, "abcdefghijkk", true);
2435 
2436         pattern = Pattern.compile("(a)bcdefghij\\11");
2437         check(pattern, "abcdefghija1", true);
2438 
2439         // Supplementary character tests
2440         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2441         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2442 
2443         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2444         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2445 
2446         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2447         check(pattern, toSupplementaries("abcdefabc"), true);
2448 
2449         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2450         check(pattern, toSupplementaries("abcdefabc"), false);
2451 
2452         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2453         check(pattern, toSupplementaries("abcdefghija"), false);
2454         check(pattern, toSupplementaries("abcdefghija1"), true);
2455 
2456         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2457         check(pattern, toSupplementaries("abcdefghijkk"), true);
2458 
2459         report("BackRef");
2460     }
2461 
2462     /**
2463      * Unicode Technical Report #18, section 2.6 End of Line
2464      * There is no empty line to be matched in the sequence \u000D\u000A
2465      * but there is an empty line in the sequence \u000A\u000D.
2466      */
2467     private static void anchorTest() throws Exception {
2468         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2469         Matcher m = p.matcher("blah1\r\nblah2");
2470         m.find();
2471         m.find();
2472         if (!m.group().equals("blah2"))
2473             failCount++;
2474 
2475         m.reset("blah1\n\rblah2");
2476         m.find();
2477         m.find();
2478         m.find();
2479         if (!m.group().equals("blah2"))
2480             failCount++;
2481 
2482         // Test behavior of $ with \r\n at end of input
2483         p = Pattern.compile(".+$");
2484         m = p.matcher("blah1\r\n");
2485         if (!m.find())
2486             failCount++;
2487        if (!m.group().equals("blah1"))
2488             failCount++;
2489         if (m.find())
2490             failCount++;
2491 
2492         // Test behavior of $ with \r\n at end of input in multiline
2493         p = Pattern.compile(".+$", Pattern.MULTILINE);
2494         m = p.matcher("blah1\r\n");
2495         if (!m.find())
2496             failCount++;
2497         if (m.find())
2498             failCount++;
2499 
2500         // Test for $ recognition of \u0085 for bug 4527731
2501         p = Pattern.compile(".+$", Pattern.MULTILINE);
2502         m = p.matcher("blah1\u0085");
2503         if (!m.find())
2504             failCount++;
2505 
2506         // Supplementary character test
2507         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2508         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2509         m.find();
2510         m.find();
2511         if (!m.group().equals(toSupplementaries("blah2")))
2512             failCount++;
2513 
2514         m.reset(toSupplementaries("blah1\n\rblah2"));
2515         m.find();
2516         m.find();
2517         m.find();
2518         if (!m.group().equals(toSupplementaries("blah2")))
2519             failCount++;
2520 
2521         // Test behavior of $ with \r\n at end of input
2522         p = Pattern.compile(".+$");
2523         m = p.matcher(toSupplementaries("blah1\r\n"));
2524         if (!m.find())
2525             failCount++;
2526         if (!m.group().equals(toSupplementaries("blah1")))
2527             failCount++;
2528         if (m.find())
2529             failCount++;
2530 
2531         // Test behavior of $ with \r\n at end of input in multiline
2532         p = Pattern.compile(".+$", Pattern.MULTILINE);
2533         m = p.matcher(toSupplementaries("blah1\r\n"));
2534         if (!m.find())
2535             failCount++;
2536         if (m.find())
2537             failCount++;
2538 
2539         // Test for $ recognition of \u0085 for bug 4527731
2540         p = Pattern.compile(".+$", Pattern.MULTILINE);
2541         m = p.matcher(toSupplementaries("blah1\u0085"));
2542         if (!m.find())
2543             failCount++;
2544 
2545         report("Anchors");
2546     }
2547 
2548     /**
2549      * A basic sanity test of Matcher.lookingAt().
2550      */
2551     private static void lookingAtTest() throws Exception {
2552         Pattern p = Pattern.compile("(ab)(c*)");
2553         Matcher m = p.matcher("abccczzzabcczzzabccc");
2554 
2555         if (!m.lookingAt())
2556             failCount++;
2557 
2558         if (!m.group().equals(m.group(0)))
2559             failCount++;
2560 
2561         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2562         if (m.lookingAt())
2563             failCount++;
2564 
2565         // Supplementary character test
2566         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2567         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2568 
2569         if (!m.lookingAt())
2570             failCount++;
2571 
2572         if (!m.group().equals(m.group(0)))
2573             failCount++;
2574 
2575         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2576         if (m.lookingAt())
2577             failCount++;
2578 
2579         report("Looking At");
2580     }
2581 
2582     /**
2583      * A basic sanity test of Matcher.matches().
2584      */
2585     private static void matchesTest() throws Exception {
2586         // matches()
2587         Pattern p = Pattern.compile("ulb(c*)");
2588         Matcher m = p.matcher("ulbcccccc");
2589         if (!m.matches())
2590             failCount++;
2591 
2592         // find() but not matches()
2593         m.reset("zzzulbcccccc");
2594         if (m.matches())
2595             failCount++;
2596 
2597         // lookingAt() but not matches()
2598         m.reset("ulbccccccdef");
2599         if (m.matches())
2600             failCount++;
2601 
2602         // matches()
2603         p = Pattern.compile("a|ad");
2604         m = p.matcher("ad");
2605         if (!m.matches())
2606             failCount++;
2607 
2608         // Supplementary character test
2609         // matches()
2610         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2611         m = p.matcher(toSupplementaries("ulbcccccc"));
2612         if (!m.matches())
2613             failCount++;
2614 
2615         // find() but not matches()
2616         m.reset(toSupplementaries("zzzulbcccccc"));
2617         if (m.matches())
2618             failCount++;
2619 
2620         // lookingAt() but not matches()
2621         m.reset(toSupplementaries("ulbccccccdef"));
2622         if (m.matches())
2623             failCount++;
2624 
2625         // matches()
2626         p = Pattern.compile(toSupplementaries("a|ad"));
2627         m = p.matcher(toSupplementaries("ad"));
2628         if (!m.matches())
2629             failCount++;
2630 
2631         report("Matches");
2632     }
2633 
2634     /**
2635      * A basic sanity test of Pattern.matches().
2636      */
2637     private static void patternMatchesTest() throws Exception {
2638         // matches()
2639         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2640                              toSupplementaries("ulbcccccc")))
2641             failCount++;
2642 
2643         // find() but not matches()
2644         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2645                             toSupplementaries("zzzulbcccccc")))
2646             failCount++;
2647 
2648         // lookingAt() but not matches()
2649         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2650                             toSupplementaries("ulbccccccdef")))
2651             failCount++;
2652 
2653         // Supplementary character test
2654         // matches()
2655         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2656                              toSupplementaries("ulbcccccc")))
2657             failCount++;
2658 
2659         // find() but not matches()
2660         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2661                             toSupplementaries("zzzulbcccccc")))
2662             failCount++;
2663 
2664         // lookingAt() but not matches()
2665         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2666                             toSupplementaries("ulbccccccdef")))
2667             failCount++;
2668 
2669         report("Pattern Matches");
2670     }
2671 
2672     /**
2673      * Canonical equivalence testing. Tests the ability of the engine
2674      * to match sequences that are not explicitly specified in the
2675      * pattern when they are considered equivalent by the Unicode Standard.
2676      */
2677     private static void ceTest() throws Exception {
2678         // Decomposed char outside char classes
2679         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2680         Matcher m = p.matcher("test\u00e5");
2681         if (!m.matches())
2682             failCount++;
2683 
2684         m.reset("testa\u030a");
2685         if (!m.matches())
2686             failCount++;
2687 
2688         // Composed char outside char classes
2689         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2690         m = p.matcher("test\u00e5");
2691         if (!m.matches())
2692             failCount++;
2693 
2694         m.reset("testa\u030a");
2695         if (!m.find())
2696             failCount++;
2697 
2698         // Decomposed char inside a char class
2699         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2700         m = p.matcher("test\u00e5");
2701         if (!m.find())
2702             failCount++;
2703 
2704         m.reset("testa\u030a");
2705         if (!m.find())
2706             failCount++;
2707 
2708         // Composed char inside a char class
2709         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2710         m = p.matcher("test\u00e5");
2711         if (!m.find())
2712             failCount++;
2713 
2714         m.reset("testa\u0300");
2715         if (!m.find())
2716             failCount++;
2717 
2718         m.reset("testa\u030a");
2719         if (!m.find())
2720             failCount++;
2721 
2722         // Marks that cannot legally change order and be equivalent
2723         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2724         check(p, "testa\u0308\u0300", true);
2725         check(p, "testa\u0300\u0308", false);
2726 
2727         // Marks that can legally change order and be equivalent
2728         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2729         check(p, "testa\u0308\u0323", true);
2730         check(p, "testa\u0323\u0308", true);
2731 
2732         // Test all equivalences of the sequence a\u0308\u0323\u0300
2733         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2734         check(p, "testa\u0308\u0323\u0300", true);
2735         check(p, "testa\u0323\u0308\u0300", true);
2736         check(p, "testa\u0308\u0300\u0323", true);
2737         check(p, "test\u00e4\u0323\u0300", true);
2738         check(p, "test\u00e4\u0300\u0323", true);
2739 
2740         Object[][] data = new Object[][] {
2741 
2742         // JDK-4867170
2743         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2744         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2745         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2746         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2747         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2748         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2749         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2750 
2751         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2752         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2753         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2754         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2755         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2756 
2757         // backtracking, force to match "\u1f80", instead of \u1f82"
2758         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2759 
2760         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2761         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2762 
2763         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2764         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2765         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2766 
2767         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2768         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2769         { "\u1f80",          "ab\u1f80cd",             "f", true },
2770         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2771         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2772         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2773         { "\u1f82",          "\u1f80\u0300",           "m", true },
2774 
2775         // JDK-7080302       # compile failed
2776         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2777 
2778         // JDK-6728861, same cause as above one
2779         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2780 
2781         // JDK-6995635
2782         { "(\u00e9)", "e\u0301", "m", true },
2783 
2784         // JDK-6736245
2785         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2786         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2787         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD
2788 
2789         //  4916384.
2790         // Decomposed hangul (jamos) works inside clazz
2791         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2792         { "[\u1100\u1161]", "\uac00", "m", true},
2793 
2794         { "[\uac00]", "\u1100\u1161", "m", true},
2795         { "[\uac00]", "\uac00", "m", true},
2796 
2797         // Decomposed hangul (jamos)
2798         { "\u1100\u1161", "\u1100\u1161", "m", true},
2799         { "\u1100\u1161", "\uac00", "m", true},
2800 
2801         // Composed hangul
2802         { "\uac00",  "\u1100\u1161", "m", true },
2803         { "\uac00",  "\uac00", "m", true },
2804 
2805         /* Need a NFDSlice to nfd the source to solve this issue
2806            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2807            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2808            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2809 
2810         // Decomposed supplementary outside char classes
2811         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2812         // Composed supplementary outside char classes
2813         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },
2814         */
2815         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2816         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2817 
2818         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2819         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2820         };
2821 
2822         int failCount = 0;
2823         for (Object[] d : data) {
2824             String pn = (String)d[0];
2825             String tt = (String)d[1];
2826             boolean isFind = "f".equals(((String)d[2]));
2827             boolean expected = (boolean)d[3];
2828             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2829                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2830             if (ret != expected) {
2831                 failCount++;
2832                 continue;
2833             }
2834         }
2835         report("Canonical Equivalence");
2836     }
2837 
2838     /**
2839      * A basic sanity test of Matcher.replaceAll().
2840      */
2841     private static void globalSubstitute() throws Exception {
2842         // Global substitution with a literal
2843         Pattern p = Pattern.compile("(ab)(c*)");
2844         Matcher m = p.matcher("abccczzzabcczzzabccc");
2845         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2846             failCount++;
2847 
2848         m.reset("zzzabccczzzabcczzzabccczzz");
2849         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2850             failCount++;
2851 
2852         // Global substitution with groups
2853         m.reset("zzzabccczzzabcczzzabccczzz");
2854         String result = m.replaceAll("$1");
2855         if (!result.equals("zzzabzzzabzzzabzzz"))
2856             failCount++;
2857 
2858         // Supplementary character test
2859         // Global substitution with a literal
2860         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2861         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2862         if (!m.replaceAll(toSupplementaries("test")).
2863             equals(toSupplementaries("testzzztestzzztest")))
2864             failCount++;
2865 
2866         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2867         if (!m.replaceAll(toSupplementaries("test")).
2868             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2869             failCount++;
2870 
2871         // Global substitution with groups
2872         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2873         result = m.replaceAll("$1");
2874         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2875             failCount++;
2876 
2877         report("Global Substitution");
2878     }
2879 
2880     /**
2881      * Tests the usage of Matcher.appendReplacement() with literal
2882      * and group substitutions.
2883      */
2884     private static void stringbufferSubstitute() throws Exception {
2885         // SB substitution with literal
2886         String blah = "zzzblahzzz";
2887         Pattern p = Pattern.compile("blah");
2888         Matcher m = p.matcher(blah);
2889         StringBuffer result = new StringBuffer();
2890         try {
2891             m.appendReplacement(result, "blech");
2892             failCount++;
2893         } catch (IllegalStateException e) {
2894         }
2895         m.find();
2896         m.appendReplacement(result, "blech");
2897         if (!result.toString().equals("zzzblech"))
2898             failCount++;
2899 
2900         m.appendTail(result);
2901         if (!result.toString().equals("zzzblechzzz"))
2902             failCount++;
2903 
2904         // SB substitution with groups
2905         blah = "zzzabcdzzz";
2906         p = Pattern.compile("(ab)(cd)*");
2907         m = p.matcher(blah);
2908         result = new StringBuffer();
2909         try {
2910             m.appendReplacement(result, "$1");
2911             failCount++;
2912         } catch (IllegalStateException e) {
2913         }
2914         m.find();
2915         m.appendReplacement(result, "$1");
2916         if (!result.toString().equals("zzzab"))
2917             failCount++;
2918 
2919         m.appendTail(result);
2920         if (!result.toString().equals("zzzabzzz"))
2921             failCount++;
2922 
2923         // SB substitution with 3 groups
2924         blah = "zzzabcdcdefzzz";
2925         p = Pattern.compile("(ab)(cd)*(ef)");
2926         m = p.matcher(blah);
2927         result = new StringBuffer();
2928         try {
2929             m.appendReplacement(result, "$1w$2w$3");
2930             failCount++;
2931         } catch (IllegalStateException e) {
2932         }
2933         m.find();
2934         m.appendReplacement(result, "$1w$2w$3");
2935         if (!result.toString().equals("zzzabwcdwef"))
2936             failCount++;
2937 
2938         m.appendTail(result);
2939         if (!result.toString().equals("zzzabwcdwefzzz"))
2940             failCount++;
2941 
2942         // SB substitution with groups and three matches
2943         // skipping middle match
2944         blah = "zzzabcdzzzabcddzzzabcdzzz";
2945         p = Pattern.compile("(ab)(cd*)");
2946         m = p.matcher(blah);
2947         result = new StringBuffer();
2948         try {
2949             m.appendReplacement(result, "$1");
2950             failCount++;
2951         } catch (IllegalStateException e) {
2952         }
2953         m.find();
2954         m.appendReplacement(result, "$1");
2955         if (!result.toString().equals("zzzab"))
2956             failCount++;
2957 
2958         m.find();
2959         m.find();
2960         m.appendReplacement(result, "$2");
2961         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2962             failCount++;
2963 
2964         m.appendTail(result);
2965         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2966             failCount++;
2967 
2968         // Check to make sure escaped $ is ignored
2969         blah = "zzzabcdcdefzzz";
2970         p = Pattern.compile("(ab)(cd)*(ef)");
2971         m = p.matcher(blah);
2972         result = new StringBuffer();
2973         m.find();
2974         m.appendReplacement(result, "$1w\\$2w$3");
2975         if (!result.toString().equals("zzzabw$2wef"))
2976             failCount++;
2977 
2978         m.appendTail(result);
2979         if (!result.toString().equals("zzzabw$2wefzzz"))
2980             failCount++;
2981 
2982         // Check to make sure a reference to nonexistent group causes error
2983         blah = "zzzabcdcdefzzz";
2984         p = Pattern.compile("(ab)(cd)*(ef)");
2985         m = p.matcher(blah);
2986         result = new StringBuffer();
2987         m.find();
2988         try {
2989             m.appendReplacement(result, "$1w$5w$3");
2990             failCount++;
2991         } catch (IndexOutOfBoundsException ioobe) {
2992             // Correct result
2993         }
2994 
2995         // Check double digit group references
2996         blah = "zzz123456789101112zzz";
2997         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2998         m = p.matcher(blah);
2999         result = new StringBuffer();
3000         m.find();
3001         m.appendReplacement(result, "$1w$11w$3");
3002         if (!result.toString().equals("zzz1w11w3"))
3003             failCount++;
3004 
3005         // Check to make sure it backs off $15 to $1 if only three groups
3006         blah = "zzzabcdcdefzzz";
3007         p = Pattern.compile("(ab)(cd)*(ef)");
3008         m = p.matcher(blah);
3009         result = new StringBuffer();
3010         m.find();
3011         m.appendReplacement(result, "$1w$15w$3");
3012         if (!result.toString().equals("zzzabwab5wef"))
3013             failCount++;
3014 
3015 
3016         // Supplementary character test
3017         // SB substitution with literal
3018         blah = toSupplementaries("zzzblahzzz");
3019         p = Pattern.compile(toSupplementaries("blah"));
3020         m = p.matcher(blah);
3021         result = new StringBuffer();
3022         try {
3023             m.appendReplacement(result, toSupplementaries("blech"));
3024             failCount++;
3025         } catch (IllegalStateException e) {
3026         }
3027         m.find();
3028         m.appendReplacement(result, toSupplementaries("blech"));
3029         if (!result.toString().equals(toSupplementaries("zzzblech")))
3030             failCount++;
3031 
3032         m.appendTail(result);
3033         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3034             failCount++;
3035 
3036         // SB substitution with groups
3037         blah = toSupplementaries("zzzabcdzzz");
3038         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3039         m = p.matcher(blah);
3040         result = new StringBuffer();
3041         try {
3042             m.appendReplacement(result, "$1");
3043             failCount++;
3044         } catch (IllegalStateException e) {
3045         }
3046         m.find();
3047         m.appendReplacement(result, "$1");
3048         if (!result.toString().equals(toSupplementaries("zzzab")))
3049             failCount++;
3050 
3051         m.appendTail(result);
3052         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3053             failCount++;
3054 
3055         // SB substitution with 3 groups
3056         blah = toSupplementaries("zzzabcdcdefzzz");
3057         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3058         m = p.matcher(blah);
3059         result = new StringBuffer();
3060         try {
3061             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3062             failCount++;
3063         } catch (IllegalStateException e) {
3064         }
3065         m.find();
3066         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3067         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3068             failCount++;
3069 
3070         m.appendTail(result);
3071         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3072             failCount++;
3073 
3074         // SB substitution with groups and three matches
3075         // skipping middle match
3076         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3077         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3078         m = p.matcher(blah);
3079         result = new StringBuffer();
3080         try {
3081             m.appendReplacement(result, "$1");
3082             failCount++;
3083         } catch (IllegalStateException e) {
3084         }
3085         m.find();
3086         m.appendReplacement(result, "$1");
3087         if (!result.toString().equals(toSupplementaries("zzzab")))
3088             failCount++;
3089 
3090         m.find();
3091         m.find();
3092         m.appendReplacement(result, "$2");
3093         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3094             failCount++;
3095 
3096         m.appendTail(result);
3097         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3098             failCount++;
3099 
3100         // Check to make sure escaped $ is ignored
3101         blah = toSupplementaries("zzzabcdcdefzzz");
3102         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3103         m = p.matcher(blah);
3104         result = new StringBuffer();
3105         m.find();
3106         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3107         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3108             failCount++;
3109 
3110         m.appendTail(result);
3111         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3112             failCount++;
3113 
3114         // Check to make sure a reference to nonexistent group causes error
3115         blah = toSupplementaries("zzzabcdcdefzzz");
3116         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3117         m = p.matcher(blah);
3118         result = new StringBuffer();
3119         m.find();
3120         try {
3121             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3122             failCount++;
3123         } catch (IndexOutOfBoundsException ioobe) {
3124             // Correct result
3125         }
3126 
3127         // Check double digit group references
3128         blah = toSupplementaries("zzz123456789101112zzz");
3129         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3130         m = p.matcher(blah);
3131         result = new StringBuffer();
3132         m.find();
3133         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3134         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3135             failCount++;
3136 
3137         // Check to make sure it backs off $15 to $1 if only three groups
3138         blah = toSupplementaries("zzzabcdcdefzzz");
3139         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3140         m = p.matcher(blah);
3141         result = new StringBuffer();
3142         m.find();
3143         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3144         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3145             failCount++;
3146 
3147         // Check nothing has been appended into the output buffer if
3148         // the replacement string triggers IllegalArgumentException.
3149         p = Pattern.compile("(abc)");
3150         m = p.matcher("abcd");
3151         result = new StringBuffer();
3152         m.find();
3153         try {
3154             m.appendReplacement(result, ("xyz$g"));
3155             failCount++;
3156         } catch (IllegalArgumentException iae) {
3157             if (result.length() != 0)
3158                 failCount++;
3159         }
3160 
3161         report("SB Substitution");
3162     }
3163 
3164     /**
3165      * Tests the usage of Matcher.appendReplacement() with literal
3166      * and group substitutions.
3167      */
3168     private static void stringbuilderSubstitute() throws Exception {
3169         // SB substitution with literal
3170         String blah = "zzzblahzzz";
3171         Pattern p = Pattern.compile("blah");
3172         Matcher m = p.matcher(blah);
3173         StringBuilder result = new StringBuilder();
3174         try {
3175             m.appendReplacement(result, "blech");
3176             failCount++;
3177         } catch (IllegalStateException e) {
3178         }
3179         m.find();
3180         m.appendReplacement(result, "blech");
3181         if (!result.toString().equals("zzzblech"))
3182             failCount++;
3183 
3184         m.appendTail(result);
3185         if (!result.toString().equals("zzzblechzzz"))
3186             failCount++;
3187 
3188         // SB substitution with groups
3189         blah = "zzzabcdzzz";
3190         p = Pattern.compile("(ab)(cd)*");
3191         m = p.matcher(blah);
3192         result = new StringBuilder();
3193         try {
3194             m.appendReplacement(result, "$1");
3195             failCount++;
3196         } catch (IllegalStateException e) {
3197         }
3198         m.find();
3199         m.appendReplacement(result, "$1");
3200         if (!result.toString().equals("zzzab"))
3201             failCount++;
3202 
3203         m.appendTail(result);
3204         if (!result.toString().equals("zzzabzzz"))
3205             failCount++;
3206 
3207         // SB substitution with 3 groups
3208         blah = "zzzabcdcdefzzz";
3209         p = Pattern.compile("(ab)(cd)*(ef)");
3210         m = p.matcher(blah);
3211         result = new StringBuilder();
3212         try {
3213             m.appendReplacement(result, "$1w$2w$3");
3214             failCount++;
3215         } catch (IllegalStateException e) {
3216         }
3217         m.find();
3218         m.appendReplacement(result, "$1w$2w$3");
3219         if (!result.toString().equals("zzzabwcdwef"))
3220             failCount++;
3221 
3222         m.appendTail(result);
3223         if (!result.toString().equals("zzzabwcdwefzzz"))
3224             failCount++;
3225 
3226         // SB substitution with groups and three matches
3227         // skipping middle match
3228         blah = "zzzabcdzzzabcddzzzabcdzzz";
3229         p = Pattern.compile("(ab)(cd*)");
3230         m = p.matcher(blah);
3231         result = new StringBuilder();
3232         try {
3233             m.appendReplacement(result, "$1");
3234             failCount++;
3235         } catch (IllegalStateException e) {
3236         }
3237         m.find();
3238         m.appendReplacement(result, "$1");
3239         if (!result.toString().equals("zzzab"))
3240             failCount++;
3241 
3242         m.find();
3243         m.find();
3244         m.appendReplacement(result, "$2");
3245         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3246             failCount++;
3247 
3248         m.appendTail(result);
3249         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3250             failCount++;
3251 
3252         // Check to make sure escaped $ is ignored
3253         blah = "zzzabcdcdefzzz";
3254         p = Pattern.compile("(ab)(cd)*(ef)");
3255         m = p.matcher(blah);
3256         result = new StringBuilder();
3257         m.find();
3258         m.appendReplacement(result, "$1w\\$2w$3");
3259         if (!result.toString().equals("zzzabw$2wef"))
3260             failCount++;
3261 
3262         m.appendTail(result);
3263         if (!result.toString().equals("zzzabw$2wefzzz"))
3264             failCount++;
3265 
3266         // Check to make sure a reference to nonexistent group causes error
3267         blah = "zzzabcdcdefzzz";
3268         p = Pattern.compile("(ab)(cd)*(ef)");
3269         m = p.matcher(blah);
3270         result = new StringBuilder();
3271         m.find();
3272         try {
3273             m.appendReplacement(result, "$1w$5w$3");
3274             failCount++;
3275         } catch (IndexOutOfBoundsException ioobe) {
3276             // Correct result
3277         }
3278 
3279         // Check double digit group references
3280         blah = "zzz123456789101112zzz";
3281         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3282         m = p.matcher(blah);
3283         result = new StringBuilder();
3284         m.find();
3285         m.appendReplacement(result, "$1w$11w$3");
3286         if (!result.toString().equals("zzz1w11w3"))
3287             failCount++;
3288 
3289         // Check to make sure it backs off $15 to $1 if only three groups
3290         blah = "zzzabcdcdefzzz";
3291         p = Pattern.compile("(ab)(cd)*(ef)");
3292         m = p.matcher(blah);
3293         result = new StringBuilder();
3294         m.find();
3295         m.appendReplacement(result, "$1w$15w$3");
3296         if (!result.toString().equals("zzzabwab5wef"))
3297             failCount++;
3298 
3299 
3300         // Supplementary character test
3301         // SB substitution with literal
3302         blah = toSupplementaries("zzzblahzzz");
3303         p = Pattern.compile(toSupplementaries("blah"));
3304         m = p.matcher(blah);
3305         result = new StringBuilder();
3306         try {
3307             m.appendReplacement(result, toSupplementaries("blech"));
3308             failCount++;
3309         } catch (IllegalStateException e) {
3310         }
3311         m.find();
3312         m.appendReplacement(result, toSupplementaries("blech"));
3313         if (!result.toString().equals(toSupplementaries("zzzblech")))
3314             failCount++;
3315         m.appendTail(result);
3316         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3317             failCount++;
3318 
3319         // SB substitution with groups
3320         blah = toSupplementaries("zzzabcdzzz");
3321         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3322         m = p.matcher(blah);
3323         result = new StringBuilder();
3324         try {
3325             m.appendReplacement(result, "$1");
3326             failCount++;
3327         } catch (IllegalStateException e) {
3328         }
3329         m.find();
3330         m.appendReplacement(result, "$1");
3331         if (!result.toString().equals(toSupplementaries("zzzab")))
3332             failCount++;
3333 
3334         m.appendTail(result);
3335         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3336             failCount++;
3337 
3338         // SB substitution with 3 groups
3339         blah = toSupplementaries("zzzabcdcdefzzz");
3340         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3341         m = p.matcher(blah);
3342         result = new StringBuilder();
3343         try {
3344             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3345             failCount++;
3346         } catch (IllegalStateException e) {
3347         }
3348         m.find();
3349         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3350         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3351             failCount++;
3352 
3353         m.appendTail(result);
3354         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3355             failCount++;
3356 
3357         // SB substitution with groups and three matches
3358         // skipping middle match
3359         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3360         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3361         m = p.matcher(blah);
3362         result = new StringBuilder();
3363         try {
3364             m.appendReplacement(result, "$1");
3365             failCount++;
3366         } catch (IllegalStateException e) {
3367         }
3368         m.find();
3369         m.appendReplacement(result, "$1");
3370         if (!result.toString().equals(toSupplementaries("zzzab")))
3371             failCount++;
3372 
3373         m.find();
3374         m.find();
3375         m.appendReplacement(result, "$2");
3376         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3377             failCount++;
3378 
3379         m.appendTail(result);
3380         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3381             failCount++;
3382 
3383         // Check to make sure escaped $ is ignored
3384         blah = toSupplementaries("zzzabcdcdefzzz");
3385         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3386         m = p.matcher(blah);
3387         result = new StringBuilder();
3388         m.find();
3389         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3390         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3391             failCount++;
3392 
3393         m.appendTail(result);
3394         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3395             failCount++;
3396 
3397         // Check to make sure a reference to nonexistent group causes error
3398         blah = toSupplementaries("zzzabcdcdefzzz");
3399         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3400         m = p.matcher(blah);
3401         result = new StringBuilder();
3402         m.find();
3403         try {
3404             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3405             failCount++;
3406         } catch (IndexOutOfBoundsException ioobe) {
3407             // Correct result
3408         }
3409         // Check double digit group references
3410         blah = toSupplementaries("zzz123456789101112zzz");
3411         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3412         m = p.matcher(blah);
3413         result = new StringBuilder();
3414         m.find();
3415         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3416         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3417             failCount++;
3418 
3419         // Check to make sure it backs off $15 to $1 if only three groups
3420         blah = toSupplementaries("zzzabcdcdefzzz");
3421         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3422         m = p.matcher(blah);
3423         result = new StringBuilder();
3424         m.find();
3425         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3426         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3427             failCount++;
3428         // Check nothing has been appended into the output buffer if
3429         // the replacement string triggers IllegalArgumentException.
3430         p = Pattern.compile("(abc)");
3431         m = p.matcher("abcd");
3432         result = new StringBuilder();
3433         m.find();
3434         try {
3435             m.appendReplacement(result, ("xyz$g"));
3436             failCount++;
3437         } catch (IllegalArgumentException iae) {
3438             if (result.length() != 0)
3439                 failCount++;
3440         }
3441         report("SB Substitution 2");
3442     }
3443 
3444     /*
3445      * 5 groups of characters are created to make a substitution string.
3446      * A base string will be created including random lead chars, the
3447      * substitution string, and random trailing chars.
3448      * A pattern containing the 5 groups is searched for and replaced with:
3449      * random group + random string + random group.
3450      * The results are checked for correctness.
3451      */
3452     private static void substitutionBasher() {
3453         for (int runs = 0; runs<1000; runs++) {
3454             // Create a base string to work in
3455             int leadingChars = generator.nextInt(10);
3456             StringBuffer baseBuffer = new StringBuffer(100);
3457             String leadingString = getRandomAlphaString(leadingChars);
3458             baseBuffer.append(leadingString);
3459 
3460             // Create 5 groups of random number of random chars
3461             // Create the string to substitute
3462             // Create the pattern string to search for
3463             StringBuffer bufferToSub = new StringBuffer(25);
3464             StringBuffer bufferToPat = new StringBuffer(50);
3465             String[] groups = new String[5];
3466             for(int i=0; i<5; i++) {
3467                 int aGroupSize = generator.nextInt(5)+1;
3468                 groups[i] = getRandomAlphaString(aGroupSize);
3469                 bufferToSub.append(groups[i]);
3470                 bufferToPat.append('(');
3471                 bufferToPat.append(groups[i]);
3472                 bufferToPat.append(')');
3473             }
3474             String stringToSub = bufferToSub.toString();
3475             String pattern = bufferToPat.toString();
3476 
3477             // Place sub string into working string at random index
3478             baseBuffer.append(stringToSub);
3479 
3480             // Append random chars to end
3481             int trailingChars = generator.nextInt(10);
3482             String trailingString = getRandomAlphaString(trailingChars);
3483             baseBuffer.append(trailingString);
3484             String baseString = baseBuffer.toString();
3485 
3486             // Create test pattern and matcher
3487             Pattern p = Pattern.compile(pattern);
3488             Matcher m = p.matcher(baseString);
3489 
3490             // Reject candidate if pattern happens to start early
3491             m.find();
3492             if (m.start() < leadingChars)
3493                 continue;
3494 
3495             // Reject candidate if more than one match
3496             if (m.find())
3497                 continue;
3498 
3499             // Construct a replacement string with :
3500             // random group + random string + random group
3501             StringBuffer bufferToRep = new StringBuffer();
3502             int groupIndex1 = generator.nextInt(5);
3503             bufferToRep.append("$" + (groupIndex1 + 1));
3504             String randomMidString = getRandomAlphaString(5);
3505             bufferToRep.append(randomMidString);
3506             int groupIndex2 = generator.nextInt(5);
3507             bufferToRep.append("$" + (groupIndex2 + 1));
3508             String replacement = bufferToRep.toString();
3509 
3510             // Do the replacement
3511             String result = m.replaceAll(replacement);
3512 
3513             // Construct expected result
3514             StringBuffer bufferToRes = new StringBuffer();
3515             bufferToRes.append(leadingString);
3516             bufferToRes.append(groups[groupIndex1]);
3517             bufferToRes.append(randomMidString);
3518             bufferToRes.append(groups[groupIndex2]);
3519             bufferToRes.append(trailingString);
3520             String expectedResult = bufferToRes.toString();
3521 
3522             // Check results
3523             if (!result.equals(expectedResult))
3524                 failCount++;
3525         }
3526 
3527         report("Substitution Basher");
3528     }
3529 
3530     /*
3531      * 5 groups of characters are created to make a substitution string.
3532      * A base string will be created including random lead chars, the
3533      * substitution string, and random trailing chars.
3534      * A pattern containing the 5 groups is searched for and replaced with:
3535      * random group + random string + random group.
3536      * The results are checked for correctness.
3537      */
3538     private static void substitutionBasher2() {
3539         for (int runs = 0; runs<1000; runs++) {
3540             // Create a base string to work in
3541             int leadingChars = generator.nextInt(10);
3542             StringBuilder baseBuffer = new StringBuilder(100);
3543             String leadingString = getRandomAlphaString(leadingChars);
3544             baseBuffer.append(leadingString);
3545 
3546             // Create 5 groups of random number of random chars
3547             // Create the string to substitute
3548             // Create the pattern string to search for
3549             StringBuilder bufferToSub = new StringBuilder(25);
3550             StringBuilder bufferToPat = new StringBuilder(50);
3551             String[] groups = new String[5];
3552             for(int i=0; i<5; i++) {
3553                 int aGroupSize = generator.nextInt(5)+1;
3554                 groups[i] = getRandomAlphaString(aGroupSize);
3555                 bufferToSub.append(groups[i]);
3556                 bufferToPat.append('(');
3557                 bufferToPat.append(groups[i]);
3558                 bufferToPat.append(')');
3559             }
3560             String stringToSub = bufferToSub.toString();
3561             String pattern = bufferToPat.toString();
3562 
3563             // Place sub string into working string at random index
3564             baseBuffer.append(stringToSub);
3565 
3566             // Append random chars to end
3567             int trailingChars = generator.nextInt(10);
3568             String trailingString = getRandomAlphaString(trailingChars);
3569             baseBuffer.append(trailingString);
3570             String baseString = baseBuffer.toString();
3571 
3572             // Create test pattern and matcher
3573             Pattern p = Pattern.compile(pattern);
3574             Matcher m = p.matcher(baseString);
3575 
3576             // Reject candidate if pattern happens to start early
3577             m.find();
3578             if (m.start() < leadingChars)
3579                 continue;
3580 
3581             // Reject candidate if more than one match
3582             if (m.find())
3583                 continue;
3584 
3585             // Construct a replacement string with :
3586             // random group + random string + random group
3587             StringBuilder bufferToRep = new StringBuilder();
3588             int groupIndex1 = generator.nextInt(5);
3589             bufferToRep.append("$" + (groupIndex1 + 1));
3590             String randomMidString = getRandomAlphaString(5);
3591             bufferToRep.append(randomMidString);
3592             int groupIndex2 = generator.nextInt(5);
3593             bufferToRep.append("$" + (groupIndex2 + 1));
3594             String replacement = bufferToRep.toString();
3595 
3596             // Do the replacement
3597             String result = m.replaceAll(replacement);
3598 
3599             // Construct expected result
3600             StringBuilder bufferToRes = new StringBuilder();
3601             bufferToRes.append(leadingString);
3602             bufferToRes.append(groups[groupIndex1]);
3603             bufferToRes.append(randomMidString);
3604             bufferToRes.append(groups[groupIndex2]);
3605             bufferToRes.append(trailingString);
3606             String expectedResult = bufferToRes.toString();
3607 
3608             // Check results
3609             if (!result.equals(expectedResult)) {
3610                 failCount++;
3611             }
3612         }
3613 
3614         report("Substitution Basher 2");
3615     }
3616 
3617     /**
3618      * Checks the handling of some escape sequences that the Pattern
3619      * class should process instead of the java compiler. These are
3620      * not in the file because the escapes should be be processed
3621      * by the Pattern class when the regex is compiled.
3622      */
3623     private static void escapes() throws Exception {
3624         Pattern p = Pattern.compile("\\043");
3625         Matcher m = p.matcher("#");
3626         if (!m.find())
3627             failCount++;
3628 
3629         p = Pattern.compile("\\x23");
3630         m = p.matcher("#");
3631         if (!m.find())
3632             failCount++;
3633 
3634         p = Pattern.compile("\\u0023");
3635         m = p.matcher("#");
3636         if (!m.find())
3637             failCount++;
3638 
3639         report("Escape sequences");
3640     }
3641 
3642     /**
3643      * Checks the handling of blank input situations. These
3644      * tests are incompatible with my test file format.
3645      */
3646     private static void blankInput() throws Exception {
3647         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3648         Matcher m = p.matcher("");
3649         if (m.find())
3650             failCount++;
3651 
3652         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3653         m = p.matcher("");
3654         if (!m.find())
3655             failCount++;
3656 
3657         p = Pattern.compile("abc");
3658         m = p.matcher("");
3659         if (m.find())
3660             failCount++;
3661 
3662         p = Pattern.compile("a*");
3663         m = p.matcher("");
3664         if (!m.find())
3665             failCount++;
3666 
3667         report("Blank input");
3668     }
3669 
3670     /**
3671      * Tests the Boyer-Moore pattern matching of a character sequence
3672      * on randomly generated patterns.
3673      */
3674     private static void bm() throws Exception {
3675         doBnM('a');
3676         report("Boyer Moore (ASCII)");
3677 
3678         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3679         report("Boyer Moore (Supplementary)");
3680     }
3681 
3682     private static void doBnM(int baseCharacter) throws Exception {
3683         int achar=0;
3684 
3685         for (int i=0; i<100; i++) {
3686             // Create a short pattern to search for
3687             int patternLength = generator.nextInt(7) + 4;
3688             StringBuffer patternBuffer = new StringBuffer(patternLength);
3689             String pattern;
3690             retry: for (;;) {
3691                 for (int x=0; x<patternLength; x++) {
3692                     int ch = baseCharacter + generator.nextInt(26);
3693                     if (Character.isSupplementaryCodePoint(ch)) {
3694                         patternBuffer.append(Character.toChars(ch));
3695                     } else {
3696                         patternBuffer.append((char)ch);
3697                     }
3698                 }
3699                 pattern = patternBuffer.toString();
3700 
3701                 // Avoid patterns that start and end with the same substring
3702                 // See JDK-6854417
3703                 for (int x=1; x < pattern.length(); x++) {
3704                     if (pattern.startsWith(pattern.substring(x)))
3705                         continue retry;
3706                 }
3707                 break;
3708             }
3709             Pattern p = Pattern.compile(pattern);
3710 
3711             // Create a buffer with random ASCII chars that does
3712             // not match the sample
3713             String toSearch = null;
3714             StringBuffer s = null;
3715             Matcher m = p.matcher("");
3716             do {
3717                 s = new StringBuffer(100);
3718                 for (int x=0; x<100; x++) {
3719                     int ch = baseCharacter + generator.nextInt(26);
3720                     if (Character.isSupplementaryCodePoint(ch)) {
3721                         s.append(Character.toChars(ch));
3722                     } else {
3723                         s.append((char)ch);
3724                     }
3725                 }
3726                 toSearch = s.toString();
3727                 m.reset(toSearch);
3728             } while (m.find());
3729 
3730             // Insert the pattern at a random spot
3731             int insertIndex = generator.nextInt(99);
3732             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3733                 insertIndex++;
3734             s = s.insert(insertIndex, pattern);
3735             toSearch = s.toString();
3736 
3737             // Make sure that the pattern is found
3738             m.reset(toSearch);
3739             if (!m.find())
3740                 failCount++;
3741 
3742             // Make sure that the match text is the pattern
3743             if (!m.group().equals(pattern))
3744                 failCount++;
3745 
3746             // Make sure match occured at insertion point
3747             if (m.start() != insertIndex)
3748                 failCount++;
3749         }
3750     }
3751 
3752     /**
3753      * Tests the matching of slices on randomly generated patterns.
3754      * The Boyer-Moore optimization is not done on these patterns
3755      * because it uses unicode case folding.
3756      */
3757     private static void slice() throws Exception {
3758         doSlice(Character.MAX_VALUE);
3759         report("Slice");
3760 
3761         doSlice(Character.MAX_CODE_POINT);
3762         report("Slice (Supplementary)");
3763     }
3764 
3765     private static void doSlice(int maxCharacter) throws Exception {
3766         Random generator = new Random();
3767         int achar=0;
3768 
3769         for (int i=0; i<100; i++) {
3770             // Create a short pattern to search for
3771             int patternLength = generator.nextInt(7) + 4;
3772             StringBuffer patternBuffer = new StringBuffer(patternLength);
3773             for (int x=0; x<patternLength; x++) {
3774                 int randomChar = 0;
3775                 while (!Character.isLetterOrDigit(randomChar))
3776                     randomChar = generator.nextInt(maxCharacter);
3777                 if (Character.isSupplementaryCodePoint(randomChar)) {
3778                     patternBuffer.append(Character.toChars(randomChar));
3779                 } else {
3780                     patternBuffer.append((char) randomChar);
3781                 }
3782             }
3783             String pattern =  patternBuffer.toString();
3784             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3785 
3786             // Create a buffer with random chars that does not match the sample
3787             String toSearch = null;
3788             StringBuffer s = null;
3789             Matcher m = p.matcher("");
3790             do {
3791                 s = new StringBuffer(100);
3792                 for (int x=0; x<100; x++) {
3793                     int randomChar = 0;
3794                     while (!Character.isLetterOrDigit(randomChar))
3795                         randomChar = generator.nextInt(maxCharacter);
3796                     if (Character.isSupplementaryCodePoint(randomChar)) {
3797                         s.append(Character.toChars(randomChar));
3798                     } else {
3799                         s.append((char) randomChar);
3800                     }
3801                 }
3802                 toSearch = s.toString();
3803                 m.reset(toSearch);
3804             } while (m.find());
3805 
3806             // Insert the pattern at a random spot
3807             int insertIndex = generator.nextInt(99);
3808             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3809                 insertIndex++;
3810             s = s.insert(insertIndex, pattern);
3811             toSearch = s.toString();
3812 
3813             // Make sure that the pattern is found
3814             m.reset(toSearch);
3815             if (!m.find())
3816                 failCount++;
3817 
3818             // Make sure that the match text is the pattern
3819             if (!m.group().equals(pattern))
3820                 failCount++;
3821 
3822             // Make sure match occured at insertion point
3823             if (m.start() != insertIndex)
3824                 failCount++;
3825         }
3826     }
3827 
3828     private static void explainFailure(String pattern, String data,
3829                                        String expected, String actual) {
3830         System.err.println("----------------------------------------");
3831         System.err.println("Pattern = "+pattern);
3832         System.err.println("Data = "+data);
3833         System.err.println("Expected = " + expected);
3834         System.err.println("Actual   = " + actual);
3835     }
3836 
3837     private static void explainFailure(String pattern, String data,
3838                                        Throwable t) {
3839         System.err.println("----------------------------------------");
3840         System.err.println("Pattern = "+pattern);
3841         System.err.println("Data = "+data);
3842         t.printStackTrace(System.err);
3843     }
3844 
3845     // Testing examples from a file
3846 
3847     /**
3848      * Goes through the file "TestCases.txt" and creates many patterns
3849      * described in the file, matching the patterns against input lines in
3850      * the file, and comparing the results against the correct results
3851      * also found in the file. The file format is described in comments
3852      * at the head of the file.
3853      */
3854     private static void processFile(String fileName) throws Exception {
3855         File testCases = new File(System.getProperty("test.src", "."),
3856                                   fileName);
3857         FileInputStream in = new FileInputStream(testCases);
3858         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3859 
3860         // Process next test case.
3861         String aLine;
3862         while((aLine = r.readLine()) != null) {
3863             // Read a line for pattern
3864             String patternString = grabLine(r);
3865             Pattern p = null;
3866             try {
3867                 p = compileTestPattern(patternString);
3868             } catch (PatternSyntaxException e) {
3869                 String dataString = grabLine(r);
3870                 String expectedResult = grabLine(r);
3871                 if (expectedResult.startsWith("error"))
3872                     continue;
3873                 explainFailure(patternString, dataString, e);
3874                 failCount++;
3875                 continue;
3876             }
3877 
3878             // Read a line for input string
3879             String dataString = grabLine(r);
3880             Matcher m = p.matcher(dataString);
3881             StringBuffer result = new StringBuffer();
3882 
3883             // Check for IllegalStateExceptions before a match
3884             failCount += preMatchInvariants(m);
3885 
3886             boolean found = m.find();
3887 
3888             if (found)
3889                 failCount += postTrueMatchInvariants(m);
3890             else
3891                 failCount += postFalseMatchInvariants(m);
3892 
3893             if (found) {
3894                 result.append("true ");
3895                 result.append(m.group(0) + " ");
3896             } else {
3897                 result.append("false ");
3898             }
3899 
3900             result.append(m.groupCount());
3901 
3902             if (found) {
3903                 for (int i=1; i<m.groupCount()+1; i++)
3904                     if (m.group(i) != null)
3905                         result.append(" " +m.group(i));
3906             }
3907 
3908             // Read a line for the expected result
3909             String expectedResult = grabLine(r);
3910 
3911             if (!result.toString().equals(expectedResult)) {
3912                 explainFailure(patternString, dataString, expectedResult, result.toString());
3913                 failCount++;
3914             }
3915         }
3916 
3917         report(fileName);
3918     }
3919 
3920     private static int preMatchInvariants(Matcher m) {
3921         int failCount = 0;
3922         try {
3923             m.start();
3924             failCount++;
3925         } catch (IllegalStateException ise) {}
3926         try {
3927             m.end();
3928             failCount++;
3929         } catch (IllegalStateException ise) {}
3930         try {
3931             m.group();
3932             failCount++;
3933         } catch (IllegalStateException ise) {}
3934         return failCount;
3935     }
3936 
3937     private static int postFalseMatchInvariants(Matcher m) {
3938         int failCount = 0;
3939         try {
3940             m.group();
3941             failCount++;
3942         } catch (IllegalStateException ise) {}
3943         try {
3944             m.start();
3945             failCount++;
3946         } catch (IllegalStateException ise) {}
3947         try {
3948             m.end();
3949             failCount++;
3950         } catch (IllegalStateException ise) {}
3951         return failCount;
3952     }
3953 
3954     private static int postTrueMatchInvariants(Matcher m) {
3955         int failCount = 0;
3956         //assert(m.start() = m.start(0);
3957         if (m.start() != m.start(0))
3958             failCount++;
3959         //assert(m.end() = m.end(0);
3960         if (m.start() != m.start(0))
3961             failCount++;
3962         //assert(m.group() = m.group(0);
3963         if (!m.group().equals(m.group(0)))
3964             failCount++;
3965         try {
3966             m.group(50);
3967             failCount++;
3968         } catch (IndexOutOfBoundsException ise) {}
3969 
3970         return failCount;
3971     }
3972 
3973     private static Pattern compileTestPattern(String patternString) {
3974         if (!patternString.startsWith("'")) {
3975             return Pattern.compile(patternString);
3976         }
3977         int break1 = patternString.lastIndexOf("'");
3978         String flagString = patternString.substring(
3979                                           break1+1, patternString.length());
3980         patternString = patternString.substring(1, break1);
3981 
3982         if (flagString.equals("i"))
3983             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3984 
3985         if (flagString.equals("m"))
3986             return Pattern.compile(patternString, Pattern.MULTILINE);
3987 
3988         return Pattern.compile(patternString);
3989     }
3990 
3991     /**
3992      * Reads a line from the input file. Keeps reading lines until a non
3993      * empty non comment line is read. If the line contains a \n then
3994      * these two characters are replaced by a newline char. If a \\uxxxx
3995      * sequence is read then the sequence is replaced by the unicode char.
3996      */
3997     private static String grabLine(BufferedReader r) throws Exception {
3998         int index = 0;
3999         String line = r.readLine();
4000         while (line.startsWith("//") || line.length() < 1)
4001             line = r.readLine();
4002         while ((index = line.indexOf("\\n")) != -1) {
4003             StringBuffer temp = new StringBuffer(line);
4004             temp.replace(index, index+2, "\n");
4005             line = temp.toString();
4006         }
4007         while ((index = line.indexOf("\\u")) != -1) {
4008             StringBuffer temp = new StringBuffer(line);
4009             String value = temp.substring(index+2, index+6);
4010             char aChar = (char)Integer.parseInt(value, 16);
4011             String unicodeChar = "" + aChar;
4012             temp.replace(index, index+6, unicodeChar);
4013             line = temp.toString();
4014         }
4015 
4016         return line;
4017     }
4018 
4019     private static void check(Pattern p, String s, String g, String expected) {
4020         Matcher m = p.matcher(s);
4021         m.find();
4022         if (!m.group(g).equals(expected) ||
4023             s.charAt(m.start(g)) != expected.charAt(0) ||
4024             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
4025             failCount++;
4026     }
4027 
4028     private static void checkReplaceFirst(String p, String s, String r, String expected)
4029     {
4030         if (!expected.equals(Pattern.compile(p)
4031                                     .matcher(s)
4032                                     .replaceFirst(r)))
4033             failCount++;
4034     }
4035 
4036     private static void checkReplaceAll(String p, String s, String r, String expected)
4037     {
4038         if (!expected.equals(Pattern.compile(p)
4039                                     .matcher(s)
4040                                     .replaceAll(r)))
4041             failCount++;
4042     }
4043 
4044     private static void checkExpectedFail(String p) {
4045         try {
4046             Pattern.compile(p);
4047         } catch (PatternSyntaxException pse) {
4048             //pse.printStackTrace();
4049             return;
4050         }
4051         failCount++;
4052     }
4053 
4054     private static void checkExpectedIAE(Matcher m, String g) {
4055         m.find();
4056         try {
4057             m.group(g);
4058         } catch (IllegalArgumentException x) {
4059             //iae.printStackTrace();
4060             try {
4061                 m.start(g);
4062             } catch (IllegalArgumentException xx) {
4063                 try {
4064                     m.start(g);
4065                 } catch (IllegalArgumentException xxx) {
4066                     return;
4067                 }
4068             }
4069         }
4070         failCount++;
4071     }
4072 
4073     private static void checkExpectedNPE(Matcher m) {
4074         m.find();
4075         try {
4076             m.group(null);
4077         } catch (NullPointerException x) {
4078             try {
4079                 m.start(null);
4080             } catch (NullPointerException xx) {
4081                 try {
4082                     m.end(null);
4083                 } catch (NullPointerException xxx) {
4084                     return;
4085                 }
4086             }
4087         }
4088         failCount++;
4089     }
4090 
4091     private static void namedGroupCaptureTest() throws Exception {
4092         check(Pattern.compile("x+(?<gname>y+)z+"),
4093               "xxxyyyzzz",
4094               "gname",
4095               "yyy");
4096 
4097         check(Pattern.compile("x+(?<gname8>y+)z+"),
4098               "xxxyyyzzz",
4099               "gname8",
4100               "yyy");
4101 
4102         //backref
4103         Pattern pattern = Pattern.compile("(a*)bc\\1");
4104         check(pattern, "zzzaabcazzz", true);  // found "abca"
4105 
4106         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
4107               "zzzaabcaazzz", true);
4108 
4109         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
4110               "abcdefabc", true);
4111 
4112         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
4113               "abcdefghijkk", true);
4114 
4115         // Supplementary character tests
4116         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4117               toSupplementaries("zzzaabcazzz"), true);
4118 
4119         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
4120               toSupplementaries("zzzaabcaazzz"), true);
4121 
4122         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
4123               toSupplementaries("abcdefabc"), true);
4124 
4125         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4126                               "(?<gname>" +
4127                               toSupplementaries("k)") + "\\k<gname>"),
4128               toSupplementaries("abcdefghijkk"), true);
4129 
4130         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4131               "xxxyyyzzzyyy",
4132               "gname",
4133               "yyy");
4134 
4135         //replaceFirst/All
4136         checkReplaceFirst("(?<gn>ab)(c*)",
4137                           "abccczzzabcczzzabccc",
4138                           "${gn}",
4139                           "abzzzabcczzzabccc");
4140 
4141         checkReplaceAll("(?<gn>ab)(c*)",
4142                         "abccczzzabcczzzabccc",
4143                         "${gn}",
4144                         "abzzzabzzzab");
4145 
4146 
4147         checkReplaceFirst("(?<gn>ab)(c*)",
4148                           "zzzabccczzzabcczzzabccczzz",
4149                           "${gn}",
4150                           "zzzabzzzabcczzzabccczzz");
4151 
4152         checkReplaceAll("(?<gn>ab)(c*)",
4153                         "zzzabccczzzabcczzzabccczzz",
4154                         "${gn}",
4155                         "zzzabzzzabzzzabzzz");
4156 
4157         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4158                           "zzzabccczzzabcczzzabccczzz",
4159                           "${gn2}",
4160                           "zzzccczzzabcczzzabccczzz");
4161 
4162         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4163                         "zzzabccczzzabcczzzabccczzz",
4164                         "${gn2}",
4165                         "zzzccczzzcczzzccczzz");
4166 
4167         //toSupplementaries("(ab)(c*)"));
4168         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4169                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4170                           toSupplementaries("abccczzzabcczzzabccc"),
4171                           "${gn1}",
4172                           toSupplementaries("abzzzabcczzzabccc"));
4173 
4174 
4175         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4176                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4177                         toSupplementaries("abccczzzabcczzzabccc"),
4178                         "${gn1}",
4179                         toSupplementaries("abzzzabzzzab"));
4180 
4181         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4182                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4183                           toSupplementaries("abccczzzabcczzzabccc"),
4184                           "${gn2}",
4185                           toSupplementaries("ccczzzabcczzzabccc"));
4186 
4187 
4188         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4189                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4190                         toSupplementaries("abccczzzabcczzzabccc"),
4191                         "${gn2}",
4192                         toSupplementaries("ccczzzcczzzccc"));
4193 
4194         checkReplaceFirst("(?<dog>Dog)AndCat",
4195                           "zzzDogAndCatzzzDogAndCatzzz",
4196                           "${dog}",
4197                           "zzzDogzzzDogAndCatzzz");
4198 
4199 
4200         checkReplaceAll("(?<dog>Dog)AndCat",
4201                           "zzzDogAndCatzzzDogAndCatzzz",
4202                           "${dog}",
4203                           "zzzDogzzzDogzzz");
4204 
4205         // backref in Matcher & String
4206         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4207             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4208             failCount++;
4209 
4210         // negative
4211         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4212         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4213         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4214         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4215         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4216         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4217                          "gnameX");
4218         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4219         report("NamedGroupCapture");
4220     }
4221 
4222     // This is for bug 6919132
4223     private static void nonBmpClassComplementTest() throws Exception {
4224         Pattern p = Pattern.compile("\\P{Lu}");
4225         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4226 
4227         if (m.find() && m.start() == 1)
4228             failCount++;
4229 
4230         // from a unicode category
4231         p = Pattern.compile("\\P{Lu}");
4232         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4233         if (m.find())
4234             failCount++;
4235         if (!m.hitEnd())
4236             failCount++;
4237 
4238         // block
4239         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4240         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4241         if (m.find() && m.start() == 1)
4242             failCount++;
4243 
4244         p = Pattern.compile("\\P{sc=GRANTHA}");
4245         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4246         if (m.find() && m.start() == 1)
4247             failCount++;
4248 
4249         report("NonBmpClassComplement");
4250     }
4251 
4252     private static void unicodePropertiesTest() throws Exception {
4253         // different forms
4254         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4255             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4256             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4257             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4258             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4259             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4260             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4261             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4262             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4263             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4264             failCount++;
4265 
4266         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4267         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4268         Matcher lastSM  = common;
4269         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4270 
4271         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4272         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4273         Matcher lastBM = latin;
4274         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4275 
4276         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4277             if (cp >= 0x30000 && (cp & 0x70) == 0){
4278                 continue;  // only pick couple code points, they are the same
4279             }
4280 
4281             // Unicode Script
4282             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4283             Matcher m;
4284             String str = new String(Character.toChars(cp));
4285             if (script == lastScript) {
4286                  m = lastSM;
4287                  m.reset(str);
4288             } else {
4289                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4290             }
4291             if (!m.matches()) {
4292                 failCount++;
4293             }
4294             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4295             other.reset(str);
4296             if (other.matches()) {
4297                 failCount++;
4298             }
4299             lastSM = m;
4300             lastScript = script;
4301 
4302             // Unicode Block
4303             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4304             if (block == null) {
4305                 //System.out.printf("Not a Block: cp=%x%n", cp);
4306                 continue;
4307             }
4308             if (block == lastBlock) {
4309                  m = lastBM;
4310                  m.reset(str);
4311             } else {
4312                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4313             }
4314             if (!m.matches()) {
4315                 failCount++;
4316             }
4317             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4318             other.reset(str);
4319             if (other.matches()) {
4320                 failCount++;
4321             }
4322             lastBM = m;
4323             lastBlock = block;
4324         }
4325         report("unicodeProperties");
4326     }
4327 
4328     private static void unicodeHexNotationTest() throws Exception {
4329 
4330         // negative
4331         checkExpectedFail("\\x{-23}");
4332         checkExpectedFail("\\x{110000}");
4333         checkExpectedFail("\\x{}");
4334         checkExpectedFail("\\x{AB[ef]");
4335 
4336         // codepoint
4337         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4338         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4339         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4340         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4341 
4342         // in class
4343         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4344         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4345         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4346         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4347         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4348         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4349 
4350         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4351              String s = "A" + new String(Character.toChars(cp)) + "B";
4352              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4353                                              : String.format("\\u%04x\\u%04x",
4354                                                (int) Character.toChars(cp)[0],
4355                                                (int) Character.toChars(cp)[1]);
4356              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4357              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4358                  failCount++;
4359              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4360                  failCount++;
4361              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4362                  failCount++;
4363              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4364                  failCount++;
4365          }
4366          report("unicodeHexNotation");
4367     }
4368 
4369     private static void unicodeClassesTest() throws Exception {
4370 
4371         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4372         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4373         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4374         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4375         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4376         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4377         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4378         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4379         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4380         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4381         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4382         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4383         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4384         Matcher bound  = Pattern.compile("\\b").matcher("");
4385         Matcher word   = Pattern.compile("\\w++").matcher("");
4386         // UNICODE_CHARACTER_CLASS
4387         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4388         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4389         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4390         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4391         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4392         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4393         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4394         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4395         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4396         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4397         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4398         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4399         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4400         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4401         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4402         // embedded flag (?U)
4403         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4404         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4405         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4406 
4407         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4408         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4409         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4410         // properties
4411         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4412         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4413         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4414         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4415         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4416         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4417         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4418         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4419         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4420         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4421         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4422         // javaMethod
4423         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4424         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4425         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4426         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4427         // GC/C
4428         Matcher gcC  = Pattern.compile("\\p{C}").matcher("");
4429 
4430         for (int cp = 1; cp < 0x30000; cp++) {
4431             String str = new String(Character.toChars(cp));
4432             int type = Character.getType(cp);
4433             if (// lower
4434                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4435                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4436                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4437                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4438                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4439                 // upper
4440                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4441                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4442                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4443                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4444                 // alpha
4445                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4446                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4447                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4448                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4449                 // digit
4450                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4451                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4452                 // alnum
4453                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4454                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4455                 // punct
4456                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4457                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4458                 // graph
4459                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4460                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4461                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4462                 // blank
4463                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4464                                           != blank.reset(str).matches()  ||
4465                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4466                 // print
4467                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4468                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4469                 // cntrl
4470                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4471                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4472                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4473                 // hexdigit
4474                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4475                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4476                 // space
4477                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4478                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4479                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4480                 // word
4481                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4482                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4483                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4484                 // bwordb
4485                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4486                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4487                 // properties
4488                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4489                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4490                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4491                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4492                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4493                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4494                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches() ||
4495                 // gc_C
4496                 (Character.CONTROL == type || Character.FORMAT == type ||
4497                  Character.PRIVATE_USE == type || Character.SURROGATE == type ||
4498                  Character.UNASSIGNED == type)
4499                 != gcC.reset(str).matches()) {
4500                 failCount++;
4501             }
4502         }
4503 
4504         // bounds/word align
4505         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4506         if (!bwbU.reset("\u0180sherman\u0400").matches())
4507             failCount++;
4508         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4509         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4510             failCount++;
4511         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4512         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4513             failCount++;
4514         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4515             failCount++;
4516         report("unicodePredefinedClasses");
4517     }
4518 
4519     private static void unicodeCharacterNameTest() throws Exception {
4520 
4521         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4522             if (!Character.isValidCodePoint(cp) ||
4523                 Character.getType(cp) == Character.UNASSIGNED)
4524                 continue;
4525             String str = new String(Character.toChars(cp));
4526             // single
4527             String p = "\\N{" + Character.getName(cp) + "}";
4528             if (!Pattern.compile(p).matcher(str).matches()) {
4529                 failCount++;
4530             }
4531             // class[c]
4532             p = "[\\N{" + Character.getName(cp) + "}]";
4533             if (!Pattern.compile(p).matcher(str).matches()) {
4534                 failCount++;
4535             }
4536         }
4537 
4538         // range
4539         for (int i = 0; i < 10; i++) {
4540             int start = generator.nextInt(20);
4541             int end = start + generator.nextInt(200);
4542             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4543             String str;
4544             for (int cp = start; cp < end; cp++) {
4545                 str = new String(Character.toChars(cp));
4546                 if (!Pattern.compile(p).matcher(str).matches()) {
4547                     failCount++;
4548                 }
4549             }
4550             str = new String(Character.toChars(end + 10));
4551             if (Pattern.compile(p).matcher(str).matches()) {
4552                 failCount++;
4553             }
4554         }
4555 
4556         // slice
4557         for (int i = 0; i < 10; i++) {
4558             int n = generator.nextInt(256);
4559             int[] buf = new int[n];
4560             StringBuffer sb = new StringBuffer(1024);
4561             for (int j = 0; j < n; j++) {
4562                 int cp = generator.nextInt(1000);
4563                 if (!Character.isValidCodePoint(cp) ||
4564                     Character.getType(cp) == Character.UNASSIGNED)
4565                     cp = 0x4e00;    // just use 4e00
4566                 sb.append("\\N{" + Character.getName(cp) + "}");
4567                 buf[j] = cp;
4568             }
4569             String p = sb.toString();
4570             String str = new String(buf, 0, buf.length);
4571             if (!Pattern.compile(p).matcher(str).matches()) {
4572                 failCount++;
4573             }
4574         }
4575         report("unicodeCharacterName");
4576     }
4577 
4578     private static void horizontalAndVerticalWSTest() throws Exception {
4579         String hws = new String (new char[] {
4580                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4581                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4582                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4583                                      0x202f, 0x205f, 0x3000 });
4584         String vws = new String (new char[] {
4585                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4586         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4587             !Pattern.compile("[\\h]+").matcher(hws).matches())
4588             failCount++;
4589         if (Pattern.compile("\\H").matcher(hws).find() ||
4590             Pattern.compile("[\\H]").matcher(hws).find())
4591             failCount++;
4592         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4593             !Pattern.compile("[\\v]+").matcher(vws).matches())
4594             failCount++;
4595         if (Pattern.compile("\\V").matcher(vws).find() ||
4596             Pattern.compile("[\\V]").matcher(vws).find())
4597             failCount++;
4598         String prefix = "abcd";
4599         String suffix = "efgh";
4600         String ng = "A";
4601         for (int i = 0; i < hws.length(); i++) {
4602             String c = String.valueOf(hws.charAt(i));
4603             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4604             if (!m.find() || !c.equals(m.group()))
4605                 failCount++;
4606             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4607             if (!m.find() || !c.equals(m.group()))
4608                 failCount++;
4609 
4610             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4611             if (!m.find() || !ng.equals(m.group()))
4612                 failCount++;
4613             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4614             if (!m.find() || !ng.equals(m.group()))
4615                 failCount++;
4616         }
4617         for (int i = 0; i < vws.length(); i++) {
4618             String c = String.valueOf(vws.charAt(i));
4619             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4620             if (!m.find() || !c.equals(m.group()))
4621                 failCount++;
4622             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4623             if (!m.find() || !c.equals(m.group()))
4624                 failCount++;
4625 
4626             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4627             if (!m.find() || !ng.equals(m.group()))
4628                 failCount++;
4629             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4630             if (!m.find() || !ng.equals(m.group()))
4631                 failCount++;
4632         }
4633         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4634         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4635             failCount++;
4636         report("horizontalAndVerticalWSTest");
4637     }
4638 
4639     private static void linebreakTest() throws Exception {
4640         String linebreaks = new String (new char[] {
4641             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4642         String crnl = "\r\n";
4643         if (!(Pattern.compile("\\R+").matcher(linebreaks).matches() &&
4644               Pattern.compile("\\R").matcher(crnl).matches() &&
4645               Pattern.compile("\\Rabc").matcher(crnl + "abc").matches() &&
4646               Pattern.compile("\\Rabc").matcher("\rabc").matches() &&
4647               Pattern.compile("\\R\\R").matcher(crnl).matches() &&  // backtracking
4648               Pattern.compile("\\R\\n").matcher(crnl).matches()) && // backtracking
4649               !Pattern.compile("((?<!\\R)\\s)*").matcher(crnl).matches()) { // #8176029
4650             failCount++;
4651         }
4652         report("linebreakTest");
4653     }
4654 
4655     // #7189363
4656     private static void branchTest() throws Exception {
4657         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4658             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4659             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4660             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4661             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4662             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4663             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4664             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4665             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4666             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4667             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4668             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4669             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4670             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4671             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4672             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4673             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4674             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4675             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4676             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4677             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4678             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4679             failCount++;
4680         report("branchTest");
4681     }
4682 
4683     // This test is for 8007395
4684     private static void groupCurlyNotFoundSuppTest() throws Exception {
4685         String input = "test this as \ud83d\ude0d";
4686         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4687                                           "test(.)*(@[a-zA-Z.]+)",
4688                                           "test([^B])+(@[a-zA-Z.]+)",
4689                                           "test([^B])*(@[a-zA-Z.]+)",
4690                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4691                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4692                                         }) {
4693             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4694                                .matcher(input);
4695             try {
4696                 if (m.find()) {
4697                     failCount++;
4698                 }
4699             } catch (Exception x) {
4700                 failCount++;
4701             }
4702         }
4703         report("GroupCurly NotFoundSupp");
4704     }
4705 
4706     // This test is for 8023647
4707     private static void groupCurlyBackoffTest() throws Exception {
4708         if (!"abc1c".matches("(\\w)+1\\1") ||
4709             "abc11".matches("(\\w)+1\\1")) {
4710             failCount++;
4711         }
4712         report("GroupCurly backoff");
4713     }
4714 
4715     // This test is for 8012646
4716     private static void patternAsPredicate() throws Exception {
4717         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4718 
4719         if (p.test("")) {
4720             failCount++;
4721         }
4722         if (!p.test("word")) {
4723             failCount++;
4724         }
4725         if (p.test("1234")) {
4726             failCount++;
4727         }
4728         if (!p.test("word1234")) {
4729             failCount++;
4730         }
4731         report("Pattern.asPredicate");
4732     }
4733 
4734     // This test is for 8184692
4735     private static void patternAsMatchPredicate() throws Exception {
4736         Predicate<String> p = Pattern.compile("[a-z]+").asMatchPredicate();
4737 
4738         if (p.test("")) {
4739             failCount++;
4740         }
4741         if (!p.test("word")) {
4742             failCount++;
4743         }
4744         if (p.test("1234word")) {
4745             failCount++;
4746         }
4747         if (p.test("1234")) {
4748             failCount++;
4749         }
4750         report("Pattern.asMatchPredicate");
4751     }
4752 
4753 
4754     // This test is for 8035975
4755     private static void invalidFlags() throws Exception {
4756         for (int flag = 1; flag != 0; flag <<= 1) {
4757             switch (flag) {
4758             case Pattern.CASE_INSENSITIVE:
4759             case Pattern.MULTILINE:
4760             case Pattern.DOTALL:
4761             case Pattern.UNICODE_CASE:
4762             case Pattern.CANON_EQ:
4763             case Pattern.UNIX_LINES:
4764             case Pattern.LITERAL:
4765             case Pattern.UNICODE_CHARACTER_CLASS:
4766             case Pattern.COMMENTS:
4767                 // valid flag, continue
4768                 break;
4769             default:
4770                 try {
4771                     Pattern.compile(".", flag);
4772                     failCount++;
4773                 } catch (IllegalArgumentException expected) {
4774                 }
4775             }
4776         }
4777         report("Invalid compile flags");
4778     }
4779 
4780     // This test is for 8158482
4781     private static void embeddedFlags() throws Exception {
4782         try {
4783             Pattern.compile("(?i).(?-i).");
4784             Pattern.compile("(?m).(?-m).");
4785             Pattern.compile("(?s).(?-s).");
4786             Pattern.compile("(?d).(?-d).");
4787             Pattern.compile("(?u).(?-u).");
4788             Pattern.compile("(?c).(?-c).");
4789             Pattern.compile("(?x).(?-x).");
4790             Pattern.compile("(?U).(?-U).");
4791             Pattern.compile("(?imsducxU).(?-imsducxU).");
4792         } catch (PatternSyntaxException x) {
4793             failCount++;
4794         }
4795         report("Embedded flags");
4796     }
4797 
4798     private static void grapheme() throws Exception {
4799         Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
4800                 Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
4801             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4802             .forEach( ln -> {
4803                 ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4804                 // System.out.println(str);
4805                 String[] strs = ln.split("\u00f7|\u00d7");
4806                 StringBuilder src = new StringBuilder();
4807                 ArrayList<String> graphemes = new ArrayList<>();
4808                 StringBuilder buf = new StringBuilder();
4809                 int offBk = 0;
4810                 for (String str : strs) {
4811                     if (str.length() == 0)  // first empty str
4812                         continue;
4813                     int cp = Integer.parseInt(str, 16);
4814                     src.appendCodePoint(cp);
4815                     buf.appendCodePoint(cp);
4816                     offBk += (str.length() + 1);
4817                     if (ln.charAt(offBk) == '\u00f7') {    // DIV
4818                         graphemes.add(buf.toString());
4819                         buf = new StringBuilder();
4820                     }
4821                 }
4822                 Pattern p = Pattern.compile("\\X");
4823                 Matcher m = p.matcher(src.toString());
4824                 Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4825                 for (String g : graphemes) {
4826                     // System.out.printf("     grapheme:=[%s]%n", g);
4827                     // (1) test \\X directly
4828                     if (!m.find() || !m.group().equals(g)) {
4829                         System.out.println("Failed \\X [" + ln + "] : " + g);
4830                         failCount++;
4831                     }
4832                     // (2) test \\b{g} + \\X  via Scanner
4833                     boolean hasNext = s.hasNext(p);
4834                     // if (!s.hasNext() || !s.next().equals(next)) {
4835                     if (!s.hasNext(p) || !s.next(p).equals(g)) {
4836                         System.out.println("Failed b{g} [" + ln + "] : " + g);
4837                         failCount++;
4838                     }
4839                 }
4840             });
4841         // some sanity checks
4842         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4843             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4844             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4845             failCount++;
4846         // make sure "\b{n}" still works
4847         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4848             failCount++;
4849         report("Unicode extended grapheme cluster");
4850     }
4851 
4852     // hangup/timeout if go into exponential backtracking
4853     private static void expoBacktracking() throws Exception {
4854 
4855         Object[][] patternMatchers = {
4856             // 6328855
4857             { "(.*\n*)*",
4858               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4859               false },
4860             // 6192895
4861             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4862               "Hello World this is a test this is a test this is a test A",
4863               true },
4864             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4865               "Hello World this is a test this is a test this is a test \u4e00 ",
4866               false },
4867             { " *([a-z0-9]+ *)+",
4868               "hello world this is a test this is a test this is a test A",
4869               false },
4870             // 4771934 [FIXED] #5013651?
4871             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4872               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4873               true },
4874             // 4866249 [FIXED]
4875             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4876               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4877               true },
4878             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4879               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4880               false },
4881             // 6345469
4882             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4883               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4884               true }, // --> matched
4885             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4886               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4887               false },
4888             // 5026912
4889             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4890               "156580451111112225588087755221111111566969655555555",
4891               false},
4892             // 6988218
4893             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4894               "'%)) order by ANGEBOT.ID",
4895               false},    // find
4896             // 6693451
4897             { "^(\\s*foo\\s*)*$",
4898               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4899               true },
4900             { "^(\\s*foo\\s*)*$",
4901               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4902               false
4903             },
4904             // 7006761
4905             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4906             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4907             // 8140212
4908             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4909               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4910               false
4911             },
4912             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4913             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4914 
4915             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4916             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4917 
4918             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4919             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4920 
4921             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4922 
4923             /* not fixed
4924             //8132141   --->    second level exponential backtracking
4925             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4926               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4927             */
4928         };
4929 
4930         for (Object[] pm : patternMatchers) {
4931             String p = (String)pm[0];
4932             String s = (String)pm[1];
4933             boolean r = (Boolean)pm[2];
4934             if (r != Pattern.compile(p).matcher(s).matches()) {
4935                 failCount++;
4936             }
4937         }
4938     }
4939 
4940     private static void invalidGroupName() {
4941         // Invalid start of a group name
4942         for (String groupName : List.of("", ".", "0", "\u0040", "\u005b",
4943                 "\u0060", "\u007b", "\u0416")) {
4944             for (String pat : List.of("(?<" + groupName + ">)",
4945                     "\\k<" + groupName + ">")) {
4946                 try {
4947                     Pattern.compile(pat);
4948                     failCount++;
4949                 } catch (PatternSyntaxException e) {
4950                     if (!e.getMessage().startsWith(
4951                             "capturing group name does not start with a"
4952                             + " Latin letter")) {
4953                         failCount++;
4954                     }
4955                 }
4956             }
4957         }
4958         // Invalid char in a group name
4959         for (String groupName : List.of("a.", "b\u0040", "c\u005b",
4960                 "d\u0060", "e\u007b", "f\u0416")) {
4961             for (String pat : List.of("(?<" + groupName + ">)",
4962                     "\\k<" + groupName + ">")) {
4963                 try {
4964                     Pattern.compile(pat);
4965                     failCount++;
4966                 } catch (PatternSyntaxException e) {
4967                     if (!e.getMessage().startsWith(
4968                             "named capturing group is missing trailing '>'")) {
4969                         failCount++;
4970                     }
4971                 }
4972             }
4973         }
4974         report("Invalid capturing group names");
4975     }
4976 
4977     private static void illegalRepetitionRange() {
4978         // huge integers > (2^31 - 1)
4979         String n = BigInteger.valueOf(1L << 32)
4980             .toString();
4981         String m = BigInteger.valueOf(1L << 31)
4982             .add(new BigInteger(80, generator))
4983             .toString();
4984         for (String rep : List.of("", "x", ".", ",", "-1", "2,1",
4985                 n, n + ",", "0," + n, n + "," + m, m, m + ",", "0," + m)) {
4986             String pat = ".{" + rep + "}";
4987             try {
4988                 Pattern.compile(pat);
4989                 failCount++;
4990                 System.out.println("Expected to fail. Pattern: " + pat);
4991             } catch (PatternSyntaxException e) {
4992                 if (!e.getMessage().startsWith("Illegal repetition")) {
4993                     failCount++;
4994                     System.out.println("Unexpected error message: " + e.getMessage());
4995                 }
4996             } catch (Throwable t) {
4997                 failCount++;
4998                 System.out.println("Unexpected exception: " + t);
4999             }
5000         }
5001         report("illegalRepetitionRange");
5002     }
5003 
5004     private static void surrogatePairWithCanonEq() {
5005         try {
5006             Pattern.compile("\ud834\udd21", Pattern.CANON_EQ);
5007         } catch (Throwable t) {
5008             failCount++;
5009             System.out.println("Unexpected exception: " + t);
5010         }
5011         report("surrogatePairWithCanonEq");
5012     }
5013 
5014     // This test is for 8235812
5015     private static void lineBreakWithQuantifier() {
5016         // key:    pattern
5017         // value:  lengths of input that must match the pattern
5018         Map<String, List<Integer>> cases = Map.ofEntries(
5019             Map.entry("\\R?",      List.of(0, 1)),
5020             Map.entry("\\R*",      List.of(0, 1, 2, 3)),
5021             Map.entry("\\R+",      List.of(1, 2, 3)),
5022             Map.entry("\\R{0}",    List.of(0)),
5023             Map.entry("\\R{1}",    List.of(1)),
5024             Map.entry("\\R{2}",    List.of(2)),
5025             Map.entry("\\R{3}",    List.of(3)),
5026             Map.entry("\\R{0,}",   List.of(0, 1, 2, 3)),
5027             Map.entry("\\R{1,}",   List.of(1, 2, 3)),
5028             Map.entry("\\R{2,}",   List.of(2, 3)),
5029             Map.entry("\\R{3,}",   List.of(3)),
5030             Map.entry("\\R{0,0}",  List.of(0)),
5031             Map.entry("\\R{0,1}",  List.of(0, 1)),
5032             Map.entry("\\R{0,2}",  List.of(0, 1, 2)),
5033             Map.entry("\\R{0,3}",  List.of(0, 1, 2, 3)),
5034             Map.entry("\\R{1,1}",  List.of(1)),
5035             Map.entry("\\R{1,2}",  List.of(1, 2)),
5036             Map.entry("\\R{1,3}",  List.of(1, 2, 3)),
5037             Map.entry("\\R{2,2}",  List.of(2)),
5038             Map.entry("\\R{2,3}",  List.of(2, 3)),
5039             Map.entry("\\R{3,3}",  List.of(3)),
5040             Map.entry("\\R",       List.of(1)),
5041             Map.entry("\\R\\R",    List.of(2)),
5042             Map.entry("\\R\\R\\R", List.of(3))
5043         );
5044 
5045         // key:    length of input
5046         // value:  all possible inputs of given length
5047         Map<Integer, List<String>> inputs = new HashMap<>();
5048         String[] Rs = { "\r\n", "\r", "\n",
5049                         "\u000B", "\u000C", "\u0085", "\u2028", "\u2029" };
5050         StringBuilder sb = new StringBuilder();
5051         for (int len = 0; len <= 3; ++len) {
5052             int[] idx = new int[len + 1];
5053             do {
5054                 sb.setLength(0);
5055                 for (int j = 0; j < len; ++j)
5056                     sb.append(Rs[idx[j]]);
5057                 inputs.computeIfAbsent(len, ArrayList::new).add(sb.toString());
5058                 idx[0]++;
5059                 for (int j = 0; j < len; ++j) {
5060                     if (idx[j] < Rs.length)
5061                         break;
5062                     idx[j] = 0;
5063                     idx[j+1]++;
5064                 }
5065             } while (idx[len] == 0);
5066         }
5067 
5068         // exhaustive testing
5069         for (String patStr : cases.keySet()) {
5070             Pattern[] pats = patStr.endsWith("R")
5071                 ? new Pattern[] { Pattern.compile(patStr) }  // no quantifiers
5072                 : new Pattern[] { Pattern.compile(patStr),          // greedy
5073                                   Pattern.compile(patStr + "?") };  // reluctant
5074             Matcher m = pats[0].matcher("");
5075             for (Pattern p : pats) {
5076                 m.usePattern(p);
5077                 for (int len : cases.get(patStr)) {
5078                     for (String in : inputs.get(len)) {
5079                         if (!m.reset(in).matches()) {
5080                             failCount++;
5081                             System.err.println("Expected to match '" +
5082                                     in + "' =~ /" + p + "/");
5083                         }
5084                     }
5085                 }
5086             }
5087         }
5088         report("lineBreakWithQuantifier");
5089     }
5090 
5091     // This test is for 8214245
5092     private static void caseInsensitivePMatch() {
5093         for (String input : List.of("abcd", "AbCd", "ABCD")) {
5094             for (String pattern : List.of("abcd", "aBcD", "[a-d]{4}",
5095                     "(?:a|b|c|d){4}", "\\p{Lower}{4}", "\\p{Ll}{4}",
5096                     "\\p{IsLl}{4}", "\\p{gc=Ll}{4}",
5097                     "\\p{general_category=Ll}{4}", "\\p{IsLowercase}{4}",
5098                     "\\p{javaLowerCase}{4}", "\\p{Upper}{4}", "\\p{Lu}{4}",
5099                     "\\p{IsLu}{4}", "\\p{gc=Lu}{4}", "\\p{general_category=Lu}{4}",
5100                     "\\p{IsUppercase}{4}", "\\p{javaUpperCase}{4}",
5101                     "\\p{Lt}{4}", "\\p{IsLt}{4}", "\\p{gc=Lt}{4}",
5102                     "\\p{general_category=Lt}{4}", "\\p{IsTitlecase}{4}",
5103                     "\\p{javaTitleCase}{4}", "[\\p{Lower}]{4}", "[\\p{Ll}]{4}",
5104                     "[\\p{IsLl}]{4}", "[\\p{gc=Ll}]{4}",
5105                     "[\\p{general_category=Ll}]{4}", "[\\p{IsLowercase}]{4}",
5106                     "[\\p{javaLowerCase}]{4}", "[\\p{Upper}]{4}", "[\\p{Lu}]{4}",
5107                     "[\\p{IsLu}]{4}", "[\\p{gc=Lu}]{4}",
5108                     "[\\p{general_category=Lu}]{4}", "[\\p{IsUppercase}]{4}",
5109                     "[\\p{javaUpperCase}]{4}", "[\\p{Lt}]{4}", "[\\p{IsLt}]{4}",
5110                     "[\\p{gc=Lt}]{4}", "[\\p{general_category=Lt}]{4}",
5111                     "[\\p{IsTitlecase}]{4}", "[\\p{javaTitleCase}]{4}"))
5112             {
5113                 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE)
5114                             .matcher(input)
5115                             .matches())
5116                 {
5117                     failCount++;
5118                     System.err.println("Expected to match: " +
5119                                        "'" + input + "' =~ /" + pattern + "/");
5120                 }
5121             }
5122         }
5123 
5124         for (String input : List.of("\u01c7", "\u01c8", "\u01c9")) {
5125             for (String pattern : List.of("\u01c7", "\u01c8", "\u01c9",
5126                     "[\u01c7\u01c8]", "[\u01c7\u01c9]", "[\u01c8\u01c9]",
5127                     "[\u01c7-\u01c8]", "[\u01c8-\u01c9]", "[\u01c7-\u01c9]",
5128                     "\\p{Lower}", "\\p{Ll}", "\\p{IsLl}", "\\p{gc=Ll}",
5129                     "\\p{general_category=Ll}", "\\p{IsLowercase}",
5130                     "\\p{javaLowerCase}", "\\p{Upper}", "\\p{Lu}",
5131                     "\\p{IsLu}", "\\p{gc=Lu}", "\\p{general_category=Lu}",
5132                     "\\p{IsUppercase}", "\\p{javaUpperCase}",
5133                     "\\p{Lt}", "\\p{IsLt}", "\\p{gc=Lt}",
5134                     "\\p{general_category=Lt}", "\\p{IsTitlecase}",
5135                     "\\p{javaTitleCase}", "[\\p{Lower}]", "[\\p{Ll}]",
5136                     "[\\p{IsLl}]", "[\\p{gc=Ll}]",
5137                     "[\\p{general_category=Ll}]", "[\\p{IsLowercase}]",
5138                     "[\\p{javaLowerCase}]", "[\\p{Upper}]", "[\\p{Lu}]",
5139                     "[\\p{IsLu}]", "[\\p{gc=Lu}]",
5140                     "[\\p{general_category=Lu}]", "[\\p{IsUppercase}]",
5141                     "[\\p{javaUpperCase}]", "[\\p{Lt}]", "[\\p{IsLt}]",
5142                     "[\\p{gc=Lt}]", "[\\p{general_category=Lt}]",
5143                     "[\\p{IsTitlecase}]", "[\\p{javaTitleCase}]"))
5144             {
5145                 if (!Pattern.compile(pattern, Pattern.CASE_INSENSITIVE
5146                                             | Pattern.UNICODE_CHARACTER_CLASS)
5147                             .matcher(input)
5148                             .matches())
5149                 {
5150                     failCount++;
5151                     System.err.println("Expected to match: " +
5152                                        "'" + input + "' =~ /" + pattern + "/");
5153                 }
5154             }
5155         }
5156         report("caseInsensitivePMatch");
5157     }
5158 }