1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481
  37  * @library /lib/testlibrary
  38  * @build jdk.testlibrary.*
  39  * @run main RegExTest
  40  * @key randomness
  41  */
  42 
  43 import java.util.function.Function;
  44 import java.util.regex.*;
  45 import java.util.Random;
  46 import java.util.Scanner;
  47 import java.io.*;
  48 import java.nio.file.*;
  49 import java.util.*;
  50 import java.nio.CharBuffer;
  51 import java.util.function.Predicate;
  52 import jdk.testlibrary.RandomFactory;
  53 
  54 /**
  55  * This is a test class created to check the operation of
  56  * the Pattern and Matcher classes.
  57  */
  58 public class RegExTest {
  59 
  60     private static Random generator = RandomFactory.getRandom();
  61     private static boolean failure = false;
  62     private static int failCount = 0;
  63     private static String firstFailure = null;
  64 
  65     /**
  66      * Main to interpret arguments and run several tests.
  67      *
  68      */
  69     public static void main(String[] args) throws Exception {
  70         // Most of the tests are in a file
  71         processFile("TestCases.txt");
  72         //processFile("PerlCases.txt");
  73         processFile("BMPTestCases.txt");
  74         processFile("SupplementaryTestCases.txt");
  75 
  76         // These test many randomly generated char patterns
  77         bm();
  78         slice();
  79 
  80         // These are hard to put into the file
  81         escapes();
  82         blankInput();
  83 
  84         // Substitition tests on randomly generated sequences
  85         globalSubstitute();
  86         stringbufferSubstitute();
  87         stringbuilderSubstitute();
  88 
  89         substitutionBasher();
  90         substitutionBasher2();
  91 
  92         // Canonical Equivalence
  93         ceTest();
  94 
  95         // Anchors
  96         anchorTest();
  97 
  98         // boolean match calls
  99         matchesTest();
 100         lookingAtTest();
 101 
 102         // Pattern API
 103         patternMatchesTest();
 104 
 105         // Misc
 106         lookbehindTest();
 107         nullArgumentTest();
 108         backRefTest();
 109         groupCaptureTest();
 110         caretTest();
 111         charClassTest();
 112         emptyPatternTest();
 113         findIntTest();
 114         group0Test();
 115         longPatternTest();
 116         octalTest();
 117         ampersandTest();
 118         negationTest();
 119         splitTest();
 120         appendTest();
 121         caseFoldingTest();
 122         commentsTest();
 123         unixLinesTest();
 124         replaceFirstTest();
 125         gTest();
 126         zTest();
 127         serializeTest();
 128         reluctantRepetitionTest();
 129         multilineDollarTest();
 130         dollarAtEndTest();
 131         caretBetweenTerminatorsTest();
 132         // This RFE rejected in Tiger numOccurrencesTest();
 133         javaCharClassTest();
 134         nonCaptureRepetitionTest();
 135         notCapturedGroupCurlyMatchTest();
 136         escapedSegmentTest();
 137         literalPatternTest();
 138         literalReplacementTest();
 139         regionTest();
 140         toStringTest();
 141         negatedCharClassTest();
 142         findFromTest();
 143         boundsTest();
 144         unicodeWordBoundsTest();
 145         caretAtEndTest();
 146         wordSearchTest();
 147         hitEndTest();
 148         toMatchResultTest();
 149         toMatchResultTest2();
 150         surrogatesInClassTest();
 151         removeQEQuotingTest();
 152         namedGroupCaptureTest();
 153         nonBmpClassComplementTest();
 154         unicodePropertiesTest();
 155         unicodeHexNotationTest();
 156         unicodeClassesTest();
 157         unicodeCharacterNameTest();
 158         horizontalAndVerticalWSTest();
 159         linebreakTest();
 160         branchTest();
 161         groupCurlyNotFoundSuppTest();
 162         groupCurlyBackoffTest();
 163         patternAsPredicate();
 164         invalidFlags();
 165         grapheme();
 166 
 167         if (failure) {
 168             throw new
 169                 RuntimeException("RegExTest failed, 1st failure: " +
 170                                  firstFailure);
 171         } else {
 172             System.err.println("OKAY: All tests passed.");
 173         }
 174     }
 175 
 176     // Utility functions
 177 
 178     private static String getRandomAlphaString(int length) {
 179         StringBuffer buf = new StringBuffer(length);
 180         for (int i=0; i<length; i++) {
 181             char randChar = (char)(97 + generator.nextInt(26));
 182             buf.append(randChar);
 183         }
 184         return buf.toString();
 185     }
 186 
 187     private static void check(Matcher m, String expected) {
 188         m.find();
 189         if (!m.group().equals(expected))
 190             failCount++;
 191     }
 192 
 193     private static void check(Matcher m, String result, boolean expected) {
 194         m.find();
 195         if (m.group().equals(result) != expected)
 196             failCount++;
 197     }
 198 
 199     private static void check(Pattern p, String s, boolean expected) {
 200         if (p.matcher(s).find() != expected)
 201             failCount++;
 202     }
 203 
 204     private static void check(String p, String s, boolean expected) {
 205         Matcher matcher = Pattern.compile(p).matcher(s);
 206         if (matcher.find() != expected)
 207             failCount++;
 208     }
 209 
 210     private static void check(String p, char c, boolean expected) {
 211         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 212         Pattern pattern = Pattern.compile(propertyPattern);
 213         char[] ca = new char[1]; ca[0] = c;
 214         Matcher matcher = pattern.matcher(new String(ca));
 215         if (!matcher.find())
 216             failCount++;
 217     }
 218 
 219     private static void check(String p, int codePoint, boolean expected) {
 220         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 221         Pattern pattern = Pattern.compile(propertyPattern);
 222         char[] ca = Character.toChars(codePoint);
 223         Matcher matcher = pattern.matcher(new String(ca));
 224         if (!matcher.find())
 225             failCount++;
 226     }
 227 
 228     private static void check(String p, int flag, String input, String s,
 229                               boolean expected)
 230     {
 231         Pattern pattern = Pattern.compile(p, flag);
 232         Matcher matcher = pattern.matcher(input);
 233         if (expected)
 234             check(matcher, s, expected);
 235         else
 236             check(pattern, input, false);
 237     }
 238 
 239     private static void report(String testName) {
 240         int spacesToAdd = 30 - testName.length();
 241         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 242         for (int i=0; i<spacesToAdd; i++)
 243             paddedNameBuffer.append(" ");
 244         String paddedName = paddedNameBuffer.toString();
 245         System.err.println(paddedName + ": " +
 246                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 247         if (failCount > 0) {
 248             failure = true;
 249 
 250             if (firstFailure == null) {
 251                 firstFailure = testName;
 252             }
 253         }
 254 
 255         failCount = 0;
 256     }
 257 
 258     /**
 259      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 260      * supplementary characters. This method does NOT fully take care
 261      * of the regex syntax.
 262      */
 263     private static String toSupplementaries(String s) {
 264         int length = s.length();
 265         StringBuffer sb = new StringBuffer(length * 2);
 266 
 267         for (int i = 0; i < length; ) {
 268             char c = s.charAt(i++);
 269             if (c == '\\') {
 270                 sb.append(c);
 271                 if (i < length) {
 272                     c = s.charAt(i++);
 273                     sb.append(c);
 274                     if (c == 'u') {
 275                         // assume no syntax error
 276                         sb.append(s.charAt(i++));
 277                         sb.append(s.charAt(i++));
 278                         sb.append(s.charAt(i++));
 279                         sb.append(s.charAt(i++));
 280                     }
 281                 }
 282             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 283                 sb.append('\ud800').append((char)('\udc00'+c));
 284             } else {
 285                 sb.append(c);
 286             }
 287         }
 288         return sb.toString();
 289     }
 290 
 291     // Regular expression tests
 292 
 293     // This is for bug 6178785
 294     // Test if an expected NPE gets thrown when passing in a null argument
 295     private static boolean check(Runnable test) {
 296         try {
 297             test.run();
 298             failCount++;
 299             return false;
 300         } catch (NullPointerException npe) {
 301             return true;
 302         }
 303     }
 304 
 305     private static void nullArgumentTest() {
 306         check(() -> Pattern.compile(null));
 307         check(() -> Pattern.matches(null, null));
 308         check(() -> Pattern.matches("xyz", null));
 309         check(() -> Pattern.quote(null));
 310         check(() -> Pattern.compile("xyz").split(null));
 311         check(() -> Pattern.compile("xyz").matcher(null));
 312 
 313         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 314         m.matches();
 315         check(() -> m.appendTail((StringBuffer) null));
 316         check(() -> m.appendTail((StringBuilder)null));
 317         check(() -> m.replaceAll((String) null));
 318         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 319         check(() -> m.replaceFirst((String)null));
 320         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 321         check(() -> m.appendReplacement((StringBuffer)null, null));
 322         check(() -> m.appendReplacement((StringBuilder)null, null));
 323         check(() -> m.reset(null));
 324         check(() -> Matcher.quoteReplacement(null));
 325         //check(() -> m.usePattern(null));
 326 
 327         report("Null Argument");
 328     }
 329 
 330     // This is for bug6635133
 331     // Test if surrogate pair in Unicode escapes can be handled correctly.
 332     private static void surrogatesInClassTest() throws Exception {
 333         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 334         Matcher matcher = pattern.matcher("\ud834\udd22");
 335         if (!matcher.find())
 336             failCount++;
 337 
 338         report("Surrogate pair in Unicode escape");
 339     }
 340 
 341     // This is for bug6990617
 342     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 343     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 344     // char is an octal digit.
 345     private static void removeQEQuotingTest() throws Exception {
 346         Pattern pattern =
 347             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 348         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 349         if (!matcher.find())
 350             failCount++;
 351 
 352         report("Remove Q/E Quoting");
 353     }
 354 
 355     // This is for bug 4988891
 356     // Test toMatchResult to see that it is a copy of the Matcher
 357     // that is not affected by subsequent operations on the original
 358     private static void toMatchResultTest() throws Exception {
 359         Pattern pattern = Pattern.compile("squid");
 360         Matcher matcher = pattern.matcher(
 361             "agiantsquidofdestinyasmallsquidoffate");
 362         matcher.find();
 363         int matcherStart1 = matcher.start();
 364         MatchResult mr = matcher.toMatchResult();
 365         if (mr == matcher)
 366             failCount++;
 367         int resultStart1 = mr.start();
 368         if (matcherStart1 != resultStart1)
 369             failCount++;
 370         matcher.find();
 371         int matcherStart2 = matcher.start();
 372         int resultStart2 = mr.start();
 373         if (matcherStart2 == resultStart2)
 374             failCount++;
 375         if (resultStart1 != resultStart2)
 376             failCount++;
 377         MatchResult mr2 = matcher.toMatchResult();
 378         if (mr == mr2)
 379             failCount++;
 380         if (mr2.start() != matcherStart2)
 381             failCount++;
 382         report("toMatchResult is a copy");
 383     }
 384 
 385     private static void checkExpectedISE(Runnable test) {
 386         try {
 387             test.run();
 388             failCount++;
 389         } catch (IllegalStateException x) {
 390         } catch (IndexOutOfBoundsException xx) {
 391             failCount++;
 392         }
 393     }
 394 
 395     private static void checkExpectedIOOE(Runnable test) {
 396         try {
 397             test.run();
 398             failCount++;
 399         } catch (IndexOutOfBoundsException x) {}
 400     }
 401 
 402     // This is for bug 8074678
 403     // Test the result of toMatchResult throws ISE if no match is availble
 404     private static void toMatchResultTest2() throws Exception {
 405         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 406         matcher.find();
 407         MatchResult mr = matcher.toMatchResult();
 408 
 409         checkExpectedISE(() -> mr.start());
 410         checkExpectedISE(() -> mr.start(2));
 411         checkExpectedISE(() -> mr.end());
 412         checkExpectedISE(() -> mr.end(2));
 413         checkExpectedISE(() -> mr.group());
 414         checkExpectedISE(() -> mr.group(2));
 415 
 416         matcher = Pattern.compile("(match)").matcher("there is a match");
 417         matcher.find();
 418         MatchResult mr2 = matcher.toMatchResult();
 419         checkExpectedIOOE(() -> mr2.start(2));
 420         checkExpectedIOOE(() -> mr2.end(2));
 421         checkExpectedIOOE(() -> mr2.group(2));
 422 
 423         report("toMatchResult2 appropriate exceptions");
 424     }
 425 
 426     // This is for bug 5013885
 427     // Must test a slice to see if it reports hitEnd correctly
 428     private static void hitEndTest() throws Exception {
 429         // Basic test of Slice node
 430         Pattern p = Pattern.compile("^squidattack");
 431         Matcher m = p.matcher("squack");
 432         m.find();
 433         if (m.hitEnd())
 434             failCount++;
 435         m.reset("squid");
 436         m.find();
 437         if (!m.hitEnd())
 438             failCount++;
 439 
 440         // Test Slice, SliceA and SliceU nodes
 441         for (int i=0; i<3; i++) {
 442             int flags = 0;
 443             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 444             if (i==2) flags = Pattern.UNICODE_CASE;
 445             p = Pattern.compile("^abc", flags);
 446             m = p.matcher("ad");
 447             m.find();
 448             if (m.hitEnd())
 449                 failCount++;
 450             m.reset("ab");
 451             m.find();
 452             if (!m.hitEnd())
 453                 failCount++;
 454         }
 455 
 456         // Test Boyer-Moore node
 457         p = Pattern.compile("catattack");
 458         m = p.matcher("attack");
 459         m.find();
 460         if (!m.hitEnd())
 461             failCount++;
 462 
 463         p = Pattern.compile("catattack");
 464         m = p.matcher("attackattackattackcatatta");
 465         m.find();
 466         if (!m.hitEnd())
 467             failCount++;
 468         report("hitEnd from a Slice");
 469     }
 470 
 471     // This is for bug 4997476
 472     // It is weird code submitted by customer demonstrating a regression
 473     private static void wordSearchTest() throws Exception {
 474         String testString = new String("word1 word2 word3");
 475         Pattern p = Pattern.compile("\\b");
 476         Matcher m = p.matcher(testString);
 477         int position = 0;
 478         int start = 0;
 479         while (m.find(position)) {
 480             start = m.start();
 481             if (start == testString.length())
 482                 break;
 483             if (m.find(start+1)) {
 484                 position = m.start();
 485             } else {
 486                 position = testString.length();
 487             }
 488             if (testString.substring(start, position).equals(" "))
 489                 continue;
 490             if (!testString.substring(start, position-1).startsWith("word"))
 491                 failCount++;
 492         }
 493         report("Customer word search");
 494     }
 495 
 496     // This is for bug 4994840
 497     private static void caretAtEndTest() throws Exception {
 498         // Problem only occurs with multiline patterns
 499         // containing a beginning-of-line caret "^" followed
 500         // by an expression that also matches the empty string.
 501         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 502         Matcher matcher = pattern.matcher("\r");
 503         matcher.find();
 504         matcher.find();
 505         report("Caret at end");
 506     }
 507 
 508     // This test is for 4979006
 509     // Check to see if word boundary construct properly handles unicode
 510     // non spacing marks
 511     private static void unicodeWordBoundsTest() throws Exception {
 512         String spaces = "  ";
 513         String wordChar = "a";
 514         String nsm = "\u030a";
 515 
 516         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 517 
 518         Pattern pattern = Pattern.compile("\\b");
 519         Matcher matcher = pattern.matcher("");
 520         // S=other B=word character N=non spacing mark .=word boundary
 521         // SS.BB.SS
 522         String input = spaces + wordChar + wordChar + spaces;
 523         twoFindIndexes(input, matcher, 2, 4);
 524         // SS.BBN.SS
 525         input = spaces + wordChar +wordChar + nsm + spaces;
 526         twoFindIndexes(input, matcher, 2, 5);
 527         // SS.BN.SS
 528         input = spaces + wordChar + nsm + spaces;
 529         twoFindIndexes(input, matcher, 2, 4);
 530         // SS.BNN.SS
 531         input = spaces + wordChar + nsm + nsm + spaces;
 532         twoFindIndexes(input, matcher, 2, 5);
 533         // SSN.BB.SS
 534         input = spaces + nsm + wordChar + wordChar + spaces;
 535         twoFindIndexes(input, matcher, 3, 5);
 536         // SS.BNB.SS
 537         input = spaces + wordChar + nsm + wordChar + spaces;
 538         twoFindIndexes(input, matcher, 2, 5);
 539         // SSNNSS
 540         input = spaces + nsm + nsm + spaces;
 541         matcher.reset(input);
 542         if (matcher.find())
 543             failCount++;
 544         // SSN.BBN.SS
 545         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 546         twoFindIndexes(input, matcher, 3, 6);
 547 
 548         report("Unicode word boundary");
 549     }
 550 
 551     private static void twoFindIndexes(String input, Matcher matcher, int a,
 552                                        int b) throws Exception
 553     {
 554         matcher.reset(input);
 555         matcher.find();
 556         if (matcher.start() != a)
 557             failCount++;
 558         matcher.find();
 559         if (matcher.start() != b)
 560             failCount++;
 561     }
 562 
 563     // This test is for 6284152
 564     static void check(String regex, String input, String[] expected) {
 565         List<String> result = new ArrayList<String>();
 566         Pattern p = Pattern.compile(regex);
 567         Matcher m = p.matcher(input);
 568         while (m.find()) {
 569             result.add(m.group());
 570         }
 571         if (!Arrays.asList(expected).equals(result))
 572             failCount++;
 573     }
 574 
 575     private static void lookbehindTest() throws Exception {
 576         //Positive
 577         check("(?<=%.{0,5})foo\\d",
 578               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 579               new String[]{"foo1", "foo2", "foo3"});
 580 
 581         //boundary at end of the lookbehind sub-regex should work consistently
 582         //with the boundary just after the lookbehind sub-regex
 583         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 584         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 585         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 586         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 587 
 588         //Negative
 589         check("(?<!%.{0,5})foo\\d",
 590               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 591               new String[] {"foo4", "foo5"});
 592 
 593         //Positive greedy
 594         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 595 
 596         //Positive reluctant
 597         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 598 
 599         //supplementary
 600         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 601               new String[] {"fo\ud800\udc00o"});
 602         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 603               new String[] {"fo\ud800\udc00o"});
 604         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 605               new String[] {"fo\ud800\udc00o"});
 606         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 607               new String[] {"fo\ud800\udc00o"});
 608         report("Lookbehind");
 609     }
 610 
 611     // This test is for 4938995
 612     // Check to see if weak region boundaries are transparent to
 613     // lookahead and lookbehind constructs
 614     private static void boundsTest() throws Exception {
 615         String fullMessage = "catdogcat";
 616         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 617         Matcher matcher = pattern.matcher("catdogca");
 618         matcher.useTransparentBounds(true);
 619         if (matcher.find())
 620             failCount++;
 621         matcher.reset("atdogcat");
 622         if (matcher.find())
 623             failCount++;
 624         matcher.reset(fullMessage);
 625         if (!matcher.find())
 626             failCount++;
 627         matcher.reset(fullMessage);
 628         matcher.region(0,9);
 629         if (!matcher.find())
 630             failCount++;
 631         matcher.reset(fullMessage);
 632         matcher.region(0,6);
 633         if (!matcher.find())
 634             failCount++;
 635         matcher.reset(fullMessage);
 636         matcher.region(3,6);
 637         if (!matcher.find())
 638             failCount++;
 639         matcher.useTransparentBounds(false);
 640         if (matcher.find())
 641             failCount++;
 642 
 643         // Negative lookahead/lookbehind
 644         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 645         matcher = pattern.matcher("dogcat");
 646         matcher.useTransparentBounds(true);
 647         matcher.region(0,3);
 648         if (matcher.find())
 649             failCount++;
 650         matcher.reset("catdog");
 651         matcher.region(3,6);
 652         if (matcher.find())
 653             failCount++;
 654         matcher.useTransparentBounds(false);
 655         matcher.reset("dogcat");
 656         matcher.region(0,3);
 657         if (!matcher.find())
 658             failCount++;
 659         matcher.reset("catdog");
 660         matcher.region(3,6);
 661         if (!matcher.find())
 662             failCount++;
 663 
 664         report("Region bounds transparency");
 665     }
 666 
 667     // This test is for 4945394
 668     private static void findFromTest() throws Exception {
 669         String message = "This is 40 $0 message.";
 670         Pattern pat = Pattern.compile("\\$0");
 671         Matcher match = pat.matcher(message);
 672         if (!match.find())
 673             failCount++;
 674         if (match.find())
 675             failCount++;
 676         if (match.find())
 677             failCount++;
 678         report("Check for alternating find");
 679     }
 680 
 681     // This test is for 4872664 and 4892980
 682     private static void negatedCharClassTest() throws Exception {
 683         Pattern pattern = Pattern.compile("[^>]");
 684         Matcher matcher = pattern.matcher("\u203A");
 685         if (!matcher.matches())
 686             failCount++;
 687         pattern = Pattern.compile("[^fr]");
 688         matcher = pattern.matcher("a");
 689         if (!matcher.find())
 690             failCount++;
 691         matcher.reset("\u203A");
 692         if (!matcher.find())
 693             failCount++;
 694         String s = "for";
 695         String result[] = s.split("[^fr]");
 696         if (!result[0].equals("f"))
 697             failCount++;
 698         if (!result[1].equals("r"))
 699             failCount++;
 700         s = "f\u203Ar";
 701         result = s.split("[^fr]");
 702         if (!result[0].equals("f"))
 703             failCount++;
 704         if (!result[1].equals("r"))
 705             failCount++;
 706 
 707         // Test adding to bits, subtracting a node, then adding to bits again
 708         pattern = Pattern.compile("[^f\u203Ar]");
 709         matcher = pattern.matcher("a");
 710         if (!matcher.find())
 711             failCount++;
 712         matcher.reset("f");
 713         if (matcher.find())
 714             failCount++;
 715         matcher.reset("\u203A");
 716         if (matcher.find())
 717             failCount++;
 718         matcher.reset("r");
 719         if (matcher.find())
 720             failCount++;
 721         matcher.reset("\u203B");
 722         if (!matcher.find())
 723             failCount++;
 724 
 725         // Test subtracting a node, adding to bits, subtracting again
 726         pattern = Pattern.compile("[^\u203Ar\u203B]");
 727         matcher = pattern.matcher("a");
 728         if (!matcher.find())
 729             failCount++;
 730         matcher.reset("\u203A");
 731         if (matcher.find())
 732             failCount++;
 733         matcher.reset("r");
 734         if (matcher.find())
 735             failCount++;
 736         matcher.reset("\u203B");
 737         if (matcher.find())
 738             failCount++;
 739         matcher.reset("\u203C");
 740         if (!matcher.find())
 741             failCount++;
 742 
 743         report("Negated Character Class");
 744     }
 745 
 746     // This test is for 4628291
 747     private static void toStringTest() throws Exception {
 748         Pattern pattern = Pattern.compile("b+");
 749         if (pattern.toString() != "b+")
 750             failCount++;
 751         Matcher matcher = pattern.matcher("aaabbbccc");
 752         String matcherString = matcher.toString(); // unspecified
 753         matcher.find();
 754         matcherString = matcher.toString(); // unspecified
 755         matcher.region(0,3);
 756         matcherString = matcher.toString(); // unspecified
 757         matcher.reset();
 758         matcherString = matcher.toString(); // unspecified
 759         report("toString");
 760     }
 761 
 762     // This test is for 4808962
 763     private static void literalPatternTest() throws Exception {
 764         int flags = Pattern.LITERAL;
 765 
 766         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 767         check(pattern, "abc\\t$^", true);
 768 
 769         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 770         check(pattern, "abc\\t$^", true);
 771 
 772         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 773         check(pattern, "\\Qa^$bcabc\\E", true);
 774         check(pattern, "a^$bcabc", false);
 775 
 776         pattern = Pattern.compile("\\\\Q\\\\E");
 777         check(pattern, "\\Q\\E", true);
 778 
 779         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 780         check(pattern, "abcefg\\Q\\Ehij", true);
 781 
 782         pattern = Pattern.compile("\\\\\\Q\\\\E");
 783         check(pattern, "\\\\\\\\", true);
 784 
 785         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 786         check(pattern, "\\Qa^$bcabc\\E", true);
 787         check(pattern, "a^$bcabc", false);
 788 
 789         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 790         check(pattern, "\\Qabc\\Edef", true);
 791         check(pattern, "abcdef", false);
 792 
 793         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 794         check(pattern, "abc\\Edef", true);
 795         check(pattern, "abcdef", false);
 796 
 797         pattern = Pattern.compile(Pattern.quote("\\E"));
 798         check(pattern, "\\E", true);
 799 
 800         pattern = Pattern.compile("((((abc.+?:)", flags);
 801         check(pattern, "((((abc.+?:)", true);
 802 
 803         flags |= Pattern.MULTILINE;
 804 
 805         pattern = Pattern.compile("^cat$", flags);
 806         check(pattern, "abc^cat$def", true);
 807         check(pattern, "cat", false);
 808 
 809         flags |= Pattern.CASE_INSENSITIVE;
 810 
 811         pattern = Pattern.compile("abcdef", flags);
 812         check(pattern, "ABCDEF", true);
 813         check(pattern, "AbCdEf", true);
 814 
 815         flags |= Pattern.DOTALL;
 816 
 817         pattern = Pattern.compile("a...b", flags);
 818         check(pattern, "A...b", true);
 819         check(pattern, "Axxxb", false);
 820 
 821         flags |= Pattern.CANON_EQ;
 822 
 823         Pattern p = Pattern.compile("testa\u030a", flags);
 824         check(pattern, "testa\u030a", false);
 825         check(pattern, "test\u00e5", false);
 826 
 827         // Supplementary character test
 828         flags = Pattern.LITERAL;
 829 
 830         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 831         check(pattern, toSupplementaries("abc\\t$^"), true);
 832 
 833         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 834         check(pattern, toSupplementaries("abc\\t$^"), true);
 835 
 836         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 837         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 838         check(pattern, toSupplementaries("a^$bcabc"), false);
 839 
 840         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 841         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 842         check(pattern, toSupplementaries("a^$bcabc"), false);
 843 
 844         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 845         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 846         check(pattern, toSupplementaries("abcdef"), false);
 847 
 848         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 849         check(pattern, toSupplementaries("abc\\Edef"), true);
 850         check(pattern, toSupplementaries("abcdef"), false);
 851 
 852         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 853         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 854 
 855         flags |= Pattern.MULTILINE;
 856 
 857         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 858         check(pattern, toSupplementaries("abc^cat$def"), true);
 859         check(pattern, toSupplementaries("cat"), false);
 860 
 861         flags |= Pattern.DOTALL;
 862 
 863         // note: this is case-sensitive.
 864         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 865         check(pattern, toSupplementaries("a...b"), true);
 866         check(pattern, toSupplementaries("axxxb"), false);
 867 
 868         flags |= Pattern.CANON_EQ;
 869 
 870         String t = toSupplementaries("test");
 871         p = Pattern.compile(t + "a\u030a", flags);
 872         check(pattern, t + "a\u030a", false);
 873         check(pattern, t + "\u00e5", false);
 874 
 875         report("Literal pattern");
 876     }
 877 
 878     // This test is for 4803179
 879     // This test is also for 4808962, replacement parts
 880     private static void literalReplacementTest() throws Exception {
 881         int flags = Pattern.LITERAL;
 882 
 883         Pattern pattern = Pattern.compile("abc", flags);
 884         Matcher matcher = pattern.matcher("zzzabczzz");
 885         String replaceTest = "$0";
 886         String result = matcher.replaceAll(replaceTest);
 887         if (!result.equals("zzzabczzz"))
 888             failCount++;
 889 
 890         matcher.reset();
 891         String literalReplacement = matcher.quoteReplacement(replaceTest);
 892         result = matcher.replaceAll(literalReplacement);
 893         if (!result.equals("zzz$0zzz"))
 894             failCount++;
 895 
 896         matcher.reset();
 897         replaceTest = "\\t$\\$";
 898         literalReplacement = matcher.quoteReplacement(replaceTest);
 899         result = matcher.replaceAll(literalReplacement);
 900         if (!result.equals("zzz\\t$\\$zzz"))
 901             failCount++;
 902 
 903         // Supplementary character test
 904         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 905         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 906         replaceTest = "$0";
 907         result = matcher.replaceAll(replaceTest);
 908         if (!result.equals(toSupplementaries("zzzabczzz")))
 909             failCount++;
 910 
 911         matcher.reset();
 912         literalReplacement = matcher.quoteReplacement(replaceTest);
 913         result = matcher.replaceAll(literalReplacement);
 914         if (!result.equals(toSupplementaries("zzz$0zzz")))
 915             failCount++;
 916 
 917         matcher.reset();
 918         replaceTest = "\\t$\\$";
 919         literalReplacement = matcher.quoteReplacement(replaceTest);
 920         result = matcher.replaceAll(literalReplacement);
 921         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 922             failCount++;
 923 
 924         // IAE should be thrown if backslash or '$' is the last character
 925         // in replacement string
 926         try {
 927             "\uac00".replaceAll("\uac00", "$");
 928             failCount++;
 929         } catch (IllegalArgumentException iie) {
 930         } catch (Exception e) {
 931             failCount++;
 932         }
 933         try {
 934             "\uac00".replaceAll("\uac00", "\\");
 935             failCount++;
 936         } catch (IllegalArgumentException iie) {
 937         } catch (Exception e) {
 938             failCount++;
 939         }
 940         report("Literal replacement");
 941     }
 942 
 943     // This test is for 4757029
 944     private static void regionTest() throws Exception {
 945         Pattern pattern = Pattern.compile("abc");
 946         Matcher matcher = pattern.matcher("abcdefabc");
 947 
 948         matcher.region(0,9);
 949         if (!matcher.find())
 950             failCount++;
 951         if (!matcher.find())
 952             failCount++;
 953         matcher.region(0,3);
 954         if (!matcher.find())
 955            failCount++;
 956         matcher.region(3,6);
 957         if (matcher.find())
 958            failCount++;
 959         matcher.region(0,2);
 960         if (matcher.find())
 961            failCount++;
 962 
 963         expectRegionFail(matcher, 1, -1);
 964         expectRegionFail(matcher, -1, -1);
 965         expectRegionFail(matcher, -1, 1);
 966         expectRegionFail(matcher, 5, 3);
 967         expectRegionFail(matcher, 5, 12);
 968         expectRegionFail(matcher, 12, 12);
 969 
 970         pattern = Pattern.compile("^abc$");
 971         matcher = pattern.matcher("zzzabczzz");
 972         matcher.region(0,9);
 973         if (matcher.find())
 974             failCount++;
 975         matcher.region(3,6);
 976         if (!matcher.find())
 977            failCount++;
 978         matcher.region(3,6);
 979         matcher.useAnchoringBounds(false);
 980         if (matcher.find())
 981            failCount++;
 982 
 983         // Supplementary character test
 984         pattern = Pattern.compile(toSupplementaries("abc"));
 985         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 986         matcher.region(0,9*2);
 987         if (!matcher.find())
 988             failCount++;
 989         if (!matcher.find())
 990             failCount++;
 991         matcher.region(0,3*2);
 992         if (!matcher.find())
 993            failCount++;
 994         matcher.region(1,3*2);
 995         if (matcher.find())
 996            failCount++;
 997         matcher.region(3*2,6*2);
 998         if (matcher.find())
 999            failCount++;
1000         matcher.region(0,2*2);
1001         if (matcher.find())
1002            failCount++;
1003         matcher.region(0,2*2+1);
1004         if (matcher.find())
1005            failCount++;
1006 
1007         expectRegionFail(matcher, 1*2, -1);
1008         expectRegionFail(matcher, -1, -1);
1009         expectRegionFail(matcher, -1, 1*2);
1010         expectRegionFail(matcher, 5*2, 3*2);
1011         expectRegionFail(matcher, 5*2, 12*2);
1012         expectRegionFail(matcher, 12*2, 12*2);
1013 
1014         pattern = Pattern.compile(toSupplementaries("^abc$"));
1015         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1016         matcher.region(0,9*2);
1017         if (matcher.find())
1018             failCount++;
1019         matcher.region(3*2,6*2);
1020         if (!matcher.find())
1021            failCount++;
1022         matcher.region(3*2+1,6*2);
1023         if (matcher.find())
1024            failCount++;
1025         matcher.region(3*2,6*2-1);
1026         if (matcher.find())
1027            failCount++;
1028         matcher.region(3*2,6*2);
1029         matcher.useAnchoringBounds(false);
1030         if (matcher.find())
1031            failCount++;
1032         report("Regions");
1033     }
1034 
1035     private static void expectRegionFail(Matcher matcher, int index1,
1036                                          int index2)
1037     {
1038         try {
1039             matcher.region(index1, index2);
1040             failCount++;
1041         } catch (IndexOutOfBoundsException ioobe) {
1042             // Correct result
1043         } catch (IllegalStateException ise) {
1044             // Correct result
1045         }
1046     }
1047 
1048     // This test is for 4803197
1049     private static void escapedSegmentTest() throws Exception {
1050 
1051         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1052         check(pattern, "dir1\\dir2", true);
1053 
1054         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1055         check(pattern, "dir1\\dir2\\", true);
1056 
1057         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1058         check(pattern, "dir1\\dir2\\", true);
1059 
1060         // Supplementary character test
1061         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1062         check(pattern, toSupplementaries("dir1\\dir2"), true);
1063 
1064         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1065         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1066 
1067         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1068         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1069 
1070         report("Escaped segment");
1071     }
1072 
1073     // This test is for 4792284
1074     private static void nonCaptureRepetitionTest() throws Exception {
1075         String input = "abcdefgh;";
1076 
1077         String[] patterns = new String[] {
1078             "(?:\\w{4})+;",
1079             "(?:\\w{8})*;",
1080             "(?:\\w{2}){2,4};",
1081             "(?:\\w{4}){2,};",   // only matches the
1082             ".*?(?:\\w{5})+;",   //     specified minimum
1083             ".*?(?:\\w{9})*;",   //     number of reps - OK
1084             "(?:\\w{4})+?;",     // lazy repetition - OK
1085             "(?:\\w{4})++;",     // possessive repetition - OK
1086             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1087             "(\\w{4})+;",        // capturing group - OK
1088         };
1089 
1090         for (int i = 0; i < patterns.length; i++) {
1091             // Check find()
1092             check(patterns[i], 0, input, input, true);
1093             // Check matches()
1094             Pattern p = Pattern.compile(patterns[i]);
1095             Matcher m = p.matcher(input);
1096 
1097             if (m.matches()) {
1098                 if (!m.group(0).equals(input))
1099                     failCount++;
1100             } else {
1101                 failCount++;
1102             }
1103         }
1104 
1105         report("Non capturing repetition");
1106     }
1107 
1108     // This test is for 6358731
1109     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1110         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1111         Matcher matcher = pattern.matcher("abcd");
1112         if (!matcher.matches() ||
1113              matcher.group(1) != null ||
1114              !matcher.group(2).equals("abcd")) {
1115             failCount++;
1116         }
1117         report("Not captured GroupCurly");
1118     }
1119 
1120     // This test is for 4706545
1121     private static void javaCharClassTest() throws Exception {
1122         for (int i=0; i<1000; i++) {
1123             char c = (char)generator.nextInt();
1124             check("{javaLowerCase}", c, Character.isLowerCase(c));
1125             check("{javaUpperCase}", c, Character.isUpperCase(c));
1126             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1127             check("{javaTitleCase}", c, Character.isTitleCase(c));
1128             check("{javaDigit}", c, Character.isDigit(c));
1129             check("{javaDefined}", c, Character.isDefined(c));
1130             check("{javaLetter}", c, Character.isLetter(c));
1131             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1132             check("{javaJavaIdentifierStart}", c,
1133                   Character.isJavaIdentifierStart(c));
1134             check("{javaJavaIdentifierPart}", c,
1135                   Character.isJavaIdentifierPart(c));
1136             check("{javaUnicodeIdentifierStart}", c,
1137                   Character.isUnicodeIdentifierStart(c));
1138             check("{javaUnicodeIdentifierPart}", c,
1139                   Character.isUnicodeIdentifierPart(c));
1140             check("{javaIdentifierIgnorable}", c,
1141                   Character.isIdentifierIgnorable(c));
1142             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1143             check("{javaWhitespace}", c, Character.isWhitespace(c));
1144             check("{javaISOControl}", c, Character.isISOControl(c));
1145             check("{javaMirrored}", c, Character.isMirrored(c));
1146 
1147         }
1148 
1149         // Supplementary character test
1150         for (int i=0; i<1000; i++) {
1151             int c = generator.nextInt(Character.MAX_CODE_POINT
1152                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1153                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1154             check("{javaLowerCase}", c, Character.isLowerCase(c));
1155             check("{javaUpperCase}", c, Character.isUpperCase(c));
1156             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1157             check("{javaTitleCase}", c, Character.isTitleCase(c));
1158             check("{javaDigit}", c, Character.isDigit(c));
1159             check("{javaDefined}", c, Character.isDefined(c));
1160             check("{javaLetter}", c, Character.isLetter(c));
1161             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1162             check("{javaJavaIdentifierStart}", c,
1163                   Character.isJavaIdentifierStart(c));
1164             check("{javaJavaIdentifierPart}", c,
1165                   Character.isJavaIdentifierPart(c));
1166             check("{javaUnicodeIdentifierStart}", c,
1167                   Character.isUnicodeIdentifierStart(c));
1168             check("{javaUnicodeIdentifierPart}", c,
1169                   Character.isUnicodeIdentifierPart(c));
1170             check("{javaIdentifierIgnorable}", c,
1171                   Character.isIdentifierIgnorable(c));
1172             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1173             check("{javaWhitespace}", c, Character.isWhitespace(c));
1174             check("{javaISOControl}", c, Character.isISOControl(c));
1175             check("{javaMirrored}", c, Character.isMirrored(c));
1176         }
1177 
1178         report("Java character classes");
1179     }
1180 
1181     // This test is for 4523620
1182     /*
1183     private static void numOccurrencesTest() throws Exception {
1184         Pattern pattern = Pattern.compile("aaa");
1185 
1186         if (pattern.numOccurrences("aaaaaa", false) != 2)
1187             failCount++;
1188         if (pattern.numOccurrences("aaaaaa", true) != 4)
1189             failCount++;
1190 
1191         pattern = Pattern.compile("^");
1192         if (pattern.numOccurrences("aaaaaa", false) != 1)
1193             failCount++;
1194         if (pattern.numOccurrences("aaaaaa", true) != 1)
1195             failCount++;
1196 
1197         report("Number of Occurrences");
1198     }
1199     */
1200 
1201     // This test is for 4776374
1202     private static void caretBetweenTerminatorsTest() throws Exception {
1203         int flags1 = Pattern.DOTALL;
1204         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1205         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1206         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1207 
1208         check("^....", flags1, "test\ntest", "test", true);
1209         check(".....^", flags1, "test\ntest", "test", false);
1210         check(".....^", flags1, "test\n", "test", false);
1211         check("....^", flags1, "test\r\n", "test", false);
1212 
1213         check("^....", flags2, "test\ntest", "test", true);
1214         check("....^", flags2, "test\ntest", "test", false);
1215         check(".....^", flags2, "test\n", "test", false);
1216         check("....^", flags2, "test\r\n", "test", false);
1217 
1218         check("^....", flags3, "test\ntest", "test", true);
1219         check(".....^", flags3, "test\ntest", "test\n", true);
1220         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1221         check(".....^", flags3, "test\n", "test", false);
1222         check(".....^", flags3, "test\r\n", "test", false);
1223         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1224 
1225         check("^....", flags4, "test\ntest", "test", true);
1226         check(".....^", flags3, "test\ntest", "test\n", true);
1227         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1228         check(".....^", flags4, "test\n", "test\n", false);
1229         check(".....^", flags4, "test\r\n", "test\r", false);
1230 
1231         // Supplementary character test
1232         String t = toSupplementaries("test");
1233         check("^....", flags1, t+"\n"+t, t, true);
1234         check(".....^", flags1, t+"\n"+t, t, false);
1235         check(".....^", flags1, t+"\n", t, false);
1236         check("....^", flags1, t+"\r\n", t, false);
1237 
1238         check("^....", flags2, t+"\n"+t, t, true);
1239         check("....^", flags2, t+"\n"+t, t, false);
1240         check(".....^", flags2, t+"\n", t, false);
1241         check("....^", flags2, t+"\r\n", t, false);
1242 
1243         check("^....", flags3, t+"\n"+t, t, true);
1244         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1245         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1246         check(".....^", flags3, t+"\n", t, false);
1247         check(".....^", flags3, t+"\r\n", t, false);
1248         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1249 
1250         check("^....", flags4, t+"\n"+t, t, true);
1251         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1252         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1253         check(".....^", flags4, t+"\n", t+"\n", false);
1254         check(".....^", flags4, t+"\r\n", t+"\r", false);
1255 
1256         report("Caret between terminators");
1257     }
1258 
1259     // This test is for 4727935
1260     private static void dollarAtEndTest() throws Exception {
1261         int flags1 = Pattern.DOTALL;
1262         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1263         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1264 
1265         check("....$", flags1, "test\n", "test", true);
1266         check("....$", flags1, "test\r\n", "test", true);
1267         check(".....$", flags1, "test\n", "test\n", true);
1268         check(".....$", flags1, "test\u0085", "test\u0085", true);
1269         check("....$", flags1, "test\u0085", "test", true);
1270 
1271         check("....$", flags2, "test\n", "test", true);
1272         check(".....$", flags2, "test\n", "test\n", true);
1273         check(".....$", flags2, "test\u0085", "test\u0085", true);
1274         check("....$", flags2, "test\u0085", "est\u0085", true);
1275 
1276         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1277         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1278         check("....$blah", flags3, "test\nblah", "!!!!", false);
1279         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1280 
1281         // Supplementary character test
1282         String t = toSupplementaries("test");
1283         String b = toSupplementaries("blah");
1284         check("....$", flags1, t+"\n", t, true);
1285         check("....$", flags1, t+"\r\n", t, true);
1286         check(".....$", flags1, t+"\n", t+"\n", true);
1287         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1288         check("....$", flags1, t+"\u0085", t, true);
1289 
1290         check("....$", flags2, t+"\n", t, true);
1291         check(".....$", flags2, t+"\n", t+"\n", true);
1292         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1293         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1294 
1295         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1296         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1297         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1298         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1299 
1300         report("Dollar at End");
1301     }
1302 
1303     // This test is for 4711773
1304     private static void multilineDollarTest() throws Exception {
1305         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1306         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1307         matcher.find();
1308         if (matcher.start(0) != 9)
1309             failCount++;
1310         matcher.find();
1311         if (matcher.start(0) != 20)
1312             failCount++;
1313 
1314         // Supplementary character test
1315         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1316         matcher.find();
1317         if (matcher.start(0) != 9*2)
1318             failCount++;
1319         matcher.find();
1320         if (matcher.start(0) != 20*2)
1321             failCount++;
1322 
1323         report("Multiline Dollar");
1324     }
1325 
1326     private static void reluctantRepetitionTest() throws Exception {
1327         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1328         check(p, "1 word word word 2", true);
1329         check(p, "1 wor wo w 2", true);
1330         check(p, "1 word word 2", true);
1331         check(p, "1 word 2", true);
1332         check(p, "1 wo w w 2", true);
1333         check(p, "1 wo w 2", true);
1334         check(p, "1 wor w 2", true);
1335 
1336         p = Pattern.compile("([a-z])+?c");
1337         Matcher m = p.matcher("ababcdefdec");
1338         check(m, "ababc");
1339 
1340         // Supplementary character test
1341         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1342         m = p.matcher(toSupplementaries("ababcdefdec"));
1343         check(m, toSupplementaries("ababc"));
1344 
1345         report("Reluctant Repetition");
1346     }
1347 
1348     private static void serializeTest() throws Exception {
1349         String patternStr = "(b)";
1350         String matchStr = "b";
1351         Pattern pattern = Pattern.compile(patternStr);
1352         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1353         ObjectOutputStream oos = new ObjectOutputStream(baos);
1354         oos.writeObject(pattern);
1355         oos.close();
1356         ObjectInputStream ois = new ObjectInputStream(
1357             new ByteArrayInputStream(baos.toByteArray()));
1358         Pattern serializedPattern = (Pattern)ois.readObject();
1359         ois.close();
1360         Matcher matcher = serializedPattern.matcher(matchStr);
1361         if (!matcher.matches())
1362             failCount++;
1363         if (matcher.groupCount() != 1)
1364             failCount++;
1365 
1366         report("Serialization");
1367     }
1368 
1369     private static void gTest() {
1370         Pattern pattern = Pattern.compile("\\G\\w");
1371         Matcher matcher = pattern.matcher("abc#x#x");
1372         matcher.find();
1373         matcher.find();
1374         matcher.find();
1375         if (matcher.find())
1376             failCount++;
1377 
1378         pattern = Pattern.compile("\\GA*");
1379         matcher = pattern.matcher("1A2AA3");
1380         matcher.find();
1381         if (matcher.find())
1382             failCount++;
1383 
1384         pattern = Pattern.compile("\\GA*");
1385         matcher = pattern.matcher("1A2AA3");
1386         if (!matcher.find(1))
1387             failCount++;
1388         matcher.find();
1389         if (matcher.find())
1390             failCount++;
1391 
1392         report("\\G");
1393     }
1394 
1395     private static void zTest() {
1396         Pattern pattern = Pattern.compile("foo\\Z");
1397         // Positives
1398         check(pattern, "foo\u0085", true);
1399         check(pattern, "foo\u2028", true);
1400         check(pattern, "foo\u2029", true);
1401         check(pattern, "foo\n", true);
1402         check(pattern, "foo\r", true);
1403         check(pattern, "foo\r\n", true);
1404         // Negatives
1405         check(pattern, "fooo", false);
1406         check(pattern, "foo\n\r", false);
1407 
1408         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1409         // Positives
1410         check(pattern, "foo", true);
1411         check(pattern, "foo\n", true);
1412         // Negatives
1413         check(pattern, "foo\r", false);
1414         check(pattern, "foo\u0085", false);
1415         check(pattern, "foo\u2028", false);
1416         check(pattern, "foo\u2029", false);
1417 
1418         report("\\Z");
1419     }
1420 
1421     private static void replaceFirstTest() {
1422         Pattern pattern = Pattern.compile("(ab)(c*)");
1423         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1424         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1425             failCount++;
1426 
1427         matcher.reset("zzzabccczzzabcczzzabccczzz");
1428         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1429             failCount++;
1430 
1431         matcher.reset("zzzabccczzzabcczzzabccczzz");
1432         String result = matcher.replaceFirst("$1");
1433         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1434             failCount++;
1435 
1436         matcher.reset("zzzabccczzzabcczzzabccczzz");
1437         result = matcher.replaceFirst("$2");
1438         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1439             failCount++;
1440 
1441         pattern = Pattern.compile("a*");
1442         matcher = pattern.matcher("aaaaaaaaaa");
1443         if (!matcher.replaceFirst("test").equals("test"))
1444             failCount++;
1445 
1446         pattern = Pattern.compile("a+");
1447         matcher = pattern.matcher("zzzaaaaaaaaaa");
1448         if (!matcher.replaceFirst("test").equals("zzztest"))
1449             failCount++;
1450 
1451         // Supplementary character test
1452         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1453         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1454         if (!matcher.replaceFirst(toSupplementaries("test"))
1455                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1456             failCount++;
1457 
1458         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1459         if (!matcher.replaceFirst(toSupplementaries("test")).
1460             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1461             failCount++;
1462 
1463         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1464         result = matcher.replaceFirst("$1");
1465         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1466             failCount++;
1467 
1468         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1469         result = matcher.replaceFirst("$2");
1470         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1471             failCount++;
1472 
1473         pattern = Pattern.compile(toSupplementaries("a*"));
1474         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1475         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1476             failCount++;
1477 
1478         pattern = Pattern.compile(toSupplementaries("a+"));
1479         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1480         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1481             failCount++;
1482 
1483         report("Replace First");
1484     }
1485 
1486     private static void unixLinesTest() {
1487         Pattern pattern = Pattern.compile(".*");
1488         Matcher matcher = pattern.matcher("aa\u2028blah");
1489         matcher.find();
1490         if (!matcher.group(0).equals("aa"))
1491             failCount++;
1492 
1493         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1494         matcher = pattern.matcher("aa\u2028blah");
1495         matcher.find();
1496         if (!matcher.group(0).equals("aa\u2028blah"))
1497             failCount++;
1498 
1499         pattern = Pattern.compile("[az]$",
1500                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1501         matcher = pattern.matcher("aa\u2028zz");
1502         check(matcher, "a\u2028", false);
1503 
1504         // Supplementary character test
1505         pattern = Pattern.compile(".*");
1506         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1507         matcher.find();
1508         if (!matcher.group(0).equals(toSupplementaries("aa")))
1509             failCount++;
1510 
1511         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1512         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1513         matcher.find();
1514         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1515             failCount++;
1516 
1517         pattern = Pattern.compile(toSupplementaries("[az]$"),
1518                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1519         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1520         check(matcher, toSupplementaries("a\u2028"), false);
1521 
1522         report("Unix Lines");
1523     }
1524 
1525     private static void commentsTest() {
1526         int flags = Pattern.COMMENTS;
1527 
1528         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1529         Matcher matcher = pattern.matcher("aa#aa");
1530         if (!matcher.matches())
1531             failCount++;
1532 
1533         pattern = Pattern.compile("aa  # blah", flags);
1534         matcher = pattern.matcher("aa");
1535         if (!matcher.matches())
1536             failCount++;
1537 
1538         pattern = Pattern.compile("aa blah", flags);
1539         matcher = pattern.matcher("aablah");
1540         if (!matcher.matches())
1541              failCount++;
1542 
1543         pattern = Pattern.compile("aa  # blah blech  ", flags);
1544         matcher = pattern.matcher("aa");
1545         if (!matcher.matches())
1546             failCount++;
1547 
1548         pattern = Pattern.compile("aa  # blah\n  ", flags);
1549         matcher = pattern.matcher("aa");
1550         if (!matcher.matches())
1551             failCount++;
1552 
1553         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1554         matcher = pattern.matcher("aabc");
1555         if (!matcher.matches())
1556              failCount++;
1557 
1558         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1559         matcher = pattern.matcher("aabc");
1560         if (!matcher.matches())
1561              failCount++;
1562 
1563         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1564         matcher = pattern.matcher("aabc#blech");
1565         if (!matcher.matches())
1566              failCount++;
1567 
1568         // Supplementary character test
1569         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1570         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1571         if (!matcher.matches())
1572             failCount++;
1573 
1574         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1575         matcher = pattern.matcher(toSupplementaries("aa"));
1576         if (!matcher.matches())
1577             failCount++;
1578 
1579         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1580         matcher = pattern.matcher(toSupplementaries("aablah"));
1581         if (!matcher.matches())
1582              failCount++;
1583 
1584         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1585         matcher = pattern.matcher(toSupplementaries("aa"));
1586         if (!matcher.matches())
1587             failCount++;
1588 
1589         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1590         matcher = pattern.matcher(toSupplementaries("aa"));
1591         if (!matcher.matches())
1592             failCount++;
1593 
1594         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1595         matcher = pattern.matcher(toSupplementaries("aabc"));
1596         if (!matcher.matches())
1597              failCount++;
1598 
1599         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1600         matcher = pattern.matcher(toSupplementaries("aabc"));
1601         if (!matcher.matches())
1602              failCount++;
1603 
1604         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1605         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1606         if (!matcher.matches())
1607              failCount++;
1608 
1609         report("Comments");
1610     }
1611 
1612     private static void caseFoldingTest() { // bug 4504687
1613         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1614         Pattern pattern = Pattern.compile("aa", flags);
1615         Matcher matcher = pattern.matcher("ab");
1616         if (matcher.matches())
1617             failCount++;
1618 
1619         pattern = Pattern.compile("aA", flags);
1620         matcher = pattern.matcher("ab");
1621         if (matcher.matches())
1622             failCount++;
1623 
1624         pattern = Pattern.compile("aa", flags);
1625         matcher = pattern.matcher("aB");
1626         if (matcher.matches())
1627             failCount++;
1628         matcher = pattern.matcher("Ab");
1629         if (matcher.matches())
1630             failCount++;
1631 
1632         // ASCII               "a"
1633         // Latin-1 Supplement  "a" + grave
1634         // Cyrillic            "a"
1635         String[] patterns = new String[] {
1636             //single
1637             "a", "\u00e0", "\u0430",
1638             //slice
1639             "ab", "\u00e0\u00e1", "\u0430\u0431",
1640             //class single
1641             "[a]", "[\u00e0]", "[\u0430]",
1642             //class range
1643             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1644             //back reference
1645             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1646         };
1647 
1648         String[] texts = new String[] {
1649             "A", "\u00c0", "\u0410",
1650             "AB", "\u00c0\u00c1", "\u0410\u0411",
1651             "A", "\u00c0", "\u0410",
1652             "B", "\u00c2", "\u0411",
1653             "aA", "\u00e0\u00c0", "\u0430\u0410"
1654         };
1655 
1656         boolean[] expected = new boolean[] {
1657             true, false, false,
1658             true, false, false,
1659             true, false, false,
1660             true, false, false,
1661             true, false, false
1662         };
1663 
1664         flags = Pattern.CASE_INSENSITIVE;
1665         for (int i = 0; i < patterns.length; i++) {
1666             pattern = Pattern.compile(patterns[i], flags);
1667             matcher = pattern.matcher(texts[i]);
1668             if (matcher.matches() != expected[i]) {
1669                 System.out.println("<1> Failed at " + i);
1670                 failCount++;
1671             }
1672         }
1673 
1674         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1675         for (int i = 0; i < patterns.length; i++) {
1676             pattern = Pattern.compile(patterns[i], flags);
1677             matcher = pattern.matcher(texts[i]);
1678             if (!matcher.matches()) {
1679                 System.out.println("<2> Failed at " + i);
1680                 failCount++;
1681             }
1682         }
1683         // flag unicode_case alone should do nothing
1684         flags = Pattern.UNICODE_CASE;
1685         for (int i = 0; i < patterns.length; i++) {
1686             pattern = Pattern.compile(patterns[i], flags);
1687             matcher = pattern.matcher(texts[i]);
1688             if (matcher.matches()) {
1689                 System.out.println("<3> Failed at " + i);
1690                 failCount++;
1691             }
1692         }
1693 
1694         // Special cases: i, I, u+0131 and u+0130
1695         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1696         pattern = Pattern.compile("[h-j]+", flags);
1697         if (!pattern.matcher("\u0131\u0130").matches())
1698             failCount++;
1699         report("Case Folding");
1700     }
1701 
1702     private static void appendTest() {
1703         Pattern pattern = Pattern.compile("(ab)(cd)");
1704         Matcher matcher = pattern.matcher("abcd");
1705         String result = matcher.replaceAll("$2$1");
1706         if (!result.equals("cdab"))
1707             failCount++;
1708 
1709         String  s1 = "Swap all: first = 123, second = 456";
1710         String  s2 = "Swap one: first = 123, second = 456";
1711         String  r  = "$3$2$1";
1712         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1713         matcher = pattern.matcher(s1);
1714 
1715         result = matcher.replaceAll(r);
1716         if (!result.equals("Swap all: 123 = first, 456 = second"))
1717             failCount++;
1718 
1719         matcher = pattern.matcher(s2);
1720 
1721         if (matcher.find()) {
1722             StringBuffer sb = new StringBuffer();
1723             matcher.appendReplacement(sb, r);
1724             matcher.appendTail(sb);
1725             result = sb.toString();
1726             if (!result.equals("Swap one: 123 = first, second = 456"))
1727                 failCount++;
1728         }
1729 
1730         // Supplementary character test
1731         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1732         matcher = pattern.matcher(toSupplementaries("abcd"));
1733         result = matcher.replaceAll("$2$1");
1734         if (!result.equals(toSupplementaries("cdab")))
1735             failCount++;
1736 
1737         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1738         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1739         r  = toSupplementaries("$3$2$1");
1740         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1741         matcher = pattern.matcher(s1);
1742 
1743         result = matcher.replaceAll(r);
1744         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1745             failCount++;
1746 
1747         matcher = pattern.matcher(s2);
1748 
1749         if (matcher.find()) {
1750             StringBuffer sb = new StringBuffer();
1751             matcher.appendReplacement(sb, r);
1752             matcher.appendTail(sb);
1753             result = sb.toString();
1754             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1755                 failCount++;
1756         }
1757         report("Append");
1758     }
1759 
1760     private static void splitTest() {
1761         Pattern pattern = Pattern.compile(":");
1762         String[] result = pattern.split("foo:and:boo", 2);
1763         if (!result[0].equals("foo"))
1764             failCount++;
1765         if (!result[1].equals("and:boo"))
1766             failCount++;
1767         // Supplementary character test
1768         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1769         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1770         if (!result[0].equals(toSupplementaries("foo")))
1771             failCount++;
1772         if (!result[1].equals(toSupplementaries("andXboo")))
1773             failCount++;
1774 
1775         CharBuffer cb = CharBuffer.allocate(100);
1776         cb.put("foo:and:boo");
1777         cb.flip();
1778         result = pattern.split(cb);
1779         if (!result[0].equals("foo"))
1780             failCount++;
1781         if (!result[1].equals("and"))
1782             failCount++;
1783         if (!result[2].equals("boo"))
1784             failCount++;
1785 
1786         // Supplementary character test
1787         CharBuffer cbs = CharBuffer.allocate(100);
1788         cbs.put(toSupplementaries("fooXandXboo"));
1789         cbs.flip();
1790         result = patternX.split(cbs);
1791         if (!result[0].equals(toSupplementaries("foo")))
1792             failCount++;
1793         if (!result[1].equals(toSupplementaries("and")))
1794             failCount++;
1795         if (!result[2].equals(toSupplementaries("boo")))
1796             failCount++;
1797 
1798         String source = "0123456789";
1799         for (int limit=-2; limit<3; limit++) {
1800             for (int x=0; x<10; x++) {
1801                 result = source.split(Integer.toString(x), limit);
1802                 int expectedLength = limit < 1 ? 2 : limit;
1803 
1804                 if ((limit == 0) && (x == 9)) {
1805                     // expected dropping of ""
1806                     if (result.length != 1)
1807                         failCount++;
1808                     if (!result[0].equals("012345678")) {
1809                         failCount++;
1810                     }
1811                 } else {
1812                     if (result.length != expectedLength) {
1813                         failCount++;
1814                     }
1815                     if (!result[0].equals(source.substring(0,x))) {
1816                         if (limit != 1) {
1817                             failCount++;
1818                         } else {
1819                             if (!result[0].equals(source.substring(0,10))) {
1820                                 failCount++;
1821                             }
1822                         }
1823                     }
1824                     if (expectedLength > 1) { // Check segment 2
1825                         if (!result[1].equals(source.substring(x+1,10)))
1826                             failCount++;
1827                     }
1828                 }
1829             }
1830         }
1831         // Check the case for no match found
1832         for (int limit=-2; limit<3; limit++) {
1833             result = source.split("e", limit);
1834             if (result.length != 1)
1835                 failCount++;
1836             if (!result[0].equals(source))
1837                 failCount++;
1838         }
1839         // Check the case for limit == 0, source = "";
1840         // split() now returns 0-length for empty source "" see #6559590
1841         source = "";
1842         result = source.split("e", 0);
1843         if (result.length != 1)
1844             failCount++;
1845         if (!result[0].equals(source))
1846             failCount++;
1847 
1848         // Check both split() and splitAsStraem(), especially for zero-lenth
1849         // input and zero-lenth match cases
1850         String[][] input = new String[][] {
1851             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1852             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1853             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1854             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1855             { "(?=\\p{Lu})", "AbcEfg" },
1856             { "(?=\\p{Lu})", "Abc" },
1857             { " ",           "" },              // zero-length input
1858             { ".*",          "" },
1859 
1860             // some tests from PatternStreamTest.java
1861             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1862             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1863             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1864             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1865             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1866             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1867             { "\u56da",  "" },
1868             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1869             { "o",       "boo:and:foo" },
1870             { "o",       "booooo:and:fooooo" },
1871             { "o",       "fooooo:" },
1872         };
1873 
1874         String[][] expected = new String[][] {
1875             { "Abc", "Efg", "Hij" },
1876             { "", "Abc", "Efg", "Hij" },
1877             { "Abc", "", "Efg", "Hij" },
1878             { "Abc", "Efg", "Hij" },
1879             { "Abc", "Efg" },
1880             { "Abc" },
1881             { "" },
1882             { "" },
1883 
1884             { "awgqwefg1fefw", "vssv1vvv1" },
1885             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1886             { "awgqwefg", "fefw4vssv", "vvv" },
1887             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1888             { "1", "23", "456", "7890" },
1889             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1890             { "" },
1891             { "This", "is", "testing", "", "with", "different", "separators" },
1892             { "b", "", ":and:f" },
1893             { "b", "", "", "", "", ":and:f" },
1894             { "f", "", "", "", "", ":" },
1895         };
1896         for (int i = 0; i < input.length; i++) {
1897             pattern = Pattern.compile(input[i][0]);
1898             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1899                 failCount++;
1900             }
1901             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1902                                              // array for zero-length input for now
1903                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1904                                expected[i])) {
1905                 failCount++;
1906             }
1907         }
1908         report("Split");
1909     }
1910 
1911     private static void negationTest() {
1912         Pattern pattern = Pattern.compile("[\\[@^]+");
1913         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1914         if (!matcher.find())
1915             failCount++;
1916         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1917             failCount++;
1918         pattern = Pattern.compile("[@\\[^]+");
1919         matcher = pattern.matcher("@@@@[[[[^^^^");
1920         if (!matcher.find())
1921             failCount++;
1922         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1923             failCount++;
1924         pattern = Pattern.compile("[@\\[^@]+");
1925         matcher = pattern.matcher("@@@@[[[[^^^^");
1926         if (!matcher.find())
1927             failCount++;
1928         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1929             failCount++;
1930 
1931         pattern = Pattern.compile("\\)");
1932         matcher = pattern.matcher("xxx)xxx");
1933         if (!matcher.find())
1934             failCount++;
1935 
1936         report("Negation");
1937     }
1938 
1939     private static void ampersandTest() {
1940         Pattern pattern = Pattern.compile("[&@]+");
1941         check(pattern, "@@@@&&&&", true);
1942 
1943         pattern = Pattern.compile("[@&]+");
1944         check(pattern, "@@@@&&&&", true);
1945 
1946         pattern = Pattern.compile("[@\\&]+");
1947         check(pattern, "@@@@&&&&", true);
1948 
1949         report("Ampersand");
1950     }
1951 
1952     private static void octalTest() throws Exception {
1953         Pattern pattern = Pattern.compile("\\u0007");
1954         Matcher matcher = pattern.matcher("\u0007");
1955         if (!matcher.matches())
1956             failCount++;
1957         pattern = Pattern.compile("\\07");
1958         matcher = pattern.matcher("\u0007");
1959         if (!matcher.matches())
1960             failCount++;
1961         pattern = Pattern.compile("\\007");
1962         matcher = pattern.matcher("\u0007");
1963         if (!matcher.matches())
1964             failCount++;
1965         pattern = Pattern.compile("\\0007");
1966         matcher = pattern.matcher("\u0007");
1967         if (!matcher.matches())
1968             failCount++;
1969         pattern = Pattern.compile("\\040");
1970         matcher = pattern.matcher("\u0020");
1971         if (!matcher.matches())
1972             failCount++;
1973         pattern = Pattern.compile("\\0403");
1974         matcher = pattern.matcher("\u00203");
1975         if (!matcher.matches())
1976             failCount++;
1977         pattern = Pattern.compile("\\0103");
1978         matcher = pattern.matcher("\u0043");
1979         if (!matcher.matches())
1980             failCount++;
1981 
1982         report("Octal");
1983     }
1984 
1985     private static void longPatternTest() throws Exception {
1986         try {
1987             Pattern pattern = Pattern.compile(
1988                 "a 32-character-long pattern xxxx");
1989             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1990             pattern = Pattern.compile("a thirty four character long regex");
1991             StringBuffer patternToBe = new StringBuffer(101);
1992             for (int i=0; i<100; i++)
1993                 patternToBe.append((char)(97 + i%26));
1994             pattern = Pattern.compile(patternToBe.toString());
1995         } catch (PatternSyntaxException e) {
1996             failCount++;
1997         }
1998 
1999         // Supplementary character test
2000         try {
2001             Pattern pattern = Pattern.compile(
2002                 toSupplementaries("a 32-character-long pattern xxxx"));
2003             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2004             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2005             StringBuffer patternToBe = new StringBuffer(101*2);
2006             for (int i=0; i<100; i++)
2007                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2008                                                      + 97 + i%26));
2009             pattern = Pattern.compile(patternToBe.toString());
2010         } catch (PatternSyntaxException e) {
2011             failCount++;
2012         }
2013         report("LongPattern");
2014     }
2015 
2016     private static void group0Test() throws Exception {
2017         Pattern pattern = Pattern.compile("(tes)ting");
2018         Matcher matcher = pattern.matcher("testing");
2019         check(matcher, "testing");
2020 
2021         matcher.reset("testing");
2022         if (matcher.lookingAt()) {
2023             if (!matcher.group(0).equals("testing"))
2024                 failCount++;
2025         } else {
2026             failCount++;
2027         }
2028 
2029         matcher.reset("testing");
2030         if (matcher.matches()) {
2031             if (!matcher.group(0).equals("testing"))
2032                 failCount++;
2033         } else {
2034             failCount++;
2035         }
2036 
2037         pattern = Pattern.compile("(tes)ting");
2038         matcher = pattern.matcher("testing");
2039         if (matcher.lookingAt()) {
2040             if (!matcher.group(0).equals("testing"))
2041                 failCount++;
2042         } else {
2043             failCount++;
2044         }
2045 
2046         pattern = Pattern.compile("^(tes)ting");
2047         matcher = pattern.matcher("testing");
2048         if (matcher.matches()) {
2049             if (!matcher.group(0).equals("testing"))
2050                 failCount++;
2051         } else {
2052             failCount++;
2053         }
2054 
2055         // Supplementary character test
2056         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2057         matcher = pattern.matcher(toSupplementaries("testing"));
2058         check(matcher, toSupplementaries("testing"));
2059 
2060         matcher.reset(toSupplementaries("testing"));
2061         if (matcher.lookingAt()) {
2062             if (!matcher.group(0).equals(toSupplementaries("testing")))
2063                 failCount++;
2064         } else {
2065             failCount++;
2066         }
2067 
2068         matcher.reset(toSupplementaries("testing"));
2069         if (matcher.matches()) {
2070             if (!matcher.group(0).equals(toSupplementaries("testing")))
2071                 failCount++;
2072         } else {
2073             failCount++;
2074         }
2075 
2076         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2077         matcher = pattern.matcher(toSupplementaries("testing"));
2078         if (matcher.lookingAt()) {
2079             if (!matcher.group(0).equals(toSupplementaries("testing")))
2080                 failCount++;
2081         } else {
2082             failCount++;
2083         }
2084 
2085         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2086         matcher = pattern.matcher(toSupplementaries("testing"));
2087         if (matcher.matches()) {
2088             if (!matcher.group(0).equals(toSupplementaries("testing")))
2089                 failCount++;
2090         } else {
2091             failCount++;
2092         }
2093 
2094         report("Group0");
2095     }
2096 
2097     private static void findIntTest() throws Exception {
2098         Pattern p = Pattern.compile("blah");
2099         Matcher m = p.matcher("zzzzblahzzzzzblah");
2100         boolean result = m.find(2);
2101         if (!result)
2102             failCount++;
2103 
2104         p = Pattern.compile("$");
2105         m = p.matcher("1234567890");
2106         result = m.find(10);
2107         if (!result)
2108             failCount++;
2109         try {
2110             result = m.find(11);
2111             failCount++;
2112         } catch (IndexOutOfBoundsException e) {
2113             // correct result
2114         }
2115 
2116         // Supplementary character test
2117         p = Pattern.compile(toSupplementaries("blah"));
2118         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2119         result = m.find(2);
2120         if (!result)
2121             failCount++;
2122 
2123         report("FindInt");
2124     }
2125 
2126     private static void emptyPatternTest() throws Exception {
2127         Pattern p = Pattern.compile("");
2128         Matcher m = p.matcher("foo");
2129 
2130         // Should find empty pattern at beginning of input
2131         boolean result = m.find();
2132         if (result != true)
2133             failCount++;
2134         if (m.start() != 0)
2135             failCount++;
2136 
2137         // Should not match entire input if input is not empty
2138         m.reset();
2139         result = m.matches();
2140         if (result == true)
2141             failCount++;
2142 
2143         try {
2144             m.start(0);
2145             failCount++;
2146         } catch (IllegalStateException e) {
2147             // Correct result
2148         }
2149 
2150         // Should match entire input if input is empty
2151         m.reset("");
2152         result = m.matches();
2153         if (result != true)
2154             failCount++;
2155 
2156         result = Pattern.matches("", "");
2157         if (result != true)
2158             failCount++;
2159 
2160         result = Pattern.matches("", "foo");
2161         if (result == true)
2162             failCount++;
2163         report("EmptyPattern");
2164     }
2165 
2166     private static void charClassTest() throws Exception {
2167         Pattern pattern = Pattern.compile("blah[ab]]blech");
2168         check(pattern, "blahb]blech", true);
2169 
2170         pattern = Pattern.compile("[abc[def]]");
2171         check(pattern, "b", true);
2172 
2173         // Supplementary character tests
2174         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2175         check(pattern, toSupplementaries("blahb]blech"), true);
2176 
2177         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2178         check(pattern, toSupplementaries("b"), true);
2179 
2180         try {
2181             // u00ff when UNICODE_CASE
2182             pattern = Pattern.compile("[ab\u00ffcd]",
2183                                       Pattern.CASE_INSENSITIVE|
2184                                       Pattern.UNICODE_CASE);
2185             check(pattern, "ab\u00ffcd", true);
2186             check(pattern, "Ab\u0178Cd", true);
2187 
2188             // u00b5 when UNICODE_CASE
2189             pattern = Pattern.compile("[ab\u00b5cd]",
2190                                       Pattern.CASE_INSENSITIVE|
2191                                       Pattern.UNICODE_CASE);
2192             check(pattern, "ab\u00b5cd", true);
2193             check(pattern, "Ab\u039cCd", true);
2194         } catch (Exception e) { failCount++; }
2195 
2196         /* Special cases
2197            (1)LatinSmallLetterLongS u+017f
2198            (2)LatinSmallLetterDotlessI u+0131
2199            (3)LatineCapitalLetterIWithDotAbove u+0130
2200            (4)KelvinSign u+212a
2201            (5)AngstromSign u+212b
2202         */
2203         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2204         pattern = Pattern.compile("[sik\u00c5]+", flags);
2205         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2206             failCount++;
2207 
2208         report("CharClass");
2209     }
2210 
2211     private static void caretTest() throws Exception {
2212         Pattern pattern = Pattern.compile("\\w*");
2213         Matcher matcher = pattern.matcher("a#bc#def##g");
2214         check(matcher, "a");
2215         check(matcher, "");
2216         check(matcher, "bc");
2217         check(matcher, "");
2218         check(matcher, "def");
2219         check(matcher, "");
2220         check(matcher, "");
2221         check(matcher, "g");
2222         check(matcher, "");
2223         if (matcher.find())
2224             failCount++;
2225 
2226         pattern = Pattern.compile("^\\w*");
2227         matcher = pattern.matcher("a#bc#def##g");
2228         check(matcher, "a");
2229         if (matcher.find())
2230             failCount++;
2231 
2232         pattern = Pattern.compile("\\w");
2233         matcher = pattern.matcher("abc##x");
2234         check(matcher, "a");
2235         check(matcher, "b");
2236         check(matcher, "c");
2237         check(matcher, "x");
2238         if (matcher.find())
2239             failCount++;
2240 
2241         pattern = Pattern.compile("^\\w");
2242         matcher = pattern.matcher("abc##x");
2243         check(matcher, "a");
2244         if (matcher.find())
2245             failCount++;
2246 
2247         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2248         matcher = pattern.matcher("abcdef-ghi\njklmno");
2249         check(matcher, "abc");
2250         if (matcher.find())
2251             failCount++;
2252 
2253         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2254         matcher = pattern.matcher("abcdef-ghi\njklmno");
2255         check(matcher, "abc");
2256         check(matcher, "jkl");
2257         if (matcher.find())
2258             failCount++;
2259 
2260         pattern = Pattern.compile("^", Pattern.MULTILINE);
2261         matcher = pattern.matcher("this is some text");
2262         String result = matcher.replaceAll("X");
2263         if (!result.equals("Xthis is some text"))
2264             failCount++;
2265 
2266         pattern = Pattern.compile("^");
2267         matcher = pattern.matcher("this is some text");
2268         result = matcher.replaceAll("X");
2269         if (!result.equals("Xthis is some text"))
2270             failCount++;
2271 
2272         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2273         matcher = pattern.matcher("this is some text\n");
2274         result = matcher.replaceAll("X");
2275         if (!result.equals("Xthis is some text\n"))
2276             failCount++;
2277 
2278         report("Caret");
2279     }
2280 
2281     private static void groupCaptureTest() throws Exception {
2282         // Independent group
2283         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2284         Matcher matcher = pattern.matcher("xxxyyyzzz");
2285         matcher.find();
2286         try {
2287             String blah = matcher.group(1);
2288             failCount++;
2289         } catch (IndexOutOfBoundsException ioobe) {
2290             // Good result
2291         }
2292         // Pure group
2293         pattern = Pattern.compile("x+(?:y+)z+");
2294         matcher = pattern.matcher("xxxyyyzzz");
2295         matcher.find();
2296         try {
2297             String blah = matcher.group(1);
2298             failCount++;
2299         } catch (IndexOutOfBoundsException ioobe) {
2300             // Good result
2301         }
2302 
2303         // Supplementary character tests
2304         // Independent group
2305         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2306         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2307         matcher.find();
2308         try {
2309             String blah = matcher.group(1);
2310             failCount++;
2311         } catch (IndexOutOfBoundsException ioobe) {
2312             // Good result
2313         }
2314         // Pure group
2315         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2316         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2317         matcher.find();
2318         try {
2319             String blah = matcher.group(1);
2320             failCount++;
2321         } catch (IndexOutOfBoundsException ioobe) {
2322             // Good result
2323         }
2324 
2325         report("GroupCapture");
2326     }
2327 
2328     private static void backRefTest() throws Exception {
2329         Pattern pattern = Pattern.compile("(a*)bc\\1");
2330         check(pattern, "zzzaabcazzz", true);
2331 
2332         pattern = Pattern.compile("(a*)bc\\1");
2333         check(pattern, "zzzaabcaazzz", true);
2334 
2335         pattern = Pattern.compile("(abc)(def)\\1");
2336         check(pattern, "abcdefabc", true);
2337 
2338         pattern = Pattern.compile("(abc)(def)\\3");
2339         check(pattern, "abcdefabc", false);
2340 
2341         try {
2342             for (int i = 1; i < 10; i++) {
2343                 // Make sure backref 1-9 are always accepted
2344                 pattern = Pattern.compile("abcdef\\" + i);
2345                 // and fail to match if the target group does not exit
2346                 check(pattern, "abcdef", false);
2347             }
2348         } catch(PatternSyntaxException e) {
2349             failCount++;
2350         }
2351 
2352         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2353         check(pattern, "abcdefghija", false);
2354         check(pattern, "abcdefghija1", true);
2355 
2356         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2357         check(pattern, "abcdefghijkk", true);
2358 
2359         pattern = Pattern.compile("(a)bcdefghij\\11");
2360         check(pattern, "abcdefghija1", true);
2361 
2362         // Supplementary character tests
2363         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2364         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2365 
2366         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2367         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2368 
2369         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2370         check(pattern, toSupplementaries("abcdefabc"), true);
2371 
2372         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2373         check(pattern, toSupplementaries("abcdefabc"), false);
2374 
2375         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2376         check(pattern, toSupplementaries("abcdefghija"), false);
2377         check(pattern, toSupplementaries("abcdefghija1"), true);
2378 
2379         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2380         check(pattern, toSupplementaries("abcdefghijkk"), true);
2381 
2382         report("BackRef");
2383     }
2384 
2385     /**
2386      * Unicode Technical Report #18, section 2.6 End of Line
2387      * There is no empty line to be matched in the sequence \u000D\u000A
2388      * but there is an empty line in the sequence \u000A\u000D.
2389      */
2390     private static void anchorTest() throws Exception {
2391         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2392         Matcher m = p.matcher("blah1\r\nblah2");
2393         m.find();
2394         m.find();
2395         if (!m.group().equals("blah2"))
2396             failCount++;
2397 
2398         m.reset("blah1\n\rblah2");
2399         m.find();
2400         m.find();
2401         m.find();
2402         if (!m.group().equals("blah2"))
2403             failCount++;
2404 
2405         // Test behavior of $ with \r\n at end of input
2406         p = Pattern.compile(".+$");
2407         m = p.matcher("blah1\r\n");
2408         if (!m.find())
2409             failCount++;
2410        if (!m.group().equals("blah1"))
2411             failCount++;
2412         if (m.find())
2413             failCount++;
2414 
2415         // Test behavior of $ with \r\n at end of input in multiline
2416         p = Pattern.compile(".+$", Pattern.MULTILINE);
2417         m = p.matcher("blah1\r\n");
2418         if (!m.find())
2419             failCount++;
2420         if (m.find())
2421             failCount++;
2422 
2423         // Test for $ recognition of \u0085 for bug 4527731
2424         p = Pattern.compile(".+$", Pattern.MULTILINE);
2425         m = p.matcher("blah1\u0085");
2426         if (!m.find())
2427             failCount++;
2428 
2429         // Supplementary character test
2430         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2431         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2432         m.find();
2433         m.find();
2434         if (!m.group().equals(toSupplementaries("blah2")))
2435             failCount++;
2436 
2437         m.reset(toSupplementaries("blah1\n\rblah2"));
2438         m.find();
2439         m.find();
2440         m.find();
2441         if (!m.group().equals(toSupplementaries("blah2")))
2442             failCount++;
2443 
2444         // Test behavior of $ with \r\n at end of input
2445         p = Pattern.compile(".+$");
2446         m = p.matcher(toSupplementaries("blah1\r\n"));
2447         if (!m.find())
2448             failCount++;
2449         if (!m.group().equals(toSupplementaries("blah1")))
2450             failCount++;
2451         if (m.find())
2452             failCount++;
2453 
2454         // Test behavior of $ with \r\n at end of input in multiline
2455         p = Pattern.compile(".+$", Pattern.MULTILINE);
2456         m = p.matcher(toSupplementaries("blah1\r\n"));
2457         if (!m.find())
2458             failCount++;
2459         if (m.find())
2460             failCount++;
2461 
2462         // Test for $ recognition of \u0085 for bug 4527731
2463         p = Pattern.compile(".+$", Pattern.MULTILINE);
2464         m = p.matcher(toSupplementaries("blah1\u0085"));
2465         if (!m.find())
2466             failCount++;
2467 
2468         report("Anchors");
2469     }
2470 
2471     /**
2472      * A basic sanity test of Matcher.lookingAt().
2473      */
2474     private static void lookingAtTest() throws Exception {
2475         Pattern p = Pattern.compile("(ab)(c*)");
2476         Matcher m = p.matcher("abccczzzabcczzzabccc");
2477 
2478         if (!m.lookingAt())
2479             failCount++;
2480 
2481         if (!m.group().equals(m.group(0)))
2482             failCount++;
2483 
2484         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2485         if (m.lookingAt())
2486             failCount++;
2487 
2488         // Supplementary character test
2489         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2490         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2491 
2492         if (!m.lookingAt())
2493             failCount++;
2494 
2495         if (!m.group().equals(m.group(0)))
2496             failCount++;
2497 
2498         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2499         if (m.lookingAt())
2500             failCount++;
2501 
2502         report("Looking At");
2503     }
2504 
2505     /**
2506      * A basic sanity test of Matcher.matches().
2507      */
2508     private static void matchesTest() throws Exception {
2509         // matches()
2510         Pattern p = Pattern.compile("ulb(c*)");
2511         Matcher m = p.matcher("ulbcccccc");
2512         if (!m.matches())
2513             failCount++;
2514 
2515         // find() but not matches()
2516         m.reset("zzzulbcccccc");
2517         if (m.matches())
2518             failCount++;
2519 
2520         // lookingAt() but not matches()
2521         m.reset("ulbccccccdef");
2522         if (m.matches())
2523             failCount++;
2524 
2525         // matches()
2526         p = Pattern.compile("a|ad");
2527         m = p.matcher("ad");
2528         if (!m.matches())
2529             failCount++;
2530 
2531         // Supplementary character test
2532         // matches()
2533         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2534         m = p.matcher(toSupplementaries("ulbcccccc"));
2535         if (!m.matches())
2536             failCount++;
2537 
2538         // find() but not matches()
2539         m.reset(toSupplementaries("zzzulbcccccc"));
2540         if (m.matches())
2541             failCount++;
2542 
2543         // lookingAt() but not matches()
2544         m.reset(toSupplementaries("ulbccccccdef"));
2545         if (m.matches())
2546             failCount++;
2547 
2548         // matches()
2549         p = Pattern.compile(toSupplementaries("a|ad"));
2550         m = p.matcher(toSupplementaries("ad"));
2551         if (!m.matches())
2552             failCount++;
2553 
2554         report("Matches");
2555     }
2556 
2557     /**
2558      * A basic sanity test of Pattern.matches().
2559      */
2560     private static void patternMatchesTest() throws Exception {
2561         // matches()
2562         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2563                              toSupplementaries("ulbcccccc")))
2564             failCount++;
2565 
2566         // find() but not matches()
2567         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2568                             toSupplementaries("zzzulbcccccc")))
2569             failCount++;
2570 
2571         // lookingAt() but not matches()
2572         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2573                             toSupplementaries("ulbccccccdef")))
2574             failCount++;
2575 
2576         // Supplementary character test
2577         // matches()
2578         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2579                              toSupplementaries("ulbcccccc")))
2580             failCount++;
2581 
2582         // find() but not matches()
2583         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2584                             toSupplementaries("zzzulbcccccc")))
2585             failCount++;
2586 
2587         // lookingAt() but not matches()
2588         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2589                             toSupplementaries("ulbccccccdef")))
2590             failCount++;
2591 
2592         report("Pattern Matches");
2593     }
2594 
2595     /**
2596      * Canonical equivalence testing. Tests the ability of the engine
2597      * to match sequences that are not explicitly specified in the
2598      * pattern when they are considered equivalent by the Unicode Standard.
2599      */
2600     private static void ceTest() throws Exception {
2601         // Decomposed char outside char classes
2602         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2603         Matcher m = p.matcher("test\u00e5");
2604         if (!m.matches())
2605             failCount++;
2606 
2607         m.reset("testa\u030a");
2608         if (!m.matches())
2609             failCount++;
2610 
2611         // Composed char outside char classes
2612         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2613         m = p.matcher("test\u00e5");
2614         if (!m.matches())
2615             failCount++;
2616 
2617         m.reset("testa\u030a");
2618         if (!m.find())
2619             failCount++;
2620 
2621         // Decomposed char inside a char class
2622         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2623         m = p.matcher("test\u00e5");
2624         if (!m.find())
2625             failCount++;
2626 
2627         m.reset("testa\u030a");
2628         if (!m.find())
2629             failCount++;
2630 
2631         // Composed char inside a char class
2632         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2633         m = p.matcher("test\u00e5");
2634         if (!m.find())
2635             failCount++;
2636 
2637         m.reset("testa\u0300");
2638         if (!m.find())
2639             failCount++;
2640 
2641         m.reset("testa\u030a");
2642         if (!m.find())
2643             failCount++;
2644 
2645         // Marks that cannot legally change order and be equivalent
2646         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2647         check(p, "testa\u0308\u0300", true);
2648         check(p, "testa\u0300\u0308", false);
2649 
2650         // Marks that can legally change order and be equivalent
2651         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2652         check(p, "testa\u0308\u0323", true);
2653         check(p, "testa\u0323\u0308", true);
2654 
2655         // Test all equivalences of the sequence a\u0308\u0323\u0300
2656         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2657         check(p, "testa\u0308\u0323\u0300", true);
2658         check(p, "testa\u0323\u0308\u0300", true);
2659         check(p, "testa\u0308\u0300\u0323", true);
2660         check(p, "test\u00e4\u0323\u0300", true);
2661         check(p, "test\u00e4\u0300\u0323", true);
2662 
2663         /*
2664          * The following canonical equivalence tests don't work. Bug id: 4916384.
2665          *
2666         // Decomposed hangul (jamos)
2667         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2668         m = p.matcher("\u1100\u1161");
2669         if (!m.matches())
2670             failCount++;
2671 
2672         m.reset("\uac00");
2673         if (!m.matches())
2674             failCount++;
2675 
2676         // Composed hangul
2677         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2678         m = p.matcher("\u1100\u1161");
2679         if (!m.matches())
2680             failCount++;
2681 
2682         m.reset("\uac00");
2683         if (!m.matches())
2684             failCount++;
2685 
2686         // Decomposed supplementary outside char classes
2687         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2688         m = p.matcher("test\ud834\uddc0");
2689         if (!m.matches())
2690             failCount++;
2691 
2692         m.reset("test\ud834\uddbc\ud834\udd6f");
2693         if (!m.matches())
2694             failCount++;
2695 
2696         // Composed supplementary outside char classes
2697         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2698         m.reset("test\ud834\uddbc\ud834\udd6f");
2699         if (!m.matches())
2700             failCount++;
2701 
2702         m = p.matcher("test\ud834\uddc0");
2703         if (!m.matches())
2704             failCount++;
2705 
2706         */
2707 
2708         report("Canonical Equivalence");
2709     }
2710 
2711     /**
2712      * A basic sanity test of Matcher.replaceAll().
2713      */
2714     private static void globalSubstitute() throws Exception {
2715         // Global substitution with a literal
2716         Pattern p = Pattern.compile("(ab)(c*)");
2717         Matcher m = p.matcher("abccczzzabcczzzabccc");
2718         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2719             failCount++;
2720 
2721         m.reset("zzzabccczzzabcczzzabccczzz");
2722         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2723             failCount++;
2724 
2725         // Global substitution with groups
2726         m.reset("zzzabccczzzabcczzzabccczzz");
2727         String result = m.replaceAll("$1");
2728         if (!result.equals("zzzabzzzabzzzabzzz"))
2729             failCount++;
2730 
2731         // Supplementary character test
2732         // Global substitution with a literal
2733         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2734         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2735         if (!m.replaceAll(toSupplementaries("test")).
2736             equals(toSupplementaries("testzzztestzzztest")))
2737             failCount++;
2738 
2739         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2740         if (!m.replaceAll(toSupplementaries("test")).
2741             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2742             failCount++;
2743 
2744         // Global substitution with groups
2745         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2746         result = m.replaceAll("$1");
2747         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2748             failCount++;
2749 
2750         report("Global Substitution");
2751     }
2752 
2753     /**
2754      * Tests the usage of Matcher.appendReplacement() with literal
2755      * and group substitutions.
2756      */
2757     private static void stringbufferSubstitute() throws Exception {
2758         // SB substitution with literal
2759         String blah = "zzzblahzzz";
2760         Pattern p = Pattern.compile("blah");
2761         Matcher m = p.matcher(blah);
2762         StringBuffer result = new StringBuffer();
2763         try {
2764             m.appendReplacement(result, "blech");
2765             failCount++;
2766         } catch (IllegalStateException e) {
2767         }
2768         m.find();
2769         m.appendReplacement(result, "blech");
2770         if (!result.toString().equals("zzzblech"))
2771             failCount++;
2772 
2773         m.appendTail(result);
2774         if (!result.toString().equals("zzzblechzzz"))
2775             failCount++;
2776 
2777         // SB substitution with groups
2778         blah = "zzzabcdzzz";
2779         p = Pattern.compile("(ab)(cd)*");
2780         m = p.matcher(blah);
2781         result = new StringBuffer();
2782         try {
2783             m.appendReplacement(result, "$1");
2784             failCount++;
2785         } catch (IllegalStateException e) {
2786         }
2787         m.find();
2788         m.appendReplacement(result, "$1");
2789         if (!result.toString().equals("zzzab"))
2790             failCount++;
2791 
2792         m.appendTail(result);
2793         if (!result.toString().equals("zzzabzzz"))
2794             failCount++;
2795 
2796         // SB substitution with 3 groups
2797         blah = "zzzabcdcdefzzz";
2798         p = Pattern.compile("(ab)(cd)*(ef)");
2799         m = p.matcher(blah);
2800         result = new StringBuffer();
2801         try {
2802             m.appendReplacement(result, "$1w$2w$3");
2803             failCount++;
2804         } catch (IllegalStateException e) {
2805         }
2806         m.find();
2807         m.appendReplacement(result, "$1w$2w$3");
2808         if (!result.toString().equals("zzzabwcdwef"))
2809             failCount++;
2810 
2811         m.appendTail(result);
2812         if (!result.toString().equals("zzzabwcdwefzzz"))
2813             failCount++;
2814 
2815         // SB substitution with groups and three matches
2816         // skipping middle match
2817         blah = "zzzabcdzzzabcddzzzabcdzzz";
2818         p = Pattern.compile("(ab)(cd*)");
2819         m = p.matcher(blah);
2820         result = new StringBuffer();
2821         try {
2822             m.appendReplacement(result, "$1");
2823             failCount++;
2824         } catch (IllegalStateException e) {
2825         }
2826         m.find();
2827         m.appendReplacement(result, "$1");
2828         if (!result.toString().equals("zzzab"))
2829             failCount++;
2830 
2831         m.find();
2832         m.find();
2833         m.appendReplacement(result, "$2");
2834         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2835             failCount++;
2836 
2837         m.appendTail(result);
2838         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2839             failCount++;
2840 
2841         // Check to make sure escaped $ is ignored
2842         blah = "zzzabcdcdefzzz";
2843         p = Pattern.compile("(ab)(cd)*(ef)");
2844         m = p.matcher(blah);
2845         result = new StringBuffer();
2846         m.find();
2847         m.appendReplacement(result, "$1w\\$2w$3");
2848         if (!result.toString().equals("zzzabw$2wef"))
2849             failCount++;
2850 
2851         m.appendTail(result);
2852         if (!result.toString().equals("zzzabw$2wefzzz"))
2853             failCount++;
2854 
2855         // Check to make sure a reference to nonexistent group causes error
2856         blah = "zzzabcdcdefzzz";
2857         p = Pattern.compile("(ab)(cd)*(ef)");
2858         m = p.matcher(blah);
2859         result = new StringBuffer();
2860         m.find();
2861         try {
2862             m.appendReplacement(result, "$1w$5w$3");
2863             failCount++;
2864         } catch (IndexOutOfBoundsException ioobe) {
2865             // Correct result
2866         }
2867 
2868         // Check double digit group references
2869         blah = "zzz123456789101112zzz";
2870         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2871         m = p.matcher(blah);
2872         result = new StringBuffer();
2873         m.find();
2874         m.appendReplacement(result, "$1w$11w$3");
2875         if (!result.toString().equals("zzz1w11w3"))
2876             failCount++;
2877 
2878         // Check to make sure it backs off $15 to $1 if only three groups
2879         blah = "zzzabcdcdefzzz";
2880         p = Pattern.compile("(ab)(cd)*(ef)");
2881         m = p.matcher(blah);
2882         result = new StringBuffer();
2883         m.find();
2884         m.appendReplacement(result, "$1w$15w$3");
2885         if (!result.toString().equals("zzzabwab5wef"))
2886             failCount++;
2887 
2888 
2889         // Supplementary character test
2890         // SB substitution with literal
2891         blah = toSupplementaries("zzzblahzzz");
2892         p = Pattern.compile(toSupplementaries("blah"));
2893         m = p.matcher(blah);
2894         result = new StringBuffer();
2895         try {
2896             m.appendReplacement(result, toSupplementaries("blech"));
2897             failCount++;
2898         } catch (IllegalStateException e) {
2899         }
2900         m.find();
2901         m.appendReplacement(result, toSupplementaries("blech"));
2902         if (!result.toString().equals(toSupplementaries("zzzblech")))
2903             failCount++;
2904 
2905         m.appendTail(result);
2906         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2907             failCount++;
2908 
2909         // SB substitution with groups
2910         blah = toSupplementaries("zzzabcdzzz");
2911         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2912         m = p.matcher(blah);
2913         result = new StringBuffer();
2914         try {
2915             m.appendReplacement(result, "$1");
2916             failCount++;
2917         } catch (IllegalStateException e) {
2918         }
2919         m.find();
2920         m.appendReplacement(result, "$1");
2921         if (!result.toString().equals(toSupplementaries("zzzab")))
2922             failCount++;
2923 
2924         m.appendTail(result);
2925         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2926             failCount++;
2927 
2928         // SB substitution with 3 groups
2929         blah = toSupplementaries("zzzabcdcdefzzz");
2930         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2931         m = p.matcher(blah);
2932         result = new StringBuffer();
2933         try {
2934             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2935             failCount++;
2936         } catch (IllegalStateException e) {
2937         }
2938         m.find();
2939         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2940         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2941             failCount++;
2942 
2943         m.appendTail(result);
2944         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2945             failCount++;
2946 
2947         // SB substitution with groups and three matches
2948         // skipping middle match
2949         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2950         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2951         m = p.matcher(blah);
2952         result = new StringBuffer();
2953         try {
2954             m.appendReplacement(result, "$1");
2955             failCount++;
2956         } catch (IllegalStateException e) {
2957         }
2958         m.find();
2959         m.appendReplacement(result, "$1");
2960         if (!result.toString().equals(toSupplementaries("zzzab")))
2961             failCount++;
2962 
2963         m.find();
2964         m.find();
2965         m.appendReplacement(result, "$2");
2966         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2967             failCount++;
2968 
2969         m.appendTail(result);
2970         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2971             failCount++;
2972 
2973         // Check to make sure escaped $ is ignored
2974         blah = toSupplementaries("zzzabcdcdefzzz");
2975         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2976         m = p.matcher(blah);
2977         result = new StringBuffer();
2978         m.find();
2979         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2980         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2981             failCount++;
2982 
2983         m.appendTail(result);
2984         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2985             failCount++;
2986 
2987         // Check to make sure a reference to nonexistent group causes error
2988         blah = toSupplementaries("zzzabcdcdefzzz");
2989         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2990         m = p.matcher(blah);
2991         result = new StringBuffer();
2992         m.find();
2993         try {
2994             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2995             failCount++;
2996         } catch (IndexOutOfBoundsException ioobe) {
2997             // Correct result
2998         }
2999 
3000         // Check double digit group references
3001         blah = toSupplementaries("zzz123456789101112zzz");
3002         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3003         m = p.matcher(blah);
3004         result = new StringBuffer();
3005         m.find();
3006         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3007         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3008             failCount++;
3009 
3010         // Check to make sure it backs off $15 to $1 if only three groups
3011         blah = toSupplementaries("zzzabcdcdefzzz");
3012         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3013         m = p.matcher(blah);
3014         result = new StringBuffer();
3015         m.find();
3016         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3017         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3018             failCount++;
3019 
3020         // Check nothing has been appended into the output buffer if
3021         // the replacement string triggers IllegalArgumentException.
3022         p = Pattern.compile("(abc)");
3023         m = p.matcher("abcd");
3024         result = new StringBuffer();
3025         m.find();
3026         try {
3027             m.appendReplacement(result, ("xyz$g"));
3028             failCount++;
3029         } catch (IllegalArgumentException iae) {
3030             if (result.length() != 0)
3031                 failCount++;
3032         }
3033 
3034         report("SB Substitution");
3035     }
3036 
3037     /**
3038      * Tests the usage of Matcher.appendReplacement() with literal
3039      * and group substitutions.
3040      */
3041     private static void stringbuilderSubstitute() throws Exception {
3042         // SB substitution with literal
3043         String blah = "zzzblahzzz";
3044         Pattern p = Pattern.compile("blah");
3045         Matcher m = p.matcher(blah);
3046         StringBuilder result = new StringBuilder();
3047         try {
3048             m.appendReplacement(result, "blech");
3049             failCount++;
3050         } catch (IllegalStateException e) {
3051         }
3052         m.find();
3053         m.appendReplacement(result, "blech");
3054         if (!result.toString().equals("zzzblech"))
3055             failCount++;
3056 
3057         m.appendTail(result);
3058         if (!result.toString().equals("zzzblechzzz"))
3059             failCount++;
3060 
3061         // SB substitution with groups
3062         blah = "zzzabcdzzz";
3063         p = Pattern.compile("(ab)(cd)*");
3064         m = p.matcher(blah);
3065         result = new StringBuilder();
3066         try {
3067             m.appendReplacement(result, "$1");
3068             failCount++;
3069         } catch (IllegalStateException e) {
3070         }
3071         m.find();
3072         m.appendReplacement(result, "$1");
3073         if (!result.toString().equals("zzzab"))
3074             failCount++;
3075 
3076         m.appendTail(result);
3077         if (!result.toString().equals("zzzabzzz"))
3078             failCount++;
3079 
3080         // SB substitution with 3 groups
3081         blah = "zzzabcdcdefzzz";
3082         p = Pattern.compile("(ab)(cd)*(ef)");
3083         m = p.matcher(blah);
3084         result = new StringBuilder();
3085         try {
3086             m.appendReplacement(result, "$1w$2w$3");
3087             failCount++;
3088         } catch (IllegalStateException e) {
3089         }
3090         m.find();
3091         m.appendReplacement(result, "$1w$2w$3");
3092         if (!result.toString().equals("zzzabwcdwef"))
3093             failCount++;
3094 
3095         m.appendTail(result);
3096         if (!result.toString().equals("zzzabwcdwefzzz"))
3097             failCount++;
3098 
3099         // SB substitution with groups and three matches
3100         // skipping middle match
3101         blah = "zzzabcdzzzabcddzzzabcdzzz";
3102         p = Pattern.compile("(ab)(cd*)");
3103         m = p.matcher(blah);
3104         result = new StringBuilder();
3105         try {
3106             m.appendReplacement(result, "$1");
3107             failCount++;
3108         } catch (IllegalStateException e) {
3109         }
3110         m.find();
3111         m.appendReplacement(result, "$1");
3112         if (!result.toString().equals("zzzab"))
3113             failCount++;
3114 
3115         m.find();
3116         m.find();
3117         m.appendReplacement(result, "$2");
3118         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3119             failCount++;
3120 
3121         m.appendTail(result);
3122         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3123             failCount++;
3124 
3125         // Check to make sure escaped $ is ignored
3126         blah = "zzzabcdcdefzzz";
3127         p = Pattern.compile("(ab)(cd)*(ef)");
3128         m = p.matcher(blah);
3129         result = new StringBuilder();
3130         m.find();
3131         m.appendReplacement(result, "$1w\\$2w$3");
3132         if (!result.toString().equals("zzzabw$2wef"))
3133             failCount++;
3134 
3135         m.appendTail(result);
3136         if (!result.toString().equals("zzzabw$2wefzzz"))
3137             failCount++;
3138 
3139         // Check to make sure a reference to nonexistent group causes error
3140         blah = "zzzabcdcdefzzz";
3141         p = Pattern.compile("(ab)(cd)*(ef)");
3142         m = p.matcher(blah);
3143         result = new StringBuilder();
3144         m.find();
3145         try {
3146             m.appendReplacement(result, "$1w$5w$3");
3147             failCount++;
3148         } catch (IndexOutOfBoundsException ioobe) {
3149             // Correct result
3150         }
3151 
3152         // Check double digit group references
3153         blah = "zzz123456789101112zzz";
3154         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3155         m = p.matcher(blah);
3156         result = new StringBuilder();
3157         m.find();
3158         m.appendReplacement(result, "$1w$11w$3");
3159         if (!result.toString().equals("zzz1w11w3"))
3160             failCount++;
3161 
3162         // Check to make sure it backs off $15 to $1 if only three groups
3163         blah = "zzzabcdcdefzzz";
3164         p = Pattern.compile("(ab)(cd)*(ef)");
3165         m = p.matcher(blah);
3166         result = new StringBuilder();
3167         m.find();
3168         m.appendReplacement(result, "$1w$15w$3");
3169         if (!result.toString().equals("zzzabwab5wef"))
3170             failCount++;
3171 
3172 
3173         // Supplementary character test
3174         // SB substitution with literal
3175         blah = toSupplementaries("zzzblahzzz");
3176         p = Pattern.compile(toSupplementaries("blah"));
3177         m = p.matcher(blah);
3178         result = new StringBuilder();
3179         try {
3180             m.appendReplacement(result, toSupplementaries("blech"));
3181             failCount++;
3182         } catch (IllegalStateException e) {
3183         }
3184         m.find();
3185         m.appendReplacement(result, toSupplementaries("blech"));
3186         if (!result.toString().equals(toSupplementaries("zzzblech")))
3187             failCount++;
3188         m.appendTail(result);
3189         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3190             failCount++;
3191 
3192         // SB substitution with groups
3193         blah = toSupplementaries("zzzabcdzzz");
3194         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3195         m = p.matcher(blah);
3196         result = new StringBuilder();
3197         try {
3198             m.appendReplacement(result, "$1");
3199             failCount++;
3200         } catch (IllegalStateException e) {
3201         }
3202         m.find();
3203         m.appendReplacement(result, "$1");
3204         if (!result.toString().equals(toSupplementaries("zzzab")))
3205             failCount++;
3206 
3207         m.appendTail(result);
3208         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3209             failCount++;
3210 
3211         // SB substitution with 3 groups
3212         blah = toSupplementaries("zzzabcdcdefzzz");
3213         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3214         m = p.matcher(blah);
3215         result = new StringBuilder();
3216         try {
3217             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3218             failCount++;
3219         } catch (IllegalStateException e) {
3220         }
3221         m.find();
3222         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3223         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3224             failCount++;
3225 
3226         m.appendTail(result);
3227         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3228             failCount++;
3229 
3230         // SB substitution with groups and three matches
3231         // skipping middle match
3232         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3233         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3234         m = p.matcher(blah);
3235         result = new StringBuilder();
3236         try {
3237             m.appendReplacement(result, "$1");
3238             failCount++;
3239         } catch (IllegalStateException e) {
3240         }
3241         m.find();
3242         m.appendReplacement(result, "$1");
3243         if (!result.toString().equals(toSupplementaries("zzzab")))
3244             failCount++;
3245 
3246         m.find();
3247         m.find();
3248         m.appendReplacement(result, "$2");
3249         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3250             failCount++;
3251 
3252         m.appendTail(result);
3253         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3254             failCount++;
3255 
3256         // Check to make sure escaped $ is ignored
3257         blah = toSupplementaries("zzzabcdcdefzzz");
3258         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3259         m = p.matcher(blah);
3260         result = new StringBuilder();
3261         m.find();
3262         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3263         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3264             failCount++;
3265 
3266         m.appendTail(result);
3267         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3268             failCount++;
3269 
3270         // Check to make sure a reference to nonexistent group causes error
3271         blah = toSupplementaries("zzzabcdcdefzzz");
3272         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3273         m = p.matcher(blah);
3274         result = new StringBuilder();
3275         m.find();
3276         try {
3277             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3278             failCount++;
3279         } catch (IndexOutOfBoundsException ioobe) {
3280             // Correct result
3281         }
3282         // Check double digit group references
3283         blah = toSupplementaries("zzz123456789101112zzz");
3284         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3285         m = p.matcher(blah);
3286         result = new StringBuilder();
3287         m.find();
3288         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3289         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3290             failCount++;
3291 
3292         // Check to make sure it backs off $15 to $1 if only three groups
3293         blah = toSupplementaries("zzzabcdcdefzzz");
3294         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3295         m = p.matcher(blah);
3296         result = new StringBuilder();
3297         m.find();
3298         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3299         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3300             failCount++;
3301         // Check nothing has been appended into the output buffer if
3302         // the replacement string triggers IllegalArgumentException.
3303         p = Pattern.compile("(abc)");
3304         m = p.matcher("abcd");
3305         result = new StringBuilder();
3306         m.find();
3307         try {
3308             m.appendReplacement(result, ("xyz$g"));
3309             failCount++;
3310         } catch (IllegalArgumentException iae) {
3311             if (result.length() != 0)
3312                 failCount++;
3313         }
3314         report("SB Substitution 2");
3315     }
3316 
3317     /*
3318      * 5 groups of characters are created to make a substitution string.
3319      * A base string will be created including random lead chars, the
3320      * substitution string, and random trailing chars.
3321      * A pattern containing the 5 groups is searched for and replaced with:
3322      * random group + random string + random group.
3323      * The results are checked for correctness.
3324      */
3325     private static void substitutionBasher() {
3326         for (int runs = 0; runs<1000; runs++) {
3327             // Create a base string to work in
3328             int leadingChars = generator.nextInt(10);
3329             StringBuffer baseBuffer = new StringBuffer(100);
3330             String leadingString = getRandomAlphaString(leadingChars);
3331             baseBuffer.append(leadingString);
3332 
3333             // Create 5 groups of random number of random chars
3334             // Create the string to substitute
3335             // Create the pattern string to search for
3336             StringBuffer bufferToSub = new StringBuffer(25);
3337             StringBuffer bufferToPat = new StringBuffer(50);
3338             String[] groups = new String[5];
3339             for(int i=0; i<5; i++) {
3340                 int aGroupSize = generator.nextInt(5)+1;
3341                 groups[i] = getRandomAlphaString(aGroupSize);
3342                 bufferToSub.append(groups[i]);
3343                 bufferToPat.append('(');
3344                 bufferToPat.append(groups[i]);
3345                 bufferToPat.append(')');
3346             }
3347             String stringToSub = bufferToSub.toString();
3348             String pattern = bufferToPat.toString();
3349 
3350             // Place sub string into working string at random index
3351             baseBuffer.append(stringToSub);
3352 
3353             // Append random chars to end
3354             int trailingChars = generator.nextInt(10);
3355             String trailingString = getRandomAlphaString(trailingChars);
3356             baseBuffer.append(trailingString);
3357             String baseString = baseBuffer.toString();
3358 
3359             // Create test pattern and matcher
3360             Pattern p = Pattern.compile(pattern);
3361             Matcher m = p.matcher(baseString);
3362 
3363             // Reject candidate if pattern happens to start early
3364             m.find();
3365             if (m.start() < leadingChars)
3366                 continue;
3367 
3368             // Reject candidate if more than one match
3369             if (m.find())
3370                 continue;
3371 
3372             // Construct a replacement string with :
3373             // random group + random string + random group
3374             StringBuffer bufferToRep = new StringBuffer();
3375             int groupIndex1 = generator.nextInt(5);
3376             bufferToRep.append("$" + (groupIndex1 + 1));
3377             String randomMidString = getRandomAlphaString(5);
3378             bufferToRep.append(randomMidString);
3379             int groupIndex2 = generator.nextInt(5);
3380             bufferToRep.append("$" + (groupIndex2 + 1));
3381             String replacement = bufferToRep.toString();
3382 
3383             // Do the replacement
3384             String result = m.replaceAll(replacement);
3385 
3386             // Construct expected result
3387             StringBuffer bufferToRes = new StringBuffer();
3388             bufferToRes.append(leadingString);
3389             bufferToRes.append(groups[groupIndex1]);
3390             bufferToRes.append(randomMidString);
3391             bufferToRes.append(groups[groupIndex2]);
3392             bufferToRes.append(trailingString);
3393             String expectedResult = bufferToRes.toString();
3394 
3395             // Check results
3396             if (!result.equals(expectedResult))
3397                 failCount++;
3398         }
3399 
3400         report("Substitution Basher");
3401     }
3402 
3403     /*
3404      * 5 groups of characters are created to make a substitution string.
3405      * A base string will be created including random lead chars, the
3406      * substitution string, and random trailing chars.
3407      * A pattern containing the 5 groups is searched for and replaced with:
3408      * random group + random string + random group.
3409      * The results are checked for correctness.
3410      */
3411     private static void substitutionBasher2() {
3412         for (int runs = 0; runs<1000; runs++) {
3413             // Create a base string to work in
3414             int leadingChars = generator.nextInt(10);
3415             StringBuilder baseBuffer = new StringBuilder(100);
3416             String leadingString = getRandomAlphaString(leadingChars);
3417             baseBuffer.append(leadingString);
3418 
3419             // Create 5 groups of random number of random chars
3420             // Create the string to substitute
3421             // Create the pattern string to search for
3422             StringBuilder bufferToSub = new StringBuilder(25);
3423             StringBuilder bufferToPat = new StringBuilder(50);
3424             String[] groups = new String[5];
3425             for(int i=0; i<5; i++) {
3426                 int aGroupSize = generator.nextInt(5)+1;
3427                 groups[i] = getRandomAlphaString(aGroupSize);
3428                 bufferToSub.append(groups[i]);
3429                 bufferToPat.append('(');
3430                 bufferToPat.append(groups[i]);
3431                 bufferToPat.append(')');
3432             }
3433             String stringToSub = bufferToSub.toString();
3434             String pattern = bufferToPat.toString();
3435 
3436             // Place sub string into working string at random index
3437             baseBuffer.append(stringToSub);
3438 
3439             // Append random chars to end
3440             int trailingChars = generator.nextInt(10);
3441             String trailingString = getRandomAlphaString(trailingChars);
3442             baseBuffer.append(trailingString);
3443             String baseString = baseBuffer.toString();
3444 
3445             // Create test pattern and matcher
3446             Pattern p = Pattern.compile(pattern);
3447             Matcher m = p.matcher(baseString);
3448 
3449             // Reject candidate if pattern happens to start early
3450             m.find();
3451             if (m.start() < leadingChars)
3452                 continue;
3453 
3454             // Reject candidate if more than one match
3455             if (m.find())
3456                 continue;
3457 
3458             // Construct a replacement string with :
3459             // random group + random string + random group
3460             StringBuilder bufferToRep = new StringBuilder();
3461             int groupIndex1 = generator.nextInt(5);
3462             bufferToRep.append("$" + (groupIndex1 + 1));
3463             String randomMidString = getRandomAlphaString(5);
3464             bufferToRep.append(randomMidString);
3465             int groupIndex2 = generator.nextInt(5);
3466             bufferToRep.append("$" + (groupIndex2 + 1));
3467             String replacement = bufferToRep.toString();
3468 
3469             // Do the replacement
3470             String result = m.replaceAll(replacement);
3471 
3472             // Construct expected result
3473             StringBuilder bufferToRes = new StringBuilder();
3474             bufferToRes.append(leadingString);
3475             bufferToRes.append(groups[groupIndex1]);
3476             bufferToRes.append(randomMidString);
3477             bufferToRes.append(groups[groupIndex2]);
3478             bufferToRes.append(trailingString);
3479             String expectedResult = bufferToRes.toString();
3480 
3481             // Check results
3482             if (!result.equals(expectedResult)) {
3483                 failCount++;
3484             }
3485         }
3486 
3487         report("Substitution Basher 2");
3488     }
3489 
3490     /**
3491      * Checks the handling of some escape sequences that the Pattern
3492      * class should process instead of the java compiler. These are
3493      * not in the file because the escapes should be be processed
3494      * by the Pattern class when the regex is compiled.
3495      */
3496     private static void escapes() throws Exception {
3497         Pattern p = Pattern.compile("\\043");
3498         Matcher m = p.matcher("#");
3499         if (!m.find())
3500             failCount++;
3501 
3502         p = Pattern.compile("\\x23");
3503         m = p.matcher("#");
3504         if (!m.find())
3505             failCount++;
3506 
3507         p = Pattern.compile("\\u0023");
3508         m = p.matcher("#");
3509         if (!m.find())
3510             failCount++;
3511 
3512         report("Escape sequences");
3513     }
3514 
3515     /**
3516      * Checks the handling of blank input situations. These
3517      * tests are incompatible with my test file format.
3518      */
3519     private static void blankInput() throws Exception {
3520         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3521         Matcher m = p.matcher("");
3522         if (m.find())
3523             failCount++;
3524 
3525         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3526         m = p.matcher("");
3527         if (!m.find())
3528             failCount++;
3529 
3530         p = Pattern.compile("abc");
3531         m = p.matcher("");
3532         if (m.find())
3533             failCount++;
3534 
3535         p = Pattern.compile("a*");
3536         m = p.matcher("");
3537         if (!m.find())
3538             failCount++;
3539 
3540         report("Blank input");
3541     }
3542 
3543     /**
3544      * Tests the Boyer-Moore pattern matching of a character sequence
3545      * on randomly generated patterns.
3546      */
3547     private static void bm() throws Exception {
3548         doBnM('a');
3549         report("Boyer Moore (ASCII)");
3550 
3551         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3552         report("Boyer Moore (Supplementary)");
3553     }
3554 
3555     private static void doBnM(int baseCharacter) throws Exception {
3556         int achar=0;
3557 
3558         for (int i=0; i<100; i++) {
3559             // Create a short pattern to search for
3560             int patternLength = generator.nextInt(7) + 4;
3561             StringBuffer patternBuffer = new StringBuffer(patternLength);
3562             String pattern;
3563             retry: for (;;) {
3564                 for (int x=0; x<patternLength; x++) {
3565                     int ch = baseCharacter + generator.nextInt(26);
3566                     if (Character.isSupplementaryCodePoint(ch)) {
3567                         patternBuffer.append(Character.toChars(ch));
3568                     } else {
3569                         patternBuffer.append((char)ch);
3570                     }
3571                 }
3572                 pattern = patternBuffer.toString();
3573 
3574                 // Avoid patterns that start and end with the same substring
3575                 // See JDK-6854417
3576                 for (int x=1; x < pattern.length(); x++) {
3577                     if (pattern.startsWith(pattern.substring(x)))
3578                         continue retry;
3579                 }
3580                 break;
3581             }
3582             Pattern p = Pattern.compile(pattern);
3583 
3584             // Create a buffer with random ASCII chars that does
3585             // not match the sample
3586             String toSearch = null;
3587             StringBuffer s = null;
3588             Matcher m = p.matcher("");
3589             do {
3590                 s = new StringBuffer(100);
3591                 for (int x=0; x<100; x++) {
3592                     int ch = baseCharacter + generator.nextInt(26);
3593                     if (Character.isSupplementaryCodePoint(ch)) {
3594                         s.append(Character.toChars(ch));
3595                     } else {
3596                         s.append((char)ch);
3597                     }
3598                 }
3599                 toSearch = s.toString();
3600                 m.reset(toSearch);
3601             } while (m.find());
3602 
3603             // Insert the pattern at a random spot
3604             int insertIndex = generator.nextInt(99);
3605             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3606                 insertIndex++;
3607             s = s.insert(insertIndex, pattern);
3608             toSearch = s.toString();
3609 
3610             // Make sure that the pattern is found
3611             m.reset(toSearch);
3612             if (!m.find())
3613                 failCount++;
3614 
3615             // Make sure that the match text is the pattern
3616             if (!m.group().equals(pattern))
3617                 failCount++;
3618 
3619             // Make sure match occured at insertion point
3620             if (m.start() != insertIndex)
3621                 failCount++;
3622         }
3623     }
3624 
3625     /**
3626      * Tests the matching of slices on randomly generated patterns.
3627      * The Boyer-Moore optimization is not done on these patterns
3628      * because it uses unicode case folding.
3629      */
3630     private static void slice() throws Exception {
3631         doSlice(Character.MAX_VALUE);
3632         report("Slice");
3633 
3634         doSlice(Character.MAX_CODE_POINT);
3635         report("Slice (Supplementary)");
3636     }
3637 
3638     private static void doSlice(int maxCharacter) throws Exception {
3639         Random generator = new Random();
3640         int achar=0;
3641 
3642         for (int i=0; i<100; i++) {
3643             // Create a short pattern to search for
3644             int patternLength = generator.nextInt(7) + 4;
3645             StringBuffer patternBuffer = new StringBuffer(patternLength);
3646             for (int x=0; x<patternLength; x++) {
3647                 int randomChar = 0;
3648                 while (!Character.isLetterOrDigit(randomChar))
3649                     randomChar = generator.nextInt(maxCharacter);
3650                 if (Character.isSupplementaryCodePoint(randomChar)) {
3651                     patternBuffer.append(Character.toChars(randomChar));
3652                 } else {
3653                     patternBuffer.append((char) randomChar);
3654                 }
3655             }
3656             String pattern =  patternBuffer.toString();
3657             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3658 
3659             // Create a buffer with random chars that does not match the sample
3660             String toSearch = null;
3661             StringBuffer s = null;
3662             Matcher m = p.matcher("");
3663             do {
3664                 s = new StringBuffer(100);
3665                 for (int x=0; x<100; x++) {
3666                     int randomChar = 0;
3667                     while (!Character.isLetterOrDigit(randomChar))
3668                         randomChar = generator.nextInt(maxCharacter);
3669                     if (Character.isSupplementaryCodePoint(randomChar)) {
3670                         s.append(Character.toChars(randomChar));
3671                     } else {
3672                         s.append((char) randomChar);
3673                     }
3674                 }
3675                 toSearch = s.toString();
3676                 m.reset(toSearch);
3677             } while (m.find());
3678 
3679             // Insert the pattern at a random spot
3680             int insertIndex = generator.nextInt(99);
3681             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3682                 insertIndex++;
3683             s = s.insert(insertIndex, pattern);
3684             toSearch = s.toString();
3685 
3686             // Make sure that the pattern is found
3687             m.reset(toSearch);
3688             if (!m.find())
3689                 failCount++;
3690 
3691             // Make sure that the match text is the pattern
3692             if (!m.group().equals(pattern))
3693                 failCount++;
3694 
3695             // Make sure match occured at insertion point
3696             if (m.start() != insertIndex)
3697                 failCount++;
3698         }
3699     }
3700 
3701     private static void explainFailure(String pattern, String data,
3702                                        String expected, String actual) {
3703         System.err.println("----------------------------------------");
3704         System.err.println("Pattern = "+pattern);
3705         System.err.println("Data = "+data);
3706         System.err.println("Expected = " + expected);
3707         System.err.println("Actual   = " + actual);
3708     }
3709 
3710     private static void explainFailure(String pattern, String data,
3711                                        Throwable t) {
3712         System.err.println("----------------------------------------");
3713         System.err.println("Pattern = "+pattern);
3714         System.err.println("Data = "+data);
3715         t.printStackTrace(System.err);
3716     }
3717 
3718     // Testing examples from a file
3719 
3720     /**
3721      * Goes through the file "TestCases.txt" and creates many patterns
3722      * described in the file, matching the patterns against input lines in
3723      * the file, and comparing the results against the correct results
3724      * also found in the file. The file format is described in comments
3725      * at the head of the file.
3726      */
3727     private static void processFile(String fileName) throws Exception {
3728         File testCases = new File(System.getProperty("test.src", "."),
3729                                   fileName);
3730         FileInputStream in = new FileInputStream(testCases);
3731         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3732 
3733         // Process next test case.
3734         String aLine;
3735         while((aLine = r.readLine()) != null) {
3736             // Read a line for pattern
3737             String patternString = grabLine(r);
3738             Pattern p = null;
3739             try {
3740                 p = compileTestPattern(patternString);
3741             } catch (PatternSyntaxException e) {
3742                 String dataString = grabLine(r);
3743                 String expectedResult = grabLine(r);
3744                 if (expectedResult.startsWith("error"))
3745                     continue;
3746                 explainFailure(patternString, dataString, e);
3747                 failCount++;
3748                 continue;
3749             }
3750 
3751             // Read a line for input string
3752             String dataString = grabLine(r);
3753             Matcher m = p.matcher(dataString);
3754             StringBuffer result = new StringBuffer();
3755 
3756             // Check for IllegalStateExceptions before a match
3757             failCount += preMatchInvariants(m);
3758 
3759             boolean found = m.find();
3760 
3761             if (found)
3762                 failCount += postTrueMatchInvariants(m);
3763             else
3764                 failCount += postFalseMatchInvariants(m);
3765 
3766             if (found) {
3767                 result.append("true ");
3768                 result.append(m.group(0) + " ");
3769             } else {
3770                 result.append("false ");
3771             }
3772 
3773             result.append(m.groupCount());
3774 
3775             if (found) {
3776                 for (int i=1; i<m.groupCount()+1; i++)
3777                     if (m.group(i) != null)
3778                         result.append(" " +m.group(i));
3779             }
3780 
3781             // Read a line for the expected result
3782             String expectedResult = grabLine(r);
3783 
3784             if (!result.toString().equals(expectedResult)) {
3785                 explainFailure(patternString, dataString, expectedResult, result.toString());
3786                 failCount++;
3787             }
3788         }
3789 
3790         report(fileName);
3791     }
3792 
3793     private static int preMatchInvariants(Matcher m) {
3794         int failCount = 0;
3795         try {
3796             m.start();
3797             failCount++;
3798         } catch (IllegalStateException ise) {}
3799         try {
3800             m.end();
3801             failCount++;
3802         } catch (IllegalStateException ise) {}
3803         try {
3804             m.group();
3805             failCount++;
3806         } catch (IllegalStateException ise) {}
3807         return failCount;
3808     }
3809 
3810     private static int postFalseMatchInvariants(Matcher m) {
3811         int failCount = 0;
3812         try {
3813             m.group();
3814             failCount++;
3815         } catch (IllegalStateException ise) {}
3816         try {
3817             m.start();
3818             failCount++;
3819         } catch (IllegalStateException ise) {}
3820         try {
3821             m.end();
3822             failCount++;
3823         } catch (IllegalStateException ise) {}
3824         return failCount;
3825     }
3826 
3827     private static int postTrueMatchInvariants(Matcher m) {
3828         int failCount = 0;
3829         //assert(m.start() = m.start(0);
3830         if (m.start() != m.start(0))
3831             failCount++;
3832         //assert(m.end() = m.end(0);
3833         if (m.start() != m.start(0))
3834             failCount++;
3835         //assert(m.group() = m.group(0);
3836         if (!m.group().equals(m.group(0)))
3837             failCount++;
3838         try {
3839             m.group(50);
3840             failCount++;
3841         } catch (IndexOutOfBoundsException ise) {}
3842 
3843         return failCount;
3844     }
3845 
3846     private static Pattern compileTestPattern(String patternString) {
3847         if (!patternString.startsWith("'")) {
3848             return Pattern.compile(patternString);
3849         }
3850 
3851         int break1 = patternString.lastIndexOf("'");
3852         String flagString = patternString.substring(
3853                                           break1+1, patternString.length());
3854         patternString = patternString.substring(1, break1);
3855 
3856         if (flagString.equals("i"))
3857             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3858 
3859         if (flagString.equals("m"))
3860             return Pattern.compile(patternString, Pattern.MULTILINE);
3861 
3862         return Pattern.compile(patternString);
3863     }
3864 
3865     /**
3866      * Reads a line from the input file. Keeps reading lines until a non
3867      * empty non comment line is read. If the line contains a \n then
3868      * these two characters are replaced by a newline char. If a \\uxxxx
3869      * sequence is read then the sequence is replaced by the unicode char.
3870      */
3871     private static String grabLine(BufferedReader r) throws Exception {
3872         int index = 0;
3873         String line = r.readLine();
3874         while (line.startsWith("//") || line.length() < 1)
3875             line = r.readLine();
3876         while ((index = line.indexOf("\\n")) != -1) {
3877             StringBuffer temp = new StringBuffer(line);
3878             temp.replace(index, index+2, "\n");
3879             line = temp.toString();
3880         }
3881         while ((index = line.indexOf("\\u")) != -1) {
3882             StringBuffer temp = new StringBuffer(line);
3883             String value = temp.substring(index+2, index+6);
3884             char aChar = (char)Integer.parseInt(value, 16);
3885             String unicodeChar = "" + aChar;
3886             temp.replace(index, index+6, unicodeChar);
3887             line = temp.toString();
3888         }
3889 
3890         return line;
3891     }
3892 
3893     private static void check(Pattern p, String s, String g, String expected) {
3894         Matcher m = p.matcher(s);
3895         m.find();
3896         if (!m.group(g).equals(expected) ||
3897             s.charAt(m.start(g)) != expected.charAt(0) ||
3898             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3899             failCount++;
3900     }
3901 
3902     private static void checkReplaceFirst(String p, String s, String r, String expected)
3903     {
3904         if (!expected.equals(Pattern.compile(p)
3905                                     .matcher(s)
3906                                     .replaceFirst(r)))
3907             failCount++;
3908     }
3909 
3910     private static void checkReplaceAll(String p, String s, String r, String expected)
3911     {
3912         if (!expected.equals(Pattern.compile(p)
3913                                     .matcher(s)
3914                                     .replaceAll(r)))
3915             failCount++;
3916     }
3917 
3918     private static void checkExpectedFail(String p) {
3919         try {
3920             Pattern.compile(p);
3921         } catch (PatternSyntaxException pse) {
3922             //pse.printStackTrace();
3923             return;
3924         }
3925         failCount++;
3926     }
3927 
3928     private static void checkExpectedIAE(Matcher m, String g) {
3929         m.find();
3930         try {
3931             m.group(g);
3932         } catch (IllegalArgumentException x) {
3933             //iae.printStackTrace();
3934             try {
3935                 m.start(g);
3936             } catch (IllegalArgumentException xx) {
3937                 try {
3938                     m.start(g);
3939                 } catch (IllegalArgumentException xxx) {
3940                     return;
3941                 }
3942             }
3943         }
3944         failCount++;
3945     }
3946 
3947     private static void checkExpectedNPE(Matcher m) {
3948         m.find();
3949         try {
3950             m.group(null);
3951         } catch (NullPointerException x) {
3952             try {
3953                 m.start(null);
3954             } catch (NullPointerException xx) {
3955                 try {
3956                     m.end(null);
3957                 } catch (NullPointerException xxx) {
3958                     return;
3959                 }
3960             }
3961         }
3962         failCount++;
3963     }
3964 
3965     private static void namedGroupCaptureTest() throws Exception {
3966         check(Pattern.compile("x+(?<gname>y+)z+"),
3967               "xxxyyyzzz",
3968               "gname",
3969               "yyy");
3970 
3971         check(Pattern.compile("x+(?<gname8>y+)z+"),
3972               "xxxyyyzzz",
3973               "gname8",
3974               "yyy");
3975 
3976         //backref
3977         Pattern pattern = Pattern.compile("(a*)bc\\1");
3978         check(pattern, "zzzaabcazzz", true);  // found "abca"
3979 
3980         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3981               "zzzaabcaazzz", true);
3982 
3983         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3984               "abcdefabc", true);
3985 
3986         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3987               "abcdefghijkk", true);
3988 
3989         // Supplementary character tests
3990         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3991               toSupplementaries("zzzaabcazzz"), true);
3992 
3993         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3994               toSupplementaries("zzzaabcaazzz"), true);
3995 
3996         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3997               toSupplementaries("abcdefabc"), true);
3998 
3999         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
4000                               "(?<gname>" +
4001                               toSupplementaries("k)") + "\\k<gname>"),
4002               toSupplementaries("abcdefghijkk"), true);
4003 
4004         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4005               "xxxyyyzzzyyy",
4006               "gname",
4007               "yyy");
4008 
4009         //replaceFirst/All
4010         checkReplaceFirst("(?<gn>ab)(c*)",
4011                           "abccczzzabcczzzabccc",
4012                           "${gn}",
4013                           "abzzzabcczzzabccc");
4014 
4015         checkReplaceAll("(?<gn>ab)(c*)",
4016                         "abccczzzabcczzzabccc",
4017                         "${gn}",
4018                         "abzzzabzzzab");
4019 
4020 
4021         checkReplaceFirst("(?<gn>ab)(c*)",
4022                           "zzzabccczzzabcczzzabccczzz",
4023                           "${gn}",
4024                           "zzzabzzzabcczzzabccczzz");
4025 
4026         checkReplaceAll("(?<gn>ab)(c*)",
4027                         "zzzabccczzzabcczzzabccczzz",
4028                         "${gn}",
4029                         "zzzabzzzabzzzabzzz");
4030 
4031         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4032                           "zzzabccczzzabcczzzabccczzz",
4033                           "${gn2}",
4034                           "zzzccczzzabcczzzabccczzz");
4035 
4036         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4037                         "zzzabccczzzabcczzzabccczzz",
4038                         "${gn2}",
4039                         "zzzccczzzcczzzccczzz");
4040 
4041         //toSupplementaries("(ab)(c*)"));
4042         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4043                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4044                           toSupplementaries("abccczzzabcczzzabccc"),
4045                           "${gn1}",
4046                           toSupplementaries("abzzzabcczzzabccc"));
4047 
4048 
4049         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4050                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4051                         toSupplementaries("abccczzzabcczzzabccc"),
4052                         "${gn1}",
4053                         toSupplementaries("abzzzabzzzab"));
4054 
4055         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4056                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4057                           toSupplementaries("abccczzzabcczzzabccc"),
4058                           "${gn2}",
4059                           toSupplementaries("ccczzzabcczzzabccc"));
4060 
4061 
4062         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4063                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4064                         toSupplementaries("abccczzzabcczzzabccc"),
4065                         "${gn2}",
4066                         toSupplementaries("ccczzzcczzzccc"));
4067 
4068         checkReplaceFirst("(?<dog>Dog)AndCat",
4069                           "zzzDogAndCatzzzDogAndCatzzz",
4070                           "${dog}",
4071                           "zzzDogzzzDogAndCatzzz");
4072 
4073 
4074         checkReplaceAll("(?<dog>Dog)AndCat",
4075                           "zzzDogAndCatzzzDogAndCatzzz",
4076                           "${dog}",
4077                           "zzzDogzzzDogzzz");
4078 
4079         // backref in Matcher & String
4080         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4081             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4082             failCount++;
4083 
4084         // negative
4085         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4086         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4087         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4088         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4089         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4090         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4091                          "gnameX");
4092         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4093         report("NamedGroupCapture");
4094     }
4095 
4096     // This is for bug 6919132
4097     private static void nonBmpClassComplementTest() throws Exception {
4098         Pattern p = Pattern.compile("\\P{Lu}");
4099         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4100 
4101         if (m.find() && m.start() == 1)
4102             failCount++;
4103 
4104         // from a unicode category
4105         p = Pattern.compile("\\P{Lu}");
4106         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4107         if (m.find())
4108             failCount++;
4109         if (!m.hitEnd())
4110             failCount++;
4111 
4112         // block
4113         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4114         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4115         if (m.find() && m.start() == 1)
4116             failCount++;
4117 
4118         p = Pattern.compile("\\P{sc=GRANTHA}");
4119         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4120         if (m.find() && m.start() == 1)
4121             failCount++;
4122 
4123         report("NonBmpClassComplement");
4124     }
4125 
4126     private static void unicodePropertiesTest() throws Exception {
4127         // different forms
4128         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4129             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4130             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4131             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4132             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4133             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4134             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4135             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4136             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4137             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4138             failCount++;
4139 
4140         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4141         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4142         Matcher lastSM  = common;
4143         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4144 
4145         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4146         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4147         Matcher lastBM = latin;
4148         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4149 
4150         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4151             if (cp >= 0x30000 && (cp & 0x70) == 0){
4152                 continue;  // only pick couple code points, they are the same
4153             }
4154 
4155             // Unicode Script
4156             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4157             Matcher m;
4158             String str = new String(Character.toChars(cp));
4159             if (script == lastScript) {
4160                  m = lastSM;
4161                  m.reset(str);
4162             } else {
4163                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4164             }
4165             if (!m.matches()) {
4166                 failCount++;
4167             }
4168             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4169             other.reset(str);
4170             if (other.matches()) {
4171                 failCount++;
4172             }
4173             lastSM = m;
4174             lastScript = script;
4175 
4176             // Unicode Block
4177             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4178             if (block == null) {
4179                 //System.out.printf("Not a Block: cp=%x%n", cp);
4180                 continue;
4181             }
4182             if (block == lastBlock) {
4183                  m = lastBM;
4184                  m.reset(str);
4185             } else {
4186                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4187             }
4188             if (!m.matches()) {
4189                 failCount++;
4190             }
4191             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4192             other.reset(str);
4193             if (other.matches()) {
4194                 failCount++;
4195             }
4196             lastBM = m;
4197             lastBlock = block;
4198         }
4199         report("unicodeProperties");
4200     }
4201 
4202     private static void unicodeHexNotationTest() throws Exception {
4203 
4204         // negative
4205         checkExpectedFail("\\x{-23}");
4206         checkExpectedFail("\\x{110000}");
4207         checkExpectedFail("\\x{}");
4208         checkExpectedFail("\\x{AB[ef]");
4209 
4210         // codepoint
4211         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4212         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4213         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4214         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4215 
4216         // in class
4217         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4218         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4219         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4220         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4221         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4222         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4223 
4224         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4225              String s = "A" + new String(Character.toChars(cp)) + "B";
4226              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4227                                              : String.format("\\u%04x\\u%04x",
4228                                                (int) Character.toChars(cp)[0],
4229                                                (int) Character.toChars(cp)[1]);
4230              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4231              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4232                  failCount++;
4233              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4234                  failCount++;
4235              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4236                  failCount++;
4237              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4238                  failCount++;
4239          }
4240          report("unicodeHexNotation");
4241     }
4242 
4243     private static void unicodeClassesTest() throws Exception {
4244 
4245         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4246         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4247         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4248         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4249         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4250         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4251         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4252         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4253         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4254         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4255         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4256         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4257         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4258         Matcher bound  = Pattern.compile("\\b").matcher("");
4259         Matcher word   = Pattern.compile("\\w++").matcher("");
4260         // UNICODE_CHARACTER_CLASS
4261         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4262         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4263         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4264         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4265         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4266         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4267         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4268         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4269         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4270         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4271         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4272         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4273         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4274         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4275         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4276         // embedded flag (?U)
4277         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4278         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4279         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4280 
4281         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4282         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4283         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4284         // properties
4285         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4286         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4287         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4288         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4289         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4290         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4291         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4292         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4293         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4294         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4295         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4296 
4297         // javaMethod
4298         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4299         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4300         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4301         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4302 
4303         for (int cp = 1; cp < 0x30000; cp++) {
4304             String str = new String(Character.toChars(cp));
4305             int type = Character.getType(cp);
4306             if (// lower
4307                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4308                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4309                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4310                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4311                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4312                 // upper
4313                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4314                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4315                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4316                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4317                 // alpha
4318                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4319                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4320                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4321                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4322                 // digit
4323                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4324                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4325                 // alnum
4326                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4327                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4328                 // punct
4329                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4330                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4331                 // graph
4332                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4333                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4334                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4335                 // blank
4336                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4337                                           != blank.reset(str).matches()  ||
4338                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4339                 // print
4340                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4341                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4342                 // cntrl
4343                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4344                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4345                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4346                 // hexdigit
4347                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4348                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4349                 // space
4350                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4351                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4352                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4353                 // word
4354                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4355                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4356                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4357                 // bwordb
4358                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4359                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4360                 // properties
4361                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4362                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4363                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4364                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4365                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4366                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4367                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
4368                 failCount++;
4369         }
4370 
4371         // bounds/word align
4372         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4373         if (!bwbU.reset("\u0180sherman\u0400").matches())
4374             failCount++;
4375         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4376         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4377             failCount++;
4378         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4379         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4380             failCount++;
4381         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4382             failCount++;
4383         report("unicodePredefinedClasses");
4384     }
4385 
4386     private static void unicodeCharacterNameTest() throws Exception {
4387 
4388         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4389             if (!Character.isValidCodePoint(cp) ||
4390                 Character.getType(cp) == Character.UNASSIGNED)
4391                 continue;
4392             String str = new String(Character.toChars(cp));
4393             // single
4394             String p = "\\N{" + Character.getName(cp) + "}";
4395             if (!Pattern.compile(p).matcher(str).matches()) {
4396                 failCount++;
4397             }
4398             // class[c]
4399             p = "[\\N{" + Character.getName(cp) + "}]";
4400             if (!Pattern.compile(p).matcher(str).matches()) {
4401                 failCount++;
4402             }
4403         }
4404 
4405         // range
4406         for (int i = 0; i < 10; i++) {
4407             int start = generator.nextInt(20);
4408             int end = start + generator.nextInt(200);
4409             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4410             String str;
4411             for (int cp = start; cp < end; cp++) {
4412                 str = new String(Character.toChars(cp));
4413                 if (!Pattern.compile(p).matcher(str).matches()) {
4414                     failCount++;
4415                 }
4416             }
4417             str = new String(Character.toChars(end + 10));
4418             if (Pattern.compile(p).matcher(str).matches()) {
4419                 failCount++;
4420             }
4421         }
4422 
4423         // slice
4424         for (int i = 0; i < 10; i++) {
4425             int n = generator.nextInt(256);
4426             int[] buf = new int[n];
4427             StringBuffer sb = new StringBuffer(1024);
4428             for (int j = 0; j < n; j++) {
4429                 int cp = generator.nextInt(1000);
4430                 if (!Character.isValidCodePoint(cp) ||
4431                     Character.getType(cp) == Character.UNASSIGNED)
4432                     cp = 0x4e00;    // just use 4e00
4433                 sb.append("\\N{" + Character.getName(cp) + "}");
4434                 buf[j] = cp;
4435             }
4436             String p = sb.toString();
4437             String str = new String(buf, 0, buf.length);
4438             if (!Pattern.compile(p).matcher(str).matches()) {
4439                 failCount++;
4440             }
4441         }
4442         report("unicodeCharacterName");
4443     }
4444 
4445     private static void horizontalAndVerticalWSTest() throws Exception {
4446         String hws = new String (new char[] {
4447                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4448                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4449                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4450                                      0x202f, 0x205f, 0x3000 });
4451         String vws = new String (new char[] {
4452                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4453         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4454             !Pattern.compile("[\\h]+").matcher(hws).matches())
4455             failCount++;
4456         if (Pattern.compile("\\H").matcher(hws).find() ||
4457             Pattern.compile("[\\H]").matcher(hws).find())
4458             failCount++;
4459         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4460             !Pattern.compile("[\\v]+").matcher(vws).matches())
4461             failCount++;
4462         if (Pattern.compile("\\V").matcher(vws).find() ||
4463             Pattern.compile("[\\V]").matcher(vws).find())
4464             failCount++;
4465         String prefix = "abcd";
4466         String suffix = "efgh";
4467         String ng = "A";
4468         for (int i = 0; i < hws.length(); i++) {
4469             String c = String.valueOf(hws.charAt(i));
4470             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4471             if (!m.find() || !c.equals(m.group()))
4472                 failCount++;
4473             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4474             if (!m.find() || !c.equals(m.group()))
4475                 failCount++;
4476 
4477             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4478             if (!m.find() || !ng.equals(m.group()))
4479                 failCount++;
4480             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4481             if (!m.find() || !ng.equals(m.group()))
4482                 failCount++;
4483         }
4484         for (int i = 0; i < vws.length(); i++) {
4485             String c = String.valueOf(vws.charAt(i));
4486             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4487             if (!m.find() || !c.equals(m.group()))
4488                 failCount++;
4489             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4490             if (!m.find() || !c.equals(m.group()))
4491                 failCount++;
4492 
4493             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4494             if (!m.find() || !ng.equals(m.group()))
4495                 failCount++;
4496             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4497             if (!m.find() || !ng.equals(m.group()))
4498                 failCount++;
4499         }
4500         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4501         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4502             failCount++;
4503         report("horizontalAndVerticalWSTest");
4504     }
4505 
4506     private static void linebreakTest() throws Exception {
4507         String linebreaks = new String (new char[] {
4508             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4509         String crnl = "\r\n";
4510         if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4511             !Pattern.compile("\\R").matcher(crnl).matches() ||
4512             Pattern.compile("\\R\\R").matcher(crnl).matches())
4513             failCount++;
4514         report("linebreakTest");
4515     }
4516 
4517     // #7189363
4518     private static void branchTest() throws Exception {
4519         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4520             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4521             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4522             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4523             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4524             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4525             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4526             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4527             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4528             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4529             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4530             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4531             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4532             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4533             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4534             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4535             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4536             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4537             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4538             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4539             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4540             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4541             failCount++;
4542         report("branchTest");
4543     }
4544 
4545     // This test is for 8007395
4546     private static void groupCurlyNotFoundSuppTest() throws Exception {
4547         String input = "test this as \ud83d\ude0d";
4548         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4549                                           "test(.)*(@[a-zA-Z.]+)",
4550                                           "test([^B])+(@[a-zA-Z.]+)",
4551                                           "test([^B])*(@[a-zA-Z.]+)",
4552                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4553                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4554                                         }) {
4555             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4556                                .matcher(input);
4557             try {
4558                 if (m.find()) {
4559                     failCount++;
4560                 }
4561             } catch (Exception x) {
4562                 failCount++;
4563             }
4564         }
4565         report("GroupCurly NotFoundSupp");
4566     }
4567 
4568     // This test is for 8023647
4569     private static void groupCurlyBackoffTest() throws Exception {
4570         if (!"abc1c".matches("(\\w)+1\\1") ||
4571             "abc11".matches("(\\w)+1\\1")) {
4572             failCount++;
4573         }
4574         report("GroupCurly backoff");
4575     }
4576 
4577     // This test is for 8012646
4578     private static void patternAsPredicate() throws Exception {
4579         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4580 
4581         if (p.test("")) {
4582             failCount++;
4583         }
4584         if (!p.test("word")) {
4585             failCount++;
4586         }
4587         if (p.test("1234")) {
4588             failCount++;
4589         }
4590         report("Pattern.asPredicate");
4591     }
4592 
4593     // This test is for 8035975
4594     private static void invalidFlags() throws Exception {
4595         for (int flag = 1; flag != 0; flag <<= 1) {
4596             switch (flag) {
4597             case Pattern.CASE_INSENSITIVE:
4598             case Pattern.MULTILINE:
4599             case Pattern.DOTALL:
4600             case Pattern.UNICODE_CASE:
4601             case Pattern.CANON_EQ:
4602             case Pattern.UNIX_LINES:
4603             case Pattern.LITERAL:
4604             case Pattern.UNICODE_CHARACTER_CLASS:
4605             case Pattern.COMMENTS:
4606                 // valid flag, continue
4607                 break;
4608             default:
4609                 try {
4610                     Pattern.compile(".", flag);
4611                     failCount++;
4612                 } catch (IllegalArgumentException expected) {
4613                 }
4614             }
4615         }
4616         report("Invalid compile flags");
4617     }
4618 
4619     private static void grapheme() throws Exception {
4620         Files.lines(Paths.get(System.getProperty("test.src", "."),
4621                               "GraphemeBreakTest.txt"))
4622             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4623             .forEach( ln -> {
4624                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4625                     // System.out.println(str);
4626                     String[] strs = ln.split("\u00f7|\u00d7");
4627                     StringBuilder src = new StringBuilder();
4628                     ArrayList<String> graphemes = new ArrayList<>();
4629                     StringBuilder buf = new StringBuilder();
4630                     int offBk = 0;
4631                     for (String str : strs) {
4632                         if (str.length() == 0)  // first empty str
4633                             continue;
4634                         int cp = Integer.parseInt(str, 16);
4635                         src.appendCodePoint(cp);
4636                         buf.appendCodePoint(cp);
4637                         offBk += (str.length() + 1);
4638                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4639                             graphemes.add(buf.toString());
4640                             buf = new StringBuilder();
4641                         }
4642                     }
4643                     Pattern p = Pattern.compile("\\X");
4644                     Matcher m = p.matcher(src.toString());
4645                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4646                     for (String g : graphemes) {
4647                         // System.out.printf("     grapheme:=[%s]%n", g);
4648                         // (1) test \\X directly
4649                         if (!m.find() || !m.group().equals(g)) {
4650                             System.out.println("Failed \\X [" + ln + "] : " + g);
4651                             failCount++;
4652                         }
4653                         // (2) test \\b{g} + \\X  via Scanner
4654                         boolean hasNext = s.hasNext(p);
4655                         // if (!s.hasNext() || !s.next().equals(next)) {
4656                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4657                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4658                             failCount++;
4659                         }
4660                     }
4661                 });
4662         // some sanity checks
4663         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4664             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4665             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4666             failCount++;
4667         // make sure "\b{n}" still works
4668         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4669             failCount++;
4670         report("Unicode extended grapheme cluster");
4671     }
4672 }