1 /*
   2  * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.
  56      *
  57      */
  58     public static void main(String[] args) throws Exception {
  59         // Most of the tests are in a file
  60         processFile("TestCases.txt");
  61         //processFile("PerlCases.txt");
  62         processFile("BMPTestCases.txt");
  63         processFile("SupplementaryTestCases.txt");
  64 
  65         // These test many randomly generated char patterns
  66         bm();
  67         slice();
  68 
  69         // These are hard to put into the file
  70         escapes();
  71         blankInput();
  72 
  73         // Substitition tests on randomly generated sequences
  74         globalSubstitute();
  75         stringbufferSubstitute();
  76         substitutionBasher();
  77 
  78         // Canonical Equivalence
  79         ceTest();
  80 
  81         // Anchors
  82         anchorTest();
  83 
  84         // boolean match calls
  85         matchesTest();
  86         lookingAtTest();
  87 
  88         // Pattern API
  89         patternMatchesTest();
  90 
  91         // Misc
  92         lookbehindTest();
  93         nullArgumentTest();
  94         backRefTest();
  95         groupCaptureTest();
  96         caretTest();
  97         charClassTest();
  98         emptyPatternTest();
  99         findIntTest();
 100         group0Test();
 101         longPatternTest();
 102         octalTest();
 103         ampersandTest();
 104         negationTest();
 105         splitTest();
 106         appendTest();
 107         caseFoldingTest();
 108         commentsTest();
 109         unixLinesTest();
 110         replaceFirstTest();
 111         gTest();
 112         zTest();
 113         serializeTest();
 114         reluctantRepetitionTest();
 115         multilineDollarTest();
 116         dollarAtEndTest();
 117         caretBetweenTerminatorsTest();
 118         // This RFE rejected in Tiger numOccurrencesTest();
 119         javaCharClassTest();
 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138         unicodePropertiesTest();
 139         unicodeHexNotationTest();
 140         if (failure)
 141             throw new RuntimeException("Failure in the RE handling.");
 142         else
 143             System.err.println("OKAY: All tests passed.");
 144     }
 145 
 146     // Utility functions
 147 
 148     private static String getRandomAlphaString(int length) {
 149         StringBuffer buf = new StringBuffer(length);
 150         for (int i=0; i<length; i++) {
 151             char randChar = (char)(97 + generator.nextInt(26));
 152             buf.append(randChar);
 153         }
 154         return buf.toString();
 155     }
 156 
 157     private static void check(Matcher m, String expected) {
 158         m.find();
 159         if (!m.group().equals(expected))
 160             failCount++;
 161     }
 162 
 163     private static void check(Matcher m, String result, boolean expected) {
 164         m.find();
 165         if (m.group().equals(result) != expected)
 166             failCount++;
 167     }
 168 
 169     private static void check(Pattern p, String s, boolean expected) {
 170         if (p.matcher(s).find() != expected)
 171             failCount++;
 172     }
 173 
 174     private static void check(String p, String s, boolean expected) {
 175         Matcher matcher = Pattern.compile(p).matcher(s);
 176         if (matcher.find() != expected)
 177             failCount++;
 178     }
 179 
 180     private static void check(String p, char c, boolean expected) {
 181         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 182         Pattern pattern = Pattern.compile(propertyPattern);
 183         char[] ca = new char[1]; ca[0] = c;
 184         Matcher matcher = pattern.matcher(new String(ca));
 185         if (!matcher.find())
 186             failCount++;
 187     }
 188 
 189     private static void check(String p, int codePoint, boolean expected) {
 190         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 191         Pattern pattern = Pattern.compile(propertyPattern);
 192         char[] ca = Character.toChars(codePoint);
 193         Matcher matcher = pattern.matcher(new String(ca));
 194         if (!matcher.find())
 195             failCount++;
 196     }
 197 
 198     private static void check(String p, int flag, String input, String s,
 199                               boolean expected)
 200     {
 201         Pattern pattern = Pattern.compile(p, flag);
 202         Matcher matcher = pattern.matcher(input);
 203         if (expected)
 204             check(matcher, s, expected);
 205         else
 206             check(pattern, input, false);
 207     }
 208 
 209     private static void report(String testName) {
 210         int spacesToAdd = 30 - testName.length();
 211         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 212         for (int i=0; i<spacesToAdd; i++)
 213             paddedNameBuffer.append(" ");
 214         String paddedName = paddedNameBuffer.toString();
 215         System.err.println(paddedName + ": " +
 216                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 217         if (failCount > 0)
 218             failure = true;
 219         failCount = 0;
 220     }
 221 
 222     /**
 223      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 224      * supplementary characters. This method does NOT fully take care
 225      * of the regex syntax.
 226      */
 227     private static String toSupplementaries(String s) {
 228         int length = s.length();
 229         StringBuffer sb = new StringBuffer(length * 2);
 230 
 231         for (int i = 0; i < length; ) {
 232             char c = s.charAt(i++);
 233             if (c == '\\') {
 234                 sb.append(c);
 235                 if (i < length) {
 236                     c = s.charAt(i++);
 237                     sb.append(c);
 238                     if (c == 'u') {
 239                         // assume no syntax error
 240                         sb.append(s.charAt(i++));
 241                         sb.append(s.charAt(i++));
 242                         sb.append(s.charAt(i++));
 243                         sb.append(s.charAt(i++));
 244                     }
 245                 }
 246             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 247                 sb.append('\ud800').append((char)('\udc00'+c));
 248             } else {
 249                 sb.append(c);
 250             }
 251         }
 252         return sb.toString();
 253     }
 254 
 255     // Regular expression tests
 256 
 257     // This is for bug 6178785
 258     // Test if an expected NPE gets thrown when passing in a null argument
 259     private static boolean check(Runnable test) {
 260         try {
 261             test.run();
 262             failCount++;
 263             return false;
 264         } catch (NullPointerException npe) {
 265             return true;
 266         }
 267     }
 268 
 269     private static void nullArgumentTest() {
 270         check(new Runnable() { public void run() { Pattern.compile(null); }});
 271         check(new Runnable() { public void run() { Pattern.matches(null, null); }});
 272         check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
 273         check(new Runnable() { public void run() { Pattern.quote(null);}});
 274         check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
 275         check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
 276 
 277         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 278         m.matches();
 279         check(new Runnable() { public void run() { m.appendTail(null);}});
 280         check(new Runnable() { public void run() { m.replaceAll(null);}});
 281         check(new Runnable() { public void run() { m.replaceFirst(null);}});
 282         check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
 283         check(new Runnable() { public void run() { m.reset(null);}});
 284         check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
 285         //check(new Runnable() { public void run() { m.usePattern(null);}});
 286 
 287         report("Null Argument");
 288     }
 289 
 290     // This is for bug6635133
 291     // Test if surrogate pair in Unicode escapes can be handled correctly.
 292     private static void surrogatesInClassTest() throws Exception {
 293         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 294         Matcher matcher = pattern.matcher("\ud834\udd22");
 295         if (!matcher.find())
 296             failCount++;
 297     }
 298 
 299     // This is for bug 4988891
 300     // Test toMatchResult to see that it is a copy of the Matcher
 301     // that is not affected by subsequent operations on the original
 302     private static void toMatchResultTest() throws Exception {
 303         Pattern pattern = Pattern.compile("squid");
 304         Matcher matcher = pattern.matcher(
 305             "agiantsquidofdestinyasmallsquidoffate");
 306         matcher.find();
 307         int matcherStart1 = matcher.start();
 308         MatchResult mr = matcher.toMatchResult();
 309         if (mr == matcher)
 310             failCount++;
 311         int resultStart1 = mr.start();
 312         if (matcherStart1 != resultStart1)
 313             failCount++;
 314         matcher.find();
 315         int matcherStart2 = matcher.start();
 316         int resultStart2 = mr.start();
 317         if (matcherStart2 == resultStart2)
 318             failCount++;
 319         if (resultStart1 != resultStart2)
 320             failCount++;
 321         MatchResult mr2 = matcher.toMatchResult();
 322         if (mr == mr2)
 323             failCount++;
 324         if (mr2.start() != matcherStart2)
 325             failCount++;
 326         report("toMatchResult is a copy");
 327     }
 328 
 329     // This is for bug 5013885
 330     // Must test a slice to see if it reports hitEnd correctly
 331     private static void hitEndTest() throws Exception {
 332         // Basic test of Slice node
 333         Pattern p = Pattern.compile("^squidattack");
 334         Matcher m = p.matcher("squack");
 335         m.find();
 336         if (m.hitEnd())
 337             failCount++;
 338         m.reset("squid");
 339         m.find();
 340         if (!m.hitEnd())
 341             failCount++;
 342 
 343         // Test Slice, SliceA and SliceU nodes
 344         for (int i=0; i<3; i++) {
 345             int flags = 0;
 346             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 347             if (i==2) flags = Pattern.UNICODE_CASE;
 348             p = Pattern.compile("^abc", flags);
 349             m = p.matcher("ad");
 350             m.find();
 351             if (m.hitEnd())
 352                 failCount++;
 353             m.reset("ab");
 354             m.find();
 355             if (!m.hitEnd())
 356                 failCount++;
 357         }
 358 
 359         // Test Boyer-Moore node
 360         p = Pattern.compile("catattack");
 361         m = p.matcher("attack");
 362         m.find();
 363         if (!m.hitEnd())
 364             failCount++;
 365 
 366         p = Pattern.compile("catattack");
 367         m = p.matcher("attackattackattackcatatta");
 368         m.find();
 369         if (!m.hitEnd())
 370             failCount++;
 371         report("hitEnd from a Slice");
 372     }
 373 
 374     // This is for bug 4997476
 375     // It is weird code submitted by customer demonstrating a regression
 376     private static void wordSearchTest() throws Exception {
 377         String testString = new String("word1 word2 word3");
 378         Pattern p = Pattern.compile("\\b");
 379         Matcher m = p.matcher(testString);
 380         int position = 0;
 381         int start = 0;
 382         while (m.find(position)) {
 383             start = m.start();
 384             if (start == testString.length())
 385                 break;
 386             if (m.find(start+1)) {
 387                 position = m.start();
 388             } else {
 389                 position = testString.length();
 390             }
 391             if (testString.substring(start, position).equals(" "))
 392                 continue;
 393             if (!testString.substring(start, position-1).startsWith("word"))
 394                 failCount++;
 395         }
 396         report("Customer word search");
 397     }
 398 
 399     // This is for bug 4994840
 400     private static void caretAtEndTest() throws Exception {
 401         // Problem only occurs with multiline patterns
 402         // containing a beginning-of-line caret "^" followed
 403         // by an expression that also matches the empty string.
 404         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 405         Matcher matcher = pattern.matcher("\r");
 406         matcher.find();
 407         matcher.find();
 408         report("Caret at end");
 409     }
 410 
 411     // This test is for 4979006
 412     // Check to see if word boundary construct properly handles unicode
 413     // non spacing marks
 414     private static void unicodeWordBoundsTest() throws Exception {
 415         String spaces = "  ";
 416         String wordChar = "a";
 417         String nsm = "\u030a";
 418 
 419         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 420 
 421         Pattern pattern = Pattern.compile("\\b");
 422         Matcher matcher = pattern.matcher("");
 423         // S=other B=word character N=non spacing mark .=word boundary
 424         // SS.BB.SS
 425         String input = spaces + wordChar + wordChar + spaces;
 426         twoFindIndexes(input, matcher, 2, 4);
 427         // SS.BBN.SS
 428         input = spaces + wordChar +wordChar + nsm + spaces;
 429         twoFindIndexes(input, matcher, 2, 5);
 430         // SS.BN.SS
 431         input = spaces + wordChar + nsm + spaces;
 432         twoFindIndexes(input, matcher, 2, 4);
 433         // SS.BNN.SS
 434         input = spaces + wordChar + nsm + nsm + spaces;
 435         twoFindIndexes(input, matcher, 2, 5);
 436         // SSN.BB.SS
 437         input = spaces + nsm + wordChar + wordChar + spaces;
 438         twoFindIndexes(input, matcher, 3, 5);
 439         // SS.BNB.SS
 440         input = spaces + wordChar + nsm + wordChar + spaces;
 441         twoFindIndexes(input, matcher, 2, 5);
 442         // SSNNSS
 443         input = spaces + nsm + nsm + spaces;
 444         matcher.reset(input);
 445         if (matcher.find())
 446             failCount++;
 447         // SSN.BBN.SS
 448         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 449         twoFindIndexes(input, matcher, 3, 6);
 450 
 451         report("Unicode word boundary");
 452     }
 453 
 454     private static void twoFindIndexes(String input, Matcher matcher, int a,
 455                                        int b) throws Exception
 456     {
 457         matcher.reset(input);
 458         matcher.find();
 459         if (matcher.start() != a)
 460             failCount++;
 461         matcher.find();
 462         if (matcher.start() != b)
 463             failCount++;
 464     }
 465 
 466     // This test is for 6284152
 467     static void check(String regex, String input, String[] expected) {
 468         List<String> result = new ArrayList<String>();
 469         Pattern p = Pattern.compile(regex);
 470         Matcher m = p.matcher(input);
 471         while (m.find()) {
 472             result.add(m.group());
 473         }
 474         if (!Arrays.asList(expected).equals(result))
 475             failCount++;
 476     }
 477 
 478     private static void lookbehindTest() throws Exception {
 479         //Positive
 480         check("(?<=%.{0,5})foo\\d",
 481               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 482               new String[]{"foo1", "foo2", "foo3"});
 483 
 484         //boundary at end of the lookbehind sub-regex should work consistently
 485         //with the boundary just after the lookbehind sub-regex
 486         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 487         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 488         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 489         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 490 
 491         //Negative
 492         check("(?<!%.{0,5})foo\\d",
 493               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 494               new String[] {"foo4", "foo5"});
 495 
 496         //Positive greedy
 497         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 498 
 499         //Positive reluctant
 500         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 501 
 502         //supplementary
 503         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 504               new String[] {"fo\ud800\udc00o"});
 505         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 506               new String[] {"fo\ud800\udc00o"});
 507         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 508               new String[] {"fo\ud800\udc00o"});
 509         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 510               new String[] {"fo\ud800\udc00o"});
 511         report("Lookbehind");
 512     }
 513 
 514     // This test is for 4938995
 515     // Check to see if weak region boundaries are transparent to
 516     // lookahead and lookbehind constructs
 517     private static void boundsTest() throws Exception {
 518         String fullMessage = "catdogcat";
 519         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 520         Matcher matcher = pattern.matcher("catdogca");
 521         matcher.useTransparentBounds(true);
 522         if (matcher.find())
 523             failCount++;
 524         matcher.reset("atdogcat");
 525         if (matcher.find())
 526             failCount++;
 527         matcher.reset(fullMessage);
 528         if (!matcher.find())
 529             failCount++;
 530         matcher.reset(fullMessage);
 531         matcher.region(0,9);
 532         if (!matcher.find())
 533             failCount++;
 534         matcher.reset(fullMessage);
 535         matcher.region(0,6);
 536         if (!matcher.find())
 537             failCount++;
 538         matcher.reset(fullMessage);
 539         matcher.region(3,6);
 540         if (!matcher.find())
 541             failCount++;
 542         matcher.useTransparentBounds(false);
 543         if (matcher.find())
 544             failCount++;
 545 
 546         // Negative lookahead/lookbehind
 547         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 548         matcher = pattern.matcher("dogcat");
 549         matcher.useTransparentBounds(true);
 550         matcher.region(0,3);
 551         if (matcher.find())
 552             failCount++;
 553         matcher.reset("catdog");
 554         matcher.region(3,6);
 555         if (matcher.find())
 556             failCount++;
 557         matcher.useTransparentBounds(false);
 558         matcher.reset("dogcat");
 559         matcher.region(0,3);
 560         if (!matcher.find())
 561             failCount++;
 562         matcher.reset("catdog");
 563         matcher.region(3,6);
 564         if (!matcher.find())
 565             failCount++;
 566 
 567         report("Region bounds transparency");
 568     }
 569 
 570     // This test is for 4945394
 571     private static void findFromTest() throws Exception {
 572         String message = "This is 40 $0 message.";
 573         Pattern pat = Pattern.compile("\\$0");
 574         Matcher match = pat.matcher(message);
 575         if (!match.find())
 576             failCount++;
 577         if (match.find())
 578             failCount++;
 579         if (match.find())
 580             failCount++;
 581         report("Check for alternating find");
 582     }
 583 
 584     // This test is for 4872664 and 4892980
 585     private static void negatedCharClassTest() throws Exception {
 586         Pattern pattern = Pattern.compile("[^>]");
 587         Matcher matcher = pattern.matcher("\u203A");
 588         if (!matcher.matches())
 589             failCount++;
 590         pattern = Pattern.compile("[^fr]");
 591         matcher = pattern.matcher("a");
 592         if (!matcher.find())
 593             failCount++;
 594         matcher.reset("\u203A");
 595         if (!matcher.find())
 596             failCount++;
 597         String s = "for";
 598         String result[] = s.split("[^fr]");
 599         if (!result[0].equals("f"))
 600             failCount++;
 601         if (!result[1].equals("r"))
 602             failCount++;
 603         s = "f\u203Ar";
 604         result = s.split("[^fr]");
 605         if (!result[0].equals("f"))
 606             failCount++;
 607         if (!result[1].equals("r"))
 608             failCount++;
 609 
 610         // Test adding to bits, subtracting a node, then adding to bits again
 611         pattern = Pattern.compile("[^f\u203Ar]");
 612         matcher = pattern.matcher("a");
 613         if (!matcher.find())
 614             failCount++;
 615         matcher.reset("f");
 616         if (matcher.find())
 617             failCount++;
 618         matcher.reset("\u203A");
 619         if (matcher.find())
 620             failCount++;
 621         matcher.reset("r");
 622         if (matcher.find())
 623             failCount++;
 624         matcher.reset("\u203B");
 625         if (!matcher.find())
 626             failCount++;
 627 
 628         // Test subtracting a node, adding to bits, subtracting again
 629         pattern = Pattern.compile("[^\u203Ar\u203B]");
 630         matcher = pattern.matcher("a");
 631         if (!matcher.find())
 632             failCount++;
 633         matcher.reset("\u203A");
 634         if (matcher.find())
 635             failCount++;
 636         matcher.reset("r");
 637         if (matcher.find())
 638             failCount++;
 639         matcher.reset("\u203B");
 640         if (matcher.find())
 641             failCount++;
 642         matcher.reset("\u203C");
 643         if (!matcher.find())
 644             failCount++;
 645 
 646         report("Negated Character Class");
 647     }
 648 
 649     // This test is for 4628291
 650     private static void toStringTest() throws Exception {
 651         Pattern pattern = Pattern.compile("b+");
 652         if (pattern.toString() != "b+")
 653             failCount++;
 654         Matcher matcher = pattern.matcher("aaabbbccc");
 655         String matcherString = matcher.toString(); // unspecified
 656         matcher.find();
 657         matcherString = matcher.toString(); // unspecified
 658         matcher.region(0,3);
 659         matcherString = matcher.toString(); // unspecified
 660         matcher.reset();
 661         matcherString = matcher.toString(); // unspecified
 662         report("toString");
 663     }
 664 
 665     // This test is for 4808962
 666     private static void literalPatternTest() throws Exception {
 667         int flags = Pattern.LITERAL;
 668 
 669         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 670         check(pattern, "abc\\t$^", true);
 671 
 672         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 673         check(pattern, "abc\\t$^", true);
 674 
 675         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 676         check(pattern, "\\Qa^$bcabc\\E", true);
 677         check(pattern, "a^$bcabc", false);
 678 
 679         pattern = Pattern.compile("\\\\Q\\\\E");
 680         check(pattern, "\\Q\\E", true);
 681 
 682         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 683         check(pattern, "abcefg\\Q\\Ehij", true);
 684 
 685         pattern = Pattern.compile("\\\\\\Q\\\\E");
 686         check(pattern, "\\\\\\\\", true);
 687 
 688         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 689         check(pattern, "\\Qa^$bcabc\\E", true);
 690         check(pattern, "a^$bcabc", false);
 691 
 692         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 693         check(pattern, "\\Qabc\\Edef", true);
 694         check(pattern, "abcdef", false);
 695 
 696         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 697         check(pattern, "abc\\Edef", true);
 698         check(pattern, "abcdef", false);
 699 
 700         pattern = Pattern.compile(Pattern.quote("\\E"));
 701         check(pattern, "\\E", true);
 702 
 703         pattern = Pattern.compile("((((abc.+?:)", flags);
 704         check(pattern, "((((abc.+?:)", true);
 705 
 706         flags |= Pattern.MULTILINE;
 707 
 708         pattern = Pattern.compile("^cat$", flags);
 709         check(pattern, "abc^cat$def", true);
 710         check(pattern, "cat", false);
 711 
 712         flags |= Pattern.CASE_INSENSITIVE;
 713 
 714         pattern = Pattern.compile("abcdef", flags);
 715         check(pattern, "ABCDEF", true);
 716         check(pattern, "AbCdEf", true);
 717 
 718         flags |= Pattern.DOTALL;
 719 
 720         pattern = Pattern.compile("a...b", flags);
 721         check(pattern, "A...b", true);
 722         check(pattern, "Axxxb", false);
 723 
 724         flags |= Pattern.CANON_EQ;
 725 
 726         Pattern p = Pattern.compile("testa\u030a", flags);
 727         check(pattern, "testa\u030a", false);
 728         check(pattern, "test\u00e5", false);
 729 
 730         // Supplementary character test
 731         flags = Pattern.LITERAL;
 732 
 733         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 734         check(pattern, toSupplementaries("abc\\t$^"), true);
 735 
 736         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 737         check(pattern, toSupplementaries("abc\\t$^"), true);
 738 
 739         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 740         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 741         check(pattern, toSupplementaries("a^$bcabc"), false);
 742 
 743         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 744         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 745         check(pattern, toSupplementaries("a^$bcabc"), false);
 746 
 747         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 748         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 749         check(pattern, toSupplementaries("abcdef"), false);
 750 
 751         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 752         check(pattern, toSupplementaries("abc\\Edef"), true);
 753         check(pattern, toSupplementaries("abcdef"), false);
 754 
 755         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 756         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 757 
 758         flags |= Pattern.MULTILINE;
 759 
 760         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 761         check(pattern, toSupplementaries("abc^cat$def"), true);
 762         check(pattern, toSupplementaries("cat"), false);
 763 
 764         flags |= Pattern.DOTALL;
 765 
 766         // note: this is case-sensitive.
 767         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 768         check(pattern, toSupplementaries("a...b"), true);
 769         check(pattern, toSupplementaries("axxxb"), false);
 770 
 771         flags |= Pattern.CANON_EQ;
 772 
 773         String t = toSupplementaries("test");
 774         p = Pattern.compile(t + "a\u030a", flags);
 775         check(pattern, t + "a\u030a", false);
 776         check(pattern, t + "\u00e5", false);
 777 
 778         report("Literal pattern");
 779     }
 780 
 781     // This test is for 4803179
 782     // This test is also for 4808962, replacement parts
 783     private static void literalReplacementTest() throws Exception {
 784         int flags = Pattern.LITERAL;
 785 
 786         Pattern pattern = Pattern.compile("abc", flags);
 787         Matcher matcher = pattern.matcher("zzzabczzz");
 788         String replaceTest = "$0";
 789         String result = matcher.replaceAll(replaceTest);
 790         if (!result.equals("zzzabczzz"))
 791             failCount++;
 792 
 793         matcher.reset();
 794         String literalReplacement = matcher.quoteReplacement(replaceTest);
 795         result = matcher.replaceAll(literalReplacement);
 796         if (!result.equals("zzz$0zzz"))
 797             failCount++;
 798 
 799         matcher.reset();
 800         replaceTest = "\\t$\\$";
 801         literalReplacement = matcher.quoteReplacement(replaceTest);
 802         result = matcher.replaceAll(literalReplacement);
 803         if (!result.equals("zzz\\t$\\$zzz"))
 804             failCount++;
 805 
 806         // Supplementary character test
 807         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 808         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 809         replaceTest = "$0";
 810         result = matcher.replaceAll(replaceTest);
 811         if (!result.equals(toSupplementaries("zzzabczzz")))
 812             failCount++;
 813 
 814         matcher.reset();
 815         literalReplacement = matcher.quoteReplacement(replaceTest);
 816         result = matcher.replaceAll(literalReplacement);
 817         if (!result.equals(toSupplementaries("zzz$0zzz")))
 818             failCount++;
 819 
 820         matcher.reset();
 821         replaceTest = "\\t$\\$";
 822         literalReplacement = matcher.quoteReplacement(replaceTest);
 823         result = matcher.replaceAll(literalReplacement);
 824         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 825             failCount++;
 826 
 827         report("Literal replacement");
 828     }
 829 
 830     // This test is for 4757029
 831     private static void regionTest() throws Exception {
 832         Pattern pattern = Pattern.compile("abc");
 833         Matcher matcher = pattern.matcher("abcdefabc");
 834 
 835         matcher.region(0,9);
 836         if (!matcher.find())
 837             failCount++;
 838         if (!matcher.find())
 839             failCount++;
 840         matcher.region(0,3);
 841         if (!matcher.find())
 842            failCount++;
 843         matcher.region(3,6);
 844         if (matcher.find())
 845            failCount++;
 846         matcher.region(0,2);
 847         if (matcher.find())
 848            failCount++;
 849 
 850         expectRegionFail(matcher, 1, -1);
 851         expectRegionFail(matcher, -1, -1);
 852         expectRegionFail(matcher, -1, 1);
 853         expectRegionFail(matcher, 5, 3);
 854         expectRegionFail(matcher, 5, 12);
 855         expectRegionFail(matcher, 12, 12);
 856 
 857         pattern = Pattern.compile("^abc$");
 858         matcher = pattern.matcher("zzzabczzz");
 859         matcher.region(0,9);
 860         if (matcher.find())
 861             failCount++;
 862         matcher.region(3,6);
 863         if (!matcher.find())
 864            failCount++;
 865         matcher.region(3,6);
 866         matcher.useAnchoringBounds(false);
 867         if (matcher.find())
 868            failCount++;
 869 
 870         // Supplementary character test
 871         pattern = Pattern.compile(toSupplementaries("abc"));
 872         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 873         matcher.region(0,9*2);
 874         if (!matcher.find())
 875             failCount++;
 876         if (!matcher.find())
 877             failCount++;
 878         matcher.region(0,3*2);
 879         if (!matcher.find())
 880            failCount++;
 881         matcher.region(1,3*2);
 882         if (matcher.find())
 883            failCount++;
 884         matcher.region(3*2,6*2);
 885         if (matcher.find())
 886            failCount++;
 887         matcher.region(0,2*2);
 888         if (matcher.find())
 889            failCount++;
 890         matcher.region(0,2*2+1);
 891         if (matcher.find())
 892            failCount++;
 893 
 894         expectRegionFail(matcher, 1*2, -1);
 895         expectRegionFail(matcher, -1, -1);
 896         expectRegionFail(matcher, -1, 1*2);
 897         expectRegionFail(matcher, 5*2, 3*2);
 898         expectRegionFail(matcher, 5*2, 12*2);
 899         expectRegionFail(matcher, 12*2, 12*2);
 900 
 901         pattern = Pattern.compile(toSupplementaries("^abc$"));
 902         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 903         matcher.region(0,9*2);
 904         if (matcher.find())
 905             failCount++;
 906         matcher.region(3*2,6*2);
 907         if (!matcher.find())
 908            failCount++;
 909         matcher.region(3*2+1,6*2);
 910         if (matcher.find())
 911            failCount++;
 912         matcher.region(3*2,6*2-1);
 913         if (matcher.find())
 914            failCount++;
 915         matcher.region(3*2,6*2);
 916         matcher.useAnchoringBounds(false);
 917         if (matcher.find())
 918            failCount++;
 919         report("Regions");
 920     }
 921 
 922     private static void expectRegionFail(Matcher matcher, int index1,
 923                                          int index2)
 924     {
 925         try {
 926             matcher.region(index1, index2);
 927             failCount++;
 928         } catch (IndexOutOfBoundsException ioobe) {
 929             // Correct result
 930         } catch (IllegalStateException ise) {
 931             // Correct result
 932         }
 933     }
 934 
 935     // This test is for 4803197
 936     private static void escapedSegmentTest() throws Exception {
 937 
 938         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
 939         check(pattern, "dir1\\dir2", true);
 940 
 941         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
 942         check(pattern, "dir1\\dir2\\", true);
 943 
 944         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
 945         check(pattern, "dir1\\dir2\\", true);
 946 
 947         // Supplementary character test
 948         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
 949         check(pattern, toSupplementaries("dir1\\dir2"), true);
 950 
 951         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
 952         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
 953 
 954         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
 955         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
 956 
 957         report("Escaped segment");
 958     }
 959 
 960     // This test is for 4792284
 961     private static void nonCaptureRepetitionTest() throws Exception {
 962         String input = "abcdefgh;";
 963 
 964         String[] patterns = new String[] {
 965             "(?:\\w{4})+;",
 966             "(?:\\w{8})*;",
 967             "(?:\\w{2}){2,4};",
 968             "(?:\\w{4}){2,};",   // only matches the
 969             ".*?(?:\\w{5})+;",   //     specified minimum
 970             ".*?(?:\\w{9})*;",   //     number of reps - OK
 971             "(?:\\w{4})+?;",     // lazy repetition - OK
 972             "(?:\\w{4})++;",     // possessive repetition - OK
 973             "(?:\\w{2,}?)+;",    // non-deterministic - OK
 974             "(\\w{4})+;",        // capturing group - OK
 975         };
 976 
 977         for (int i = 0; i < patterns.length; i++) {
 978             // Check find()
 979             check(patterns[i], 0, input, input, true);
 980             // Check matches()
 981             Pattern p = Pattern.compile(patterns[i]);
 982             Matcher m = p.matcher(input);
 983 
 984             if (m.matches()) {
 985                 if (!m.group(0).equals(input))
 986                     failCount++;
 987             } else {
 988                 failCount++;
 989             }
 990         }
 991 
 992         report("Non capturing repetition");
 993     }
 994 
 995     // This test is for 6358731
 996     private static void notCapturedGroupCurlyMatchTest() throws Exception {
 997         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
 998         Matcher matcher = pattern.matcher("abcd");
 999         if (!matcher.matches() ||
1000              matcher.group(1) != null ||
1001              !matcher.group(2).equals("abcd")) {
1002             failCount++;
1003         }
1004         report("Not captured GroupCurly");
1005     }
1006 
1007     // This test is for 4706545
1008     private static void javaCharClassTest() throws Exception {
1009         for (int i=0; i<1000; i++) {
1010             char c = (char)generator.nextInt();
1011             check("{javaLowerCase}", c, Character.isLowerCase(c));
1012             check("{javaUpperCase}", c, Character.isUpperCase(c));
1013             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1014             check("{javaTitleCase}", c, Character.isTitleCase(c));
1015             check("{javaDigit}", c, Character.isDigit(c));
1016             check("{javaDefined}", c, Character.isDefined(c));
1017             check("{javaLetter}", c, Character.isLetter(c));
1018             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1019             check("{javaJavaIdentifierStart}", c,
1020                   Character.isJavaIdentifierStart(c));
1021             check("{javaJavaIdentifierPart}", c,
1022                   Character.isJavaIdentifierPart(c));
1023             check("{javaUnicodeIdentifierStart}", c,
1024                   Character.isUnicodeIdentifierStart(c));
1025             check("{javaUnicodeIdentifierPart}", c,
1026                   Character.isUnicodeIdentifierPart(c));
1027             check("{javaIdentifierIgnorable}", c,
1028                   Character.isIdentifierIgnorable(c));
1029             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1030             check("{javaWhitespace}", c, Character.isWhitespace(c));
1031             check("{javaISOControl}", c, Character.isISOControl(c));
1032             check("{javaMirrored}", c, Character.isMirrored(c));
1033 
1034         }
1035 
1036         // Supplementary character test
1037         for (int i=0; i<1000; i++) {
1038             int c = generator.nextInt(Character.MAX_CODE_POINT
1039                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1040                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1041             check("{javaLowerCase}", c, Character.isLowerCase(c));
1042             check("{javaUpperCase}", c, Character.isUpperCase(c));
1043             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1044             check("{javaTitleCase}", c, Character.isTitleCase(c));
1045             check("{javaDigit}", c, Character.isDigit(c));
1046             check("{javaDefined}", c, Character.isDefined(c));
1047             check("{javaLetter}", c, Character.isLetter(c));
1048             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1049             check("{javaJavaIdentifierStart}", c,
1050                   Character.isJavaIdentifierStart(c));
1051             check("{javaJavaIdentifierPart}", c,
1052                   Character.isJavaIdentifierPart(c));
1053             check("{javaUnicodeIdentifierStart}", c,
1054                   Character.isUnicodeIdentifierStart(c));
1055             check("{javaUnicodeIdentifierPart}", c,
1056                   Character.isUnicodeIdentifierPart(c));
1057             check("{javaIdentifierIgnorable}", c,
1058                   Character.isIdentifierIgnorable(c));
1059             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1060             check("{javaWhitespace}", c, Character.isWhitespace(c));
1061             check("{javaISOControl}", c, Character.isISOControl(c));
1062             check("{javaMirrored}", c, Character.isMirrored(c));
1063         }
1064 
1065         report("Java character classes");
1066     }
1067 
1068     // This test is for 4523620
1069     /*
1070     private static void numOccurrencesTest() throws Exception {
1071         Pattern pattern = Pattern.compile("aaa");
1072 
1073         if (pattern.numOccurrences("aaaaaa", false) != 2)
1074             failCount++;
1075         if (pattern.numOccurrences("aaaaaa", true) != 4)
1076             failCount++;
1077 
1078         pattern = Pattern.compile("^");
1079         if (pattern.numOccurrences("aaaaaa", false) != 1)
1080             failCount++;
1081         if (pattern.numOccurrences("aaaaaa", true) != 1)
1082             failCount++;
1083 
1084         report("Number of Occurrences");
1085     }
1086     */
1087 
1088     // This test is for 4776374
1089     private static void caretBetweenTerminatorsTest() throws Exception {
1090         int flags1 = Pattern.DOTALL;
1091         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1092         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1093         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1094 
1095         check("^....", flags1, "test\ntest", "test", true);
1096         check(".....^", flags1, "test\ntest", "test", false);
1097         check(".....^", flags1, "test\n", "test", false);
1098         check("....^", flags1, "test\r\n", "test", false);
1099 
1100         check("^....", flags2, "test\ntest", "test", true);
1101         check("....^", flags2, "test\ntest", "test", false);
1102         check(".....^", flags2, "test\n", "test", false);
1103         check("....^", flags2, "test\r\n", "test", false);
1104 
1105         check("^....", flags3, "test\ntest", "test", true);
1106         check(".....^", flags3, "test\ntest", "test\n", true);
1107         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1108         check(".....^", flags3, "test\n", "test", false);
1109         check(".....^", flags3, "test\r\n", "test", false);
1110         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1111 
1112         check("^....", flags4, "test\ntest", "test", true);
1113         check(".....^", flags3, "test\ntest", "test\n", true);
1114         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1115         check(".....^", flags4, "test\n", "test\n", false);
1116         check(".....^", flags4, "test\r\n", "test\r", false);
1117 
1118         // Supplementary character test
1119         String t = toSupplementaries("test");
1120         check("^....", flags1, t+"\n"+t, t, true);
1121         check(".....^", flags1, t+"\n"+t, t, false);
1122         check(".....^", flags1, t+"\n", t, false);
1123         check("....^", flags1, t+"\r\n", t, false);
1124 
1125         check("^....", flags2, t+"\n"+t, t, true);
1126         check("....^", flags2, t+"\n"+t, t, false);
1127         check(".....^", flags2, t+"\n", t, false);
1128         check("....^", flags2, t+"\r\n", t, false);
1129 
1130         check("^....", flags3, t+"\n"+t, t, true);
1131         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1132         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1133         check(".....^", flags3, t+"\n", t, false);
1134         check(".....^", flags3, t+"\r\n", t, false);
1135         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1136 
1137         check("^....", flags4, t+"\n"+t, t, true);
1138         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1139         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1140         check(".....^", flags4, t+"\n", t+"\n", false);
1141         check(".....^", flags4, t+"\r\n", t+"\r", false);
1142 
1143         report("Caret between terminators");
1144     }
1145 
1146     // This test is for 4727935
1147     private static void dollarAtEndTest() throws Exception {
1148         int flags1 = Pattern.DOTALL;
1149         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1150         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1151 
1152         check("....$", flags1, "test\n", "test", true);
1153         check("....$", flags1, "test\r\n", "test", true);
1154         check(".....$", flags1, "test\n", "test\n", true);
1155         check(".....$", flags1, "test\u0085", "test\u0085", true);
1156         check("....$", flags1, "test\u0085", "test", true);
1157 
1158         check("....$", flags2, "test\n", "test", true);
1159         check(".....$", flags2, "test\n", "test\n", true);
1160         check(".....$", flags2, "test\u0085", "test\u0085", true);
1161         check("....$", flags2, "test\u0085", "est\u0085", true);
1162 
1163         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1164         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1165         check("....$blah", flags3, "test\nblah", "!!!!", false);
1166         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1167 
1168         // Supplementary character test
1169         String t = toSupplementaries("test");
1170         String b = toSupplementaries("blah");
1171         check("....$", flags1, t+"\n", t, true);
1172         check("....$", flags1, t+"\r\n", t, true);
1173         check(".....$", flags1, t+"\n", t+"\n", true);
1174         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1175         check("....$", flags1, t+"\u0085", t, true);
1176 
1177         check("....$", flags2, t+"\n", t, true);
1178         check(".....$", flags2, t+"\n", t+"\n", true);
1179         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1180         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1181 
1182         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1183         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1184         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1185         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1186 
1187         report("Dollar at End");
1188     }
1189 
1190     // This test is for 4711773
1191     private static void multilineDollarTest() throws Exception {
1192         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1193         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1194         matcher.find();
1195         if (matcher.start(0) != 9)
1196             failCount++;
1197         matcher.find();
1198         if (matcher.start(0) != 20)
1199             failCount++;
1200 
1201         // Supplementary character test
1202         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1203         matcher.find();
1204         if (matcher.start(0) != 9*2)
1205             failCount++;
1206         matcher.find();
1207         if (matcher.start(0) != 20*2)
1208             failCount++;
1209 
1210         report("Multiline Dollar");
1211     }
1212 
1213     private static void reluctantRepetitionTest() throws Exception {
1214         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1215         check(p, "1 word word word 2", true);
1216         check(p, "1 wor wo w 2", true);
1217         check(p, "1 word word 2", true);
1218         check(p, "1 word 2", true);
1219         check(p, "1 wo w w 2", true);
1220         check(p, "1 wo w 2", true);
1221         check(p, "1 wor w 2", true);
1222 
1223         p = Pattern.compile("([a-z])+?c");
1224         Matcher m = p.matcher("ababcdefdec");
1225         check(m, "ababc");
1226 
1227         // Supplementary character test
1228         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1229         m = p.matcher(toSupplementaries("ababcdefdec"));
1230         check(m, toSupplementaries("ababc"));
1231 
1232         report("Reluctant Repetition");
1233     }
1234 
1235     private static void serializeTest() throws Exception {
1236         String patternStr = "(b)";
1237         String matchStr = "b";
1238         Pattern pattern = Pattern.compile(patternStr);
1239         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1240         ObjectOutputStream oos = new ObjectOutputStream(baos);
1241         oos.writeObject(pattern);
1242         oos.close();
1243         ObjectInputStream ois = new ObjectInputStream(
1244             new ByteArrayInputStream(baos.toByteArray()));
1245         Pattern serializedPattern = (Pattern)ois.readObject();
1246         ois.close();
1247         Matcher matcher = serializedPattern.matcher(matchStr);
1248         if (!matcher.matches())
1249             failCount++;
1250         if (matcher.groupCount() != 1)
1251             failCount++;
1252 
1253         report("Serialization");
1254     }
1255 
1256     private static void gTest() {
1257         Pattern pattern = Pattern.compile("\\G\\w");
1258         Matcher matcher = pattern.matcher("abc#x#x");
1259         matcher.find();
1260         matcher.find();
1261         matcher.find();
1262         if (matcher.find())
1263             failCount++;
1264 
1265         pattern = Pattern.compile("\\GA*");
1266         matcher = pattern.matcher("1A2AA3");
1267         matcher.find();
1268         if (matcher.find())
1269             failCount++;
1270 
1271         pattern = Pattern.compile("\\GA*");
1272         matcher = pattern.matcher("1A2AA3");
1273         if (!matcher.find(1))
1274             failCount++;
1275         matcher.find();
1276         if (matcher.find())
1277             failCount++;
1278 
1279         report("\\G");
1280     }
1281 
1282     private static void zTest() {
1283         Pattern pattern = Pattern.compile("foo\\Z");
1284         // Positives
1285         check(pattern, "foo\u0085", true);
1286         check(pattern, "foo\u2028", true);
1287         check(pattern, "foo\u2029", true);
1288         check(pattern, "foo\n", true);
1289         check(pattern, "foo\r", true);
1290         check(pattern, "foo\r\n", true);
1291         // Negatives
1292         check(pattern, "fooo", false);
1293         check(pattern, "foo\n\r", false);
1294 
1295         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1296         // Positives
1297         check(pattern, "foo", true);
1298         check(pattern, "foo\n", true);
1299         // Negatives
1300         check(pattern, "foo\r", false);
1301         check(pattern, "foo\u0085", false);
1302         check(pattern, "foo\u2028", false);
1303         check(pattern, "foo\u2029", false);
1304 
1305         report("\\Z");
1306     }
1307 
1308     private static void replaceFirstTest() {
1309         Pattern pattern = Pattern.compile("(ab)(c*)");
1310         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1311         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1312             failCount++;
1313 
1314         matcher.reset("zzzabccczzzabcczzzabccczzz");
1315         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1316             failCount++;
1317 
1318         matcher.reset("zzzabccczzzabcczzzabccczzz");
1319         String result = matcher.replaceFirst("$1");
1320         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1321             failCount++;
1322 
1323         matcher.reset("zzzabccczzzabcczzzabccczzz");
1324         result = matcher.replaceFirst("$2");
1325         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1326             failCount++;
1327 
1328         pattern = Pattern.compile("a*");
1329         matcher = pattern.matcher("aaaaaaaaaa");
1330         if (!matcher.replaceFirst("test").equals("test"))
1331             failCount++;
1332 
1333         pattern = Pattern.compile("a+");
1334         matcher = pattern.matcher("zzzaaaaaaaaaa");
1335         if (!matcher.replaceFirst("test").equals("zzztest"))
1336             failCount++;
1337 
1338         // Supplementary character test
1339         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1340         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1341         if (!matcher.replaceFirst(toSupplementaries("test"))
1342                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1343             failCount++;
1344 
1345         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1346         if (!matcher.replaceFirst(toSupplementaries("test")).
1347             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1348             failCount++;
1349 
1350         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1351         result = matcher.replaceFirst("$1");
1352         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1353             failCount++;
1354 
1355         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1356         result = matcher.replaceFirst("$2");
1357         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1358             failCount++;
1359 
1360         pattern = Pattern.compile(toSupplementaries("a*"));
1361         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1362         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1363             failCount++;
1364 
1365         pattern = Pattern.compile(toSupplementaries("a+"));
1366         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1367         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1368             failCount++;
1369 
1370         report("Replace First");
1371     }
1372 
1373     private static void unixLinesTest() {
1374         Pattern pattern = Pattern.compile(".*");
1375         Matcher matcher = pattern.matcher("aa\u2028blah");
1376         matcher.find();
1377         if (!matcher.group(0).equals("aa"))
1378             failCount++;
1379 
1380         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1381         matcher = pattern.matcher("aa\u2028blah");
1382         matcher.find();
1383         if (!matcher.group(0).equals("aa\u2028blah"))
1384             failCount++;
1385 
1386         pattern = Pattern.compile("[az]$",
1387                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1388         matcher = pattern.matcher("aa\u2028zz");
1389         check(matcher, "a\u2028", false);
1390 
1391         // Supplementary character test
1392         pattern = Pattern.compile(".*");
1393         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1394         matcher.find();
1395         if (!matcher.group(0).equals(toSupplementaries("aa")))
1396             failCount++;
1397 
1398         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1399         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1400         matcher.find();
1401         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1402             failCount++;
1403 
1404         pattern = Pattern.compile(toSupplementaries("[az]$"),
1405                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1406         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1407         check(matcher, toSupplementaries("a\u2028"), false);
1408 
1409         report("Unix Lines");
1410     }
1411 
1412     private static void commentsTest() {
1413         int flags = Pattern.COMMENTS;
1414 
1415         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1416         Matcher matcher = pattern.matcher("aa#aa");
1417         if (!matcher.matches())
1418             failCount++;
1419 
1420         pattern = Pattern.compile("aa  # blah", flags);
1421         matcher = pattern.matcher("aa");
1422         if (!matcher.matches())
1423             failCount++;
1424 
1425         pattern = Pattern.compile("aa blah", flags);
1426         matcher = pattern.matcher("aablah");
1427         if (!matcher.matches())
1428              failCount++;
1429 
1430         pattern = Pattern.compile("aa  # blah blech  ", flags);
1431         matcher = pattern.matcher("aa");
1432         if (!matcher.matches())
1433             failCount++;
1434 
1435         pattern = Pattern.compile("aa  # blah\n  ", flags);
1436         matcher = pattern.matcher("aa");
1437         if (!matcher.matches())
1438             failCount++;
1439 
1440         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1441         matcher = pattern.matcher("aabc");
1442         if (!matcher.matches())
1443              failCount++;
1444 
1445         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1446         matcher = pattern.matcher("aabc");
1447         if (!matcher.matches())
1448              failCount++;
1449 
1450         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1451         matcher = pattern.matcher("aabc#blech");
1452         if (!matcher.matches())
1453              failCount++;
1454 
1455         // Supplementary character test
1456         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1457         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1458         if (!matcher.matches())
1459             failCount++;
1460 
1461         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1462         matcher = pattern.matcher(toSupplementaries("aa"));
1463         if (!matcher.matches())
1464             failCount++;
1465 
1466         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1467         matcher = pattern.matcher(toSupplementaries("aablah"));
1468         if (!matcher.matches())
1469              failCount++;
1470 
1471         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1472         matcher = pattern.matcher(toSupplementaries("aa"));
1473         if (!matcher.matches())
1474             failCount++;
1475 
1476         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1477         matcher = pattern.matcher(toSupplementaries("aa"));
1478         if (!matcher.matches())
1479             failCount++;
1480 
1481         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1482         matcher = pattern.matcher(toSupplementaries("aabc"));
1483         if (!matcher.matches())
1484              failCount++;
1485 
1486         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1487         matcher = pattern.matcher(toSupplementaries("aabc"));
1488         if (!matcher.matches())
1489              failCount++;
1490 
1491         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1492         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1493         if (!matcher.matches())
1494              failCount++;
1495 
1496         report("Comments");
1497     }
1498 
1499     private static void caseFoldingTest() { // bug 4504687
1500         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1501         Pattern pattern = Pattern.compile("aa", flags);
1502         Matcher matcher = pattern.matcher("ab");
1503         if (matcher.matches())
1504             failCount++;
1505 
1506         pattern = Pattern.compile("aA", flags);
1507         matcher = pattern.matcher("ab");
1508         if (matcher.matches())
1509             failCount++;
1510 
1511         pattern = Pattern.compile("aa", flags);
1512         matcher = pattern.matcher("aB");
1513         if (matcher.matches())
1514             failCount++;
1515         matcher = pattern.matcher("Ab");
1516         if (matcher.matches())
1517             failCount++;
1518 
1519         // ASCII               "a"
1520         // Latin-1 Supplement  "a" + grave
1521         // Cyrillic            "a"
1522         String[] patterns = new String[] {
1523             //single
1524             "a", "\u00e0", "\u0430",
1525             //slice
1526             "ab", "\u00e0\u00e1", "\u0430\u0431",
1527             //class single
1528             "[a]", "[\u00e0]", "[\u0430]",
1529             //class range
1530             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1531             //back reference
1532             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1533         };
1534 
1535         String[] texts = new String[] {
1536             "A", "\u00c0", "\u0410",
1537             "AB", "\u00c0\u00c1", "\u0410\u0411",
1538             "A", "\u00c0", "\u0410",
1539             "B", "\u00c2", "\u0411",
1540             "aA", "\u00e0\u00c0", "\u0430\u0410"
1541         };
1542 
1543         boolean[] expected = new boolean[] {
1544             true, false, false,
1545             true, false, false,
1546             true, false, false,
1547             true, false, false,
1548             true, false, false
1549         };
1550 
1551         flags = Pattern.CASE_INSENSITIVE;
1552         for (int i = 0; i < patterns.length; i++) {
1553             pattern = Pattern.compile(patterns[i], flags);
1554             matcher = pattern.matcher(texts[i]);
1555             if (matcher.matches() != expected[i]) {
1556                 System.out.println("<1> Failed at " + i);
1557                 failCount++;
1558             }
1559         }
1560 
1561         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1562         for (int i = 0; i < patterns.length; i++) {
1563             pattern = Pattern.compile(patterns[i], flags);
1564             matcher = pattern.matcher(texts[i]);
1565             if (!matcher.matches()) {
1566                 System.out.println("<2> Failed at " + i);
1567                 failCount++;
1568             }
1569         }
1570         // flag unicode_case alone should do nothing
1571         flags = Pattern.UNICODE_CASE;
1572         for (int i = 0; i < patterns.length; i++) {
1573             pattern = Pattern.compile(patterns[i], flags);
1574             matcher = pattern.matcher(texts[i]);
1575             if (matcher.matches()) {
1576                 System.out.println("<3> Failed at " + i);
1577                 failCount++;
1578             }
1579         }
1580 
1581         // Special cases: i, I, u+0131 and u+0130
1582         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1583         pattern = Pattern.compile("[h-j]+", flags);
1584         if (!pattern.matcher("\u0131\u0130").matches())
1585             failCount++;
1586         report("Case Folding");
1587     }
1588 
1589     private static void appendTest() {
1590         Pattern pattern = Pattern.compile("(ab)(cd)");
1591         Matcher matcher = pattern.matcher("abcd");
1592         String result = matcher.replaceAll("$2$1");
1593         if (!result.equals("cdab"))
1594             failCount++;
1595 
1596         String  s1 = "Swap all: first = 123, second = 456";
1597         String  s2 = "Swap one: first = 123, second = 456";
1598         String  r  = "$3$2$1";
1599         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1600         matcher = pattern.matcher(s1);
1601 
1602         result = matcher.replaceAll(r);
1603         if (!result.equals("Swap all: 123 = first, 456 = second"))
1604             failCount++;
1605 
1606         matcher = pattern.matcher(s2);
1607 
1608         if (matcher.find()) {
1609             StringBuffer sb = new StringBuffer();
1610             matcher.appendReplacement(sb, r);
1611             matcher.appendTail(sb);
1612             result = sb.toString();
1613             if (!result.equals("Swap one: 123 = first, second = 456"))
1614                 failCount++;
1615         }
1616 
1617         // Supplementary character test
1618         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1619         matcher = pattern.matcher(toSupplementaries("abcd"));
1620         result = matcher.replaceAll("$2$1");
1621         if (!result.equals(toSupplementaries("cdab")))
1622             failCount++;
1623 
1624         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1625         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1626         r  = toSupplementaries("$3$2$1");
1627         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1628         matcher = pattern.matcher(s1);
1629 
1630         result = matcher.replaceAll(r);
1631         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1632             failCount++;
1633 
1634         matcher = pattern.matcher(s2);
1635 
1636         if (matcher.find()) {
1637             StringBuffer sb = new StringBuffer();
1638             matcher.appendReplacement(sb, r);
1639             matcher.appendTail(sb);
1640             result = sb.toString();
1641             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1642                 failCount++;
1643         }
1644         report("Append");
1645     }
1646 
1647     private static void splitTest() {
1648         Pattern pattern = Pattern.compile(":");
1649         String[] result = pattern.split("foo:and:boo", 2);
1650         if (!result[0].equals("foo"))
1651             failCount++;
1652         if (!result[1].equals("and:boo"))
1653             failCount++;
1654         // Supplementary character test
1655         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1656         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1657         if (!result[0].equals(toSupplementaries("foo")))
1658             failCount++;
1659         if (!result[1].equals(toSupplementaries("andXboo")))
1660             failCount++;
1661 
1662         CharBuffer cb = CharBuffer.allocate(100);
1663         cb.put("foo:and:boo");
1664         cb.flip();
1665         result = pattern.split(cb);
1666         if (!result[0].equals("foo"))
1667             failCount++;
1668         if (!result[1].equals("and"))
1669             failCount++;
1670         if (!result[2].equals("boo"))
1671             failCount++;
1672 
1673         // Supplementary character test
1674         CharBuffer cbs = CharBuffer.allocate(100);
1675         cbs.put(toSupplementaries("fooXandXboo"));
1676         cbs.flip();
1677         result = patternX.split(cbs);
1678         if (!result[0].equals(toSupplementaries("foo")))
1679             failCount++;
1680         if (!result[1].equals(toSupplementaries("and")))
1681             failCount++;
1682         if (!result[2].equals(toSupplementaries("boo")))
1683             failCount++;
1684 
1685         String source = "0123456789";
1686         for (int limit=-2; limit<3; limit++) {
1687             for (int x=0; x<10; x++) {
1688                 result = source.split(Integer.toString(x), limit);
1689                 int expectedLength = limit < 1 ? 2 : limit;
1690 
1691                 if ((limit == 0) && (x == 9)) {
1692                     // expected dropping of ""
1693                     if (result.length != 1)
1694                         failCount++;
1695                     if (!result[0].equals("012345678")) {
1696                         failCount++;
1697                     }
1698                 } else {
1699                     if (result.length != expectedLength) {
1700                         failCount++;
1701                     }
1702                     if (!result[0].equals(source.substring(0,x))) {
1703                         if (limit != 1) {
1704                             failCount++;
1705                         } else {
1706                             if (!result[0].equals(source.substring(0,10))) {
1707                                 failCount++;
1708                             }
1709                         }
1710                     }
1711                     if (expectedLength > 1) { // Check segment 2
1712                         if (!result[1].equals(source.substring(x+1,10)))
1713                             failCount++;
1714                     }
1715                 }
1716             }
1717         }
1718         // Check the case for no match found
1719         for (int limit=-2; limit<3; limit++) {
1720             result = source.split("e", limit);
1721             if (result.length != 1)
1722                 failCount++;
1723             if (!result[0].equals(source))
1724                 failCount++;
1725         }
1726         // Check the case for limit == 0, source = "";
1727         source = "";
1728         result = source.split("e", 0);
1729         if (result.length != 1)
1730             failCount++;
1731         if (!result[0].equals(source))
1732             failCount++;
1733 
1734         report("Split");
1735     }
1736 
1737     private static void negationTest() {
1738         Pattern pattern = Pattern.compile("[\\[@^]+");
1739         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1740         if (!matcher.find())
1741             failCount++;
1742         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1743             failCount++;
1744         pattern = Pattern.compile("[@\\[^]+");
1745         matcher = pattern.matcher("@@@@[[[[^^^^");
1746         if (!matcher.find())
1747             failCount++;
1748         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1749             failCount++;
1750         pattern = Pattern.compile("[@\\[^@]+");
1751         matcher = pattern.matcher("@@@@[[[[^^^^");
1752         if (!matcher.find())
1753             failCount++;
1754         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1755             failCount++;
1756 
1757         pattern = Pattern.compile("\\)");
1758         matcher = pattern.matcher("xxx)xxx");
1759         if (!matcher.find())
1760             failCount++;
1761 
1762         report("Negation");
1763     }
1764 
1765     private static void ampersandTest() {
1766         Pattern pattern = Pattern.compile("[&@]+");
1767         check(pattern, "@@@@&&&&", true);
1768 
1769         pattern = Pattern.compile("[@&]+");
1770         check(pattern, "@@@@&&&&", true);
1771 
1772         pattern = Pattern.compile("[@\\&]+");
1773         check(pattern, "@@@@&&&&", true);
1774 
1775         report("Ampersand");
1776     }
1777 
1778     private static void octalTest() throws Exception {
1779         Pattern pattern = Pattern.compile("\\u0007");
1780         Matcher matcher = pattern.matcher("\u0007");
1781         if (!matcher.matches())
1782             failCount++;
1783         pattern = Pattern.compile("\\07");
1784         matcher = pattern.matcher("\u0007");
1785         if (!matcher.matches())
1786             failCount++;
1787         pattern = Pattern.compile("\\007");
1788         matcher = pattern.matcher("\u0007");
1789         if (!matcher.matches())
1790             failCount++;
1791         pattern = Pattern.compile("\\0007");
1792         matcher = pattern.matcher("\u0007");
1793         if (!matcher.matches())
1794             failCount++;
1795         pattern = Pattern.compile("\\040");
1796         matcher = pattern.matcher("\u0020");
1797         if (!matcher.matches())
1798             failCount++;
1799         pattern = Pattern.compile("\\0403");
1800         matcher = pattern.matcher("\u00203");
1801         if (!matcher.matches())
1802             failCount++;
1803         pattern = Pattern.compile("\\0103");
1804         matcher = pattern.matcher("\u0043");
1805         if (!matcher.matches())
1806             failCount++;
1807 
1808         report("Octal");
1809     }
1810 
1811     private static void longPatternTest() throws Exception {
1812         try {
1813             Pattern pattern = Pattern.compile(
1814                 "a 32-character-long pattern xxxx");
1815             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1816             pattern = Pattern.compile("a thirty four character long regex");
1817             StringBuffer patternToBe = new StringBuffer(101);
1818             for (int i=0; i<100; i++)
1819                 patternToBe.append((char)(97 + i%26));
1820             pattern = Pattern.compile(patternToBe.toString());
1821         } catch (PatternSyntaxException e) {
1822             failCount++;
1823         }
1824 
1825         // Supplementary character test
1826         try {
1827             Pattern pattern = Pattern.compile(
1828                 toSupplementaries("a 32-character-long pattern xxxx"));
1829             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1830             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1831             StringBuffer patternToBe = new StringBuffer(101*2);
1832             for (int i=0; i<100; i++)
1833                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1834                                                      + 97 + i%26));
1835             pattern = Pattern.compile(patternToBe.toString());
1836         } catch (PatternSyntaxException e) {
1837             failCount++;
1838         }
1839         report("LongPattern");
1840     }
1841 
1842     private static void group0Test() throws Exception {
1843         Pattern pattern = Pattern.compile("(tes)ting");
1844         Matcher matcher = pattern.matcher("testing");
1845         check(matcher, "testing");
1846 
1847         matcher.reset("testing");
1848         if (matcher.lookingAt()) {
1849             if (!matcher.group(0).equals("testing"))
1850                 failCount++;
1851         } else {
1852             failCount++;
1853         }
1854 
1855         matcher.reset("testing");
1856         if (matcher.matches()) {
1857             if (!matcher.group(0).equals("testing"))
1858                 failCount++;
1859         } else {
1860             failCount++;
1861         }
1862 
1863         pattern = Pattern.compile("(tes)ting");
1864         matcher = pattern.matcher("testing");
1865         if (matcher.lookingAt()) {
1866             if (!matcher.group(0).equals("testing"))
1867                 failCount++;
1868         } else {
1869             failCount++;
1870         }
1871 
1872         pattern = Pattern.compile("^(tes)ting");
1873         matcher = pattern.matcher("testing");
1874         if (matcher.matches()) {
1875             if (!matcher.group(0).equals("testing"))
1876                 failCount++;
1877         } else {
1878             failCount++;
1879         }
1880 
1881         // Supplementary character test
1882         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1883         matcher = pattern.matcher(toSupplementaries("testing"));
1884         check(matcher, toSupplementaries("testing"));
1885 
1886         matcher.reset(toSupplementaries("testing"));
1887         if (matcher.lookingAt()) {
1888             if (!matcher.group(0).equals(toSupplementaries("testing")))
1889                 failCount++;
1890         } else {
1891             failCount++;
1892         }
1893 
1894         matcher.reset(toSupplementaries("testing"));
1895         if (matcher.matches()) {
1896             if (!matcher.group(0).equals(toSupplementaries("testing")))
1897                 failCount++;
1898         } else {
1899             failCount++;
1900         }
1901 
1902         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1903         matcher = pattern.matcher(toSupplementaries("testing"));
1904         if (matcher.lookingAt()) {
1905             if (!matcher.group(0).equals(toSupplementaries("testing")))
1906                 failCount++;
1907         } else {
1908             failCount++;
1909         }
1910 
1911         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1912         matcher = pattern.matcher(toSupplementaries("testing"));
1913         if (matcher.matches()) {
1914             if (!matcher.group(0).equals(toSupplementaries("testing")))
1915                 failCount++;
1916         } else {
1917             failCount++;
1918         }
1919 
1920         report("Group0");
1921     }
1922 
1923     private static void findIntTest() throws Exception {
1924         Pattern p = Pattern.compile("blah");
1925         Matcher m = p.matcher("zzzzblahzzzzzblah");
1926         boolean result = m.find(2);
1927         if (!result)
1928             failCount++;
1929 
1930         p = Pattern.compile("$");
1931         m = p.matcher("1234567890");
1932         result = m.find(10);
1933         if (!result)
1934             failCount++;
1935         try {
1936             result = m.find(11);
1937             failCount++;
1938         } catch (IndexOutOfBoundsException e) {
1939             // correct result
1940         }
1941 
1942         // Supplementary character test
1943         p = Pattern.compile(toSupplementaries("blah"));
1944         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1945         result = m.find(2);
1946         if (!result)
1947             failCount++;
1948 
1949         report("FindInt");
1950     }
1951 
1952     private static void emptyPatternTest() throws Exception {
1953         Pattern p = Pattern.compile("");
1954         Matcher m = p.matcher("foo");
1955 
1956         // Should find empty pattern at beginning of input
1957         boolean result = m.find();
1958         if (result != true)
1959             failCount++;
1960         if (m.start() != 0)
1961             failCount++;
1962 
1963         // Should not match entire input if input is not empty
1964         m.reset();
1965         result = m.matches();
1966         if (result == true)
1967             failCount++;
1968 
1969         try {
1970             m.start(0);
1971             failCount++;
1972         } catch (IllegalStateException e) {
1973             // Correct result
1974         }
1975 
1976         // Should match entire input if input is empty
1977         m.reset("");
1978         result = m.matches();
1979         if (result != true)
1980             failCount++;
1981 
1982         result = Pattern.matches("", "");
1983         if (result != true)
1984             failCount++;
1985 
1986         result = Pattern.matches("", "foo");
1987         if (result == true)
1988             failCount++;
1989         report("EmptyPattern");
1990     }
1991 
1992     private static void charClassTest() throws Exception {
1993         Pattern pattern = Pattern.compile("blah[ab]]blech");
1994         check(pattern, "blahb]blech", true);
1995 
1996         pattern = Pattern.compile("[abc[def]]");
1997         check(pattern, "b", true);
1998 
1999         // Supplementary character tests
2000         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2001         check(pattern, toSupplementaries("blahb]blech"), true);
2002 
2003         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2004         check(pattern, toSupplementaries("b"), true);
2005 
2006         try {
2007             // u00ff when UNICODE_CASE
2008             pattern = Pattern.compile("[ab\u00ffcd]",
2009                                       Pattern.CASE_INSENSITIVE|
2010                                       Pattern.UNICODE_CASE);
2011             check(pattern, "ab\u00ffcd", true);
2012             check(pattern, "Ab\u0178Cd", true);
2013 
2014             // u00b5 when UNICODE_CASE
2015             pattern = Pattern.compile("[ab\u00b5cd]",
2016                                       Pattern.CASE_INSENSITIVE|
2017                                       Pattern.UNICODE_CASE);
2018             check(pattern, "ab\u00b5cd", true);
2019             check(pattern, "Ab\u039cCd", true);
2020         } catch (Exception e) { failCount++; }
2021 
2022         /* Special cases
2023            (1)LatinSmallLetterLongS u+017f
2024            (2)LatinSmallLetterDotlessI u+0131
2025            (3)LatineCapitalLetterIWithDotAbove u+0130
2026            (4)KelvinSign u+212a
2027            (5)AngstromSign u+212b
2028         */
2029         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2030         pattern = Pattern.compile("[sik\u00c5]+", flags);
2031         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2032             failCount++;
2033 
2034         report("CharClass");
2035     }
2036 
2037     private static void caretTest() throws Exception {
2038         Pattern pattern = Pattern.compile("\\w*");
2039         Matcher matcher = pattern.matcher("a#bc#def##g");
2040         check(matcher, "a");
2041         check(matcher, "");
2042         check(matcher, "bc");
2043         check(matcher, "");
2044         check(matcher, "def");
2045         check(matcher, "");
2046         check(matcher, "");
2047         check(matcher, "g");
2048         check(matcher, "");
2049         if (matcher.find())
2050             failCount++;
2051 
2052         pattern = Pattern.compile("^\\w*");
2053         matcher = pattern.matcher("a#bc#def##g");
2054         check(matcher, "a");
2055         if (matcher.find())
2056             failCount++;
2057 
2058         pattern = Pattern.compile("\\w");
2059         matcher = pattern.matcher("abc##x");
2060         check(matcher, "a");
2061         check(matcher, "b");
2062         check(matcher, "c");
2063         check(matcher, "x");
2064         if (matcher.find())
2065             failCount++;
2066 
2067         pattern = Pattern.compile("^\\w");
2068         matcher = pattern.matcher("abc##x");
2069         check(matcher, "a");
2070         if (matcher.find())
2071             failCount++;
2072 
2073         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2074         matcher = pattern.matcher("abcdef-ghi\njklmno");
2075         check(matcher, "abc");
2076         if (matcher.find())
2077             failCount++;
2078 
2079         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2080         matcher = pattern.matcher("abcdef-ghi\njklmno");
2081         check(matcher, "abc");
2082         check(matcher, "jkl");
2083         if (matcher.find())
2084             failCount++;
2085 
2086         pattern = Pattern.compile("^", Pattern.MULTILINE);
2087         matcher = pattern.matcher("this is some text");
2088         String result = matcher.replaceAll("X");
2089         if (!result.equals("Xthis is some text"))
2090             failCount++;
2091 
2092         pattern = Pattern.compile("^");
2093         matcher = pattern.matcher("this is some text");
2094         result = matcher.replaceAll("X");
2095         if (!result.equals("Xthis is some text"))
2096             failCount++;
2097 
2098         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2099         matcher = pattern.matcher("this is some text\n");
2100         result = matcher.replaceAll("X");
2101         if (!result.equals("Xthis is some text\n"))
2102             failCount++;
2103 
2104         report("Caret");
2105     }
2106 
2107     private static void groupCaptureTest() throws Exception {
2108         // Independent group
2109         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2110         Matcher matcher = pattern.matcher("xxxyyyzzz");
2111         matcher.find();
2112         try {
2113             String blah = matcher.group(1);
2114             failCount++;
2115         } catch (IndexOutOfBoundsException ioobe) {
2116             // Good result
2117         }
2118         // Pure group
2119         pattern = Pattern.compile("x+(?:y+)z+");
2120         matcher = pattern.matcher("xxxyyyzzz");
2121         matcher.find();
2122         try {
2123             String blah = matcher.group(1);
2124             failCount++;
2125         } catch (IndexOutOfBoundsException ioobe) {
2126             // Good result
2127         }
2128 
2129         // Supplementary character tests
2130         // Independent group
2131         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2132         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2133         matcher.find();
2134         try {
2135             String blah = matcher.group(1);
2136             failCount++;
2137         } catch (IndexOutOfBoundsException ioobe) {
2138             // Good result
2139         }
2140         // Pure group
2141         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2142         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2143         matcher.find();
2144         try {
2145             String blah = matcher.group(1);
2146             failCount++;
2147         } catch (IndexOutOfBoundsException ioobe) {
2148             // Good result
2149         }
2150 
2151         report("GroupCapture");
2152     }
2153 
2154     private static void backRefTest() throws Exception {
2155         Pattern pattern = Pattern.compile("(a*)bc\\1");
2156         check(pattern, "zzzaabcazzz", true);
2157 
2158         pattern = Pattern.compile("(a*)bc\\1");
2159         check(pattern, "zzzaabcaazzz", true);
2160 
2161         pattern = Pattern.compile("(abc)(def)\\1");
2162         check(pattern, "abcdefabc", true);
2163 
2164         pattern = Pattern.compile("(abc)(def)\\3");
2165         check(pattern, "abcdefabc", false);
2166 
2167         try {
2168             for (int i = 1; i < 10; i++) {
2169                 // Make sure backref 1-9 are always accepted
2170                 pattern = Pattern.compile("abcdef\\" + i);
2171                 // and fail to match if the target group does not exit
2172                 check(pattern, "abcdef", false);
2173             }
2174         } catch(PatternSyntaxException e) {
2175             failCount++;
2176         }
2177 
2178         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2179         check(pattern, "abcdefghija", false);
2180         check(pattern, "abcdefghija1", true);
2181 
2182         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2183         check(pattern, "abcdefghijkk", true);
2184 
2185         pattern = Pattern.compile("(a)bcdefghij\\11");
2186         check(pattern, "abcdefghija1", true);
2187 
2188         // Supplementary character tests
2189         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2190         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2191 
2192         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2193         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2194 
2195         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2196         check(pattern, toSupplementaries("abcdefabc"), true);
2197 
2198         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2199         check(pattern, toSupplementaries("abcdefabc"), false);
2200 
2201         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2202         check(pattern, toSupplementaries("abcdefghija"), false);
2203         check(pattern, toSupplementaries("abcdefghija1"), true);
2204 
2205         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2206         check(pattern, toSupplementaries("abcdefghijkk"), true);
2207 
2208         report("BackRef");
2209     }
2210 
2211     /**
2212      * Unicode Technical Report #18, section 2.6 End of Line
2213      * There is no empty line to be matched in the sequence \u000D\u000A
2214      * but there is an empty line in the sequence \u000A\u000D.
2215      */
2216     private static void anchorTest() throws Exception {
2217         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2218         Matcher m = p.matcher("blah1\r\nblah2");
2219         m.find();
2220         m.find();
2221         if (!m.group().equals("blah2"))
2222             failCount++;
2223 
2224         m.reset("blah1\n\rblah2");
2225         m.find();
2226         m.find();
2227         m.find();
2228         if (!m.group().equals("blah2"))
2229             failCount++;
2230 
2231         // Test behavior of $ with \r\n at end of input
2232         p = Pattern.compile(".+$");
2233         m = p.matcher("blah1\r\n");
2234         if (!m.find())
2235             failCount++;
2236        if (!m.group().equals("blah1"))
2237             failCount++;
2238         if (m.find())
2239             failCount++;
2240 
2241         // Test behavior of $ with \r\n at end of input in multiline
2242         p = Pattern.compile(".+$", Pattern.MULTILINE);
2243         m = p.matcher("blah1\r\n");
2244         if (!m.find())
2245             failCount++;
2246         if (m.find())
2247             failCount++;
2248 
2249         // Test for $ recognition of \u0085 for bug 4527731
2250         p = Pattern.compile(".+$", Pattern.MULTILINE);
2251         m = p.matcher("blah1\u0085");
2252         if (!m.find())
2253             failCount++;
2254 
2255         // Supplementary character test
2256         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2257         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2258         m.find();
2259         m.find();
2260         if (!m.group().equals(toSupplementaries("blah2")))
2261             failCount++;
2262 
2263         m.reset(toSupplementaries("blah1\n\rblah2"));
2264         m.find();
2265         m.find();
2266         m.find();
2267         if (!m.group().equals(toSupplementaries("blah2")))
2268             failCount++;
2269 
2270         // Test behavior of $ with \r\n at end of input
2271         p = Pattern.compile(".+$");
2272         m = p.matcher(toSupplementaries("blah1\r\n"));
2273         if (!m.find())
2274             failCount++;
2275         if (!m.group().equals(toSupplementaries("blah1")))
2276             failCount++;
2277         if (m.find())
2278             failCount++;
2279 
2280         // Test behavior of $ with \r\n at end of input in multiline
2281         p = Pattern.compile(".+$", Pattern.MULTILINE);
2282         m = p.matcher(toSupplementaries("blah1\r\n"));
2283         if (!m.find())
2284             failCount++;
2285         if (m.find())
2286             failCount++;
2287 
2288         // Test for $ recognition of \u0085 for bug 4527731
2289         p = Pattern.compile(".+$", Pattern.MULTILINE);
2290         m = p.matcher(toSupplementaries("blah1\u0085"));
2291         if (!m.find())
2292             failCount++;
2293 
2294         report("Anchors");
2295     }
2296 
2297     /**
2298      * A basic sanity test of Matcher.lookingAt().
2299      */
2300     private static void lookingAtTest() throws Exception {
2301         Pattern p = Pattern.compile("(ab)(c*)");
2302         Matcher m = p.matcher("abccczzzabcczzzabccc");
2303 
2304         if (!m.lookingAt())
2305             failCount++;
2306 
2307         if (!m.group().equals(m.group(0)))
2308             failCount++;
2309 
2310         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2311         if (m.lookingAt())
2312             failCount++;
2313 
2314         // Supplementary character test
2315         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2316         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2317 
2318         if (!m.lookingAt())
2319             failCount++;
2320 
2321         if (!m.group().equals(m.group(0)))
2322             failCount++;
2323 
2324         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2325         if (m.lookingAt())
2326             failCount++;
2327 
2328         report("Looking At");
2329     }
2330 
2331     /**
2332      * A basic sanity test of Matcher.matches().
2333      */
2334     private static void matchesTest() throws Exception {
2335         // matches()
2336         Pattern p = Pattern.compile("ulb(c*)");
2337         Matcher m = p.matcher("ulbcccccc");
2338         if (!m.matches())
2339             failCount++;
2340 
2341         // find() but not matches()
2342         m.reset("zzzulbcccccc");
2343         if (m.matches())
2344             failCount++;
2345 
2346         // lookingAt() but not matches()
2347         m.reset("ulbccccccdef");
2348         if (m.matches())
2349             failCount++;
2350 
2351         // matches()
2352         p = Pattern.compile("a|ad");
2353         m = p.matcher("ad");
2354         if (!m.matches())
2355             failCount++;
2356 
2357         // Supplementary character test
2358         // matches()
2359         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2360         m = p.matcher(toSupplementaries("ulbcccccc"));
2361         if (!m.matches())
2362             failCount++;
2363 
2364         // find() but not matches()
2365         m.reset(toSupplementaries("zzzulbcccccc"));
2366         if (m.matches())
2367             failCount++;
2368 
2369         // lookingAt() but not matches()
2370         m.reset(toSupplementaries("ulbccccccdef"));
2371         if (m.matches())
2372             failCount++;
2373 
2374         // matches()
2375         p = Pattern.compile(toSupplementaries("a|ad"));
2376         m = p.matcher(toSupplementaries("ad"));
2377         if (!m.matches())
2378             failCount++;
2379 
2380         report("Matches");
2381     }
2382 
2383     /**
2384      * A basic sanity test of Pattern.matches().
2385      */
2386     private static void patternMatchesTest() throws Exception {
2387         // matches()
2388         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2389                              toSupplementaries("ulbcccccc")))
2390             failCount++;
2391 
2392         // find() but not matches()
2393         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2394                             toSupplementaries("zzzulbcccccc")))
2395             failCount++;
2396 
2397         // lookingAt() but not matches()
2398         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2399                             toSupplementaries("ulbccccccdef")))
2400             failCount++;
2401 
2402         // Supplementary character test
2403         // matches()
2404         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2405                              toSupplementaries("ulbcccccc")))
2406             failCount++;
2407 
2408         // find() but not matches()
2409         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2410                             toSupplementaries("zzzulbcccccc")))
2411             failCount++;
2412 
2413         // lookingAt() but not matches()
2414         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2415                             toSupplementaries("ulbccccccdef")))
2416             failCount++;
2417 
2418         report("Pattern Matches");
2419     }
2420 
2421     /**
2422      * Canonical equivalence testing. Tests the ability of the engine
2423      * to match sequences that are not explicitly specified in the
2424      * pattern when they are considered equivalent by the Unicode Standard.
2425      */
2426     private static void ceTest() throws Exception {
2427         // Decomposed char outside char classes
2428         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2429         Matcher m = p.matcher("test\u00e5");
2430         if (!m.matches())
2431             failCount++;
2432 
2433         m.reset("testa\u030a");
2434         if (!m.matches())
2435             failCount++;
2436 
2437         // Composed char outside char classes
2438         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2439         m = p.matcher("test\u00e5");
2440         if (!m.matches())
2441             failCount++;
2442 
2443         m.reset("testa\u030a");
2444         if (!m.find())
2445             failCount++;
2446 
2447         // Decomposed char inside a char class
2448         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2449         m = p.matcher("test\u00e5");
2450         if (!m.find())
2451             failCount++;
2452 
2453         m.reset("testa\u030a");
2454         if (!m.find())
2455             failCount++;
2456 
2457         // Composed char inside a char class
2458         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2459         m = p.matcher("test\u00e5");
2460         if (!m.find())
2461             failCount++;
2462 
2463         m.reset("testa\u0300");
2464         if (!m.find())
2465             failCount++;
2466 
2467         m.reset("testa\u030a");
2468         if (!m.find())
2469             failCount++;
2470 
2471         // Marks that cannot legally change order and be equivalent
2472         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2473         check(p, "testa\u0308\u0300", true);
2474         check(p, "testa\u0300\u0308", false);
2475 
2476         // Marks that can legally change order and be equivalent
2477         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2478         check(p, "testa\u0308\u0323", true);
2479         check(p, "testa\u0323\u0308", true);
2480 
2481         // Test all equivalences of the sequence a\u0308\u0323\u0300
2482         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2483         check(p, "testa\u0308\u0323\u0300", true);
2484         check(p, "testa\u0323\u0308\u0300", true);
2485         check(p, "testa\u0308\u0300\u0323", true);
2486         check(p, "test\u00e4\u0323\u0300", true);
2487         check(p, "test\u00e4\u0300\u0323", true);
2488 
2489         /*
2490          * The following canonical equivalence tests don't work. Bug id: 4916384.
2491          *
2492         // Decomposed hangul (jamos)
2493         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2494         m = p.matcher("\u1100\u1161");
2495         if (!m.matches())
2496             failCount++;
2497 
2498         m.reset("\uac00");
2499         if (!m.matches())
2500             failCount++;
2501 
2502         // Composed hangul
2503         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2504         m = p.matcher("\u1100\u1161");
2505         if (!m.matches())
2506             failCount++;
2507 
2508         m.reset("\uac00");
2509         if (!m.matches())
2510             failCount++;
2511 
2512         // Decomposed supplementary outside char classes
2513         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2514         m = p.matcher("test\ud834\uddc0");
2515         if (!m.matches())
2516             failCount++;
2517 
2518         m.reset("test\ud834\uddbc\ud834\udd6f");
2519         if (!m.matches())
2520             failCount++;
2521 
2522         // Composed supplementary outside char classes
2523         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2524         m.reset("test\ud834\uddbc\ud834\udd6f");
2525         if (!m.matches())
2526             failCount++;
2527 
2528         m = p.matcher("test\ud834\uddc0");
2529         if (!m.matches())
2530             failCount++;
2531 
2532         */
2533 
2534         report("Canonical Equivalence");
2535     }
2536 
2537     /**
2538      * A basic sanity test of Matcher.replaceAll().
2539      */
2540     private static void globalSubstitute() throws Exception {
2541         // Global substitution with a literal
2542         Pattern p = Pattern.compile("(ab)(c*)");
2543         Matcher m = p.matcher("abccczzzabcczzzabccc");
2544         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2545             failCount++;
2546 
2547         m.reset("zzzabccczzzabcczzzabccczzz");
2548         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2549             failCount++;
2550 
2551         // Global substitution with groups
2552         m.reset("zzzabccczzzabcczzzabccczzz");
2553         String result = m.replaceAll("$1");
2554         if (!result.equals("zzzabzzzabzzzabzzz"))
2555             failCount++;
2556 
2557         // Supplementary character test
2558         // Global substitution with a literal
2559         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2560         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2561         if (!m.replaceAll(toSupplementaries("test")).
2562             equals(toSupplementaries("testzzztestzzztest")))
2563             failCount++;
2564 
2565         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2566         if (!m.replaceAll(toSupplementaries("test")).
2567             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2568             failCount++;
2569 
2570         // Global substitution with groups
2571         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2572         result = m.replaceAll("$1");
2573         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2574             failCount++;
2575 
2576         report("Global Substitution");
2577     }
2578 
2579     /**
2580      * Tests the usage of Matcher.appendReplacement() with literal
2581      * and group substitutions.
2582      */
2583     private static void stringbufferSubstitute() throws Exception {
2584         // SB substitution with literal
2585         String blah = "zzzblahzzz";
2586         Pattern p = Pattern.compile("blah");
2587         Matcher m = p.matcher(blah);
2588         StringBuffer result = new StringBuffer();
2589         try {
2590             m.appendReplacement(result, "blech");
2591             failCount++;
2592         } catch (IllegalStateException e) {
2593         }
2594         m.find();
2595         m.appendReplacement(result, "blech");
2596         if (!result.toString().equals("zzzblech"))
2597             failCount++;
2598 
2599         m.appendTail(result);
2600         if (!result.toString().equals("zzzblechzzz"))
2601             failCount++;
2602 
2603         // SB substitution with groups
2604         blah = "zzzabcdzzz";
2605         p = Pattern.compile("(ab)(cd)*");
2606         m = p.matcher(blah);
2607         result = new StringBuffer();
2608         try {
2609             m.appendReplacement(result, "$1");
2610             failCount++;
2611         } catch (IllegalStateException e) {
2612         }
2613         m.find();
2614         m.appendReplacement(result, "$1");
2615         if (!result.toString().equals("zzzab"))
2616             failCount++;
2617 
2618         m.appendTail(result);
2619         if (!result.toString().equals("zzzabzzz"))
2620             failCount++;
2621 
2622         // SB substitution with 3 groups
2623         blah = "zzzabcdcdefzzz";
2624         p = Pattern.compile("(ab)(cd)*(ef)");
2625         m = p.matcher(blah);
2626         result = new StringBuffer();
2627         try {
2628             m.appendReplacement(result, "$1w$2w$3");
2629             failCount++;
2630         } catch (IllegalStateException e) {
2631         }
2632         m.find();
2633         m.appendReplacement(result, "$1w$2w$3");
2634         if (!result.toString().equals("zzzabwcdwef"))
2635             failCount++;
2636 
2637         m.appendTail(result);
2638         if (!result.toString().equals("zzzabwcdwefzzz"))
2639             failCount++;
2640 
2641         // SB substitution with groups and three matches
2642         // skipping middle match
2643         blah = "zzzabcdzzzabcddzzzabcdzzz";
2644         p = Pattern.compile("(ab)(cd*)");
2645         m = p.matcher(blah);
2646         result = new StringBuffer();
2647         try {
2648             m.appendReplacement(result, "$1");
2649             failCount++;
2650         } catch (IllegalStateException e) {
2651         }
2652         m.find();
2653         m.appendReplacement(result, "$1");
2654         if (!result.toString().equals("zzzab"))
2655             failCount++;
2656 
2657         m.find();
2658         m.find();
2659         m.appendReplacement(result, "$2");
2660         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2661             failCount++;
2662 
2663         m.appendTail(result);
2664         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2665             failCount++;
2666 
2667         // Check to make sure escaped $ is ignored
2668         blah = "zzzabcdcdefzzz";
2669         p = Pattern.compile("(ab)(cd)*(ef)");
2670         m = p.matcher(blah);
2671         result = new StringBuffer();
2672         m.find();
2673         m.appendReplacement(result, "$1w\\$2w$3");
2674         if (!result.toString().equals("zzzabw$2wef"))
2675             failCount++;
2676 
2677         m.appendTail(result);
2678         if (!result.toString().equals("zzzabw$2wefzzz"))
2679             failCount++;
2680 
2681         // Check to make sure a reference to nonexistent group causes error
2682         blah = "zzzabcdcdefzzz";
2683         p = Pattern.compile("(ab)(cd)*(ef)");
2684         m = p.matcher(blah);
2685         result = new StringBuffer();
2686         m.find();
2687         try {
2688             m.appendReplacement(result, "$1w$5w$3");
2689             failCount++;
2690         } catch (IndexOutOfBoundsException ioobe) {
2691             // Correct result
2692         }
2693 
2694         // Check double digit group references
2695         blah = "zzz123456789101112zzz";
2696         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2697         m = p.matcher(blah);
2698         result = new StringBuffer();
2699         m.find();
2700         m.appendReplacement(result, "$1w$11w$3");
2701         if (!result.toString().equals("zzz1w11w3"))
2702             failCount++;
2703 
2704         // Check to make sure it backs off $15 to $1 if only three groups
2705         blah = "zzzabcdcdefzzz";
2706         p = Pattern.compile("(ab)(cd)*(ef)");
2707         m = p.matcher(blah);
2708         result = new StringBuffer();
2709         m.find();
2710         m.appendReplacement(result, "$1w$15w$3");
2711         if (!result.toString().equals("zzzabwab5wef"))
2712             failCount++;
2713 
2714 
2715         // Supplementary character test
2716         // SB substitution with literal
2717         blah = toSupplementaries("zzzblahzzz");
2718         p = Pattern.compile(toSupplementaries("blah"));
2719         m = p.matcher(blah);
2720         result = new StringBuffer();
2721         try {
2722             m.appendReplacement(result, toSupplementaries("blech"));
2723             failCount++;
2724         } catch (IllegalStateException e) {
2725         }
2726         m.find();
2727         m.appendReplacement(result, toSupplementaries("blech"));
2728         if (!result.toString().equals(toSupplementaries("zzzblech")))
2729             failCount++;
2730 
2731         m.appendTail(result);
2732         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2733             failCount++;
2734 
2735         // SB substitution with groups
2736         blah = toSupplementaries("zzzabcdzzz");
2737         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2738         m = p.matcher(blah);
2739         result = new StringBuffer();
2740         try {
2741             m.appendReplacement(result, "$1");
2742             failCount++;
2743         } catch (IllegalStateException e) {
2744         }
2745         m.find();
2746         m.appendReplacement(result, "$1");
2747         if (!result.toString().equals(toSupplementaries("zzzab")))
2748             failCount++;
2749 
2750         m.appendTail(result);
2751         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2752             failCount++;
2753 
2754         // SB substitution with 3 groups
2755         blah = toSupplementaries("zzzabcdcdefzzz");
2756         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2757         m = p.matcher(blah);
2758         result = new StringBuffer();
2759         try {
2760             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2761             failCount++;
2762         } catch (IllegalStateException e) {
2763         }
2764         m.find();
2765         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2766         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2767             failCount++;
2768 
2769         m.appendTail(result);
2770         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2771             failCount++;
2772 
2773         // SB substitution with groups and three matches
2774         // skipping middle match
2775         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2776         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2777         m = p.matcher(blah);
2778         result = new StringBuffer();
2779         try {
2780             m.appendReplacement(result, "$1");
2781             failCount++;
2782         } catch (IllegalStateException e) {
2783         }
2784         m.find();
2785         m.appendReplacement(result, "$1");
2786         if (!result.toString().equals(toSupplementaries("zzzab")))
2787             failCount++;
2788 
2789         m.find();
2790         m.find();
2791         m.appendReplacement(result, "$2");
2792         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2793             failCount++;
2794 
2795         m.appendTail(result);
2796         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2797             failCount++;
2798 
2799         // Check to make sure escaped $ is ignored
2800         blah = toSupplementaries("zzzabcdcdefzzz");
2801         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2802         m = p.matcher(blah);
2803         result = new StringBuffer();
2804         m.find();
2805         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2806         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2807             failCount++;
2808 
2809         m.appendTail(result);
2810         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2811             failCount++;
2812 
2813         // Check to make sure a reference to nonexistent group causes error
2814         blah = toSupplementaries("zzzabcdcdefzzz");
2815         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2816         m = p.matcher(blah);
2817         result = new StringBuffer();
2818         m.find();
2819         try {
2820             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2821             failCount++;
2822         } catch (IndexOutOfBoundsException ioobe) {
2823             // Correct result
2824         }
2825 
2826         // Check double digit group references
2827         blah = toSupplementaries("zzz123456789101112zzz");
2828         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2829         m = p.matcher(blah);
2830         result = new StringBuffer();
2831         m.find();
2832         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2833         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2834             failCount++;
2835 
2836         // Check to make sure it backs off $15 to $1 if only three groups
2837         blah = toSupplementaries("zzzabcdcdefzzz");
2838         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2839         m = p.matcher(blah);
2840         result = new StringBuffer();
2841         m.find();
2842         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2843         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2844             failCount++;
2845 
2846         // Check nothing has been appended into the output buffer if
2847         // the replacement string triggers IllegalArgumentException.
2848         p = Pattern.compile("(abc)");
2849         m = p.matcher("abcd");
2850         result = new StringBuffer();
2851         m.find();
2852         try {
2853             m.appendReplacement(result, ("xyz$g"));
2854             failCount++;
2855         } catch (IllegalArgumentException iae) {
2856             if (result.length() != 0)
2857                 failCount++;
2858         }
2859 
2860         report("SB Substitution");
2861     }
2862 
2863     /*
2864      * 5 groups of characters are created to make a substitution string.
2865      * A base string will be created including random lead chars, the
2866      * substitution string, and random trailing chars.
2867      * A pattern containing the 5 groups is searched for and replaced with:
2868      * random group + random string + random group.
2869      * The results are checked for correctness.
2870      */
2871     private static void substitutionBasher() {
2872         for (int runs = 0; runs<1000; runs++) {
2873             // Create a base string to work in
2874             int leadingChars = generator.nextInt(10);
2875             StringBuffer baseBuffer = new StringBuffer(100);
2876             String leadingString = getRandomAlphaString(leadingChars);
2877             baseBuffer.append(leadingString);
2878 
2879             // Create 5 groups of random number of random chars
2880             // Create the string to substitute
2881             // Create the pattern string to search for
2882             StringBuffer bufferToSub = new StringBuffer(25);
2883             StringBuffer bufferToPat = new StringBuffer(50);
2884             String[] groups = new String[5];
2885             for(int i=0; i<5; i++) {
2886                 int aGroupSize = generator.nextInt(5)+1;
2887                 groups[i] = getRandomAlphaString(aGroupSize);
2888                 bufferToSub.append(groups[i]);
2889                 bufferToPat.append('(');
2890                 bufferToPat.append(groups[i]);
2891                 bufferToPat.append(')');
2892             }
2893             String stringToSub = bufferToSub.toString();
2894             String pattern = bufferToPat.toString();
2895 
2896             // Place sub string into working string at random index
2897             baseBuffer.append(stringToSub);
2898 
2899             // Append random chars to end
2900             int trailingChars = generator.nextInt(10);
2901             String trailingString = getRandomAlphaString(trailingChars);
2902             baseBuffer.append(trailingString);
2903             String baseString = baseBuffer.toString();
2904 
2905             // Create test pattern and matcher
2906             Pattern p = Pattern.compile(pattern);
2907             Matcher m = p.matcher(baseString);
2908 
2909             // Reject candidate if pattern happens to start early
2910             m.find();
2911             if (m.start() < leadingChars)
2912                 continue;
2913 
2914             // Reject candidate if more than one match
2915             if (m.find())
2916                 continue;
2917 
2918             // Construct a replacement string with :
2919             // random group + random string + random group
2920             StringBuffer bufferToRep = new StringBuffer();
2921             int groupIndex1 = generator.nextInt(5);
2922             bufferToRep.append("$" + (groupIndex1 + 1));
2923             String randomMidString = getRandomAlphaString(5);
2924             bufferToRep.append(randomMidString);
2925             int groupIndex2 = generator.nextInt(5);
2926             bufferToRep.append("$" + (groupIndex2 + 1));
2927             String replacement = bufferToRep.toString();
2928 
2929             // Do the replacement
2930             String result = m.replaceAll(replacement);
2931 
2932             // Construct expected result
2933             StringBuffer bufferToRes = new StringBuffer();
2934             bufferToRes.append(leadingString);
2935             bufferToRes.append(groups[groupIndex1]);
2936             bufferToRes.append(randomMidString);
2937             bufferToRes.append(groups[groupIndex2]);
2938             bufferToRes.append(trailingString);
2939             String expectedResult = bufferToRes.toString();
2940 
2941             // Check results
2942             if (!result.equals(expectedResult))
2943                 failCount++;
2944         }
2945 
2946         report("Substitution Basher");
2947     }
2948 
2949     /**
2950      * Checks the handling of some escape sequences that the Pattern
2951      * class should process instead of the java compiler. These are
2952      * not in the file because the escapes should be be processed
2953      * by the Pattern class when the regex is compiled.
2954      */
2955     private static void escapes() throws Exception {
2956         Pattern p = Pattern.compile("\\043");
2957         Matcher m = p.matcher("#");
2958         if (!m.find())
2959             failCount++;
2960 
2961         p = Pattern.compile("\\x23");
2962         m = p.matcher("#");
2963         if (!m.find())
2964             failCount++;
2965 
2966         p = Pattern.compile("\\u0023");
2967         m = p.matcher("#");
2968         if (!m.find())
2969             failCount++;
2970 
2971         report("Escape sequences");
2972     }
2973 
2974     /**
2975      * Checks the handling of blank input situations. These
2976      * tests are incompatible with my test file format.
2977      */
2978     private static void blankInput() throws Exception {
2979         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
2980         Matcher m = p.matcher("");
2981         if (m.find())
2982             failCount++;
2983 
2984         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
2985         m = p.matcher("");
2986         if (!m.find())
2987             failCount++;
2988 
2989         p = Pattern.compile("abc");
2990         m = p.matcher("");
2991         if (m.find())
2992             failCount++;
2993 
2994         p = Pattern.compile("a*");
2995         m = p.matcher("");
2996         if (!m.find())
2997             failCount++;
2998 
2999         report("Blank input");
3000     }
3001 
3002     /**
3003      * Tests the Boyer-Moore pattern matching of a character sequence
3004      * on randomly generated patterns.
3005      */
3006     private static void bm() throws Exception {
3007         doBnM('a');
3008         report("Boyer Moore (ASCII)");
3009 
3010         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3011         report("Boyer Moore (Supplementary)");
3012     }
3013 
3014     private static void doBnM(int baseCharacter) throws Exception {
3015         int achar=0;
3016 
3017         for (int i=0; i<100; i++) {
3018             // Create a short pattern to search for
3019             int patternLength = generator.nextInt(7) + 4;
3020             StringBuffer patternBuffer = new StringBuffer(patternLength);
3021             for (int x=0; x<patternLength; x++) {
3022                 int ch = baseCharacter + generator.nextInt(26);
3023                 if (Character.isSupplementaryCodePoint(ch)) {
3024                     patternBuffer.append(Character.toChars(ch));
3025                 } else {
3026                     patternBuffer.append((char)ch);
3027                 }
3028             }
3029             String pattern =  patternBuffer.toString();
3030             Pattern p = Pattern.compile(pattern);
3031 
3032             // Create a buffer with random ASCII chars that does
3033             // not match the sample
3034             String toSearch = null;
3035             StringBuffer s = null;
3036             Matcher m = p.matcher("");
3037             do {
3038                 s = new StringBuffer(100);
3039                 for (int x=0; x<100; x++) {
3040                     int ch = baseCharacter + generator.nextInt(26);
3041                     if (Character.isSupplementaryCodePoint(ch)) {
3042                         s.append(Character.toChars(ch));
3043                     } else {
3044                         s.append((char)ch);
3045                     }
3046                 }
3047                 toSearch = s.toString();
3048                 m.reset(toSearch);
3049             } while (m.find());
3050 
3051             // Insert the pattern at a random spot
3052             int insertIndex = generator.nextInt(99);
3053             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3054                 insertIndex++;
3055             s = s.insert(insertIndex, pattern);
3056             toSearch = s.toString();
3057 
3058             // Make sure that the pattern is found
3059             m.reset(toSearch);
3060             if (!m.find())
3061                 failCount++;
3062 
3063             // Make sure that the match text is the pattern
3064             if (!m.group().equals(pattern))
3065                 failCount++;
3066 
3067             // Make sure match occured at insertion point
3068             if (m.start() != insertIndex)
3069                 failCount++;
3070         }
3071     }
3072 
3073     /**
3074      * Tests the matching of slices on randomly generated patterns.
3075      * The Boyer-Moore optimization is not done on these patterns
3076      * because it uses unicode case folding.
3077      */
3078     private static void slice() throws Exception {
3079         doSlice(Character.MAX_VALUE);
3080         report("Slice");
3081 
3082         doSlice(Character.MAX_CODE_POINT);
3083         report("Slice (Supplementary)");
3084     }
3085 
3086     private static void doSlice(int maxCharacter) throws Exception {
3087         Random generator = new Random();
3088         int achar=0;
3089 
3090         for (int i=0; i<100; i++) {
3091             // Create a short pattern to search for
3092             int patternLength = generator.nextInt(7) + 4;
3093             StringBuffer patternBuffer = new StringBuffer(patternLength);
3094             for (int x=0; x<patternLength; x++) {
3095                 int randomChar = 0;
3096                 while (!Character.isLetterOrDigit(randomChar))
3097                     randomChar = generator.nextInt(maxCharacter);
3098                 if (Character.isSupplementaryCodePoint(randomChar)) {
3099                     patternBuffer.append(Character.toChars(randomChar));
3100                 } else {
3101                     patternBuffer.append((char) randomChar);
3102                 }
3103             }
3104             String pattern =  patternBuffer.toString();
3105             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3106 
3107             // Create a buffer with random chars that does not match the sample
3108             String toSearch = null;
3109             StringBuffer s = null;
3110             Matcher m = p.matcher("");
3111             do {
3112                 s = new StringBuffer(100);
3113                 for (int x=0; x<100; x++) {
3114                     int randomChar = 0;
3115                     while (!Character.isLetterOrDigit(randomChar))
3116                         randomChar = generator.nextInt(maxCharacter);
3117                     if (Character.isSupplementaryCodePoint(randomChar)) {
3118                         s.append(Character.toChars(randomChar));
3119                     } else {
3120                         s.append((char) randomChar);
3121                     }
3122                 }
3123                 toSearch = s.toString();
3124                 m.reset(toSearch);
3125             } while (m.find());
3126 
3127             // Insert the pattern at a random spot
3128             int insertIndex = generator.nextInt(99);
3129             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3130                 insertIndex++;
3131             s = s.insert(insertIndex, pattern);
3132             toSearch = s.toString();
3133 
3134             // Make sure that the pattern is found
3135             m.reset(toSearch);
3136             if (!m.find())
3137                 failCount++;
3138 
3139             // Make sure that the match text is the pattern
3140             if (!m.group().equals(pattern))
3141                 failCount++;
3142 
3143             // Make sure match occured at insertion point
3144             if (m.start() != insertIndex)
3145                 failCount++;
3146         }
3147     }
3148 
3149     private static void explainFailure(String pattern, String data,
3150                                        String expected, String actual) {
3151         System.err.println("----------------------------------------");
3152         System.err.println("Pattern = "+pattern);
3153         System.err.println("Data = "+data);
3154         System.err.println("Expected = " + expected);
3155         System.err.println("Actual   = " + actual);
3156     }
3157 
3158     private static void explainFailure(String pattern, String data,
3159                                        Throwable t) {
3160         System.err.println("----------------------------------------");
3161         System.err.println("Pattern = "+pattern);
3162         System.err.println("Data = "+data);
3163         t.printStackTrace(System.err);
3164     }
3165 
3166     // Testing examples from a file
3167 
3168     /**
3169      * Goes through the file "TestCases.txt" and creates many patterns
3170      * described in the file, matching the patterns against input lines in
3171      * the file, and comparing the results against the correct results
3172      * also found in the file. The file format is described in comments
3173      * at the head of the file.
3174      */
3175     private static void processFile(String fileName) throws Exception {
3176         File testCases = new File(System.getProperty("test.src", "."),
3177                                   fileName);
3178         FileInputStream in = new FileInputStream(testCases);
3179         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3180 
3181         // Process next test case.
3182         String aLine;
3183         while((aLine = r.readLine()) != null) {
3184             // Read a line for pattern
3185             String patternString = grabLine(r);
3186             Pattern p = null;
3187             try {
3188                 p = compileTestPattern(patternString);
3189             } catch (PatternSyntaxException e) {
3190                 String dataString = grabLine(r);
3191                 String expectedResult = grabLine(r);
3192                 if (expectedResult.startsWith("error"))
3193                     continue;
3194                 explainFailure(patternString, dataString, e);
3195                 failCount++;
3196                 continue;
3197             }
3198 
3199             // Read a line for input string
3200             String dataString = grabLine(r);
3201             Matcher m = p.matcher(dataString);
3202             StringBuffer result = new StringBuffer();
3203 
3204             // Check for IllegalStateExceptions before a match
3205             failCount += preMatchInvariants(m);
3206 
3207             boolean found = m.find();
3208 
3209             if (found)
3210                 failCount += postTrueMatchInvariants(m);
3211             else
3212                 failCount += postFalseMatchInvariants(m);
3213 
3214             if (found) {
3215                 result.append("true ");
3216                 result.append(m.group(0) + " ");
3217             } else {
3218                 result.append("false ");
3219             }
3220 
3221             result.append(m.groupCount());
3222 
3223             if (found) {
3224                 for (int i=1; i<m.groupCount()+1; i++)
3225                     if (m.group(i) != null)
3226                         result.append(" " +m.group(i));
3227             }
3228 
3229             // Read a line for the expected result
3230             String expectedResult = grabLine(r);
3231 
3232             if (!result.toString().equals(expectedResult)) {
3233                 explainFailure(patternString, dataString, expectedResult, result.toString());
3234                 failCount++;
3235             }
3236         }
3237 
3238         report(fileName);
3239     }
3240 
3241     private static int preMatchInvariants(Matcher m) {
3242         int failCount = 0;
3243         try {
3244             m.start();
3245             failCount++;
3246         } catch (IllegalStateException ise) {}
3247         try {
3248             m.end();
3249             failCount++;
3250         } catch (IllegalStateException ise) {}
3251         try {
3252             m.group();
3253             failCount++;
3254         } catch (IllegalStateException ise) {}
3255         return failCount;
3256     }
3257 
3258     private static int postFalseMatchInvariants(Matcher m) {
3259         int failCount = 0;
3260         try {
3261             m.group();
3262             failCount++;
3263         } catch (IllegalStateException ise) {}
3264         try {
3265             m.start();
3266             failCount++;
3267         } catch (IllegalStateException ise) {}
3268         try {
3269             m.end();
3270             failCount++;
3271         } catch (IllegalStateException ise) {}
3272         return failCount;
3273     }
3274 
3275     private static int postTrueMatchInvariants(Matcher m) {
3276         int failCount = 0;
3277         //assert(m.start() = m.start(0);
3278         if (m.start() != m.start(0))
3279             failCount++;
3280         //assert(m.end() = m.end(0);
3281         if (m.start() != m.start(0))
3282             failCount++;
3283         //assert(m.group() = m.group(0);
3284         if (!m.group().equals(m.group(0)))
3285             failCount++;
3286         try {
3287             m.group(50);
3288             failCount++;
3289         } catch (IndexOutOfBoundsException ise) {}
3290 
3291         return failCount;
3292     }
3293 
3294     private static Pattern compileTestPattern(String patternString) {
3295         if (!patternString.startsWith("'")) {
3296             return Pattern.compile(patternString);
3297         }
3298 
3299         int break1 = patternString.lastIndexOf("'");
3300         String flagString = patternString.substring(
3301                                           break1+1, patternString.length());
3302         patternString = patternString.substring(1, break1);
3303 
3304         if (flagString.equals("i"))
3305             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3306 
3307         if (flagString.equals("m"))
3308             return Pattern.compile(patternString, Pattern.MULTILINE);
3309 
3310         return Pattern.compile(patternString);
3311     }
3312 
3313     /**
3314      * Reads a line from the input file. Keeps reading lines until a non
3315      * empty non comment line is read. If the line contains a \n then
3316      * these two characters are replaced by a newline char. If a \\uxxxx
3317      * sequence is read then the sequence is replaced by the unicode char.
3318      */
3319     private static String grabLine(BufferedReader r) throws Exception {
3320         int index = 0;
3321         String line = r.readLine();
3322         while (line.startsWith("//") || line.length() < 1)
3323             line = r.readLine();
3324         while ((index = line.indexOf("\\n")) != -1) {
3325             StringBuffer temp = new StringBuffer(line);
3326             temp.replace(index, index+2, "\n");
3327             line = temp.toString();
3328         }
3329         while ((index = line.indexOf("\\u")) != -1) {
3330             StringBuffer temp = new StringBuffer(line);
3331             String value = temp.substring(index+2, index+6);
3332             char aChar = (char)Integer.parseInt(value, 16);
3333             String unicodeChar = "" + aChar;
3334             temp.replace(index, index+6, unicodeChar);
3335             line = temp.toString();
3336         }
3337 
3338         return line;
3339     }
3340 
3341     private static void check(Pattern p, String s, String g, String expected) {
3342         Matcher m = p.matcher(s);
3343         m.find();
3344         if (!m.group(g).equals(expected))
3345             failCount++;
3346     }
3347 
3348     private static void checkReplaceFirst(String p, String s, String r, String expected)
3349     {
3350         if (!expected.equals(Pattern.compile(p)
3351                                     .matcher(s)
3352                                     .replaceFirst(r)))
3353             failCount++;
3354     }
3355 
3356     private static void checkReplaceAll(String p, String s, String r, String expected)
3357     {
3358         if (!expected.equals(Pattern.compile(p)
3359                                     .matcher(s)
3360                                     .replaceAll(r)))
3361             failCount++;
3362     }
3363 
3364     private static void checkExpectedFail(String p) {
3365         try {
3366             Pattern.compile(p);
3367         } catch (PatternSyntaxException pse) {
3368             //pse.printStackTrace();
3369             return;
3370         }
3371         failCount++;
3372     }
3373 
3374     private static void checkExpectedFail(Matcher m, String g) {
3375         m.find();
3376         try {
3377             m.group(g);
3378         } catch (IllegalArgumentException iae) {
3379             //iae.printStackTrace();
3380             return;
3381         } catch (NullPointerException npe) {
3382             return;
3383         }
3384         failCount++;
3385     }
3386 
3387 
3388     private static void namedGroupCaptureTest() throws Exception {
3389         check(Pattern.compile("x+(?<gname>y+)z+"),
3390               "xxxyyyzzz",
3391               "gname",
3392               "yyy");
3393 
3394         check(Pattern.compile("x+(?<gname8>y+)z+"),
3395               "xxxyyyzzz",
3396               "gname8",
3397               "yyy");
3398 
3399         //backref
3400         Pattern pattern = Pattern.compile("(a*)bc\\1");
3401         check(pattern, "zzzaabcazzz", true);  // found "abca"
3402 
3403         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3404               "zzzaabcaazzz", true);
3405 
3406         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3407               "abcdefabc", true);
3408 
3409         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3410               "abcdefghijkk", true);
3411 
3412         // Supplementary character tests
3413         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3414               toSupplementaries("zzzaabcazzz"), true);
3415 
3416         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3417               toSupplementaries("zzzaabcaazzz"), true);
3418 
3419         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3420               toSupplementaries("abcdefabc"), true);
3421 
3422         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3423                               "(?<gname>" +
3424                               toSupplementaries("k)") + "\\k<gname>"),
3425               toSupplementaries("abcdefghijkk"), true);
3426 
3427         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3428               "xxxyyyzzzyyy",
3429               "gname",
3430               "yyy");
3431 
3432         //replaceFirst/All
3433         checkReplaceFirst("(?<gn>ab)(c*)",
3434                           "abccczzzabcczzzabccc",
3435                           "${gn}",
3436                           "abzzzabcczzzabccc");
3437 
3438         checkReplaceAll("(?<gn>ab)(c*)",
3439                         "abccczzzabcczzzabccc",
3440                         "${gn}",
3441                         "abzzzabzzzab");
3442 
3443 
3444         checkReplaceFirst("(?<gn>ab)(c*)",
3445                           "zzzabccczzzabcczzzabccczzz",
3446                           "${gn}",
3447                           "zzzabzzzabcczzzabccczzz");
3448 
3449         checkReplaceAll("(?<gn>ab)(c*)",
3450                         "zzzabccczzzabcczzzabccczzz",
3451                         "${gn}",
3452                         "zzzabzzzabzzzabzzz");
3453 
3454         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3455                           "zzzabccczzzabcczzzabccczzz",
3456                           "${gn2}",
3457                           "zzzccczzzabcczzzabccczzz");
3458 
3459         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3460                         "zzzabccczzzabcczzzabccczzz",
3461                         "${gn2}",
3462                         "zzzccczzzcczzzccczzz");
3463 
3464         //toSupplementaries("(ab)(c*)"));
3465         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3466                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3467                           toSupplementaries("abccczzzabcczzzabccc"),
3468                           "${gn1}",
3469                           toSupplementaries("abzzzabcczzzabccc"));
3470 
3471 
3472         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3473                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3474                         toSupplementaries("abccczzzabcczzzabccc"),
3475                         "${gn1}",
3476                         toSupplementaries("abzzzabzzzab"));
3477 
3478         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3479                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3480                           toSupplementaries("abccczzzabcczzzabccc"),
3481                           "${gn2}",
3482                           toSupplementaries("ccczzzabcczzzabccc"));
3483 
3484 
3485         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3486                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3487                         toSupplementaries("abccczzzabcczzzabccc"),
3488                         "${gn2}",
3489                         toSupplementaries("ccczzzcczzzccc"));
3490 
3491         checkReplaceFirst("(?<dog>Dog)AndCat",
3492                           "zzzDogAndCatzzzDogAndCatzzz",
3493                           "${dog}",
3494                           "zzzDogzzzDogAndCatzzz");
3495 
3496 
3497         checkReplaceAll("(?<dog>Dog)AndCat",
3498                           "zzzDogAndCatzzzDogAndCatzzz",
3499                           "${dog}",
3500                           "zzzDogzzzDogzzz");
3501 
3502         // backref in Matcher & String
3503         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3504             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3505             failCount++;
3506 
3507         // negative
3508         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3509         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3510         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3511         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3512         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3513         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514                           "gnameX");
3515         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3516                           null);
3517         report("NamedGroupCapture");
3518     }
3519 
3520     // This is for bug 6969132
3521     private static void nonBmpClassComplementTest() throws Exception {
3522         Pattern p = Pattern.compile("\\P{Lu}");
3523         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3524         if (m.find() && m.start() == 1)
3525             failCount++;
3526 
3527         // from a unicode category
3528         p = Pattern.compile("\\P{Lu}");
3529         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3530         if (m.find())
3531             failCount++;
3532         if (!m.hitEnd())
3533             failCount++;
3534 
3535         // block
3536         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3537         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3538         if (m.find() && m.start() == 1)
3539             failCount++;
3540 
3541         report("NonBmpClassComplement");
3542     }
3543 
3544     private static void unicodePropertiesTest() throws Exception {
3545         // different forms
3546         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3547             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3548             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3549             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3550             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3551             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3552             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3553             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3554             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3555             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3556             failCount++;
3557 
3558         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3559         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3560         Matcher lastSM  = common;
3561         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3562 
3563         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3564         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3565         Matcher lastBM = latin;
3566         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3567 
3568         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3569             if (cp >= 0x30000 && (cp & 0x70) == 0){
3570                 continue;  // only pick couple code points, they are the same
3571             }
3572 
3573             // Unicode Script
3574             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3575             Matcher m;
3576             String str = new String(Character.toChars(cp));
3577             if (script == lastScript) {
3578                  m = lastSM;
3579                  m.reset(str);
3580             } else {
3581                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3582             }
3583             if (!m.matches()) {
3584                 failCount++;
3585             }
3586             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3587             other.reset(str);
3588             if (other.matches()) {
3589                 failCount++;
3590             }
3591             lastSM = m;
3592             lastScript = script;
3593 
3594             // Unicode Block
3595             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3596             if (block == null) {
3597                 //System.out.printf("Not a Block: cp=%x%n", cp);
3598                 continue;
3599             }
3600             if (block == lastBlock) {
3601                  m = lastBM;
3602                  m.reset(str);
3603             } else {
3604                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3605             }
3606             if (!m.matches()) {
3607                 failCount++;
3608             }
3609             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3610             other.reset(str);
3611             if (other.matches()) {
3612                 failCount++;
3613             }
3614             lastBM = m;
3615             lastBlock = block;
3616         }
3617         report("unicodeProperties");
3618     }
3619 
3620     private static void unicodeHexNotationTest() throws Exception {
3621 
3622         // negative
3623         checkExpectedFail("\\x{-23}");
3624         checkExpectedFail("\\x{110000}");
3625         checkExpectedFail("\\x{}");
3626         checkExpectedFail("\\x{AB[ef]");
3627 
3628         // codepoint
3629         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
3630         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3631         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
3632         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3633 
3634         // in class
3635         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
3636         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3637         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
3638         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
3639         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
3640         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3641 
3642         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3643              String s = "A" + new String(Character.toChars(cp)) + "B";
3644              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3645                                              : String.format("\\u%04x\\u%04x",
3646                                                (int) Character.toChars(cp)[0],
3647                                                (int) Character.toChars(cp)[1]);
3648              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3649              if (!Pattern.matches("A" + hexUTF16 + "B", s))
3650                  failCount++;
3651              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3652                  failCount++;
3653              if (!Pattern.matches("A" + hexCodePoint + "B", s))
3654                  failCount++;
3655              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3656                  failCount++;
3657          }
3658          report("unicodeHexNotation");
3659      }
3660 }