1 /*
   2  * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Sun designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Sun in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.
  56      *
  57      */
  58     public static void main(String[] args) throws Exception {
  59         // Most of the tests are in a file
  60         processFile("TestCases.txt");
  61         //processFile("PerlCases.txt");
  62         processFile("BMPTestCases.txt");
  63         processFile("SupplementaryTestCases.txt");
  64 
  65         // These test many randomly generated char patterns
  66         bm();
  67         slice();
  68 
  69         // These are hard to put into the file
  70         escapes();
  71         blankInput();
  72 
  73         // Substitition tests on randomly generated sequences
  74         globalSubstitute();
  75         stringbufferSubstitute();
  76         substitutionBasher();
  77 
  78         // Canonical Equivalence
  79         ceTest();
  80 
  81         // Anchors
  82         anchorTest();
  83 
  84         // boolean match calls
  85         matchesTest();
  86         lookingAtTest();
  87 
  88         // Pattern API
  89         patternMatchesTest();
  90 
  91         // Misc
  92         lookbehindTest();
  93         nullArgumentTest();
  94         backRefTest();
  95         groupCaptureTest();
  96         caretTest();
  97         charClassTest();
  98         emptyPatternTest();
  99         findIntTest();
 100         group0Test();
 101         longPatternTest();
 102         octalTest();
 103         ampersandTest();
 104         negationTest();
 105         splitTest();
 106         appendTest();
 107         caseFoldingTest();
 108         commentsTest();
 109         unixLinesTest();
 110         replaceFirstTest();
 111         gTest();
 112         zTest();
 113         serializeTest();
 114         reluctantRepetitionTest();
 115         multilineDollarTest();
 116         dollarAtEndTest();
 117         caretBetweenTerminatorsTest();
 118         // This RFE rejected in Tiger numOccurrencesTest();
 119         javaCharClassTest();
 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138 
 139         if (failure)
 140             throw new RuntimeException("Failure in the RE handling.");
 141         else
 142             System.err.println("OKAY: All tests passed.");
 143     }
 144 
 145     // Utility functions
 146 
 147     private static String getRandomAlphaString(int length) {
 148         StringBuffer buf = new StringBuffer(length);
 149         for (int i=0; i<length; i++) {
 150             char randChar = (char)(97 + generator.nextInt(26));
 151             buf.append(randChar);
 152         }
 153         return buf.toString();
 154     }
 155 
 156     private static void check(Matcher m, String expected) {
 157         m.find();
 158         if (!m.group().equals(expected))
 159             failCount++;
 160     }
 161 
 162     private static void check(Matcher m, String result, boolean expected) {
 163         m.find();
 164         if (m.group().equals(result))
 165             failCount += (expected) ? 0 : 1;
 166         else
 167             failCount += (expected) ? 1 : 0;
 168     }
 169 
 170     private static void check(Pattern p, String s, boolean expected) {
 171         Matcher matcher = p.matcher(s);
 172         if (matcher.find())
 173             failCount += (expected) ? 0 : 1;
 174         else
 175             failCount += (expected) ? 1 : 0;
 176     }
 177 
 178     private static void check(String p, char c, boolean expected) {
 179         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 180         Pattern pattern = Pattern.compile(propertyPattern);
 181         char[] ca = new char[1]; ca[0] = c;
 182         Matcher matcher = pattern.matcher(new String(ca));
 183         if (!matcher.find())
 184             failCount++;
 185     }
 186 
 187     private static void check(String p, int codePoint, boolean expected) {
 188         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 189         Pattern pattern = Pattern.compile(propertyPattern);
 190         char[] ca = Character.toChars(codePoint);
 191         Matcher matcher = pattern.matcher(new String(ca));
 192         if (!matcher.find())
 193             failCount++;
 194     }
 195 
 196     private static void check(String p, int flag, String input, String s,
 197                               boolean expected)
 198     {
 199         Pattern pattern = Pattern.compile(p, flag);
 200         Matcher matcher = pattern.matcher(input);
 201         if (expected)
 202             check(matcher, s, expected);
 203         else
 204             check(pattern, input, false);
 205     }
 206 
 207     private static void report(String testName) {
 208         int spacesToAdd = 30 - testName.length();
 209         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 210         for (int i=0; i<spacesToAdd; i++)
 211             paddedNameBuffer.append(" ");
 212         String paddedName = paddedNameBuffer.toString();
 213         System.err.println(paddedName + ": " +
 214                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 215         if (failCount > 0)
 216             failure = true;
 217         failCount = 0;
 218     }
 219 
 220     /**
 221      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 222      * supplementary characters. This method does NOT fully take care
 223      * of the regex syntax.
 224      */
 225     private static String toSupplementaries(String s) {
 226         int length = s.length();
 227         StringBuffer sb = new StringBuffer(length * 2);
 228 
 229         for (int i = 0; i < length; ) {
 230             char c = s.charAt(i++);
 231             if (c == '\\') {
 232                 sb.append(c);
 233                 if (i < length) {
 234                     c = s.charAt(i++);
 235                     sb.append(c);
 236                     if (c == 'u') {
 237                         // assume no syntax error
 238                         sb.append(s.charAt(i++));
 239                         sb.append(s.charAt(i++));
 240                         sb.append(s.charAt(i++));
 241                         sb.append(s.charAt(i++));
 242                     }
 243                 }
 244             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 245                 sb.append('\ud800').append((char)('\udc00'+c));
 246             } else {
 247                 sb.append(c);
 248             }
 249         }
 250         return sb.toString();
 251     }
 252 
 253     // Regular expression tests
 254 
 255     // This is for bug 6178785
 256     // Test if an expected NPE gets thrown when passing in a null argument
 257     private static boolean check(Runnable test) {
 258         try {
 259             test.run();
 260             failCount++;
 261             return false;
 262         } catch (NullPointerException npe) {
 263             return true;
 264         }
 265     }
 266 
 267     private static void nullArgumentTest() {
 268         check(new Runnable() { public void run() { Pattern.compile(null); }});
 269         check(new Runnable() { public void run() { Pattern.matches(null, null); }});
 270         check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
 271         check(new Runnable() { public void run() { Pattern.quote(null);}});
 272         check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
 273         check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
 274 
 275         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 276         m.matches();
 277         check(new Runnable() { public void run() { m.appendTail(null);}});
 278         check(new Runnable() { public void run() { m.replaceAll(null);}});
 279         check(new Runnable() { public void run() { m.replaceFirst(null);}});
 280         check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
 281         check(new Runnable() { public void run() { m.reset(null);}});
 282         check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
 283         //check(new Runnable() { public void run() { m.usePattern(null);}});
 284 
 285         report("Null Argument");
 286     }
 287 
 288     // This is for bug6635133
 289     // Test if surrogate pair in Unicode escapes can be handled correctly.
 290     private static void surrogatesInClassTest() throws Exception {
 291         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 292         Matcher matcher = pattern.matcher("\ud834\udd22");
 293         if (!matcher.find())
 294             failCount++;
 295     }
 296 
 297     // This is for bug 4988891
 298     // Test toMatchResult to see that it is a copy of the Matcher
 299     // that is not affected by subsequent operations on the original
 300     private static void toMatchResultTest() throws Exception {
 301         Pattern pattern = Pattern.compile("squid");
 302         Matcher matcher = pattern.matcher(
 303             "agiantsquidofdestinyasmallsquidoffate");
 304         matcher.find();
 305         int matcherStart1 = matcher.start();
 306         MatchResult mr = matcher.toMatchResult();
 307         if (mr == matcher)
 308             failCount++;
 309         int resultStart1 = mr.start();
 310         if (matcherStart1 != resultStart1)
 311             failCount++;
 312         matcher.find();
 313         int matcherStart2 = matcher.start();
 314         int resultStart2 = mr.start();
 315         if (matcherStart2 == resultStart2)
 316             failCount++;
 317         if (resultStart1 != resultStart2)
 318             failCount++;
 319         MatchResult mr2 = matcher.toMatchResult();
 320         if (mr == mr2)
 321             failCount++;
 322         if (mr2.start() != matcherStart2)
 323             failCount++;
 324         report("toMatchResult is a copy");
 325     }
 326 
 327     // This is for bug 5013885
 328     // Must test a slice to see if it reports hitEnd correctly
 329     private static void hitEndTest() throws Exception {
 330         // Basic test of Slice node
 331         Pattern p = Pattern.compile("^squidattack");
 332         Matcher m = p.matcher("squack");
 333         m.find();
 334         if (m.hitEnd())
 335             failCount++;
 336         m.reset("squid");
 337         m.find();
 338         if (!m.hitEnd())
 339             failCount++;
 340 
 341         // Test Slice, SliceA and SliceU nodes
 342         for (int i=0; i<3; i++) {
 343             int flags = 0;
 344             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 345             if (i==2) flags = Pattern.UNICODE_CASE;
 346             p = Pattern.compile("^abc", flags);
 347             m = p.matcher("ad");
 348             m.find();
 349             if (m.hitEnd())
 350                 failCount++;
 351             m.reset("ab");
 352             m.find();
 353             if (!m.hitEnd())
 354                 failCount++;
 355         }
 356 
 357         // Test Boyer-Moore node
 358         p = Pattern.compile("catattack");
 359         m = p.matcher("attack");
 360         m.find();
 361         if (!m.hitEnd())
 362             failCount++;
 363 
 364         p = Pattern.compile("catattack");
 365         m = p.matcher("attackattackattackcatatta");
 366         m.find();
 367         if (!m.hitEnd())
 368             failCount++;
 369         report("hitEnd from a Slice");
 370     }
 371 
 372     // This is for bug 4997476
 373     // It is weird code submitted by customer demonstrating a regression
 374     private static void wordSearchTest() throws Exception {
 375         String testString = new String("word1 word2 word3");
 376         Pattern p = Pattern.compile("\\b");
 377         Matcher m = p.matcher(testString);
 378         int position = 0;
 379         int start = 0;
 380         while (m.find(position)) {
 381             start = m.start();
 382             if (start == testString.length())
 383                 break;
 384             if (m.find(start+1)) {
 385                 position = m.start();
 386             } else {
 387                 position = testString.length();
 388             }
 389             if (testString.substring(start, position).equals(" "))
 390                 continue;
 391             if (!testString.substring(start, position-1).startsWith("word"))
 392                 failCount++;
 393         }
 394         report("Customer word search");
 395     }
 396 
 397     // This is for bug 4994840
 398     private static void caretAtEndTest() throws Exception {
 399         // Problem only occurs with multiline patterns
 400         // containing a beginning-of-line caret "^" followed
 401         // by an expression that also matches the empty string.
 402         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 403         Matcher matcher = pattern.matcher("\r");
 404         matcher.find();
 405         matcher.find();
 406         report("Caret at end");
 407     }
 408 
 409     // This test is for 4979006
 410     // Check to see if word boundary construct properly handles unicode
 411     // non spacing marks
 412     private static void unicodeWordBoundsTest() throws Exception {
 413         String spaces = "  ";
 414         String wordChar = "a";
 415         String nsm = "\u030a";
 416 
 417         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 418 
 419         Pattern pattern = Pattern.compile("\\b");
 420         Matcher matcher = pattern.matcher("");
 421         // S=other B=word character N=non spacing mark .=word boundary
 422         // SS.BB.SS
 423         String input = spaces + wordChar + wordChar + spaces;
 424         twoFindIndexes(input, matcher, 2, 4);
 425         // SS.BBN.SS
 426         input = spaces + wordChar +wordChar + nsm + spaces;
 427         twoFindIndexes(input, matcher, 2, 5);
 428         // SS.BN.SS
 429         input = spaces + wordChar + nsm + spaces;
 430         twoFindIndexes(input, matcher, 2, 4);
 431         // SS.BNN.SS
 432         input = spaces + wordChar + nsm + nsm + spaces;
 433         twoFindIndexes(input, matcher, 2, 5);
 434         // SSN.BB.SS
 435         input = spaces + nsm + wordChar + wordChar + spaces;
 436         twoFindIndexes(input, matcher, 3, 5);
 437         // SS.BNB.SS
 438         input = spaces + wordChar + nsm + wordChar + spaces;
 439         twoFindIndexes(input, matcher, 2, 5);
 440         // SSNNSS
 441         input = spaces + nsm + nsm + spaces;
 442         matcher.reset(input);
 443         if (matcher.find())
 444             failCount++;
 445         // SSN.BBN.SS
 446         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 447         twoFindIndexes(input, matcher, 3, 6);
 448 
 449         report("Unicode word boundary");
 450     }
 451 
 452     private static void twoFindIndexes(String input, Matcher matcher, int a,
 453                                        int b) throws Exception
 454     {
 455         matcher.reset(input);
 456         matcher.find();
 457         if (matcher.start() != a)
 458             failCount++;
 459         matcher.find();
 460         if (matcher.start() != b)
 461             failCount++;
 462     }
 463 
 464     // This test is for 6284152
 465     static void check(String regex, String input, String[] expected) {
 466         List<String> result = new ArrayList<String>();
 467         Pattern p = Pattern.compile(regex);
 468         Matcher m = p.matcher(input);
 469         while (m.find()) {
 470             result.add(m.group());
 471         }
 472         if (!Arrays.asList(expected).equals(result))
 473             failCount++;
 474     }
 475 
 476     private static void lookbehindTest() throws Exception {
 477         //Positive
 478         check("(?<=%.{0,5})foo\\d",
 479               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 480               new String[]{"foo1", "foo2", "foo3"});
 481 
 482         //boundary at end of the lookbehind sub-regex should work consistently
 483         //with the boundary just after the lookbehind sub-regex
 484         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 485         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 486         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 487         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 488 
 489         //Negative
 490         check("(?<!%.{0,5})foo\\d",
 491               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 492               new String[] {"foo4", "foo5"});
 493 
 494         //Positive greedy
 495         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 496 
 497         //Positive reluctant
 498         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 499 
 500         //supplementary
 501         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 502               new String[] {"fo\ud800\udc00o"});
 503         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 504               new String[] {"fo\ud800\udc00o"});
 505         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 506               new String[] {"fo\ud800\udc00o"});
 507         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 508               new String[] {"fo\ud800\udc00o"});
 509         report("Lookbehind");
 510     }
 511 
 512     // This test is for 4938995
 513     // Check to see if weak region boundaries are transparent to
 514     // lookahead and lookbehind constructs
 515     private static void boundsTest() throws Exception {
 516         String fullMessage = "catdogcat";
 517         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 518         Matcher matcher = pattern.matcher("catdogca");
 519         matcher.useTransparentBounds(true);
 520         if (matcher.find())
 521             failCount++;
 522         matcher.reset("atdogcat");
 523         if (matcher.find())
 524             failCount++;
 525         matcher.reset(fullMessage);
 526         if (!matcher.find())
 527             failCount++;
 528         matcher.reset(fullMessage);
 529         matcher.region(0,9);
 530         if (!matcher.find())
 531             failCount++;
 532         matcher.reset(fullMessage);
 533         matcher.region(0,6);
 534         if (!matcher.find())
 535             failCount++;
 536         matcher.reset(fullMessage);
 537         matcher.region(3,6);
 538         if (!matcher.find())
 539             failCount++;
 540         matcher.useTransparentBounds(false);
 541         if (matcher.find())
 542             failCount++;
 543 
 544         // Negative lookahead/lookbehind
 545         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 546         matcher = pattern.matcher("dogcat");
 547         matcher.useTransparentBounds(true);
 548         matcher.region(0,3);
 549         if (matcher.find())
 550             failCount++;
 551         matcher.reset("catdog");
 552         matcher.region(3,6);
 553         if (matcher.find())
 554             failCount++;
 555         matcher.useTransparentBounds(false);
 556         matcher.reset("dogcat");
 557         matcher.region(0,3);
 558         if (!matcher.find())
 559             failCount++;
 560         matcher.reset("catdog");
 561         matcher.region(3,6);
 562         if (!matcher.find())
 563             failCount++;
 564 
 565         report("Region bounds transparency");
 566     }
 567 
 568     // This test is for 4945394
 569     private static void findFromTest() throws Exception {
 570         String message = "This is 40 $0 message.";
 571         Pattern pat = Pattern.compile("\\$0");
 572         Matcher match = pat.matcher(message);
 573         if (!match.find())
 574             failCount++;
 575         if (match.find())
 576             failCount++;
 577         if (match.find())
 578             failCount++;
 579         report("Check for alternating find");
 580     }
 581 
 582     // This test is for 4872664 and 4892980
 583     private static void negatedCharClassTest() throws Exception {
 584         Pattern pattern = Pattern.compile("[^>]");
 585         Matcher matcher = pattern.matcher("\u203A");
 586         if (!matcher.matches())
 587             failCount++;
 588         pattern = Pattern.compile("[^fr]");
 589         matcher = pattern.matcher("a");
 590         if (!matcher.find())
 591             failCount++;
 592         matcher.reset("\u203A");
 593         if (!matcher.find())
 594             failCount++;
 595         String s = "for";
 596         String result[] = s.split("[^fr]");
 597         if (!result[0].equals("f"))
 598             failCount++;
 599         if (!result[1].equals("r"))
 600             failCount++;
 601         s = "f\u203Ar";
 602         result = s.split("[^fr]");
 603         if (!result[0].equals("f"))
 604             failCount++;
 605         if (!result[1].equals("r"))
 606             failCount++;
 607 
 608         // Test adding to bits, subtracting a node, then adding to bits again
 609         pattern = Pattern.compile("[^f\u203Ar]");
 610         matcher = pattern.matcher("a");
 611         if (!matcher.find())
 612             failCount++;
 613         matcher.reset("f");
 614         if (matcher.find())
 615             failCount++;
 616         matcher.reset("\u203A");
 617         if (matcher.find())
 618             failCount++;
 619         matcher.reset("r");
 620         if (matcher.find())
 621             failCount++;
 622         matcher.reset("\u203B");
 623         if (!matcher.find())
 624             failCount++;
 625 
 626         // Test subtracting a node, adding to bits, subtracting again
 627         pattern = Pattern.compile("[^\u203Ar\u203B]");
 628         matcher = pattern.matcher("a");
 629         if (!matcher.find())
 630             failCount++;
 631         matcher.reset("\u203A");
 632         if (matcher.find())
 633             failCount++;
 634         matcher.reset("r");
 635         if (matcher.find())
 636             failCount++;
 637         matcher.reset("\u203B");
 638         if (matcher.find())
 639             failCount++;
 640         matcher.reset("\u203C");
 641         if (!matcher.find())
 642             failCount++;
 643 
 644         report("Negated Character Class");
 645     }
 646 
 647     // This test is for 4628291
 648     private static void toStringTest() throws Exception {
 649         Pattern pattern = Pattern.compile("b+");
 650         if (pattern.toString() != "b+")
 651             failCount++;
 652         Matcher matcher = pattern.matcher("aaabbbccc");
 653         String matcherString = matcher.toString(); // unspecified
 654         matcher.find();
 655         matcherString = matcher.toString(); // unspecified
 656         matcher.region(0,3);
 657         matcherString = matcher.toString(); // unspecified
 658         matcher.reset();
 659         matcherString = matcher.toString(); // unspecified
 660         report("toString");
 661     }
 662 
 663     // This test is for 4808962
 664     private static void literalPatternTest() throws Exception {
 665         int flags = Pattern.LITERAL;
 666 
 667         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 668         check(pattern, "abc\\t$^", true);
 669 
 670         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 671         check(pattern, "abc\\t$^", true);
 672 
 673         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 674         check(pattern, "\\Qa^$bcabc\\E", true);
 675         check(pattern, "a^$bcabc", false);
 676 
 677         pattern = Pattern.compile("\\\\Q\\\\E");
 678         check(pattern, "\\Q\\E", true);
 679 
 680         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 681         check(pattern, "abcefg\\Q\\Ehij", true);
 682 
 683         pattern = Pattern.compile("\\\\\\Q\\\\E");
 684         check(pattern, "\\\\\\\\", true);
 685 
 686         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 687         check(pattern, "\\Qa^$bcabc\\E", true);
 688         check(pattern, "a^$bcabc", false);
 689 
 690         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 691         check(pattern, "\\Qabc\\Edef", true);
 692         check(pattern, "abcdef", false);
 693 
 694         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 695         check(pattern, "abc\\Edef", true);
 696         check(pattern, "abcdef", false);
 697 
 698         pattern = Pattern.compile(Pattern.quote("\\E"));
 699         check(pattern, "\\E", true);
 700 
 701         pattern = Pattern.compile("((((abc.+?:)", flags);
 702         check(pattern, "((((abc.+?:)", true);
 703 
 704         flags |= Pattern.MULTILINE;
 705 
 706         pattern = Pattern.compile("^cat$", flags);
 707         check(pattern, "abc^cat$def", true);
 708         check(pattern, "cat", false);
 709 
 710         flags |= Pattern.CASE_INSENSITIVE;
 711 
 712         pattern = Pattern.compile("abcdef", flags);
 713         check(pattern, "ABCDEF", true);
 714         check(pattern, "AbCdEf", true);
 715 
 716         flags |= Pattern.DOTALL;
 717 
 718         pattern = Pattern.compile("a...b", flags);
 719         check(pattern, "A...b", true);
 720         check(pattern, "Axxxb", false);
 721 
 722         flags |= Pattern.CANON_EQ;
 723 
 724         Pattern p = Pattern.compile("testa\u030a", flags);
 725         check(pattern, "testa\u030a", false);
 726         check(pattern, "test\u00e5", false);
 727 
 728         // Supplementary character test
 729         flags = Pattern.LITERAL;
 730 
 731         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 732         check(pattern, toSupplementaries("abc\\t$^"), true);
 733 
 734         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 735         check(pattern, toSupplementaries("abc\\t$^"), true);
 736 
 737         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 738         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 739         check(pattern, toSupplementaries("a^$bcabc"), false);
 740 
 741         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 742         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 743         check(pattern, toSupplementaries("a^$bcabc"), false);
 744 
 745         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 746         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 747         check(pattern, toSupplementaries("abcdef"), false);
 748 
 749         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 750         check(pattern, toSupplementaries("abc\\Edef"), true);
 751         check(pattern, toSupplementaries("abcdef"), false);
 752 
 753         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 754         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 755 
 756         flags |= Pattern.MULTILINE;
 757 
 758         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 759         check(pattern, toSupplementaries("abc^cat$def"), true);
 760         check(pattern, toSupplementaries("cat"), false);
 761 
 762         flags |= Pattern.DOTALL;
 763 
 764         // note: this is case-sensitive.
 765         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 766         check(pattern, toSupplementaries("a...b"), true);
 767         check(pattern, toSupplementaries("axxxb"), false);
 768 
 769         flags |= Pattern.CANON_EQ;
 770 
 771         String t = toSupplementaries("test");
 772         p = Pattern.compile(t + "a\u030a", flags);
 773         check(pattern, t + "a\u030a", false);
 774         check(pattern, t + "\u00e5", false);
 775 
 776         report("Literal pattern");
 777     }
 778 
 779     // This test is for 4803179
 780     // This test is also for 4808962, replacement parts
 781     private static void literalReplacementTest() throws Exception {
 782         int flags = Pattern.LITERAL;
 783 
 784         Pattern pattern = Pattern.compile("abc", flags);
 785         Matcher matcher = pattern.matcher("zzzabczzz");
 786         String replaceTest = "$0";
 787         String result = matcher.replaceAll(replaceTest);
 788         if (!result.equals("zzzabczzz"))
 789             failCount++;
 790 
 791         matcher.reset();
 792         String literalReplacement = matcher.quoteReplacement(replaceTest);
 793         result = matcher.replaceAll(literalReplacement);
 794         if (!result.equals("zzz$0zzz"))
 795             failCount++;
 796 
 797         matcher.reset();
 798         replaceTest = "\\t$\\$";
 799         literalReplacement = matcher.quoteReplacement(replaceTest);
 800         result = matcher.replaceAll(literalReplacement);
 801         if (!result.equals("zzz\\t$\\$zzz"))
 802             failCount++;
 803 
 804         // Supplementary character test
 805         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 806         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 807         replaceTest = "$0";
 808         result = matcher.replaceAll(replaceTest);
 809         if (!result.equals(toSupplementaries("zzzabczzz")))
 810             failCount++;
 811 
 812         matcher.reset();
 813         literalReplacement = matcher.quoteReplacement(replaceTest);
 814         result = matcher.replaceAll(literalReplacement);
 815         if (!result.equals(toSupplementaries("zzz$0zzz")))
 816             failCount++;
 817 
 818         matcher.reset();
 819         replaceTest = "\\t$\\$";
 820         literalReplacement = matcher.quoteReplacement(replaceTest);
 821         result = matcher.replaceAll(literalReplacement);
 822         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 823             failCount++;
 824 
 825         report("Literal replacement");
 826     }
 827 
 828     // This test is for 4757029
 829     private static void regionTest() throws Exception {
 830         Pattern pattern = Pattern.compile("abc");
 831         Matcher matcher = pattern.matcher("abcdefabc");
 832 
 833         matcher.region(0,9);
 834         if (!matcher.find())
 835             failCount++;
 836         if (!matcher.find())
 837             failCount++;
 838         matcher.region(0,3);
 839         if (!matcher.find())
 840            failCount++;
 841         matcher.region(3,6);
 842         if (matcher.find())
 843            failCount++;
 844         matcher.region(0,2);
 845         if (matcher.find())
 846            failCount++;
 847 
 848         expectRegionFail(matcher, 1, -1);
 849         expectRegionFail(matcher, -1, -1);
 850         expectRegionFail(matcher, -1, 1);
 851         expectRegionFail(matcher, 5, 3);
 852         expectRegionFail(matcher, 5, 12);
 853         expectRegionFail(matcher, 12, 12);
 854 
 855         pattern = Pattern.compile("^abc$");
 856         matcher = pattern.matcher("zzzabczzz");
 857         matcher.region(0,9);
 858         if (matcher.find())
 859             failCount++;
 860         matcher.region(3,6);
 861         if (!matcher.find())
 862            failCount++;
 863         matcher.region(3,6);
 864         matcher.useAnchoringBounds(false);
 865         if (matcher.find())
 866            failCount++;
 867 
 868         // Supplementary character test
 869         pattern = Pattern.compile(toSupplementaries("abc"));
 870         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 871         matcher.region(0,9*2);
 872         if (!matcher.find())
 873             failCount++;
 874         if (!matcher.find())
 875             failCount++;
 876         matcher.region(0,3*2);
 877         if (!matcher.find())
 878            failCount++;
 879         matcher.region(1,3*2);
 880         if (matcher.find())
 881            failCount++;
 882         matcher.region(3*2,6*2);
 883         if (matcher.find())
 884            failCount++;
 885         matcher.region(0,2*2);
 886         if (matcher.find())
 887            failCount++;
 888         matcher.region(0,2*2+1);
 889         if (matcher.find())
 890            failCount++;
 891 
 892         expectRegionFail(matcher, 1*2, -1);
 893         expectRegionFail(matcher, -1, -1);
 894         expectRegionFail(matcher, -1, 1*2);
 895         expectRegionFail(matcher, 5*2, 3*2);
 896         expectRegionFail(matcher, 5*2, 12*2);
 897         expectRegionFail(matcher, 12*2, 12*2);
 898 
 899         pattern = Pattern.compile(toSupplementaries("^abc$"));
 900         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 901         matcher.region(0,9*2);
 902         if (matcher.find())
 903             failCount++;
 904         matcher.region(3*2,6*2);
 905         if (!matcher.find())
 906            failCount++;
 907         matcher.region(3*2+1,6*2);
 908         if (matcher.find())
 909            failCount++;
 910         matcher.region(3*2,6*2-1);
 911         if (matcher.find())
 912            failCount++;
 913         matcher.region(3*2,6*2);
 914         matcher.useAnchoringBounds(false);
 915         if (matcher.find())
 916            failCount++;
 917         report("Regions");
 918     }
 919 
 920     private static void expectRegionFail(Matcher matcher, int index1,
 921                                          int index2)
 922     {
 923         try {
 924             matcher.region(index1, index2);
 925             failCount++;
 926         } catch (IndexOutOfBoundsException ioobe) {
 927             // Correct result
 928         } catch (IllegalStateException ise) {
 929             // Correct result
 930         }
 931     }
 932 
 933     // This test is for 4803197
 934     private static void escapedSegmentTest() throws Exception {
 935 
 936         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
 937         check(pattern, "dir1\\dir2", true);
 938 
 939         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
 940         check(pattern, "dir1\\dir2\\", true);
 941 
 942         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
 943         check(pattern, "dir1\\dir2\\", true);
 944 
 945         // Supplementary character test
 946         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
 947         check(pattern, toSupplementaries("dir1\\dir2"), true);
 948 
 949         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
 950         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
 951 
 952         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
 953         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
 954 
 955         report("Escaped segment");
 956     }
 957 
 958     // This test is for 4792284
 959     private static void nonCaptureRepetitionTest() throws Exception {
 960         String input = "abcdefgh;";
 961 
 962         String[] patterns = new String[] {
 963             "(?:\\w{4})+;",
 964             "(?:\\w{8})*;",
 965             "(?:\\w{2}){2,4};",
 966             "(?:\\w{4}){2,};",   // only matches the
 967             ".*?(?:\\w{5})+;",   //     specified minimum
 968             ".*?(?:\\w{9})*;",   //     number of reps - OK
 969             "(?:\\w{4})+?;",     // lazy repetition - OK
 970             "(?:\\w{4})++;",     // possessive repetition - OK
 971             "(?:\\w{2,}?)+;",    // non-deterministic - OK
 972             "(\\w{4})+;",        // capturing group - OK
 973         };
 974 
 975         for (int i = 0; i < patterns.length; i++) {
 976             // Check find()
 977             check(patterns[i], 0, input, input, true);
 978             // Check matches()
 979             Pattern p = Pattern.compile(patterns[i]);
 980             Matcher m = p.matcher(input);
 981 
 982             if (m.matches()) {
 983                 if (!m.group(0).equals(input))
 984                     failCount++;
 985             } else {
 986                 failCount++;
 987             }
 988         }
 989 
 990         report("Non capturing repetition");
 991     }
 992 
 993     // This test is for 6358731
 994     private static void notCapturedGroupCurlyMatchTest() throws Exception {
 995         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
 996         Matcher matcher = pattern.matcher("abcd");
 997         if (!matcher.matches() ||
 998              matcher.group(1) != null ||
 999              !matcher.group(2).equals("abcd")) {
1000             failCount++;
1001         }
1002         report("Not captured GroupCurly");
1003     }
1004 
1005     // This test is for 4706545
1006     private static void javaCharClassTest() throws Exception {
1007         for (int i=0; i<1000; i++) {
1008             char c = (char)generator.nextInt();
1009             check("{javaLowerCase}", c, Character.isLowerCase(c));
1010             check("{javaUpperCase}", c, Character.isUpperCase(c));
1011             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1012             check("{javaTitleCase}", c, Character.isTitleCase(c));
1013             check("{javaDigit}", c, Character.isDigit(c));
1014             check("{javaDefined}", c, Character.isDefined(c));
1015             check("{javaLetter}", c, Character.isLetter(c));
1016             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1017             check("{javaJavaIdentifierStart}", c,
1018                   Character.isJavaIdentifierStart(c));
1019             check("{javaJavaIdentifierPart}", c,
1020                   Character.isJavaIdentifierPart(c));
1021             check("{javaUnicodeIdentifierStart}", c,
1022                   Character.isUnicodeIdentifierStart(c));
1023             check("{javaUnicodeIdentifierPart}", c,
1024                   Character.isUnicodeIdentifierPart(c));
1025             check("{javaIdentifierIgnorable}", c,
1026                   Character.isIdentifierIgnorable(c));
1027             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1028             check("{javaWhitespace}", c, Character.isWhitespace(c));
1029             check("{javaISOControl}", c, Character.isISOControl(c));
1030             check("{javaMirrored}", c, Character.isMirrored(c));
1031 
1032         }
1033 
1034         // Supplementary character test
1035         for (int i=0; i<1000; i++) {
1036             int c = generator.nextInt(Character.MAX_CODE_POINT
1037                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1038                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1039             check("{javaLowerCase}", c, Character.isLowerCase(c));
1040             check("{javaUpperCase}", c, Character.isUpperCase(c));
1041             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1042             check("{javaTitleCase}", c, Character.isTitleCase(c));
1043             check("{javaDigit}", c, Character.isDigit(c));
1044             check("{javaDefined}", c, Character.isDefined(c));
1045             check("{javaLetter}", c, Character.isLetter(c));
1046             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1047             check("{javaJavaIdentifierStart}", c,
1048                   Character.isJavaIdentifierStart(c));
1049             check("{javaJavaIdentifierPart}", c,
1050                   Character.isJavaIdentifierPart(c));
1051             check("{javaUnicodeIdentifierStart}", c,
1052                   Character.isUnicodeIdentifierStart(c));
1053             check("{javaUnicodeIdentifierPart}", c,
1054                   Character.isUnicodeIdentifierPart(c));
1055             check("{javaIdentifierIgnorable}", c,
1056                   Character.isIdentifierIgnorable(c));
1057             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1058             check("{javaWhitespace}", c, Character.isWhitespace(c));
1059             check("{javaISOControl}", c, Character.isISOControl(c));
1060             check("{javaMirrored}", c, Character.isMirrored(c));
1061         }
1062 
1063         report("Java character classes");
1064     }
1065 
1066     // This test is for 4523620
1067     /*
1068     private static void numOccurrencesTest() throws Exception {
1069         Pattern pattern = Pattern.compile("aaa");
1070 
1071         if (pattern.numOccurrences("aaaaaa", false) != 2)
1072             failCount++;
1073         if (pattern.numOccurrences("aaaaaa", true) != 4)
1074             failCount++;
1075 
1076         pattern = Pattern.compile("^");
1077         if (pattern.numOccurrences("aaaaaa", false) != 1)
1078             failCount++;
1079         if (pattern.numOccurrences("aaaaaa", true) != 1)
1080             failCount++;
1081 
1082         report("Number of Occurrences");
1083     }
1084     */
1085 
1086     // This test is for 4776374
1087     private static void caretBetweenTerminatorsTest() throws Exception {
1088         int flags1 = Pattern.DOTALL;
1089         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1090         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1091         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1092 
1093         check("^....", flags1, "test\ntest", "test", true);
1094         check(".....^", flags1, "test\ntest", "test", false);
1095         check(".....^", flags1, "test\n", "test", false);
1096         check("....^", flags1, "test\r\n", "test", false);
1097 
1098         check("^....", flags2, "test\ntest", "test", true);
1099         check("....^", flags2, "test\ntest", "test", false);
1100         check(".....^", flags2, "test\n", "test", false);
1101         check("....^", flags2, "test\r\n", "test", false);
1102 
1103         check("^....", flags3, "test\ntest", "test", true);
1104         check(".....^", flags3, "test\ntest", "test\n", true);
1105         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1106         check(".....^", flags3, "test\n", "test", false);
1107         check(".....^", flags3, "test\r\n", "test", false);
1108         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1109 
1110         check("^....", flags4, "test\ntest", "test", true);
1111         check(".....^", flags3, "test\ntest", "test\n", true);
1112         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1113         check(".....^", flags4, "test\n", "test\n", false);
1114         check(".....^", flags4, "test\r\n", "test\r", false);
1115 
1116         // Supplementary character test
1117         String t = toSupplementaries("test");
1118         check("^....", flags1, t+"\n"+t, t, true);
1119         check(".....^", flags1, t+"\n"+t, t, false);
1120         check(".....^", flags1, t+"\n", t, false);
1121         check("....^", flags1, t+"\r\n", t, false);
1122 
1123         check("^....", flags2, t+"\n"+t, t, true);
1124         check("....^", flags2, t+"\n"+t, t, false);
1125         check(".....^", flags2, t+"\n", t, false);
1126         check("....^", flags2, t+"\r\n", t, false);
1127 
1128         check("^....", flags3, t+"\n"+t, t, true);
1129         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1130         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1131         check(".....^", flags3, t+"\n", t, false);
1132         check(".....^", flags3, t+"\r\n", t, false);
1133         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1134 
1135         check("^....", flags4, t+"\n"+t, t, true);
1136         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1137         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1138         check(".....^", flags4, t+"\n", t+"\n", false);
1139         check(".....^", flags4, t+"\r\n", t+"\r", false);
1140 
1141         report("Caret between terminators");
1142     }
1143 
1144     // This test is for 4727935
1145     private static void dollarAtEndTest() throws Exception {
1146         int flags1 = Pattern.DOTALL;
1147         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1148         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1149 
1150         check("....$", flags1, "test\n", "test", true);
1151         check("....$", flags1, "test\r\n", "test", true);
1152         check(".....$", flags1, "test\n", "test\n", true);
1153         check(".....$", flags1, "test\u0085", "test\u0085", true);
1154         check("....$", flags1, "test\u0085", "test", true);
1155 
1156         check("....$", flags2, "test\n", "test", true);
1157         check(".....$", flags2, "test\n", "test\n", true);
1158         check(".....$", flags2, "test\u0085", "test\u0085", true);
1159         check("....$", flags2, "test\u0085", "est\u0085", true);
1160 
1161         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1162         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1163         check("....$blah", flags3, "test\nblah", "!!!!", false);
1164         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1165 
1166         // Supplementary character test
1167         String t = toSupplementaries("test");
1168         String b = toSupplementaries("blah");
1169         check("....$", flags1, t+"\n", t, true);
1170         check("....$", flags1, t+"\r\n", t, true);
1171         check(".....$", flags1, t+"\n", t+"\n", true);
1172         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1173         check("....$", flags1, t+"\u0085", t, true);
1174 
1175         check("....$", flags2, t+"\n", t, true);
1176         check(".....$", flags2, t+"\n", t+"\n", true);
1177         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1178         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1179 
1180         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1181         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1182         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1183         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1184 
1185         report("Dollar at End");
1186     }
1187 
1188     // This test is for 4711773
1189     private static void multilineDollarTest() throws Exception {
1190         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1191         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1192         matcher.find();
1193         if (matcher.start(0) != 9)
1194             failCount++;
1195         matcher.find();
1196         if (matcher.start(0) != 20)
1197             failCount++;
1198 
1199         // Supplementary character test
1200         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1201         matcher.find();
1202         if (matcher.start(0) != 9*2)
1203             failCount++;
1204         matcher.find();
1205         if (matcher.start(0) != 20*2)
1206             failCount++;
1207 
1208         report("Multiline Dollar");
1209     }
1210 
1211     private static void reluctantRepetitionTest() throws Exception {
1212         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1213         check(p, "1 word word word 2", true);
1214         check(p, "1 wor wo w 2", true);
1215         check(p, "1 word word 2", true);
1216         check(p, "1 word 2", true);
1217         check(p, "1 wo w w 2", true);
1218         check(p, "1 wo w 2", true);
1219         check(p, "1 wor w 2", true);
1220 
1221         p = Pattern.compile("([a-z])+?c");
1222         Matcher m = p.matcher("ababcdefdec");
1223         check(m, "ababc");
1224 
1225         // Supplementary character test
1226         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1227         m = p.matcher(toSupplementaries("ababcdefdec"));
1228         check(m, toSupplementaries("ababc"));
1229 
1230         report("Reluctant Repetition");
1231     }
1232 
1233     private static void serializeTest() throws Exception {
1234         String patternStr = "(b)";
1235         String matchStr = "b";
1236         Pattern pattern = Pattern.compile(patternStr);
1237         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1238         ObjectOutputStream oos = new ObjectOutputStream(baos);
1239         oos.writeObject(pattern);
1240         oos.close();
1241         ObjectInputStream ois = new ObjectInputStream(
1242             new ByteArrayInputStream(baos.toByteArray()));
1243         Pattern serializedPattern = (Pattern)ois.readObject();
1244         ois.close();
1245         Matcher matcher = serializedPattern.matcher(matchStr);
1246         if (!matcher.matches())
1247             failCount++;
1248         if (matcher.groupCount() != 1)
1249             failCount++;
1250 
1251         report("Serialization");
1252     }
1253 
1254     private static void gTest() {
1255         Pattern pattern = Pattern.compile("\\G\\w");
1256         Matcher matcher = pattern.matcher("abc#x#x");
1257         matcher.find();
1258         matcher.find();
1259         matcher.find();
1260         if (matcher.find())
1261             failCount++;
1262 
1263         pattern = Pattern.compile("\\GA*");
1264         matcher = pattern.matcher("1A2AA3");
1265         matcher.find();
1266         if (matcher.find())
1267             failCount++;
1268 
1269         pattern = Pattern.compile("\\GA*");
1270         matcher = pattern.matcher("1A2AA3");
1271         if (!matcher.find(1))
1272             failCount++;
1273         matcher.find();
1274         if (matcher.find())
1275             failCount++;
1276 
1277         report("\\G");
1278     }
1279 
1280     private static void zTest() {
1281         Pattern pattern = Pattern.compile("foo\\Z");
1282         // Positives
1283         check(pattern, "foo\u0085", true);
1284         check(pattern, "foo\u2028", true);
1285         check(pattern, "foo\u2029", true);
1286         check(pattern, "foo\n", true);
1287         check(pattern, "foo\r", true);
1288         check(pattern, "foo\r\n", true);
1289         // Negatives
1290         check(pattern, "fooo", false);
1291         check(pattern, "foo\n\r", false);
1292 
1293         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1294         // Positives
1295         check(pattern, "foo", true);
1296         check(pattern, "foo\n", true);
1297         // Negatives
1298         check(pattern, "foo\r", false);
1299         check(pattern, "foo\u0085", false);
1300         check(pattern, "foo\u2028", false);
1301         check(pattern, "foo\u2029", false);
1302 
1303         report("\\Z");
1304     }
1305 
1306     private static void replaceFirstTest() {
1307         Pattern pattern = Pattern.compile("(ab)(c*)");
1308         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1309         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1310             failCount++;
1311 
1312         matcher.reset("zzzabccczzzabcczzzabccczzz");
1313         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1314             failCount++;
1315 
1316         matcher.reset("zzzabccczzzabcczzzabccczzz");
1317         String result = matcher.replaceFirst("$1");
1318         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1319             failCount++;
1320 
1321         matcher.reset("zzzabccczzzabcczzzabccczzz");
1322         result = matcher.replaceFirst("$2");
1323         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1324             failCount++;
1325 
1326         pattern = Pattern.compile("a*");
1327         matcher = pattern.matcher("aaaaaaaaaa");
1328         if (!matcher.replaceFirst("test").equals("test"))
1329             failCount++;
1330 
1331         pattern = Pattern.compile("a+");
1332         matcher = pattern.matcher("zzzaaaaaaaaaa");
1333         if (!matcher.replaceFirst("test").equals("zzztest"))
1334             failCount++;
1335 
1336         // Supplementary character test
1337         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1338         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1339         if (!matcher.replaceFirst(toSupplementaries("test"))
1340                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1341             failCount++;
1342 
1343         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1344         if (!matcher.replaceFirst(toSupplementaries("test")).
1345             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1346             failCount++;
1347 
1348         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1349         result = matcher.replaceFirst("$1");
1350         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1351             failCount++;
1352 
1353         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1354         result = matcher.replaceFirst("$2");
1355         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1356             failCount++;
1357 
1358         pattern = Pattern.compile(toSupplementaries("a*"));
1359         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1360         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1361             failCount++;
1362 
1363         pattern = Pattern.compile(toSupplementaries("a+"));
1364         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1365         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1366             failCount++;
1367 
1368         report("Replace First");
1369     }
1370 
1371     private static void unixLinesTest() {
1372         Pattern pattern = Pattern.compile(".*");
1373         Matcher matcher = pattern.matcher("aa\u2028blah");
1374         matcher.find();
1375         if (!matcher.group(0).equals("aa"))
1376             failCount++;
1377 
1378         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1379         matcher = pattern.matcher("aa\u2028blah");
1380         matcher.find();
1381         if (!matcher.group(0).equals("aa\u2028blah"))
1382             failCount++;
1383 
1384         pattern = Pattern.compile("[az]$",
1385                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1386         matcher = pattern.matcher("aa\u2028zz");
1387         check(matcher, "a\u2028", false);
1388 
1389         // Supplementary character test
1390         pattern = Pattern.compile(".*");
1391         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1392         matcher.find();
1393         if (!matcher.group(0).equals(toSupplementaries("aa")))
1394             failCount++;
1395 
1396         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1397         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1398         matcher.find();
1399         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1400             failCount++;
1401 
1402         pattern = Pattern.compile(toSupplementaries("[az]$"),
1403                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1404         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1405         check(matcher, toSupplementaries("a\u2028"), false);
1406 
1407         report("Unix Lines");
1408     }
1409 
1410     private static void commentsTest() {
1411         int flags = Pattern.COMMENTS;
1412 
1413         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1414         Matcher matcher = pattern.matcher("aa#aa");
1415         if (!matcher.matches())
1416             failCount++;
1417 
1418         pattern = Pattern.compile("aa  # blah", flags);
1419         matcher = pattern.matcher("aa");
1420         if (!matcher.matches())
1421             failCount++;
1422 
1423         pattern = Pattern.compile("aa blah", flags);
1424         matcher = pattern.matcher("aablah");
1425         if (!matcher.matches())
1426              failCount++;
1427 
1428         pattern = Pattern.compile("aa  # blah blech  ", flags);
1429         matcher = pattern.matcher("aa");
1430         if (!matcher.matches())
1431             failCount++;
1432 
1433         pattern = Pattern.compile("aa  # blah\n  ", flags);
1434         matcher = pattern.matcher("aa");
1435         if (!matcher.matches())
1436             failCount++;
1437 
1438         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1439         matcher = pattern.matcher("aabc");
1440         if (!matcher.matches())
1441              failCount++;
1442 
1443         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1444         matcher = pattern.matcher("aabc");
1445         if (!matcher.matches())
1446              failCount++;
1447 
1448         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1449         matcher = pattern.matcher("aabc#blech");
1450         if (!matcher.matches())
1451              failCount++;
1452 
1453         // Supplementary character test
1454         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1455         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1456         if (!matcher.matches())
1457             failCount++;
1458 
1459         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1460         matcher = pattern.matcher(toSupplementaries("aa"));
1461         if (!matcher.matches())
1462             failCount++;
1463 
1464         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1465         matcher = pattern.matcher(toSupplementaries("aablah"));
1466         if (!matcher.matches())
1467              failCount++;
1468 
1469         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1470         matcher = pattern.matcher(toSupplementaries("aa"));
1471         if (!matcher.matches())
1472             failCount++;
1473 
1474         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1475         matcher = pattern.matcher(toSupplementaries("aa"));
1476         if (!matcher.matches())
1477             failCount++;
1478 
1479         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1480         matcher = pattern.matcher(toSupplementaries("aabc"));
1481         if (!matcher.matches())
1482              failCount++;
1483 
1484         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1485         matcher = pattern.matcher(toSupplementaries("aabc"));
1486         if (!matcher.matches())
1487              failCount++;
1488 
1489         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1490         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1491         if (!matcher.matches())
1492              failCount++;
1493 
1494         report("Comments");
1495     }
1496 
1497     private static void caseFoldingTest() { // bug 4504687
1498         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1499         Pattern pattern = Pattern.compile("aa", flags);
1500         Matcher matcher = pattern.matcher("ab");
1501         if (matcher.matches())
1502             failCount++;
1503 
1504         pattern = Pattern.compile("aA", flags);
1505         matcher = pattern.matcher("ab");
1506         if (matcher.matches())
1507             failCount++;
1508 
1509         pattern = Pattern.compile("aa", flags);
1510         matcher = pattern.matcher("aB");
1511         if (matcher.matches())
1512             failCount++;
1513         matcher = pattern.matcher("Ab");
1514         if (matcher.matches())
1515             failCount++;
1516 
1517         // ASCII               "a"
1518         // Latin-1 Supplement  "a" + grave
1519         // Cyrillic            "a"
1520         String[] patterns = new String[] {
1521             //single
1522             "a", "\u00e0", "\u0430",
1523             //slice
1524             "ab", "\u00e0\u00e1", "\u0430\u0431",
1525             //class single
1526             "[a]", "[\u00e0]", "[\u0430]",
1527             //class range
1528             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1529             //back reference
1530             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1531         };
1532 
1533         String[] texts = new String[] {
1534             "A", "\u00c0", "\u0410",
1535             "AB", "\u00c0\u00c1", "\u0410\u0411",
1536             "A", "\u00c0", "\u0410",
1537             "B", "\u00c2", "\u0411",
1538             "aA", "\u00e0\u00c0", "\u0430\u0410"
1539         };
1540 
1541         boolean[] expected = new boolean[] {
1542             true, false, false,
1543             true, false, false,
1544             true, false, false,
1545             true, false, false,
1546             true, false, false
1547         };
1548 
1549         flags = Pattern.CASE_INSENSITIVE;
1550         for (int i = 0; i < patterns.length; i++) {
1551             pattern = Pattern.compile(patterns[i], flags);
1552             matcher = pattern.matcher(texts[i]);
1553             if (matcher.matches() != expected[i]) {
1554                 System.out.println("<1> Failed at " + i);
1555                 failCount++;
1556             }
1557         }
1558 
1559         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1560         for (int i = 0; i < patterns.length; i++) {
1561             pattern = Pattern.compile(patterns[i], flags);
1562             matcher = pattern.matcher(texts[i]);
1563             if (!matcher.matches()) {
1564                 System.out.println("<2> Failed at " + i);
1565                 failCount++;
1566             }
1567         }
1568         // flag unicode_case alone should do nothing
1569         flags = Pattern.UNICODE_CASE;
1570         for (int i = 0; i < patterns.length; i++) {
1571             pattern = Pattern.compile(patterns[i], flags);
1572             matcher = pattern.matcher(texts[i]);
1573             if (matcher.matches()) {
1574                 System.out.println("<3> Failed at " + i);
1575                 failCount++;
1576             }
1577         }
1578 
1579         // Special cases: i, I, u+0131 and u+0130
1580         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1581         pattern = Pattern.compile("[h-j]+", flags);
1582         if (!pattern.matcher("\u0131\u0130").matches())
1583             failCount++;
1584         report("Case Folding");
1585     }
1586 
1587     private static void appendTest() {
1588         Pattern pattern = Pattern.compile("(ab)(cd)");
1589         Matcher matcher = pattern.matcher("abcd");
1590         String result = matcher.replaceAll("$2$1");
1591         if (!result.equals("cdab"))
1592             failCount++;
1593 
1594         String  s1 = "Swap all: first = 123, second = 456";
1595         String  s2 = "Swap one: first = 123, second = 456";
1596         String  r  = "$3$2$1";
1597         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1598         matcher = pattern.matcher(s1);
1599 
1600         result = matcher.replaceAll(r);
1601         if (!result.equals("Swap all: 123 = first, 456 = second"))
1602             failCount++;
1603 
1604         matcher = pattern.matcher(s2);
1605 
1606         if (matcher.find()) {
1607             StringBuffer sb = new StringBuffer();
1608             matcher.appendReplacement(sb, r);
1609             matcher.appendTail(sb);
1610             result = sb.toString();
1611             if (!result.equals("Swap one: 123 = first, second = 456"))
1612                 failCount++;
1613         }
1614 
1615         // Supplementary character test
1616         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1617         matcher = pattern.matcher(toSupplementaries("abcd"));
1618         result = matcher.replaceAll("$2$1");
1619         if (!result.equals(toSupplementaries("cdab")))
1620             failCount++;
1621 
1622         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1623         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1624         r  = toSupplementaries("$3$2$1");
1625         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1626         matcher = pattern.matcher(s1);
1627 
1628         result = matcher.replaceAll(r);
1629         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1630             failCount++;
1631 
1632         matcher = pattern.matcher(s2);
1633 
1634         if (matcher.find()) {
1635             StringBuffer sb = new StringBuffer();
1636             matcher.appendReplacement(sb, r);
1637             matcher.appendTail(sb);
1638             result = sb.toString();
1639             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1640                 failCount++;
1641         }
1642         report("Append");
1643     }
1644 
1645     private static void splitTest() {
1646         Pattern pattern = Pattern.compile(":");
1647         String[] result = pattern.split("foo:and:boo", 2);
1648         if (!result[0].equals("foo"))
1649             failCount++;
1650         if (!result[1].equals("and:boo"))
1651             failCount++;
1652         // Supplementary character test
1653         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1654         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1655         if (!result[0].equals(toSupplementaries("foo")))
1656             failCount++;
1657         if (!result[1].equals(toSupplementaries("andXboo")))
1658             failCount++;
1659 
1660         CharBuffer cb = CharBuffer.allocate(100);
1661         cb.put("foo:and:boo");
1662         cb.flip();
1663         result = pattern.split(cb);
1664         if (!result[0].equals("foo"))
1665             failCount++;
1666         if (!result[1].equals("and"))
1667             failCount++;
1668         if (!result[2].equals("boo"))
1669             failCount++;
1670 
1671         // Supplementary character test
1672         CharBuffer cbs = CharBuffer.allocate(100);
1673         cbs.put(toSupplementaries("fooXandXboo"));
1674         cbs.flip();
1675         result = patternX.split(cbs);
1676         if (!result[0].equals(toSupplementaries("foo")))
1677             failCount++;
1678         if (!result[1].equals(toSupplementaries("and")))
1679             failCount++;
1680         if (!result[2].equals(toSupplementaries("boo")))
1681             failCount++;
1682 
1683         String source = "0123456789";
1684         for (int limit=-2; limit<3; limit++) {
1685             for (int x=0; x<10; x++) {
1686                 result = source.split(Integer.toString(x), limit);
1687                 int expectedLength = limit < 1 ? 2 : limit;
1688 
1689                 if ((limit == 0) && (x == 9)) {
1690                     // expected dropping of ""
1691                     if (result.length != 1)
1692                         failCount++;
1693                     if (!result[0].equals("012345678")) {
1694                         failCount++;
1695                     }
1696                 } else {
1697                     if (result.length != expectedLength) {
1698                         failCount++;
1699                     }
1700                     if (!result[0].equals(source.substring(0,x))) {
1701                         if (limit != 1) {
1702                             failCount++;
1703                         } else {
1704                             if (!result[0].equals(source.substring(0,10))) {
1705                                 failCount++;
1706                             }
1707                         }
1708                     }
1709                     if (expectedLength > 1) { // Check segment 2
1710                         if (!result[1].equals(source.substring(x+1,10)))
1711                             failCount++;
1712                     }
1713                 }
1714             }
1715         }
1716         // Check the case for no match found
1717         for (int limit=-2; limit<3; limit++) {
1718             result = source.split("e", limit);
1719             if (result.length != 1)
1720                 failCount++;
1721             if (!result[0].equals(source))
1722                 failCount++;
1723         }
1724         // Check the case for limit == 0, source = "";
1725         source = "";
1726         result = source.split("e", 0);
1727         if (result.length != 1)
1728             failCount++;
1729         if (!result[0].equals(source))
1730             failCount++;
1731 
1732         report("Split");
1733     }
1734 
1735     private static void negationTest() {
1736         Pattern pattern = Pattern.compile("[\\[@^]+");
1737         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1738         if (!matcher.find())
1739             failCount++;
1740         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1741             failCount++;
1742         pattern = Pattern.compile("[@\\[^]+");
1743         matcher = pattern.matcher("@@@@[[[[^^^^");
1744         if (!matcher.find())
1745             failCount++;
1746         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1747             failCount++;
1748         pattern = Pattern.compile("[@\\[^@]+");
1749         matcher = pattern.matcher("@@@@[[[[^^^^");
1750         if (!matcher.find())
1751             failCount++;
1752         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1753             failCount++;
1754 
1755         pattern = Pattern.compile("\\)");
1756         matcher = pattern.matcher("xxx)xxx");
1757         if (!matcher.find())
1758             failCount++;
1759 
1760         report("Negation");
1761     }
1762 
1763     private static void ampersandTest() {
1764         Pattern pattern = Pattern.compile("[&@]+");
1765         check(pattern, "@@@@&&&&", true);
1766 
1767         pattern = Pattern.compile("[@&]+");
1768         check(pattern, "@@@@&&&&", true);
1769 
1770         pattern = Pattern.compile("[@\\&]+");
1771         check(pattern, "@@@@&&&&", true);
1772 
1773         report("Ampersand");
1774     }
1775 
1776     private static void octalTest() throws Exception {
1777         Pattern pattern = Pattern.compile("\\u0007");
1778         Matcher matcher = pattern.matcher("\u0007");
1779         if (!matcher.matches())
1780             failCount++;
1781         pattern = Pattern.compile("\\07");
1782         matcher = pattern.matcher("\u0007");
1783         if (!matcher.matches())
1784             failCount++;
1785         pattern = Pattern.compile("\\007");
1786         matcher = pattern.matcher("\u0007");
1787         if (!matcher.matches())
1788             failCount++;
1789         pattern = Pattern.compile("\\0007");
1790         matcher = pattern.matcher("\u0007");
1791         if (!matcher.matches())
1792             failCount++;
1793         pattern = Pattern.compile("\\040");
1794         matcher = pattern.matcher("\u0020");
1795         if (!matcher.matches())
1796             failCount++;
1797         pattern = Pattern.compile("\\0403");
1798         matcher = pattern.matcher("\u00203");
1799         if (!matcher.matches())
1800             failCount++;
1801         pattern = Pattern.compile("\\0103");
1802         matcher = pattern.matcher("\u0043");
1803         if (!matcher.matches())
1804             failCount++;
1805 
1806         report("Octal");
1807     }
1808 
1809     private static void longPatternTest() throws Exception {
1810         try {
1811             Pattern pattern = Pattern.compile(
1812                 "a 32-character-long pattern xxxx");
1813             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1814             pattern = Pattern.compile("a thirty four character long regex");
1815             StringBuffer patternToBe = new StringBuffer(101);
1816             for (int i=0; i<100; i++)
1817                 patternToBe.append((char)(97 + i%26));
1818             pattern = Pattern.compile(patternToBe.toString());
1819         } catch (PatternSyntaxException e) {
1820             failCount++;
1821         }
1822 
1823         // Supplementary character test
1824         try {
1825             Pattern pattern = Pattern.compile(
1826                 toSupplementaries("a 32-character-long pattern xxxx"));
1827             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1828             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1829             StringBuffer patternToBe = new StringBuffer(101*2);
1830             for (int i=0; i<100; i++)
1831                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1832                                                      + 97 + i%26));
1833             pattern = Pattern.compile(patternToBe.toString());
1834         } catch (PatternSyntaxException e) {
1835             failCount++;
1836         }
1837         report("LongPattern");
1838     }
1839 
1840     private static void group0Test() throws Exception {
1841         Pattern pattern = Pattern.compile("(tes)ting");
1842         Matcher matcher = pattern.matcher("testing");
1843         check(matcher, "testing");
1844 
1845         matcher.reset("testing");
1846         if (matcher.lookingAt()) {
1847             if (!matcher.group(0).equals("testing"))
1848                 failCount++;
1849         } else {
1850             failCount++;
1851         }
1852 
1853         matcher.reset("testing");
1854         if (matcher.matches()) {
1855             if (!matcher.group(0).equals("testing"))
1856                 failCount++;
1857         } else {
1858             failCount++;
1859         }
1860 
1861         pattern = Pattern.compile("(tes)ting");
1862         matcher = pattern.matcher("testing");
1863         if (matcher.lookingAt()) {
1864             if (!matcher.group(0).equals("testing"))
1865                 failCount++;
1866         } else {
1867             failCount++;
1868         }
1869 
1870         pattern = Pattern.compile("^(tes)ting");
1871         matcher = pattern.matcher("testing");
1872         if (matcher.matches()) {
1873             if (!matcher.group(0).equals("testing"))
1874                 failCount++;
1875         } else {
1876             failCount++;
1877         }
1878 
1879         // Supplementary character test
1880         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1881         matcher = pattern.matcher(toSupplementaries("testing"));
1882         check(matcher, toSupplementaries("testing"));
1883 
1884         matcher.reset(toSupplementaries("testing"));
1885         if (matcher.lookingAt()) {
1886             if (!matcher.group(0).equals(toSupplementaries("testing")))
1887                 failCount++;
1888         } else {
1889             failCount++;
1890         }
1891 
1892         matcher.reset(toSupplementaries("testing"));
1893         if (matcher.matches()) {
1894             if (!matcher.group(0).equals(toSupplementaries("testing")))
1895                 failCount++;
1896         } else {
1897             failCount++;
1898         }
1899 
1900         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1901         matcher = pattern.matcher(toSupplementaries("testing"));
1902         if (matcher.lookingAt()) {
1903             if (!matcher.group(0).equals(toSupplementaries("testing")))
1904                 failCount++;
1905         } else {
1906             failCount++;
1907         }
1908 
1909         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1910         matcher = pattern.matcher(toSupplementaries("testing"));
1911         if (matcher.matches()) {
1912             if (!matcher.group(0).equals(toSupplementaries("testing")))
1913                 failCount++;
1914         } else {
1915             failCount++;
1916         }
1917 
1918         report("Group0");
1919     }
1920 
1921     private static void findIntTest() throws Exception {
1922         Pattern p = Pattern.compile("blah");
1923         Matcher m = p.matcher("zzzzblahzzzzzblah");
1924         boolean result = m.find(2);
1925         if (!result)
1926             failCount++;
1927 
1928         p = Pattern.compile("$");
1929         m = p.matcher("1234567890");
1930         result = m.find(10);
1931         if (!result)
1932             failCount++;
1933         try {
1934             result = m.find(11);
1935             failCount++;
1936         } catch (IndexOutOfBoundsException e) {
1937             // correct result
1938         }
1939 
1940         // Supplementary character test
1941         p = Pattern.compile(toSupplementaries("blah"));
1942         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1943         result = m.find(2);
1944         if (!result)
1945             failCount++;
1946 
1947         report("FindInt");
1948     }
1949 
1950     private static void emptyPatternTest() throws Exception {
1951         Pattern p = Pattern.compile("");
1952         Matcher m = p.matcher("foo");
1953 
1954         // Should find empty pattern at beginning of input
1955         boolean result = m.find();
1956         if (result != true)
1957             failCount++;
1958         if (m.start() != 0)
1959             failCount++;
1960 
1961         // Should not match entire input if input is not empty
1962         m.reset();
1963         result = m.matches();
1964         if (result == true)
1965             failCount++;
1966 
1967         try {
1968             m.start(0);
1969             failCount++;
1970         } catch (IllegalStateException e) {
1971             // Correct result
1972         }
1973 
1974         // Should match entire input if input is empty
1975         m.reset("");
1976         result = m.matches();
1977         if (result != true)
1978             failCount++;
1979 
1980         result = Pattern.matches("", "");
1981         if (result != true)
1982             failCount++;
1983 
1984         result = Pattern.matches("", "foo");
1985         if (result == true)
1986             failCount++;
1987         report("EmptyPattern");
1988     }
1989 
1990     private static void charClassTest() throws Exception {
1991         Pattern pattern = Pattern.compile("blah[ab]]blech");
1992         check(pattern, "blahb]blech", true);
1993 
1994         pattern = Pattern.compile("[abc[def]]");
1995         check(pattern, "b", true);
1996 
1997         // Supplementary character tests
1998         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
1999         check(pattern, toSupplementaries("blahb]blech"), true);
2000 
2001         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2002         check(pattern, toSupplementaries("b"), true);
2003 
2004         try {
2005             // u00ff when UNICODE_CASE
2006             pattern = Pattern.compile("[ab\u00ffcd]",
2007                                       Pattern.CASE_INSENSITIVE|
2008                                       Pattern.UNICODE_CASE);
2009             check(pattern, "ab\u00ffcd", true);
2010             check(pattern, "Ab\u0178Cd", true);
2011 
2012             // u00b5 when UNICODE_CASE
2013             pattern = Pattern.compile("[ab\u00b5cd]",
2014                                       Pattern.CASE_INSENSITIVE|
2015                                       Pattern.UNICODE_CASE);
2016             check(pattern, "ab\u00b5cd", true);
2017             check(pattern, "Ab\u039cCd", true);
2018         } catch (Exception e) { failCount++; }
2019 
2020         /* Special cases
2021            (1)LatinSmallLetterLongS u+017f
2022            (2)LatinSmallLetterDotlessI u+0131
2023            (3)LatineCapitalLetterIWithDotAbove u+0130
2024            (4)KelvinSign u+212a
2025            (5)AngstromSign u+212b
2026         */
2027         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2028         pattern = Pattern.compile("[sik\u00c5]+", flags);
2029         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2030             failCount++;
2031 
2032         report("CharClass");
2033     }
2034 
2035     private static void caretTest() throws Exception {
2036         Pattern pattern = Pattern.compile("\\w*");
2037         Matcher matcher = pattern.matcher("a#bc#def##g");
2038         check(matcher, "a");
2039         check(matcher, "");
2040         check(matcher, "bc");
2041         check(matcher, "");
2042         check(matcher, "def");
2043         check(matcher, "");
2044         check(matcher, "");
2045         check(matcher, "g");
2046         check(matcher, "");
2047         if (matcher.find())
2048             failCount++;
2049 
2050         pattern = Pattern.compile("^\\w*");
2051         matcher = pattern.matcher("a#bc#def##g");
2052         check(matcher, "a");
2053         if (matcher.find())
2054             failCount++;
2055 
2056         pattern = Pattern.compile("\\w");
2057         matcher = pattern.matcher("abc##x");
2058         check(matcher, "a");
2059         check(matcher, "b");
2060         check(matcher, "c");
2061         check(matcher, "x");
2062         if (matcher.find())
2063             failCount++;
2064 
2065         pattern = Pattern.compile("^\\w");
2066         matcher = pattern.matcher("abc##x");
2067         check(matcher, "a");
2068         if (matcher.find())
2069             failCount++;
2070 
2071         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2072         matcher = pattern.matcher("abcdef-ghi\njklmno");
2073         check(matcher, "abc");
2074         if (matcher.find())
2075             failCount++;
2076 
2077         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2078         matcher = pattern.matcher("abcdef-ghi\njklmno");
2079         check(matcher, "abc");
2080         check(matcher, "jkl");
2081         if (matcher.find())
2082             failCount++;
2083 
2084         pattern = Pattern.compile("^", Pattern.MULTILINE);
2085         matcher = pattern.matcher("this is some text");
2086         String result = matcher.replaceAll("X");
2087         if (!result.equals("Xthis is some text"))
2088             failCount++;
2089 
2090         pattern = Pattern.compile("^");
2091         matcher = pattern.matcher("this is some text");
2092         result = matcher.replaceAll("X");
2093         if (!result.equals("Xthis is some text"))
2094             failCount++;
2095 
2096         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2097         matcher = pattern.matcher("this is some text\n");
2098         result = matcher.replaceAll("X");
2099         if (!result.equals("Xthis is some text\n"))
2100             failCount++;
2101 
2102         report("Caret");
2103     }
2104 
2105     private static void groupCaptureTest() throws Exception {
2106         // Independent group
2107         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2108         Matcher matcher = pattern.matcher("xxxyyyzzz");
2109         matcher.find();
2110         try {
2111             String blah = matcher.group(1);
2112             failCount++;
2113         } catch (IndexOutOfBoundsException ioobe) {
2114             // Good result
2115         }
2116         // Pure group
2117         pattern = Pattern.compile("x+(?:y+)z+");
2118         matcher = pattern.matcher("xxxyyyzzz");
2119         matcher.find();
2120         try {
2121             String blah = matcher.group(1);
2122             failCount++;
2123         } catch (IndexOutOfBoundsException ioobe) {
2124             // Good result
2125         }
2126 
2127         // Supplementary character tests
2128         // Independent group
2129         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2130         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2131         matcher.find();
2132         try {
2133             String blah = matcher.group(1);
2134             failCount++;
2135         } catch (IndexOutOfBoundsException ioobe) {
2136             // Good result
2137         }
2138         // Pure group
2139         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2140         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2141         matcher.find();
2142         try {
2143             String blah = matcher.group(1);
2144             failCount++;
2145         } catch (IndexOutOfBoundsException ioobe) {
2146             // Good result
2147         }
2148 
2149         report("GroupCapture");
2150     }
2151 
2152     private static void backRefTest() throws Exception {
2153         Pattern pattern = Pattern.compile("(a*)bc\\1");
2154         check(pattern, "zzzaabcazzz", true);
2155 
2156         pattern = Pattern.compile("(a*)bc\\1");
2157         check(pattern, "zzzaabcaazzz", true);
2158 
2159         pattern = Pattern.compile("(abc)(def)\\1");
2160         check(pattern, "abcdefabc", true);
2161 
2162         pattern = Pattern.compile("(abc)(def)\\3");
2163         check(pattern, "abcdefabc", false);
2164 
2165         try {
2166             for (int i = 1; i < 10; i++) {
2167                 // Make sure backref 1-9 are always accepted
2168                 pattern = Pattern.compile("abcdef\\" + i);
2169                 // and fail to match if the target group does not exit
2170                 check(pattern, "abcdef", false);
2171             }
2172         } catch(PatternSyntaxException e) {
2173             failCount++;
2174         }
2175 
2176         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2177         check(pattern, "abcdefghija", false);
2178         check(pattern, "abcdefghija1", true);
2179 
2180         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2181         check(pattern, "abcdefghijkk", true);
2182 
2183         pattern = Pattern.compile("(a)bcdefghij\\11");
2184         check(pattern, "abcdefghija1", true);
2185 
2186         // Supplementary character tests
2187         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2188         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2189 
2190         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2191         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2192 
2193         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2194         check(pattern, toSupplementaries("abcdefabc"), true);
2195 
2196         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2197         check(pattern, toSupplementaries("abcdefabc"), false);
2198 
2199         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2200         check(pattern, toSupplementaries("abcdefghija"), false);
2201         check(pattern, toSupplementaries("abcdefghija1"), true);
2202 
2203         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2204         check(pattern, toSupplementaries("abcdefghijkk"), true);
2205 
2206         report("BackRef");
2207     }
2208 
2209     /**
2210      * Unicode Technical Report #18, section 2.6 End of Line
2211      * There is no empty line to be matched in the sequence \u000D\u000A
2212      * but there is an empty line in the sequence \u000A\u000D.
2213      */
2214     private static void anchorTest() throws Exception {
2215         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2216         Matcher m = p.matcher("blah1\r\nblah2");
2217         m.find();
2218         m.find();
2219         if (!m.group().equals("blah2"))
2220             failCount++;
2221 
2222         m.reset("blah1\n\rblah2");
2223         m.find();
2224         m.find();
2225         m.find();
2226         if (!m.group().equals("blah2"))
2227             failCount++;
2228 
2229         // Test behavior of $ with \r\n at end of input
2230         p = Pattern.compile(".+$");
2231         m = p.matcher("blah1\r\n");
2232         if (!m.find())
2233             failCount++;
2234        if (!m.group().equals("blah1"))
2235             failCount++;
2236         if (m.find())
2237             failCount++;
2238 
2239         // Test behavior of $ with \r\n at end of input in multiline
2240         p = Pattern.compile(".+$", Pattern.MULTILINE);
2241         m = p.matcher("blah1\r\n");
2242         if (!m.find())
2243             failCount++;
2244         if (m.find())
2245             failCount++;
2246 
2247         // Test for $ recognition of \u0085 for bug 4527731
2248         p = Pattern.compile(".+$", Pattern.MULTILINE);
2249         m = p.matcher("blah1\u0085");
2250         if (!m.find())
2251             failCount++;
2252 
2253         // Supplementary character test
2254         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2255         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2256         m.find();
2257         m.find();
2258         if (!m.group().equals(toSupplementaries("blah2")))
2259             failCount++;
2260 
2261         m.reset(toSupplementaries("blah1\n\rblah2"));
2262         m.find();
2263         m.find();
2264         m.find();
2265         if (!m.group().equals(toSupplementaries("blah2")))
2266             failCount++;
2267 
2268         // Test behavior of $ with \r\n at end of input
2269         p = Pattern.compile(".+$");
2270         m = p.matcher(toSupplementaries("blah1\r\n"));
2271         if (!m.find())
2272             failCount++;
2273         if (!m.group().equals(toSupplementaries("blah1")))
2274             failCount++;
2275         if (m.find())
2276             failCount++;
2277 
2278         // Test behavior of $ with \r\n at end of input in multiline
2279         p = Pattern.compile(".+$", Pattern.MULTILINE);
2280         m = p.matcher(toSupplementaries("blah1\r\n"));
2281         if (!m.find())
2282             failCount++;
2283         if (m.find())
2284             failCount++;
2285 
2286         // Test for $ recognition of \u0085 for bug 4527731
2287         p = Pattern.compile(".+$", Pattern.MULTILINE);
2288         m = p.matcher(toSupplementaries("blah1\u0085"));
2289         if (!m.find())
2290             failCount++;
2291 
2292         report("Anchors");
2293     }
2294 
2295     /**
2296      * A basic sanity test of Matcher.lookingAt().
2297      */
2298     private static void lookingAtTest() throws Exception {
2299         Pattern p = Pattern.compile("(ab)(c*)");
2300         Matcher m = p.matcher("abccczzzabcczzzabccc");
2301 
2302         if (!m.lookingAt())
2303             failCount++;
2304 
2305         if (!m.group().equals(m.group(0)))
2306             failCount++;
2307 
2308         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2309         if (m.lookingAt())
2310             failCount++;
2311 
2312         // Supplementary character test
2313         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2314         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2315 
2316         if (!m.lookingAt())
2317             failCount++;
2318 
2319         if (!m.group().equals(m.group(0)))
2320             failCount++;
2321 
2322         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2323         if (m.lookingAt())
2324             failCount++;
2325 
2326         report("Looking At");
2327     }
2328 
2329     /**
2330      * A basic sanity test of Matcher.matches().
2331      */
2332     private static void matchesTest() throws Exception {
2333         // matches()
2334         Pattern p = Pattern.compile("ulb(c*)");
2335         Matcher m = p.matcher("ulbcccccc");
2336         if (!m.matches())
2337             failCount++;
2338 
2339         // find() but not matches()
2340         m.reset("zzzulbcccccc");
2341         if (m.matches())
2342             failCount++;
2343 
2344         // lookingAt() but not matches()
2345         m.reset("ulbccccccdef");
2346         if (m.matches())
2347             failCount++;
2348 
2349         // matches()
2350         p = Pattern.compile("a|ad");
2351         m = p.matcher("ad");
2352         if (!m.matches())
2353             failCount++;
2354 
2355         // Supplementary character test
2356         // matches()
2357         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2358         m = p.matcher(toSupplementaries("ulbcccccc"));
2359         if (!m.matches())
2360             failCount++;
2361 
2362         // find() but not matches()
2363         m.reset(toSupplementaries("zzzulbcccccc"));
2364         if (m.matches())
2365             failCount++;
2366 
2367         // lookingAt() but not matches()
2368         m.reset(toSupplementaries("ulbccccccdef"));
2369         if (m.matches())
2370             failCount++;
2371 
2372         // matches()
2373         p = Pattern.compile(toSupplementaries("a|ad"));
2374         m = p.matcher(toSupplementaries("ad"));
2375         if (!m.matches())
2376             failCount++;
2377 
2378         report("Matches");
2379     }
2380 
2381     /**
2382      * A basic sanity test of Pattern.matches().
2383      */
2384     private static void patternMatchesTest() throws Exception {
2385         // matches()
2386         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2387                              toSupplementaries("ulbcccccc")))
2388             failCount++;
2389 
2390         // find() but not matches()
2391         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2392                             toSupplementaries("zzzulbcccccc")))
2393             failCount++;
2394 
2395         // lookingAt() but not matches()
2396         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2397                             toSupplementaries("ulbccccccdef")))
2398             failCount++;
2399 
2400         // Supplementary character test
2401         // matches()
2402         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2403                              toSupplementaries("ulbcccccc")))
2404             failCount++;
2405 
2406         // find() but not matches()
2407         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2408                             toSupplementaries("zzzulbcccccc")))
2409             failCount++;
2410 
2411         // lookingAt() but not matches()
2412         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2413                             toSupplementaries("ulbccccccdef")))
2414             failCount++;
2415 
2416         report("Pattern Matches");
2417     }
2418 
2419     /**
2420      * Canonical equivalence testing. Tests the ability of the engine
2421      * to match sequences that are not explicitly specified in the
2422      * pattern when they are considered equivalent by the Unicode Standard.
2423      */
2424     private static void ceTest() throws Exception {
2425         // Decomposed char outside char classes
2426         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2427         Matcher m = p.matcher("test\u00e5");
2428         if (!m.matches())
2429             failCount++;
2430 
2431         m.reset("testa\u030a");
2432         if (!m.matches())
2433             failCount++;
2434 
2435         // Composed char outside char classes
2436         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2437         m = p.matcher("test\u00e5");
2438         if (!m.matches())
2439             failCount++;
2440 
2441         m.reset("testa\u030a");
2442         if (!m.find())
2443             failCount++;
2444 
2445         // Decomposed char inside a char class
2446         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2447         m = p.matcher("test\u00e5");
2448         if (!m.find())
2449             failCount++;
2450 
2451         m.reset("testa\u030a");
2452         if (!m.find())
2453             failCount++;
2454 
2455         // Composed char inside a char class
2456         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2457         m = p.matcher("test\u00e5");
2458         if (!m.find())
2459             failCount++;
2460 
2461         m.reset("testa\u0300");
2462         if (!m.find())
2463             failCount++;
2464 
2465         m.reset("testa\u030a");
2466         if (!m.find())
2467             failCount++;
2468 
2469         // Marks that cannot legally change order and be equivalent
2470         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2471         check(p, "testa\u0308\u0300", true);
2472         check(p, "testa\u0300\u0308", false);
2473 
2474         // Marks that can legally change order and be equivalent
2475         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2476         check(p, "testa\u0308\u0323", true);
2477         check(p, "testa\u0323\u0308", true);
2478 
2479         // Test all equivalences of the sequence a\u0308\u0323\u0300
2480         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2481         check(p, "testa\u0308\u0323\u0300", true);
2482         check(p, "testa\u0323\u0308\u0300", true);
2483         check(p, "testa\u0308\u0300\u0323", true);
2484         check(p, "test\u00e4\u0323\u0300", true);
2485         check(p, "test\u00e4\u0300\u0323", true);
2486 
2487         /*
2488          * The following canonical equivalence tests don't work. Bug id: 4916384.
2489          *
2490         // Decomposed hangul (jamos)
2491         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2492         m = p.matcher("\u1100\u1161");
2493         if (!m.matches())
2494             failCount++;
2495 
2496         m.reset("\uac00");
2497         if (!m.matches())
2498             failCount++;
2499 
2500         // Composed hangul
2501         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2502         m = p.matcher("\u1100\u1161");
2503         if (!m.matches())
2504             failCount++;
2505 
2506         m.reset("\uac00");
2507         if (!m.matches())
2508             failCount++;
2509 
2510         // Decomposed supplementary outside char classes
2511         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2512         m = p.matcher("test\ud834\uddc0");
2513         if (!m.matches())
2514             failCount++;
2515 
2516         m.reset("test\ud834\uddbc\ud834\udd6f");
2517         if (!m.matches())
2518             failCount++;
2519 
2520         // Composed supplementary outside char classes
2521         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2522         m.reset("test\ud834\uddbc\ud834\udd6f");
2523         if (!m.matches())
2524             failCount++;
2525 
2526         m = p.matcher("test\ud834\uddc0");
2527         if (!m.matches())
2528             failCount++;
2529 
2530         */
2531 
2532         report("Canonical Equivalence");
2533     }
2534 
2535     /**
2536      * A basic sanity test of Matcher.replaceAll().
2537      */
2538     private static void globalSubstitute() throws Exception {
2539         // Global substitution with a literal
2540         Pattern p = Pattern.compile("(ab)(c*)");
2541         Matcher m = p.matcher("abccczzzabcczzzabccc");
2542         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2543             failCount++;
2544 
2545         m.reset("zzzabccczzzabcczzzabccczzz");
2546         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2547             failCount++;
2548 
2549         // Global substitution with groups
2550         m.reset("zzzabccczzzabcczzzabccczzz");
2551         String result = m.replaceAll("$1");
2552         if (!result.equals("zzzabzzzabzzzabzzz"))
2553             failCount++;
2554 
2555         // Supplementary character test
2556         // Global substitution with a literal
2557         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2558         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2559         if (!m.replaceAll(toSupplementaries("test")).
2560             equals(toSupplementaries("testzzztestzzztest")))
2561             failCount++;
2562 
2563         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2564         if (!m.replaceAll(toSupplementaries("test")).
2565             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2566             failCount++;
2567 
2568         // Global substitution with groups
2569         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2570         result = m.replaceAll("$1");
2571         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2572             failCount++;
2573 
2574         report("Global Substitution");
2575     }
2576 
2577     /**
2578      * Tests the usage of Matcher.appendReplacement() with literal
2579      * and group substitutions.
2580      */
2581     private static void stringbufferSubstitute() throws Exception {
2582         // SB substitution with literal
2583         String blah = "zzzblahzzz";
2584         Pattern p = Pattern.compile("blah");
2585         Matcher m = p.matcher(blah);
2586         StringBuffer result = new StringBuffer();
2587         try {
2588             m.appendReplacement(result, "blech");
2589             failCount++;
2590         } catch (IllegalStateException e) {
2591         }
2592         m.find();
2593         m.appendReplacement(result, "blech");
2594         if (!result.toString().equals("zzzblech"))
2595             failCount++;
2596 
2597         m.appendTail(result);
2598         if (!result.toString().equals("zzzblechzzz"))
2599             failCount++;
2600 
2601         // SB substitution with groups
2602         blah = "zzzabcdzzz";
2603         p = Pattern.compile("(ab)(cd)*");
2604         m = p.matcher(blah);
2605         result = new StringBuffer();
2606         try {
2607             m.appendReplacement(result, "$1");
2608             failCount++;
2609         } catch (IllegalStateException e) {
2610         }
2611         m.find();
2612         m.appendReplacement(result, "$1");
2613         if (!result.toString().equals("zzzab"))
2614             failCount++;
2615 
2616         m.appendTail(result);
2617         if (!result.toString().equals("zzzabzzz"))
2618             failCount++;
2619 
2620         // SB substitution with 3 groups
2621         blah = "zzzabcdcdefzzz";
2622         p = Pattern.compile("(ab)(cd)*(ef)");
2623         m = p.matcher(blah);
2624         result = new StringBuffer();
2625         try {
2626             m.appendReplacement(result, "$1w$2w$3");
2627             failCount++;
2628         } catch (IllegalStateException e) {
2629         }
2630         m.find();
2631         m.appendReplacement(result, "$1w$2w$3");
2632         if (!result.toString().equals("zzzabwcdwef"))
2633             failCount++;
2634 
2635         m.appendTail(result);
2636         if (!result.toString().equals("zzzabwcdwefzzz"))
2637             failCount++;
2638 
2639         // SB substitution with groups and three matches
2640         // skipping middle match
2641         blah = "zzzabcdzzzabcddzzzabcdzzz";
2642         p = Pattern.compile("(ab)(cd*)");
2643         m = p.matcher(blah);
2644         result = new StringBuffer();
2645         try {
2646             m.appendReplacement(result, "$1");
2647             failCount++;
2648         } catch (IllegalStateException e) {
2649         }
2650         m.find();
2651         m.appendReplacement(result, "$1");
2652         if (!result.toString().equals("zzzab"))
2653             failCount++;
2654 
2655         m.find();
2656         m.find();
2657         m.appendReplacement(result, "$2");
2658         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2659             failCount++;
2660 
2661         m.appendTail(result);
2662         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2663             failCount++;
2664 
2665         // Check to make sure escaped $ is ignored
2666         blah = "zzzabcdcdefzzz";
2667         p = Pattern.compile("(ab)(cd)*(ef)");
2668         m = p.matcher(blah);
2669         result = new StringBuffer();
2670         m.find();
2671         m.appendReplacement(result, "$1w\\$2w$3");
2672         if (!result.toString().equals("zzzabw$2wef"))
2673             failCount++;
2674 
2675         m.appendTail(result);
2676         if (!result.toString().equals("zzzabw$2wefzzz"))
2677             failCount++;
2678 
2679         // Check to make sure a reference to nonexistent group causes error
2680         blah = "zzzabcdcdefzzz";
2681         p = Pattern.compile("(ab)(cd)*(ef)");
2682         m = p.matcher(blah);
2683         result = new StringBuffer();
2684         m.find();
2685         try {
2686             m.appendReplacement(result, "$1w$5w$3");
2687             failCount++;
2688         } catch (IndexOutOfBoundsException ioobe) {
2689             // Correct result
2690         }
2691 
2692         // Check double digit group references
2693         blah = "zzz123456789101112zzz";
2694         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2695         m = p.matcher(blah);
2696         result = new StringBuffer();
2697         m.find();
2698         m.appendReplacement(result, "$1w$11w$3");
2699         if (!result.toString().equals("zzz1w11w3"))
2700             failCount++;
2701 
2702         // Check to make sure it backs off $15 to $1 if only three groups
2703         blah = "zzzabcdcdefzzz";
2704         p = Pattern.compile("(ab)(cd)*(ef)");
2705         m = p.matcher(blah);
2706         result = new StringBuffer();
2707         m.find();
2708         m.appendReplacement(result, "$1w$15w$3");
2709         if (!result.toString().equals("zzzabwab5wef"))
2710             failCount++;
2711 
2712 
2713         // Supplementary character test
2714         // SB substitution with literal
2715         blah = toSupplementaries("zzzblahzzz");
2716         p = Pattern.compile(toSupplementaries("blah"));
2717         m = p.matcher(blah);
2718         result = new StringBuffer();
2719         try {
2720             m.appendReplacement(result, toSupplementaries("blech"));
2721             failCount++;
2722         } catch (IllegalStateException e) {
2723         }
2724         m.find();
2725         m.appendReplacement(result, toSupplementaries("blech"));
2726         if (!result.toString().equals(toSupplementaries("zzzblech")))
2727             failCount++;
2728 
2729         m.appendTail(result);
2730         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2731             failCount++;
2732 
2733         // SB substitution with groups
2734         blah = toSupplementaries("zzzabcdzzz");
2735         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2736         m = p.matcher(blah);
2737         result = new StringBuffer();
2738         try {
2739             m.appendReplacement(result, "$1");
2740             failCount++;
2741         } catch (IllegalStateException e) {
2742         }
2743         m.find();
2744         m.appendReplacement(result, "$1");
2745         if (!result.toString().equals(toSupplementaries("zzzab")))
2746             failCount++;
2747 
2748         m.appendTail(result);
2749         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2750             failCount++;
2751 
2752         // SB substitution with 3 groups
2753         blah = toSupplementaries("zzzabcdcdefzzz");
2754         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2755         m = p.matcher(blah);
2756         result = new StringBuffer();
2757         try {
2758             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2759             failCount++;
2760         } catch (IllegalStateException e) {
2761         }
2762         m.find();
2763         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2764         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2765             failCount++;
2766 
2767         m.appendTail(result);
2768         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2769             failCount++;
2770 
2771         // SB substitution with groups and three matches
2772         // skipping middle match
2773         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2774         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2775         m = p.matcher(blah);
2776         result = new StringBuffer();
2777         try {
2778             m.appendReplacement(result, "$1");
2779             failCount++;
2780         } catch (IllegalStateException e) {
2781         }
2782         m.find();
2783         m.appendReplacement(result, "$1");
2784         if (!result.toString().equals(toSupplementaries("zzzab")))
2785             failCount++;
2786 
2787         m.find();
2788         m.find();
2789         m.appendReplacement(result, "$2");
2790         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2791             failCount++;
2792 
2793         m.appendTail(result);
2794         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2795             failCount++;
2796 
2797         // Check to make sure escaped $ is ignored
2798         blah = toSupplementaries("zzzabcdcdefzzz");
2799         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2800         m = p.matcher(blah);
2801         result = new StringBuffer();
2802         m.find();
2803         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2804         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2805             failCount++;
2806 
2807         m.appendTail(result);
2808         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2809             failCount++;
2810 
2811         // Check to make sure a reference to nonexistent group causes error
2812         blah = toSupplementaries("zzzabcdcdefzzz");
2813         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2814         m = p.matcher(blah);
2815         result = new StringBuffer();
2816         m.find();
2817         try {
2818             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2819             failCount++;
2820         } catch (IndexOutOfBoundsException ioobe) {
2821             // Correct result
2822         }
2823 
2824         // Check double digit group references
2825         blah = toSupplementaries("zzz123456789101112zzz");
2826         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2827         m = p.matcher(blah);
2828         result = new StringBuffer();
2829         m.find();
2830         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2831         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2832             failCount++;
2833 
2834         // Check to make sure it backs off $15 to $1 if only three groups
2835         blah = toSupplementaries("zzzabcdcdefzzz");
2836         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2837         m = p.matcher(blah);
2838         result = new StringBuffer();
2839         m.find();
2840         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2841         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2842             failCount++;
2843 
2844         // Check nothing has been appended into the output buffer if
2845         // the replacement string triggers IllegalArgumentException.
2846         p = Pattern.compile("(abc)");
2847         m = p.matcher("abcd");
2848         result = new StringBuffer();
2849         m.find();
2850         try {
2851             m.appendReplacement(result, ("xyz$g"));
2852             failCount++;
2853         } catch (IllegalArgumentException iae) {
2854             if (result.length() != 0)
2855                 failCount++;
2856         }
2857 
2858         report("SB Substitution");
2859     }
2860 
2861     /*
2862      * 5 groups of characters are created to make a substitution string.
2863      * A base string will be created including random lead chars, the
2864      * substitution string, and random trailing chars.
2865      * A pattern containing the 5 groups is searched for and replaced with:
2866      * random group + random string + random group.
2867      * The results are checked for correctness.
2868      */
2869     private static void substitutionBasher() {
2870         for (int runs = 0; runs<1000; runs++) {
2871             // Create a base string to work in
2872             int leadingChars = generator.nextInt(10);
2873             StringBuffer baseBuffer = new StringBuffer(100);
2874             String leadingString = getRandomAlphaString(leadingChars);
2875             baseBuffer.append(leadingString);
2876 
2877             // Create 5 groups of random number of random chars
2878             // Create the string to substitute
2879             // Create the pattern string to search for
2880             StringBuffer bufferToSub = new StringBuffer(25);
2881             StringBuffer bufferToPat = new StringBuffer(50);
2882             String[] groups = new String[5];
2883             for(int i=0; i<5; i++) {
2884                 int aGroupSize = generator.nextInt(5)+1;
2885                 groups[i] = getRandomAlphaString(aGroupSize);
2886                 bufferToSub.append(groups[i]);
2887                 bufferToPat.append('(');
2888                 bufferToPat.append(groups[i]);
2889                 bufferToPat.append(')');
2890             }
2891             String stringToSub = bufferToSub.toString();
2892             String pattern = bufferToPat.toString();
2893 
2894             // Place sub string into working string at random index
2895             baseBuffer.append(stringToSub);
2896 
2897             // Append random chars to end
2898             int trailingChars = generator.nextInt(10);
2899             String trailingString = getRandomAlphaString(trailingChars);
2900             baseBuffer.append(trailingString);
2901             String baseString = baseBuffer.toString();
2902 
2903             // Create test pattern and matcher
2904             Pattern p = Pattern.compile(pattern);
2905             Matcher m = p.matcher(baseString);
2906 
2907             // Reject candidate if pattern happens to start early
2908             m.find();
2909             if (m.start() < leadingChars)
2910                 continue;
2911 
2912             // Reject candidate if more than one match
2913             if (m.find())
2914                 continue;
2915 
2916             // Construct a replacement string with :
2917             // random group + random string + random group
2918             StringBuffer bufferToRep = new StringBuffer();
2919             int groupIndex1 = generator.nextInt(5);
2920             bufferToRep.append("$" + (groupIndex1 + 1));
2921             String randomMidString = getRandomAlphaString(5);
2922             bufferToRep.append(randomMidString);
2923             int groupIndex2 = generator.nextInt(5);
2924             bufferToRep.append("$" + (groupIndex2 + 1));
2925             String replacement = bufferToRep.toString();
2926 
2927             // Do the replacement
2928             String result = m.replaceAll(replacement);
2929 
2930             // Construct expected result
2931             StringBuffer bufferToRes = new StringBuffer();
2932             bufferToRes.append(leadingString);
2933             bufferToRes.append(groups[groupIndex1]);
2934             bufferToRes.append(randomMidString);
2935             bufferToRes.append(groups[groupIndex2]);
2936             bufferToRes.append(trailingString);
2937             String expectedResult = bufferToRes.toString();
2938 
2939             // Check results
2940             if (!result.equals(expectedResult))
2941                 failCount++;
2942         }
2943 
2944         report("Substitution Basher");
2945     }
2946 
2947     /**
2948      * Checks the handling of some escape sequences that the Pattern
2949      * class should process instead of the java compiler. These are
2950      * not in the file because the escapes should be be processed
2951      * by the Pattern class when the regex is compiled.
2952      */
2953     private static void escapes() throws Exception {
2954         Pattern p = Pattern.compile("\\043");
2955         Matcher m = p.matcher("#");
2956         if (!m.find())
2957             failCount++;
2958 
2959         p = Pattern.compile("\\x23");
2960         m = p.matcher("#");
2961         if (!m.find())
2962             failCount++;
2963 
2964         p = Pattern.compile("\\u0023");
2965         m = p.matcher("#");
2966         if (!m.find())
2967             failCount++;
2968 
2969         report("Escape sequences");
2970     }
2971 
2972     /**
2973      * Checks the handling of blank input situations. These
2974      * tests are incompatible with my test file format.
2975      */
2976     private static void blankInput() throws Exception {
2977         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
2978         Matcher m = p.matcher("");
2979         if (m.find())
2980             failCount++;
2981 
2982         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
2983         m = p.matcher("");
2984         if (!m.find())
2985             failCount++;
2986 
2987         p = Pattern.compile("abc");
2988         m = p.matcher("");
2989         if (m.find())
2990             failCount++;
2991 
2992         p = Pattern.compile("a*");
2993         m = p.matcher("");
2994         if (!m.find())
2995             failCount++;
2996 
2997         report("Blank input");
2998     }
2999 
3000     /**
3001      * Tests the Boyer-Moore pattern matching of a character sequence
3002      * on randomly generated patterns.
3003      */
3004     private static void bm() throws Exception {
3005         doBnM('a');
3006         report("Boyer Moore (ASCII)");
3007 
3008         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3009         report("Boyer Moore (Supplementary)");
3010     }
3011 
3012     private static void doBnM(int baseCharacter) throws Exception {
3013         int achar=0;
3014 
3015         for (int i=0; i<100; i++) {
3016             // Create a short pattern to search for
3017             int patternLength = generator.nextInt(7) + 4;
3018             StringBuffer patternBuffer = new StringBuffer(patternLength);
3019             for (int x=0; x<patternLength; x++) {
3020                 int ch = baseCharacter + generator.nextInt(26);
3021                 if (Character.isSupplementaryCodePoint(ch)) {
3022                     patternBuffer.append(Character.toChars(ch));
3023                 } else {
3024                     patternBuffer.append((char)ch);
3025                 }
3026             }
3027             String pattern =  patternBuffer.toString();
3028             Pattern p = Pattern.compile(pattern);
3029 
3030             // Create a buffer with random ASCII chars that does
3031             // not match the sample
3032             String toSearch = null;
3033             StringBuffer s = null;
3034             Matcher m = p.matcher("");
3035             do {
3036                 s = new StringBuffer(100);
3037                 for (int x=0; x<100; x++) {
3038                     int ch = baseCharacter + generator.nextInt(26);
3039                     if (Character.isSupplementaryCodePoint(ch)) {
3040                         s.append(Character.toChars(ch));
3041                     } else {
3042                         s.append((char)ch);
3043                     }
3044                 }
3045                 toSearch = s.toString();
3046                 m.reset(toSearch);
3047             } while (m.find());
3048 
3049             // Insert the pattern at a random spot
3050             int insertIndex = generator.nextInt(99);
3051             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3052                 insertIndex++;
3053             s = s.insert(insertIndex, pattern);
3054             toSearch = s.toString();
3055 
3056             // Make sure that the pattern is found
3057             m.reset(toSearch);
3058             if (!m.find())
3059                 failCount++;
3060 
3061             // Make sure that the match text is the pattern
3062             if (!m.group().equals(pattern))
3063                 failCount++;
3064 
3065             // Make sure match occured at insertion point
3066             if (m.start() != insertIndex)
3067                 failCount++;
3068         }
3069     }
3070 
3071     /**
3072      * Tests the matching of slices on randomly generated patterns.
3073      * The Boyer-Moore optimization is not done on these patterns
3074      * because it uses unicode case folding.
3075      */
3076     private static void slice() throws Exception {
3077         doSlice(Character.MAX_VALUE);
3078         report("Slice");
3079 
3080         doSlice(Character.MAX_CODE_POINT);
3081         report("Slice (Supplementary)");
3082     }
3083 
3084     private static void doSlice(int maxCharacter) throws Exception {
3085         Random generator = new Random();
3086         int achar=0;
3087 
3088         for (int i=0; i<100; i++) {
3089             // Create a short pattern to search for
3090             int patternLength = generator.nextInt(7) + 4;
3091             StringBuffer patternBuffer = new StringBuffer(patternLength);
3092             for (int x=0; x<patternLength; x++) {
3093                 int randomChar = 0;
3094                 while (!Character.isLetterOrDigit(randomChar))
3095                     randomChar = generator.nextInt(maxCharacter);
3096                 if (Character.isSupplementaryCodePoint(randomChar)) {
3097                     patternBuffer.append(Character.toChars(randomChar));
3098                 } else {
3099                     patternBuffer.append((char) randomChar);
3100                 }
3101             }
3102             String pattern =  patternBuffer.toString();
3103             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3104 
3105             // Create a buffer with random chars that does not match the sample
3106             String toSearch = null;
3107             StringBuffer s = null;
3108             Matcher m = p.matcher("");
3109             do {
3110                 s = new StringBuffer(100);
3111                 for (int x=0; x<100; x++) {
3112                     int randomChar = 0;
3113                     while (!Character.isLetterOrDigit(randomChar))
3114                         randomChar = generator.nextInt(maxCharacter);
3115                     if (Character.isSupplementaryCodePoint(randomChar)) {
3116                         s.append(Character.toChars(randomChar));
3117                     } else {
3118                         s.append((char) randomChar);
3119                     }
3120                 }
3121                 toSearch = s.toString();
3122                 m.reset(toSearch);
3123             } while (m.find());
3124 
3125             // Insert the pattern at a random spot
3126             int insertIndex = generator.nextInt(99);
3127             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3128                 insertIndex++;
3129             s = s.insert(insertIndex, pattern);
3130             toSearch = s.toString();
3131 
3132             // Make sure that the pattern is found
3133             m.reset(toSearch);
3134             if (!m.find())
3135                 failCount++;
3136 
3137             // Make sure that the match text is the pattern
3138             if (!m.group().equals(pattern))
3139                 failCount++;
3140 
3141             // Make sure match occured at insertion point
3142             if (m.start() != insertIndex)
3143                 failCount++;
3144         }
3145     }
3146 
3147     private static void explainFailure(String pattern, String data,
3148                                        String expected, String actual) {
3149         System.err.println("----------------------------------------");
3150         System.err.println("Pattern = "+pattern);
3151         System.err.println("Data = "+data);
3152         System.err.println("Expected = " + expected);
3153         System.err.println("Actual   = " + actual);
3154     }
3155 
3156     private static void explainFailure(String pattern, String data,
3157                                        Throwable t) {
3158         System.err.println("----------------------------------------");
3159         System.err.println("Pattern = "+pattern);
3160         System.err.println("Data = "+data);
3161         t.printStackTrace(System.err);
3162     }
3163 
3164     // Testing examples from a file
3165 
3166     /**
3167      * Goes through the file "TestCases.txt" and creates many patterns
3168      * described in the file, matching the patterns against input lines in
3169      * the file, and comparing the results against the correct results
3170      * also found in the file. The file format is described in comments
3171      * at the head of the file.
3172      */
3173     private static void processFile(String fileName) throws Exception {
3174         File testCases = new File(System.getProperty("test.src", "."),
3175                                   fileName);
3176         FileInputStream in = new FileInputStream(testCases);
3177         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3178 
3179         // Process next test case.
3180         String aLine;
3181         while((aLine = r.readLine()) != null) {
3182             // Read a line for pattern
3183             String patternString = grabLine(r);
3184             Pattern p = null;
3185             try {
3186                 p = compileTestPattern(patternString);
3187             } catch (PatternSyntaxException e) {
3188                 String dataString = grabLine(r);
3189                 String expectedResult = grabLine(r);
3190                 if (expectedResult.startsWith("error"))
3191                     continue;
3192                 explainFailure(patternString, dataString, e);
3193                 failCount++;
3194                 continue;
3195             }
3196 
3197             // Read a line for input string
3198             String dataString = grabLine(r);
3199             Matcher m = p.matcher(dataString);
3200             StringBuffer result = new StringBuffer();
3201 
3202             // Check for IllegalStateExceptions before a match
3203             failCount += preMatchInvariants(m);
3204 
3205             boolean found = m.find();
3206 
3207             if (found)
3208                 failCount += postTrueMatchInvariants(m);
3209             else
3210                 failCount += postFalseMatchInvariants(m);
3211 
3212             if (found) {
3213                 result.append("true ");
3214                 result.append(m.group(0) + " ");
3215             } else {
3216                 result.append("false ");
3217             }
3218 
3219             result.append(m.groupCount());
3220 
3221             if (found) {
3222                 for (int i=1; i<m.groupCount()+1; i++)
3223                     if (m.group(i) != null)
3224                         result.append(" " +m.group(i));
3225             }
3226 
3227             // Read a line for the expected result
3228             String expectedResult = grabLine(r);
3229 
3230             if (!result.toString().equals(expectedResult)) {
3231                 explainFailure(patternString, dataString, expectedResult, result.toString());
3232                 failCount++;
3233             }
3234         }
3235 
3236         report(fileName);
3237     }
3238 
3239     private static int preMatchInvariants(Matcher m) {
3240         int failCount = 0;
3241         try {
3242             m.start();
3243             failCount++;
3244         } catch (IllegalStateException ise) {}
3245         try {
3246             m.end();
3247             failCount++;
3248         } catch (IllegalStateException ise) {}
3249         try {
3250             m.group();
3251             failCount++;
3252         } catch (IllegalStateException ise) {}
3253         return failCount;
3254     }
3255 
3256     private static int postFalseMatchInvariants(Matcher m) {
3257         int failCount = 0;
3258         try {
3259             m.group();
3260             failCount++;
3261         } catch (IllegalStateException ise) {}
3262         try {
3263             m.start();
3264             failCount++;
3265         } catch (IllegalStateException ise) {}
3266         try {
3267             m.end();
3268             failCount++;
3269         } catch (IllegalStateException ise) {}
3270         return failCount;
3271     }
3272 
3273     private static int postTrueMatchInvariants(Matcher m) {
3274         int failCount = 0;
3275         //assert(m.start() = m.start(0);
3276         if (m.start() != m.start(0))
3277             failCount++;
3278         //assert(m.end() = m.end(0);
3279         if (m.start() != m.start(0))
3280             failCount++;
3281         //assert(m.group() = m.group(0);
3282         if (!m.group().equals(m.group(0)))
3283             failCount++;
3284         try {
3285             m.group(50);
3286             failCount++;
3287         } catch (IndexOutOfBoundsException ise) {}
3288 
3289         return failCount;
3290     }
3291 
3292     private static Pattern compileTestPattern(String patternString) {
3293         if (!patternString.startsWith("'")) {
3294             return Pattern.compile(patternString);
3295         }
3296 
3297         int break1 = patternString.lastIndexOf("'");
3298         String flagString = patternString.substring(
3299                                           break1+1, patternString.length());
3300         patternString = patternString.substring(1, break1);
3301 
3302         if (flagString.equals("i"))
3303             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3304 
3305         if (flagString.equals("m"))
3306             return Pattern.compile(patternString, Pattern.MULTILINE);
3307 
3308         return Pattern.compile(patternString);
3309     }
3310 
3311     /**
3312      * Reads a line from the input file. Keeps reading lines until a non
3313      * empty non comment line is read. If the line contains a \n then
3314      * these two characters are replaced by a newline char. If a \\uxxxx
3315      * sequence is read then the sequence is replaced by the unicode char.
3316      */
3317     private static String grabLine(BufferedReader r) throws Exception {
3318         int index = 0;
3319         String line = r.readLine();
3320         while (line.startsWith("//") || line.length() < 1)
3321             line = r.readLine();
3322         while ((index = line.indexOf("\\n")) != -1) {
3323             StringBuffer temp = new StringBuffer(line);
3324             temp.replace(index, index+2, "\n");
3325             line = temp.toString();
3326         }
3327         while ((index = line.indexOf("\\u")) != -1) {
3328             StringBuffer temp = new StringBuffer(line);
3329             String value = temp.substring(index+2, index+6);
3330             char aChar = (char)Integer.parseInt(value, 16);
3331             String unicodeChar = "" + aChar;
3332             temp.replace(index, index+6, unicodeChar);
3333             line = temp.toString();
3334         }
3335 
3336         return line;
3337     }
3338 
3339     private static void check(Pattern p, String s, String g, String expected) {
3340         Matcher m = p.matcher(s);
3341         m.find();
3342         if (!m.group(g).equals(expected))
3343             failCount++;
3344     }
3345 
3346     private static void checkReplaceFirst(String p, String s, String r, String expected)
3347     {
3348         if (!expected.equals(Pattern.compile(p)
3349                                     .matcher(s)
3350                                     .replaceFirst(r)))
3351             failCount++;
3352     }
3353 
3354     private static void checkReplaceAll(String p, String s, String r, String expected)
3355     {
3356         if (!expected.equals(Pattern.compile(p)
3357                                     .matcher(s)
3358                                     .replaceAll(r)))
3359             failCount++;
3360     }
3361 
3362     private static void checkExpectedFail(String p) {
3363         try {
3364             Pattern.compile(p);
3365         } catch (PatternSyntaxException pse) {
3366             //pse.printStackTrace();
3367             return;
3368         }
3369         failCount++;
3370     }
3371 
3372     private static void checkExpectedFail(Matcher m, String g) {
3373         m.find();
3374         try {
3375             m.group(g);
3376         } catch (IllegalArgumentException iae) {
3377             //iae.printStackTrace();
3378             return;
3379         } catch (NullPointerException npe) {
3380             return;
3381         }
3382         failCount++;
3383     }
3384 
3385 
3386     private static void namedGroupCaptureTest() throws Exception {
3387         check(Pattern.compile("x+(?<gname>y+)z+"),
3388               "xxxyyyzzz",
3389               "gname",
3390               "yyy");
3391 
3392         check(Pattern.compile("x+(?<gname8>y+)z+"),
3393               "xxxyyyzzz",
3394               "gname8",
3395               "yyy");
3396 
3397         //backref
3398         Pattern pattern = Pattern.compile("(a*)bc\\1");
3399         check(pattern, "zzzaabcazzz", true);  // found "abca"
3400 
3401         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3402               "zzzaabcaazzz", true);
3403 
3404         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3405               "abcdefabc", true);
3406 
3407         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3408               "abcdefghijkk", true);
3409 
3410         // Supplementary character tests
3411         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3412               toSupplementaries("zzzaabcazzz"), true);
3413 
3414         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3415               toSupplementaries("zzzaabcaazzz"), true);
3416 
3417         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3418               toSupplementaries("abcdefabc"), true);
3419 
3420         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3421                               "(?<gname>" +
3422                               toSupplementaries("k)") + "\\k<gname>"),
3423               toSupplementaries("abcdefghijkk"), true);
3424 
3425         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3426               "xxxyyyzzzyyy",
3427               "gname",
3428               "yyy");
3429 
3430         //replaceFirst/All
3431         checkReplaceFirst("(?<gn>ab)(c*)",
3432                           "abccczzzabcczzzabccc",
3433                           "${gn}",
3434                           "abzzzabcczzzabccc");
3435 
3436         checkReplaceAll("(?<gn>ab)(c*)",
3437                         "abccczzzabcczzzabccc",
3438                         "${gn}",
3439                         "abzzzabzzzab");
3440 
3441 
3442         checkReplaceFirst("(?<gn>ab)(c*)",
3443                           "zzzabccczzzabcczzzabccczzz",
3444                           "${gn}",
3445                           "zzzabzzzabcczzzabccczzz");
3446 
3447         checkReplaceAll("(?<gn>ab)(c*)",
3448                         "zzzabccczzzabcczzzabccczzz",
3449                         "${gn}",
3450                         "zzzabzzzabzzzabzzz");
3451 
3452         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3453                           "zzzabccczzzabcczzzabccczzz",
3454                           "${gn2}",
3455                           "zzzccczzzabcczzzabccczzz");
3456 
3457         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3458                         "zzzabccczzzabcczzzabccczzz",
3459                         "${gn2}",
3460                         "zzzccczzzcczzzccczzz");
3461 
3462         //toSupplementaries("(ab)(c*)"));
3463         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3464                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3465                           toSupplementaries("abccczzzabcczzzabccc"),
3466                           "${gn1}",
3467                           toSupplementaries("abzzzabcczzzabccc"));
3468 
3469 
3470         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3471                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3472                         toSupplementaries("abccczzzabcczzzabccc"),
3473                         "${gn1}",
3474                         toSupplementaries("abzzzabzzzab"));
3475 
3476         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3477                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3478                           toSupplementaries("abccczzzabcczzzabccc"),
3479                           "${gn2}",
3480                           toSupplementaries("ccczzzabcczzzabccc"));
3481 
3482 
3483         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3484                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3485                         toSupplementaries("abccczzzabcczzzabccc"),
3486                         "${gn2}",
3487                         toSupplementaries("ccczzzcczzzccc"));
3488 
3489         checkReplaceFirst("(?<dog>Dog)AndCat",
3490                           "zzzDogAndCatzzzDogAndCatzzz",
3491                           "${dog}",
3492                           "zzzDogzzzDogAndCatzzz");
3493 
3494 
3495         checkReplaceAll("(?<dog>Dog)AndCat",
3496                           "zzzDogAndCatzzzDogAndCatzzz",
3497                           "${dog}",
3498                           "zzzDogzzzDogzzz");
3499 
3500         // backref in Matcher & String
3501         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3502             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3503             failCount++;
3504 
3505         // negative
3506         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3507         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3508         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3509         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3510         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3511         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3512                           "gnameX");
3513         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514                           null);
3515         report("NamedGroupCapture");
3516     }
3517 
3518     // This is for bug 6919132
3519     private static void nonBmpClassComplementTest() throws Exception {
3520         Pattern p = Pattern.compile("\\P{Lu}");
3521         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3522         if (m.find() && m.start() == 1)
3523             failCount++;
3524 
3525         // from a unicode category
3526         p = Pattern.compile("\\P{Lu}");
3527         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3528         if (m.find())
3529             failCount++;
3530         if (!m.hitEnd())
3531             failCount++;
3532 
3533         // block
3534         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3535         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3536         if (m.find() && m.start() == 1)
3537             failCount++;
3538 
3539         report("NonBmpClassComplement");
3540     }
3541 
3542 }