1 /*
   2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  36  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647
  37  */
  38 
  39 import java.util.regex.*;
  40 import java.util.Random;
  41 import java.io.*;
  42 import java.util.*;
  43 import java.nio.CharBuffer;
  44 import java.util.function.Predicate;
  45 
  46 /**
  47  * This is a test class created to check the operation of
  48  * the Pattern and Matcher classes.
  49  */
  50 public class RegExTest {
  51 
  52     private static Random generator = new Random();
  53     private static boolean failure = false;
  54     private static int failCount = 0;
  55     private static String firstFailure = null;
  56 
  57     /**
  58      * Main to interpret arguments and run several tests.
  59      *
  60      */
  61     public static void main(String[] args) throws Exception {
  62         // Most of the tests are in a file
  63         processFile("TestCases.txt");
  64         //processFile("PerlCases.txt");
  65         processFile("BMPTestCases.txt");
  66         processFile("SupplementaryTestCases.txt");
  67 
  68         // These test many randomly generated char patterns
  69         bm();
  70         slice();
  71 
  72         // These are hard to put into the file
  73         escapes();
  74         blankInput();
  75 
  76         // Substitition tests on randomly generated sequences
  77         globalSubstitute();
  78         stringbufferSubstitute();
  79         substitutionBasher();
  80 
  81         // Canonical Equivalence
  82         ceTest();
  83 
  84         // Anchors
  85         anchorTest();
  86 
  87         // boolean match calls
  88         matchesTest();
  89         lookingAtTest();
  90 
  91         // Pattern API
  92         patternMatchesTest();
  93 
  94         // Misc
  95         lookbehindTest();
  96         nullArgumentTest();
  97         backRefTest();
  98         groupCaptureTest();
  99         caretTest();
 100         charClassTest();
 101         emptyPatternTest();
 102         findIntTest();
 103         group0Test();
 104         longPatternTest();
 105         octalTest();
 106         ampersandTest();
 107         negationTest();
 108         splitTest();
 109         appendTest();
 110         caseFoldingTest();
 111         commentsTest();
 112         unixLinesTest();
 113         replaceFirstTest();
 114         gTest();
 115         zTest();
 116         serializeTest();
 117         reluctantRepetitionTest();
 118         multilineDollarTest();
 119         dollarAtEndTest();
 120         caretBetweenTerminatorsTest();
 121         // This RFE rejected in Tiger numOccurrencesTest();
 122         javaCharClassTest();
 123         nonCaptureRepetitionTest();
 124         notCapturedGroupCurlyMatchTest();
 125         escapedSegmentTest();
 126         literalPatternTest();
 127         literalReplacementTest();
 128         regionTest();
 129         toStringTest();
 130         negatedCharClassTest();
 131         findFromTest();
 132         boundsTest();
 133         unicodeWordBoundsTest();
 134         caretAtEndTest();
 135         wordSearchTest();
 136         hitEndTest();
 137         toMatchResultTest();
 138         surrogatesInClassTest();
 139         removeQEQuotingTest();
 140         namedGroupCaptureTest();
 141         nonBmpClassComplementTest();
 142         unicodePropertiesTest();
 143         unicodeHexNotationTest();
 144         unicodeClassesTest();
 145         horizontalAndVerticalWSTest();
 146         linebreakTest();
 147         branchTest();
 148         groupCurlyNotFoundSuppTest();
 149         groupCurlyBackoffTest();
 150         patternAsPredicate();
 151         if (failure) {
 152             throw new
 153                 RuntimeException("RegExTest failed, 1st failure: " +
 154                                  firstFailure);
 155         } else {
 156             System.err.println("OKAY: All tests passed.");
 157         }
 158     }
 159 
 160     // Utility functions
 161 
 162     private static String getRandomAlphaString(int length) {
 163         StringBuffer buf = new StringBuffer(length);
 164         for (int i=0; i<length; i++) {
 165             char randChar = (char)(97 + generator.nextInt(26));
 166             buf.append(randChar);
 167         }
 168         return buf.toString();
 169     }
 170 
 171     private static void check(Matcher m, String expected) {
 172         m.find();
 173         if (!m.group().equals(expected))
 174             failCount++;
 175     }
 176 
 177     private static void check(Matcher m, String result, boolean expected) {
 178         m.find();
 179         if (m.group().equals(result) != expected)
 180             failCount++;
 181     }
 182 
 183     private static void check(Pattern p, String s, boolean expected) {
 184         if (p.matcher(s).find() != expected)
 185             failCount++;
 186     }
 187 
 188     private static void check(String p, String s, boolean expected) {
 189         Matcher matcher = Pattern.compile(p).matcher(s);
 190         if (matcher.find() != expected)
 191             failCount++;
 192     }
 193 
 194     private static void check(String p, char c, boolean expected) {
 195         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 196         Pattern pattern = Pattern.compile(propertyPattern);
 197         char[] ca = new char[1]; ca[0] = c;
 198         Matcher matcher = pattern.matcher(new String(ca));
 199         if (!matcher.find())
 200             failCount++;
 201     }
 202 
 203     private static void check(String p, int codePoint, boolean expected) {
 204         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 205         Pattern pattern = Pattern.compile(propertyPattern);
 206         char[] ca = Character.toChars(codePoint);
 207         Matcher matcher = pattern.matcher(new String(ca));
 208         if (!matcher.find())
 209             failCount++;
 210     }
 211 
 212     private static void check(String p, int flag, String input, String s,
 213                               boolean expected)
 214     {
 215         Pattern pattern = Pattern.compile(p, flag);
 216         Matcher matcher = pattern.matcher(input);
 217         if (expected)
 218             check(matcher, s, expected);
 219         else
 220             check(pattern, input, false);
 221     }
 222 
 223     private static void report(String testName) {
 224         int spacesToAdd = 30 - testName.length();
 225         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 226         for (int i=0; i<spacesToAdd; i++)
 227             paddedNameBuffer.append(" ");
 228         String paddedName = paddedNameBuffer.toString();
 229         System.err.println(paddedName + ": " +
 230                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 231         if (failCount > 0) {
 232             failure = true;
 233 
 234             if (firstFailure == null) {
 235                 firstFailure = testName;
 236             }
 237         }
 238 
 239         failCount = 0;
 240     }
 241 
 242     /**
 243      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 244      * supplementary characters. This method does NOT fully take care
 245      * of the regex syntax.
 246      */
 247     private static String toSupplementaries(String s) {
 248         int length = s.length();
 249         StringBuffer sb = new StringBuffer(length * 2);
 250 
 251         for (int i = 0; i < length; ) {
 252             char c = s.charAt(i++);
 253             if (c == '\\') {
 254                 sb.append(c);
 255                 if (i < length) {
 256                     c = s.charAt(i++);
 257                     sb.append(c);
 258                     if (c == 'u') {
 259                         // assume no syntax error
 260                         sb.append(s.charAt(i++));
 261                         sb.append(s.charAt(i++));
 262                         sb.append(s.charAt(i++));
 263                         sb.append(s.charAt(i++));
 264                     }
 265                 }
 266             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 267                 sb.append('\ud800').append((char)('\udc00'+c));
 268             } else {
 269                 sb.append(c);
 270             }
 271         }
 272         return sb.toString();
 273     }
 274 
 275     // Regular expression tests
 276 
 277     // This is for bug 6178785
 278     // Test if an expected NPE gets thrown when passing in a null argument
 279     private static boolean check(Runnable test) {
 280         try {
 281             test.run();
 282             failCount++;
 283             return false;
 284         } catch (NullPointerException npe) {
 285             return true;
 286         }
 287     }
 288 
 289     private static void nullArgumentTest() {
 290         check(new Runnable() { public void run() { Pattern.compile(null); }});
 291         check(new Runnable() { public void run() { Pattern.matches(null, null); }});
 292         check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
 293         check(new Runnable() { public void run() { Pattern.quote(null);}});
 294         check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
 295         check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
 296 
 297         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 298         m.matches();
 299         check(new Runnable() { public void run() { m.appendTail(null);}});
 300         check(new Runnable() { public void run() { m.replaceAll(null);}});
 301         check(new Runnable() { public void run() { m.replaceFirst(null);}});
 302         check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
 303         check(new Runnable() { public void run() { m.reset(null);}});
 304         check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
 305         //check(new Runnable() { public void run() { m.usePattern(null);}});
 306 
 307         report("Null Argument");
 308     }
 309 
 310     // This is for bug6635133
 311     // Test if surrogate pair in Unicode escapes can be handled correctly.
 312     private static void surrogatesInClassTest() throws Exception {
 313         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 314         Matcher matcher = pattern.matcher("\ud834\udd22");
 315         if (!matcher.find())
 316             failCount++;
 317 
 318         report("Surrogate pair in Unicode escape");
 319     }
 320 
 321     // This is for bug6990617
 322     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 323     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 324     // char is an octal digit.
 325     private static void removeQEQuotingTest() throws Exception {
 326         Pattern pattern =
 327             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 328         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 329         if (!matcher.find())
 330             failCount++;
 331 
 332         report("Remove Q/E Quoting");
 333     }
 334 
 335     // This is for bug 4988891
 336     // Test toMatchResult to see that it is a copy of the Matcher
 337     // that is not affected by subsequent operations on the original
 338     private static void toMatchResultTest() throws Exception {
 339         Pattern pattern = Pattern.compile("squid");
 340         Matcher matcher = pattern.matcher(
 341             "agiantsquidofdestinyasmallsquidoffate");
 342         matcher.find();
 343         int matcherStart1 = matcher.start();
 344         MatchResult mr = matcher.toMatchResult();
 345         if (mr == matcher)
 346             failCount++;
 347         int resultStart1 = mr.start();
 348         if (matcherStart1 != resultStart1)
 349             failCount++;
 350         matcher.find();
 351         int matcherStart2 = matcher.start();
 352         int resultStart2 = mr.start();
 353         if (matcherStart2 == resultStart2)
 354             failCount++;
 355         if (resultStart1 != resultStart2)
 356             failCount++;
 357         MatchResult mr2 = matcher.toMatchResult();
 358         if (mr == mr2)
 359             failCount++;
 360         if (mr2.start() != matcherStart2)
 361             failCount++;
 362         report("toMatchResult is a copy");
 363     }
 364 
 365     // This is for bug 5013885
 366     // Must test a slice to see if it reports hitEnd correctly
 367     private static void hitEndTest() throws Exception {
 368         // Basic test of Slice node
 369         Pattern p = Pattern.compile("^squidattack");
 370         Matcher m = p.matcher("squack");
 371         m.find();
 372         if (m.hitEnd())
 373             failCount++;
 374         m.reset("squid");
 375         m.find();
 376         if (!m.hitEnd())
 377             failCount++;
 378 
 379         // Test Slice, SliceA and SliceU nodes
 380         for (int i=0; i<3; i++) {
 381             int flags = 0;
 382             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 383             if (i==2) flags = Pattern.UNICODE_CASE;
 384             p = Pattern.compile("^abc", flags);
 385             m = p.matcher("ad");
 386             m.find();
 387             if (m.hitEnd())
 388                 failCount++;
 389             m.reset("ab");
 390             m.find();
 391             if (!m.hitEnd())
 392                 failCount++;
 393         }
 394 
 395         // Test Boyer-Moore node
 396         p = Pattern.compile("catattack");
 397         m = p.matcher("attack");
 398         m.find();
 399         if (!m.hitEnd())
 400             failCount++;
 401 
 402         p = Pattern.compile("catattack");
 403         m = p.matcher("attackattackattackcatatta");
 404         m.find();
 405         if (!m.hitEnd())
 406             failCount++;
 407         report("hitEnd from a Slice");
 408     }
 409 
 410     // This is for bug 4997476
 411     // It is weird code submitted by customer demonstrating a regression
 412     private static void wordSearchTest() throws Exception {
 413         String testString = new String("word1 word2 word3");
 414         Pattern p = Pattern.compile("\\b");
 415         Matcher m = p.matcher(testString);
 416         int position = 0;
 417         int start = 0;
 418         while (m.find(position)) {
 419             start = m.start();
 420             if (start == testString.length())
 421                 break;
 422             if (m.find(start+1)) {
 423                 position = m.start();
 424             } else {
 425                 position = testString.length();
 426             }
 427             if (testString.substring(start, position).equals(" "))
 428                 continue;
 429             if (!testString.substring(start, position-1).startsWith("word"))
 430                 failCount++;
 431         }
 432         report("Customer word search");
 433     }
 434 
 435     // This is for bug 4994840
 436     private static void caretAtEndTest() throws Exception {
 437         // Problem only occurs with multiline patterns
 438         // containing a beginning-of-line caret "^" followed
 439         // by an expression that also matches the empty string.
 440         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 441         Matcher matcher = pattern.matcher("\r");
 442         matcher.find();
 443         matcher.find();
 444         report("Caret at end");
 445     }
 446 
 447     // This test is for 4979006
 448     // Check to see if word boundary construct properly handles unicode
 449     // non spacing marks
 450     private static void unicodeWordBoundsTest() throws Exception {
 451         String spaces = "  ";
 452         String wordChar = "a";
 453         String nsm = "\u030a";
 454 
 455         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 456 
 457         Pattern pattern = Pattern.compile("\\b");
 458         Matcher matcher = pattern.matcher("");
 459         // S=other B=word character N=non spacing mark .=word boundary
 460         // SS.BB.SS
 461         String input = spaces + wordChar + wordChar + spaces;
 462         twoFindIndexes(input, matcher, 2, 4);
 463         // SS.BBN.SS
 464         input = spaces + wordChar +wordChar + nsm + spaces;
 465         twoFindIndexes(input, matcher, 2, 5);
 466         // SS.BN.SS
 467         input = spaces + wordChar + nsm + spaces;
 468         twoFindIndexes(input, matcher, 2, 4);
 469         // SS.BNN.SS
 470         input = spaces + wordChar + nsm + nsm + spaces;
 471         twoFindIndexes(input, matcher, 2, 5);
 472         // SSN.BB.SS
 473         input = spaces + nsm + wordChar + wordChar + spaces;
 474         twoFindIndexes(input, matcher, 3, 5);
 475         // SS.BNB.SS
 476         input = spaces + wordChar + nsm + wordChar + spaces;
 477         twoFindIndexes(input, matcher, 2, 5);
 478         // SSNNSS
 479         input = spaces + nsm + nsm + spaces;
 480         matcher.reset(input);
 481         if (matcher.find())
 482             failCount++;
 483         // SSN.BBN.SS
 484         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 485         twoFindIndexes(input, matcher, 3, 6);
 486 
 487         report("Unicode word boundary");
 488     }
 489 
 490     private static void twoFindIndexes(String input, Matcher matcher, int a,
 491                                        int b) throws Exception
 492     {
 493         matcher.reset(input);
 494         matcher.find();
 495         if (matcher.start() != a)
 496             failCount++;
 497         matcher.find();
 498         if (matcher.start() != b)
 499             failCount++;
 500     }
 501 
 502     // This test is for 6284152
 503     static void check(String regex, String input, String[] expected) {
 504         List<String> result = new ArrayList<String>();
 505         Pattern p = Pattern.compile(regex);
 506         Matcher m = p.matcher(input);
 507         while (m.find()) {
 508             result.add(m.group());
 509         }
 510         if (!Arrays.asList(expected).equals(result))
 511             failCount++;
 512     }
 513 
 514     private static void lookbehindTest() throws Exception {
 515         //Positive
 516         check("(?<=%.{0,5})foo\\d",
 517               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 518               new String[]{"foo1", "foo2", "foo3"});
 519 
 520         //boundary at end of the lookbehind sub-regex should work consistently
 521         //with the boundary just after the lookbehind sub-regex
 522         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 523         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 524         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 525         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 526 
 527         //Negative
 528         check("(?<!%.{0,5})foo\\d",
 529               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 530               new String[] {"foo4", "foo5"});
 531 
 532         //Positive greedy
 533         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 534 
 535         //Positive reluctant
 536         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 537 
 538         //supplementary
 539         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 540               new String[] {"fo\ud800\udc00o"});
 541         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 542               new String[] {"fo\ud800\udc00o"});
 543         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 544               new String[] {"fo\ud800\udc00o"});
 545         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 546               new String[] {"fo\ud800\udc00o"});
 547         report("Lookbehind");
 548     }
 549 
 550     // This test is for 4938995
 551     // Check to see if weak region boundaries are transparent to
 552     // lookahead and lookbehind constructs
 553     private static void boundsTest() throws Exception {
 554         String fullMessage = "catdogcat";
 555         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 556         Matcher matcher = pattern.matcher("catdogca");
 557         matcher.useTransparentBounds(true);
 558         if (matcher.find())
 559             failCount++;
 560         matcher.reset("atdogcat");
 561         if (matcher.find())
 562             failCount++;
 563         matcher.reset(fullMessage);
 564         if (!matcher.find())
 565             failCount++;
 566         matcher.reset(fullMessage);
 567         matcher.region(0,9);
 568         if (!matcher.find())
 569             failCount++;
 570         matcher.reset(fullMessage);
 571         matcher.region(0,6);
 572         if (!matcher.find())
 573             failCount++;
 574         matcher.reset(fullMessage);
 575         matcher.region(3,6);
 576         if (!matcher.find())
 577             failCount++;
 578         matcher.useTransparentBounds(false);
 579         if (matcher.find())
 580             failCount++;
 581 
 582         // Negative lookahead/lookbehind
 583         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 584         matcher = pattern.matcher("dogcat");
 585         matcher.useTransparentBounds(true);
 586         matcher.region(0,3);
 587         if (matcher.find())
 588             failCount++;
 589         matcher.reset("catdog");
 590         matcher.region(3,6);
 591         if (matcher.find())
 592             failCount++;
 593         matcher.useTransparentBounds(false);
 594         matcher.reset("dogcat");
 595         matcher.region(0,3);
 596         if (!matcher.find())
 597             failCount++;
 598         matcher.reset("catdog");
 599         matcher.region(3,6);
 600         if (!matcher.find())
 601             failCount++;
 602 
 603         report("Region bounds transparency");
 604     }
 605 
 606     // This test is for 4945394
 607     private static void findFromTest() throws Exception {
 608         String message = "This is 40 $0 message.";
 609         Pattern pat = Pattern.compile("\\$0");
 610         Matcher match = pat.matcher(message);
 611         if (!match.find())
 612             failCount++;
 613         if (match.find())
 614             failCount++;
 615         if (match.find())
 616             failCount++;
 617         report("Check for alternating find");
 618     }
 619 
 620     // This test is for 4872664 and 4892980
 621     private static void negatedCharClassTest() throws Exception {
 622         Pattern pattern = Pattern.compile("[^>]");
 623         Matcher matcher = pattern.matcher("\u203A");
 624         if (!matcher.matches())
 625             failCount++;
 626         pattern = Pattern.compile("[^fr]");
 627         matcher = pattern.matcher("a");
 628         if (!matcher.find())
 629             failCount++;
 630         matcher.reset("\u203A");
 631         if (!matcher.find())
 632             failCount++;
 633         String s = "for";
 634         String result[] = s.split("[^fr]");
 635         if (!result[0].equals("f"))
 636             failCount++;
 637         if (!result[1].equals("r"))
 638             failCount++;
 639         s = "f\u203Ar";
 640         result = s.split("[^fr]");
 641         if (!result[0].equals("f"))
 642             failCount++;
 643         if (!result[1].equals("r"))
 644             failCount++;
 645 
 646         // Test adding to bits, subtracting a node, then adding to bits again
 647         pattern = Pattern.compile("[^f\u203Ar]");
 648         matcher = pattern.matcher("a");
 649         if (!matcher.find())
 650             failCount++;
 651         matcher.reset("f");
 652         if (matcher.find())
 653             failCount++;
 654         matcher.reset("\u203A");
 655         if (matcher.find())
 656             failCount++;
 657         matcher.reset("r");
 658         if (matcher.find())
 659             failCount++;
 660         matcher.reset("\u203B");
 661         if (!matcher.find())
 662             failCount++;
 663 
 664         // Test subtracting a node, adding to bits, subtracting again
 665         pattern = Pattern.compile("[^\u203Ar\u203B]");
 666         matcher = pattern.matcher("a");
 667         if (!matcher.find())
 668             failCount++;
 669         matcher.reset("\u203A");
 670         if (matcher.find())
 671             failCount++;
 672         matcher.reset("r");
 673         if (matcher.find())
 674             failCount++;
 675         matcher.reset("\u203B");
 676         if (matcher.find())
 677             failCount++;
 678         matcher.reset("\u203C");
 679         if (!matcher.find())
 680             failCount++;
 681 
 682         report("Negated Character Class");
 683     }
 684 
 685     // This test is for 4628291
 686     private static void toStringTest() throws Exception {
 687         Pattern pattern = Pattern.compile("b+");
 688         if (pattern.toString() != "b+")
 689             failCount++;
 690         Matcher matcher = pattern.matcher("aaabbbccc");
 691         String matcherString = matcher.toString(); // unspecified
 692         matcher.find();
 693         matcherString = matcher.toString(); // unspecified
 694         matcher.region(0,3);
 695         matcherString = matcher.toString(); // unspecified
 696         matcher.reset();
 697         matcherString = matcher.toString(); // unspecified
 698         report("toString");
 699     }
 700 
 701     // This test is for 4808962
 702     private static void literalPatternTest() throws Exception {
 703         int flags = Pattern.LITERAL;
 704 
 705         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 706         check(pattern, "abc\\t$^", true);
 707 
 708         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 709         check(pattern, "abc\\t$^", true);
 710 
 711         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 712         check(pattern, "\\Qa^$bcabc\\E", true);
 713         check(pattern, "a^$bcabc", false);
 714 
 715         pattern = Pattern.compile("\\\\Q\\\\E");
 716         check(pattern, "\\Q\\E", true);
 717 
 718         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 719         check(pattern, "abcefg\\Q\\Ehij", true);
 720 
 721         pattern = Pattern.compile("\\\\\\Q\\\\E");
 722         check(pattern, "\\\\\\\\", true);
 723 
 724         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 725         check(pattern, "\\Qa^$bcabc\\E", true);
 726         check(pattern, "a^$bcabc", false);
 727 
 728         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 729         check(pattern, "\\Qabc\\Edef", true);
 730         check(pattern, "abcdef", false);
 731 
 732         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 733         check(pattern, "abc\\Edef", true);
 734         check(pattern, "abcdef", false);
 735 
 736         pattern = Pattern.compile(Pattern.quote("\\E"));
 737         check(pattern, "\\E", true);
 738 
 739         pattern = Pattern.compile("((((abc.+?:)", flags);
 740         check(pattern, "((((abc.+?:)", true);
 741 
 742         flags |= Pattern.MULTILINE;
 743 
 744         pattern = Pattern.compile("^cat$", flags);
 745         check(pattern, "abc^cat$def", true);
 746         check(pattern, "cat", false);
 747 
 748         flags |= Pattern.CASE_INSENSITIVE;
 749 
 750         pattern = Pattern.compile("abcdef", flags);
 751         check(pattern, "ABCDEF", true);
 752         check(pattern, "AbCdEf", true);
 753 
 754         flags |= Pattern.DOTALL;
 755 
 756         pattern = Pattern.compile("a...b", flags);
 757         check(pattern, "A...b", true);
 758         check(pattern, "Axxxb", false);
 759 
 760         flags |= Pattern.CANON_EQ;
 761 
 762         Pattern p = Pattern.compile("testa\u030a", flags);
 763         check(pattern, "testa\u030a", false);
 764         check(pattern, "test\u00e5", false);
 765 
 766         // Supplementary character test
 767         flags = Pattern.LITERAL;
 768 
 769         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 770         check(pattern, toSupplementaries("abc\\t$^"), true);
 771 
 772         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 773         check(pattern, toSupplementaries("abc\\t$^"), true);
 774 
 775         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 776         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 777         check(pattern, toSupplementaries("a^$bcabc"), false);
 778 
 779         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 780         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 781         check(pattern, toSupplementaries("a^$bcabc"), false);
 782 
 783         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 784         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 785         check(pattern, toSupplementaries("abcdef"), false);
 786 
 787         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 788         check(pattern, toSupplementaries("abc\\Edef"), true);
 789         check(pattern, toSupplementaries("abcdef"), false);
 790 
 791         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 792         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 793 
 794         flags |= Pattern.MULTILINE;
 795 
 796         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 797         check(pattern, toSupplementaries("abc^cat$def"), true);
 798         check(pattern, toSupplementaries("cat"), false);
 799 
 800         flags |= Pattern.DOTALL;
 801 
 802         // note: this is case-sensitive.
 803         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 804         check(pattern, toSupplementaries("a...b"), true);
 805         check(pattern, toSupplementaries("axxxb"), false);
 806 
 807         flags |= Pattern.CANON_EQ;
 808 
 809         String t = toSupplementaries("test");
 810         p = Pattern.compile(t + "a\u030a", flags);
 811         check(pattern, t + "a\u030a", false);
 812         check(pattern, t + "\u00e5", false);
 813 
 814         report("Literal pattern");
 815     }
 816 
 817     // This test is for 4803179
 818     // This test is also for 4808962, replacement parts
 819     private static void literalReplacementTest() throws Exception {
 820         int flags = Pattern.LITERAL;
 821 
 822         Pattern pattern = Pattern.compile("abc", flags);
 823         Matcher matcher = pattern.matcher("zzzabczzz");
 824         String replaceTest = "$0";
 825         String result = matcher.replaceAll(replaceTest);
 826         if (!result.equals("zzzabczzz"))
 827             failCount++;
 828 
 829         matcher.reset();
 830         String literalReplacement = matcher.quoteReplacement(replaceTest);
 831         result = matcher.replaceAll(literalReplacement);
 832         if (!result.equals("zzz$0zzz"))
 833             failCount++;
 834 
 835         matcher.reset();
 836         replaceTest = "\\t$\\$";
 837         literalReplacement = matcher.quoteReplacement(replaceTest);
 838         result = matcher.replaceAll(literalReplacement);
 839         if (!result.equals("zzz\\t$\\$zzz"))
 840             failCount++;
 841 
 842         // Supplementary character test
 843         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 844         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 845         replaceTest = "$0";
 846         result = matcher.replaceAll(replaceTest);
 847         if (!result.equals(toSupplementaries("zzzabczzz")))
 848             failCount++;
 849 
 850         matcher.reset();
 851         literalReplacement = matcher.quoteReplacement(replaceTest);
 852         result = matcher.replaceAll(literalReplacement);
 853         if (!result.equals(toSupplementaries("zzz$0zzz")))
 854             failCount++;
 855 
 856         matcher.reset();
 857         replaceTest = "\\t$\\$";
 858         literalReplacement = matcher.quoteReplacement(replaceTest);
 859         result = matcher.replaceAll(literalReplacement);
 860         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 861             failCount++;
 862 
 863         // IAE should be thrown if backslash or '$' is the last character
 864         // in replacement string
 865         try {
 866             "\uac00".replaceAll("\uac00", "$");
 867             failCount++;
 868         } catch (IllegalArgumentException iie) {
 869         } catch (Exception e) {
 870             failCount++;
 871         }
 872         try {
 873             "\uac00".replaceAll("\uac00", "\\");
 874             failCount++;
 875         } catch (IllegalArgumentException iie) {
 876         } catch (Exception e) {
 877             failCount++;
 878         }
 879         report("Literal replacement");
 880     }
 881 
 882     // This test is for 4757029
 883     private static void regionTest() throws Exception {
 884         Pattern pattern = Pattern.compile("abc");
 885         Matcher matcher = pattern.matcher("abcdefabc");
 886 
 887         matcher.region(0,9);
 888         if (!matcher.find())
 889             failCount++;
 890         if (!matcher.find())
 891             failCount++;
 892         matcher.region(0,3);
 893         if (!matcher.find())
 894            failCount++;
 895         matcher.region(3,6);
 896         if (matcher.find())
 897            failCount++;
 898         matcher.region(0,2);
 899         if (matcher.find())
 900            failCount++;
 901 
 902         expectRegionFail(matcher, 1, -1);
 903         expectRegionFail(matcher, -1, -1);
 904         expectRegionFail(matcher, -1, 1);
 905         expectRegionFail(matcher, 5, 3);
 906         expectRegionFail(matcher, 5, 12);
 907         expectRegionFail(matcher, 12, 12);
 908 
 909         pattern = Pattern.compile("^abc$");
 910         matcher = pattern.matcher("zzzabczzz");
 911         matcher.region(0,9);
 912         if (matcher.find())
 913             failCount++;
 914         matcher.region(3,6);
 915         if (!matcher.find())
 916            failCount++;
 917         matcher.region(3,6);
 918         matcher.useAnchoringBounds(false);
 919         if (matcher.find())
 920            failCount++;
 921 
 922         // Supplementary character test
 923         pattern = Pattern.compile(toSupplementaries("abc"));
 924         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 925         matcher.region(0,9*2);
 926         if (!matcher.find())
 927             failCount++;
 928         if (!matcher.find())
 929             failCount++;
 930         matcher.region(0,3*2);
 931         if (!matcher.find())
 932            failCount++;
 933         matcher.region(1,3*2);
 934         if (matcher.find())
 935            failCount++;
 936         matcher.region(3*2,6*2);
 937         if (matcher.find())
 938            failCount++;
 939         matcher.region(0,2*2);
 940         if (matcher.find())
 941            failCount++;
 942         matcher.region(0,2*2+1);
 943         if (matcher.find())
 944            failCount++;
 945 
 946         expectRegionFail(matcher, 1*2, -1);
 947         expectRegionFail(matcher, -1, -1);
 948         expectRegionFail(matcher, -1, 1*2);
 949         expectRegionFail(matcher, 5*2, 3*2);
 950         expectRegionFail(matcher, 5*2, 12*2);
 951         expectRegionFail(matcher, 12*2, 12*2);
 952 
 953         pattern = Pattern.compile(toSupplementaries("^abc$"));
 954         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 955         matcher.region(0,9*2);
 956         if (matcher.find())
 957             failCount++;
 958         matcher.region(3*2,6*2);
 959         if (!matcher.find())
 960            failCount++;
 961         matcher.region(3*2+1,6*2);
 962         if (matcher.find())
 963            failCount++;
 964         matcher.region(3*2,6*2-1);
 965         if (matcher.find())
 966            failCount++;
 967         matcher.region(3*2,6*2);
 968         matcher.useAnchoringBounds(false);
 969         if (matcher.find())
 970            failCount++;
 971         report("Regions");
 972     }
 973 
 974     private static void expectRegionFail(Matcher matcher, int index1,
 975                                          int index2)
 976     {
 977         try {
 978             matcher.region(index1, index2);
 979             failCount++;
 980         } catch (IndexOutOfBoundsException ioobe) {
 981             // Correct result
 982         } catch (IllegalStateException ise) {
 983             // Correct result
 984         }
 985     }
 986 
 987     // This test is for 4803197
 988     private static void escapedSegmentTest() throws Exception {
 989 
 990         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
 991         check(pattern, "dir1\\dir2", true);
 992 
 993         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
 994         check(pattern, "dir1\\dir2\\", true);
 995 
 996         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
 997         check(pattern, "dir1\\dir2\\", true);
 998 
 999         // Supplementary character test
1000         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1001         check(pattern, toSupplementaries("dir1\\dir2"), true);
1002 
1003         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1004         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1005 
1006         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1007         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1008 
1009         report("Escaped segment");
1010     }
1011 
1012     // This test is for 4792284
1013     private static void nonCaptureRepetitionTest() throws Exception {
1014         String input = "abcdefgh;";
1015 
1016         String[] patterns = new String[] {
1017             "(?:\\w{4})+;",
1018             "(?:\\w{8})*;",
1019             "(?:\\w{2}){2,4};",
1020             "(?:\\w{4}){2,};",   // only matches the
1021             ".*?(?:\\w{5})+;",   //     specified minimum
1022             ".*?(?:\\w{9})*;",   //     number of reps - OK
1023             "(?:\\w{4})+?;",     // lazy repetition - OK
1024             "(?:\\w{4})++;",     // possessive repetition - OK
1025             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1026             "(\\w{4})+;",        // capturing group - OK
1027         };
1028 
1029         for (int i = 0; i < patterns.length; i++) {
1030             // Check find()
1031             check(patterns[i], 0, input, input, true);
1032             // Check matches()
1033             Pattern p = Pattern.compile(patterns[i]);
1034             Matcher m = p.matcher(input);
1035 
1036             if (m.matches()) {
1037                 if (!m.group(0).equals(input))
1038                     failCount++;
1039             } else {
1040                 failCount++;
1041             }
1042         }
1043 
1044         report("Non capturing repetition");
1045     }
1046 
1047     // This test is for 6358731
1048     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1049         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1050         Matcher matcher = pattern.matcher("abcd");
1051         if (!matcher.matches() ||
1052              matcher.group(1) != null ||
1053              !matcher.group(2).equals("abcd")) {
1054             failCount++;
1055         }
1056         report("Not captured GroupCurly");
1057     }
1058 
1059     // This test is for 4706545
1060     private static void javaCharClassTest() throws Exception {
1061         for (int i=0; i<1000; i++) {
1062             char c = (char)generator.nextInt();
1063             check("{javaLowerCase}", c, Character.isLowerCase(c));
1064             check("{javaUpperCase}", c, Character.isUpperCase(c));
1065             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1066             check("{javaTitleCase}", c, Character.isTitleCase(c));
1067             check("{javaDigit}", c, Character.isDigit(c));
1068             check("{javaDefined}", c, Character.isDefined(c));
1069             check("{javaLetter}", c, Character.isLetter(c));
1070             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1071             check("{javaJavaIdentifierStart}", c,
1072                   Character.isJavaIdentifierStart(c));
1073             check("{javaJavaIdentifierPart}", c,
1074                   Character.isJavaIdentifierPart(c));
1075             check("{javaUnicodeIdentifierStart}", c,
1076                   Character.isUnicodeIdentifierStart(c));
1077             check("{javaUnicodeIdentifierPart}", c,
1078                   Character.isUnicodeIdentifierPart(c));
1079             check("{javaIdentifierIgnorable}", c,
1080                   Character.isIdentifierIgnorable(c));
1081             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1082             check("{javaWhitespace}", c, Character.isWhitespace(c));
1083             check("{javaISOControl}", c, Character.isISOControl(c));
1084             check("{javaMirrored}", c, Character.isMirrored(c));
1085 
1086         }
1087 
1088         // Supplementary character test
1089         for (int i=0; i<1000; i++) {
1090             int c = generator.nextInt(Character.MAX_CODE_POINT
1091                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1092                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1093             check("{javaLowerCase}", c, Character.isLowerCase(c));
1094             check("{javaUpperCase}", c, Character.isUpperCase(c));
1095             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1096             check("{javaTitleCase}", c, Character.isTitleCase(c));
1097             check("{javaDigit}", c, Character.isDigit(c));
1098             check("{javaDefined}", c, Character.isDefined(c));
1099             check("{javaLetter}", c, Character.isLetter(c));
1100             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1101             check("{javaJavaIdentifierStart}", c,
1102                   Character.isJavaIdentifierStart(c));
1103             check("{javaJavaIdentifierPart}", c,
1104                   Character.isJavaIdentifierPart(c));
1105             check("{javaUnicodeIdentifierStart}", c,
1106                   Character.isUnicodeIdentifierStart(c));
1107             check("{javaUnicodeIdentifierPart}", c,
1108                   Character.isUnicodeIdentifierPart(c));
1109             check("{javaIdentifierIgnorable}", c,
1110                   Character.isIdentifierIgnorable(c));
1111             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1112             check("{javaWhitespace}", c, Character.isWhitespace(c));
1113             check("{javaISOControl}", c, Character.isISOControl(c));
1114             check("{javaMirrored}", c, Character.isMirrored(c));
1115         }
1116 
1117         report("Java character classes");
1118     }
1119 
1120     // This test is for 4523620
1121     /*
1122     private static void numOccurrencesTest() throws Exception {
1123         Pattern pattern = Pattern.compile("aaa");
1124 
1125         if (pattern.numOccurrences("aaaaaa", false) != 2)
1126             failCount++;
1127         if (pattern.numOccurrences("aaaaaa", true) != 4)
1128             failCount++;
1129 
1130         pattern = Pattern.compile("^");
1131         if (pattern.numOccurrences("aaaaaa", false) != 1)
1132             failCount++;
1133         if (pattern.numOccurrences("aaaaaa", true) != 1)
1134             failCount++;
1135 
1136         report("Number of Occurrences");
1137     }
1138     */
1139 
1140     // This test is for 4776374
1141     private static void caretBetweenTerminatorsTest() throws Exception {
1142         int flags1 = Pattern.DOTALL;
1143         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1144         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1145         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1146 
1147         check("^....", flags1, "test\ntest", "test", true);
1148         check(".....^", flags1, "test\ntest", "test", false);
1149         check(".....^", flags1, "test\n", "test", false);
1150         check("....^", flags1, "test\r\n", "test", false);
1151 
1152         check("^....", flags2, "test\ntest", "test", true);
1153         check("....^", flags2, "test\ntest", "test", false);
1154         check(".....^", flags2, "test\n", "test", false);
1155         check("....^", flags2, "test\r\n", "test", false);
1156 
1157         check("^....", flags3, "test\ntest", "test", true);
1158         check(".....^", flags3, "test\ntest", "test\n", true);
1159         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1160         check(".....^", flags3, "test\n", "test", false);
1161         check(".....^", flags3, "test\r\n", "test", false);
1162         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1163 
1164         check("^....", flags4, "test\ntest", "test", true);
1165         check(".....^", flags3, "test\ntest", "test\n", true);
1166         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1167         check(".....^", flags4, "test\n", "test\n", false);
1168         check(".....^", flags4, "test\r\n", "test\r", false);
1169 
1170         // Supplementary character test
1171         String t = toSupplementaries("test");
1172         check("^....", flags1, t+"\n"+t, t, true);
1173         check(".....^", flags1, t+"\n"+t, t, false);
1174         check(".....^", flags1, t+"\n", t, false);
1175         check("....^", flags1, t+"\r\n", t, false);
1176 
1177         check("^....", flags2, t+"\n"+t, t, true);
1178         check("....^", flags2, t+"\n"+t, t, false);
1179         check(".....^", flags2, t+"\n", t, false);
1180         check("....^", flags2, t+"\r\n", t, false);
1181 
1182         check("^....", flags3, t+"\n"+t, t, true);
1183         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1184         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1185         check(".....^", flags3, t+"\n", t, false);
1186         check(".....^", flags3, t+"\r\n", t, false);
1187         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1188 
1189         check("^....", flags4, t+"\n"+t, t, true);
1190         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1191         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1192         check(".....^", flags4, t+"\n", t+"\n", false);
1193         check(".....^", flags4, t+"\r\n", t+"\r", false);
1194 
1195         report("Caret between terminators");
1196     }
1197 
1198     // This test is for 4727935
1199     private static void dollarAtEndTest() throws Exception {
1200         int flags1 = Pattern.DOTALL;
1201         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1202         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1203 
1204         check("....$", flags1, "test\n", "test", true);
1205         check("....$", flags1, "test\r\n", "test", true);
1206         check(".....$", flags1, "test\n", "test\n", true);
1207         check(".....$", flags1, "test\u0085", "test\u0085", true);
1208         check("....$", flags1, "test\u0085", "test", true);
1209 
1210         check("....$", flags2, "test\n", "test", true);
1211         check(".....$", flags2, "test\n", "test\n", true);
1212         check(".....$", flags2, "test\u0085", "test\u0085", true);
1213         check("....$", flags2, "test\u0085", "est\u0085", true);
1214 
1215         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1216         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1217         check("....$blah", flags3, "test\nblah", "!!!!", false);
1218         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1219 
1220         // Supplementary character test
1221         String t = toSupplementaries("test");
1222         String b = toSupplementaries("blah");
1223         check("....$", flags1, t+"\n", t, true);
1224         check("....$", flags1, t+"\r\n", t, true);
1225         check(".....$", flags1, t+"\n", t+"\n", true);
1226         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1227         check("....$", flags1, t+"\u0085", t, true);
1228 
1229         check("....$", flags2, t+"\n", t, true);
1230         check(".....$", flags2, t+"\n", t+"\n", true);
1231         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1232         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1233 
1234         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1235         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1236         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1237         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1238 
1239         report("Dollar at End");
1240     }
1241 
1242     // This test is for 4711773
1243     private static void multilineDollarTest() throws Exception {
1244         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1245         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1246         matcher.find();
1247         if (matcher.start(0) != 9)
1248             failCount++;
1249         matcher.find();
1250         if (matcher.start(0) != 20)
1251             failCount++;
1252 
1253         // Supplementary character test
1254         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1255         matcher.find();
1256         if (matcher.start(0) != 9*2)
1257             failCount++;
1258         matcher.find();
1259         if (matcher.start(0) != 20*2)
1260             failCount++;
1261 
1262         report("Multiline Dollar");
1263     }
1264 
1265     private static void reluctantRepetitionTest() throws Exception {
1266         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1267         check(p, "1 word word word 2", true);
1268         check(p, "1 wor wo w 2", true);
1269         check(p, "1 word word 2", true);
1270         check(p, "1 word 2", true);
1271         check(p, "1 wo w w 2", true);
1272         check(p, "1 wo w 2", true);
1273         check(p, "1 wor w 2", true);
1274 
1275         p = Pattern.compile("([a-z])+?c");
1276         Matcher m = p.matcher("ababcdefdec");
1277         check(m, "ababc");
1278 
1279         // Supplementary character test
1280         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1281         m = p.matcher(toSupplementaries("ababcdefdec"));
1282         check(m, toSupplementaries("ababc"));
1283 
1284         report("Reluctant Repetition");
1285     }
1286 
1287     private static void serializeTest() throws Exception {
1288         String patternStr = "(b)";
1289         String matchStr = "b";
1290         Pattern pattern = Pattern.compile(patternStr);
1291         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1292         ObjectOutputStream oos = new ObjectOutputStream(baos);
1293         oos.writeObject(pattern);
1294         oos.close();
1295         ObjectInputStream ois = new ObjectInputStream(
1296             new ByteArrayInputStream(baos.toByteArray()));
1297         Pattern serializedPattern = (Pattern)ois.readObject();
1298         ois.close();
1299         Matcher matcher = serializedPattern.matcher(matchStr);
1300         if (!matcher.matches())
1301             failCount++;
1302         if (matcher.groupCount() != 1)
1303             failCount++;
1304 
1305         report("Serialization");
1306     }
1307 
1308     private static void gTest() {
1309         Pattern pattern = Pattern.compile("\\G\\w");
1310         Matcher matcher = pattern.matcher("abc#x#x");
1311         matcher.find();
1312         matcher.find();
1313         matcher.find();
1314         if (matcher.find())
1315             failCount++;
1316 
1317         pattern = Pattern.compile("\\GA*");
1318         matcher = pattern.matcher("1A2AA3");
1319         matcher.find();
1320         if (matcher.find())
1321             failCount++;
1322 
1323         pattern = Pattern.compile("\\GA*");
1324         matcher = pattern.matcher("1A2AA3");
1325         if (!matcher.find(1))
1326             failCount++;
1327         matcher.find();
1328         if (matcher.find())
1329             failCount++;
1330 
1331         report("\\G");
1332     }
1333 
1334     private static void zTest() {
1335         Pattern pattern = Pattern.compile("foo\\Z");
1336         // Positives
1337         check(pattern, "foo\u0085", true);
1338         check(pattern, "foo\u2028", true);
1339         check(pattern, "foo\u2029", true);
1340         check(pattern, "foo\n", true);
1341         check(pattern, "foo\r", true);
1342         check(pattern, "foo\r\n", true);
1343         // Negatives
1344         check(pattern, "fooo", false);
1345         check(pattern, "foo\n\r", false);
1346 
1347         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1348         // Positives
1349         check(pattern, "foo", true);
1350         check(pattern, "foo\n", true);
1351         // Negatives
1352         check(pattern, "foo\r", false);
1353         check(pattern, "foo\u0085", false);
1354         check(pattern, "foo\u2028", false);
1355         check(pattern, "foo\u2029", false);
1356 
1357         report("\\Z");
1358     }
1359 
1360     private static void replaceFirstTest() {
1361         Pattern pattern = Pattern.compile("(ab)(c*)");
1362         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1363         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1364             failCount++;
1365 
1366         matcher.reset("zzzabccczzzabcczzzabccczzz");
1367         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1368             failCount++;
1369 
1370         matcher.reset("zzzabccczzzabcczzzabccczzz");
1371         String result = matcher.replaceFirst("$1");
1372         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1373             failCount++;
1374 
1375         matcher.reset("zzzabccczzzabcczzzabccczzz");
1376         result = matcher.replaceFirst("$2");
1377         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1378             failCount++;
1379 
1380         pattern = Pattern.compile("a*");
1381         matcher = pattern.matcher("aaaaaaaaaa");
1382         if (!matcher.replaceFirst("test").equals("test"))
1383             failCount++;
1384 
1385         pattern = Pattern.compile("a+");
1386         matcher = pattern.matcher("zzzaaaaaaaaaa");
1387         if (!matcher.replaceFirst("test").equals("zzztest"))
1388             failCount++;
1389 
1390         // Supplementary character test
1391         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1392         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1393         if (!matcher.replaceFirst(toSupplementaries("test"))
1394                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1395             failCount++;
1396 
1397         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1398         if (!matcher.replaceFirst(toSupplementaries("test")).
1399             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1400             failCount++;
1401 
1402         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1403         result = matcher.replaceFirst("$1");
1404         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1405             failCount++;
1406 
1407         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1408         result = matcher.replaceFirst("$2");
1409         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1410             failCount++;
1411 
1412         pattern = Pattern.compile(toSupplementaries("a*"));
1413         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1414         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1415             failCount++;
1416 
1417         pattern = Pattern.compile(toSupplementaries("a+"));
1418         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1419         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1420             failCount++;
1421 
1422         report("Replace First");
1423     }
1424 
1425     private static void unixLinesTest() {
1426         Pattern pattern = Pattern.compile(".*");
1427         Matcher matcher = pattern.matcher("aa\u2028blah");
1428         matcher.find();
1429         if (!matcher.group(0).equals("aa"))
1430             failCount++;
1431 
1432         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1433         matcher = pattern.matcher("aa\u2028blah");
1434         matcher.find();
1435         if (!matcher.group(0).equals("aa\u2028blah"))
1436             failCount++;
1437 
1438         pattern = Pattern.compile("[az]$",
1439                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1440         matcher = pattern.matcher("aa\u2028zz");
1441         check(matcher, "a\u2028", false);
1442 
1443         // Supplementary character test
1444         pattern = Pattern.compile(".*");
1445         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1446         matcher.find();
1447         if (!matcher.group(0).equals(toSupplementaries("aa")))
1448             failCount++;
1449 
1450         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1451         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1452         matcher.find();
1453         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1454             failCount++;
1455 
1456         pattern = Pattern.compile(toSupplementaries("[az]$"),
1457                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1458         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1459         check(matcher, toSupplementaries("a\u2028"), false);
1460 
1461         report("Unix Lines");
1462     }
1463 
1464     private static void commentsTest() {
1465         int flags = Pattern.COMMENTS;
1466 
1467         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1468         Matcher matcher = pattern.matcher("aa#aa");
1469         if (!matcher.matches())
1470             failCount++;
1471 
1472         pattern = Pattern.compile("aa  # blah", flags);
1473         matcher = pattern.matcher("aa");
1474         if (!matcher.matches())
1475             failCount++;
1476 
1477         pattern = Pattern.compile("aa blah", flags);
1478         matcher = pattern.matcher("aablah");
1479         if (!matcher.matches())
1480              failCount++;
1481 
1482         pattern = Pattern.compile("aa  # blah blech  ", flags);
1483         matcher = pattern.matcher("aa");
1484         if (!matcher.matches())
1485             failCount++;
1486 
1487         pattern = Pattern.compile("aa  # blah\n  ", flags);
1488         matcher = pattern.matcher("aa");
1489         if (!matcher.matches())
1490             failCount++;
1491 
1492         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1493         matcher = pattern.matcher("aabc");
1494         if (!matcher.matches())
1495              failCount++;
1496 
1497         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1498         matcher = pattern.matcher("aabc");
1499         if (!matcher.matches())
1500              failCount++;
1501 
1502         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1503         matcher = pattern.matcher("aabc#blech");
1504         if (!matcher.matches())
1505              failCount++;
1506 
1507         // Supplementary character test
1508         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1509         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1510         if (!matcher.matches())
1511             failCount++;
1512 
1513         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1514         matcher = pattern.matcher(toSupplementaries("aa"));
1515         if (!matcher.matches())
1516             failCount++;
1517 
1518         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1519         matcher = pattern.matcher(toSupplementaries("aablah"));
1520         if (!matcher.matches())
1521              failCount++;
1522 
1523         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1524         matcher = pattern.matcher(toSupplementaries("aa"));
1525         if (!matcher.matches())
1526             failCount++;
1527 
1528         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1529         matcher = pattern.matcher(toSupplementaries("aa"));
1530         if (!matcher.matches())
1531             failCount++;
1532 
1533         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1534         matcher = pattern.matcher(toSupplementaries("aabc"));
1535         if (!matcher.matches())
1536              failCount++;
1537 
1538         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1539         matcher = pattern.matcher(toSupplementaries("aabc"));
1540         if (!matcher.matches())
1541              failCount++;
1542 
1543         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1544         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1545         if (!matcher.matches())
1546              failCount++;
1547 
1548         report("Comments");
1549     }
1550 
1551     private static void caseFoldingTest() { // bug 4504687
1552         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1553         Pattern pattern = Pattern.compile("aa", flags);
1554         Matcher matcher = pattern.matcher("ab");
1555         if (matcher.matches())
1556             failCount++;
1557 
1558         pattern = Pattern.compile("aA", flags);
1559         matcher = pattern.matcher("ab");
1560         if (matcher.matches())
1561             failCount++;
1562 
1563         pattern = Pattern.compile("aa", flags);
1564         matcher = pattern.matcher("aB");
1565         if (matcher.matches())
1566             failCount++;
1567         matcher = pattern.matcher("Ab");
1568         if (matcher.matches())
1569             failCount++;
1570 
1571         // ASCII               "a"
1572         // Latin-1 Supplement  "a" + grave
1573         // Cyrillic            "a"
1574         String[] patterns = new String[] {
1575             //single
1576             "a", "\u00e0", "\u0430",
1577             //slice
1578             "ab", "\u00e0\u00e1", "\u0430\u0431",
1579             //class single
1580             "[a]", "[\u00e0]", "[\u0430]",
1581             //class range
1582             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1583             //back reference
1584             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1585         };
1586 
1587         String[] texts = new String[] {
1588             "A", "\u00c0", "\u0410",
1589             "AB", "\u00c0\u00c1", "\u0410\u0411",
1590             "A", "\u00c0", "\u0410",
1591             "B", "\u00c2", "\u0411",
1592             "aA", "\u00e0\u00c0", "\u0430\u0410"
1593         };
1594 
1595         boolean[] expected = new boolean[] {
1596             true, false, false,
1597             true, false, false,
1598             true, false, false,
1599             true, false, false,
1600             true, false, false
1601         };
1602 
1603         flags = Pattern.CASE_INSENSITIVE;
1604         for (int i = 0; i < patterns.length; i++) {
1605             pattern = Pattern.compile(patterns[i], flags);
1606             matcher = pattern.matcher(texts[i]);
1607             if (matcher.matches() != expected[i]) {
1608                 System.out.println("<1> Failed at " + i);
1609                 failCount++;
1610             }
1611         }
1612 
1613         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1614         for (int i = 0; i < patterns.length; i++) {
1615             pattern = Pattern.compile(patterns[i], flags);
1616             matcher = pattern.matcher(texts[i]);
1617             if (!matcher.matches()) {
1618                 System.out.println("<2> Failed at " + i);
1619                 failCount++;
1620             }
1621         }
1622         // flag unicode_case alone should do nothing
1623         flags = Pattern.UNICODE_CASE;
1624         for (int i = 0; i < patterns.length; i++) {
1625             pattern = Pattern.compile(patterns[i], flags);
1626             matcher = pattern.matcher(texts[i]);
1627             if (matcher.matches()) {
1628                 System.out.println("<3> Failed at " + i);
1629                 failCount++;
1630             }
1631         }
1632 
1633         // Special cases: i, I, u+0131 and u+0130
1634         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1635         pattern = Pattern.compile("[h-j]+", flags);
1636         if (!pattern.matcher("\u0131\u0130").matches())
1637             failCount++;
1638         report("Case Folding");
1639     }
1640 
1641     private static void appendTest() {
1642         Pattern pattern = Pattern.compile("(ab)(cd)");
1643         Matcher matcher = pattern.matcher("abcd");
1644         String result = matcher.replaceAll("$2$1");
1645         if (!result.equals("cdab"))
1646             failCount++;
1647 
1648         String  s1 = "Swap all: first = 123, second = 456";
1649         String  s2 = "Swap one: first = 123, second = 456";
1650         String  r  = "$3$2$1";
1651         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1652         matcher = pattern.matcher(s1);
1653 
1654         result = matcher.replaceAll(r);
1655         if (!result.equals("Swap all: 123 = first, 456 = second"))
1656             failCount++;
1657 
1658         matcher = pattern.matcher(s2);
1659 
1660         if (matcher.find()) {
1661             StringBuffer sb = new StringBuffer();
1662             matcher.appendReplacement(sb, r);
1663             matcher.appendTail(sb);
1664             result = sb.toString();
1665             if (!result.equals("Swap one: 123 = first, second = 456"))
1666                 failCount++;
1667         }
1668 
1669         // Supplementary character test
1670         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1671         matcher = pattern.matcher(toSupplementaries("abcd"));
1672         result = matcher.replaceAll("$2$1");
1673         if (!result.equals(toSupplementaries("cdab")))
1674             failCount++;
1675 
1676         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1677         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1678         r  = toSupplementaries("$3$2$1");
1679         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1680         matcher = pattern.matcher(s1);
1681 
1682         result = matcher.replaceAll(r);
1683         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1684             failCount++;
1685 
1686         matcher = pattern.matcher(s2);
1687 
1688         if (matcher.find()) {
1689             StringBuffer sb = new StringBuffer();
1690             matcher.appendReplacement(sb, r);
1691             matcher.appendTail(sb);
1692             result = sb.toString();
1693             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1694                 failCount++;
1695         }
1696         report("Append");
1697     }
1698 
1699     private static void splitTest() {
1700         Pattern pattern = Pattern.compile(":");
1701         String[] result = pattern.split("foo:and:boo", 2);
1702         if (!result[0].equals("foo"))
1703             failCount++;
1704         if (!result[1].equals("and:boo"))
1705             failCount++;
1706         // Supplementary character test
1707         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1708         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1709         if (!result[0].equals(toSupplementaries("foo")))
1710             failCount++;
1711         if (!result[1].equals(toSupplementaries("andXboo")))
1712             failCount++;
1713 
1714         CharBuffer cb = CharBuffer.allocate(100);
1715         cb.put("foo:and:boo");
1716         cb.flip();
1717         result = pattern.split(cb);
1718         if (!result[0].equals("foo"))
1719             failCount++;
1720         if (!result[1].equals("and"))
1721             failCount++;
1722         if (!result[2].equals("boo"))
1723             failCount++;
1724 
1725         // Supplementary character test
1726         CharBuffer cbs = CharBuffer.allocate(100);
1727         cbs.put(toSupplementaries("fooXandXboo"));
1728         cbs.flip();
1729         result = patternX.split(cbs);
1730         if (!result[0].equals(toSupplementaries("foo")))
1731             failCount++;
1732         if (!result[1].equals(toSupplementaries("and")))
1733             failCount++;
1734         if (!result[2].equals(toSupplementaries("boo")))
1735             failCount++;
1736 
1737         String source = "0123456789";
1738         for (int limit=-2; limit<3; limit++) {
1739             for (int x=0; x<10; x++) {
1740                 result = source.split(Integer.toString(x), limit);
1741                 int expectedLength = limit < 1 ? 2 : limit;
1742 
1743                 if ((limit == 0) && (x == 9)) {
1744                     // expected dropping of ""
1745                     if (result.length != 1)
1746                         failCount++;
1747                     if (!result[0].equals("012345678")) {
1748                         failCount++;
1749                     }
1750                 } else {
1751                     if (result.length != expectedLength) {
1752                         failCount++;
1753                     }
1754                     if (!result[0].equals(source.substring(0,x))) {
1755                         if (limit != 1) {
1756                             failCount++;
1757                         } else {
1758                             if (!result[0].equals(source.substring(0,10))) {
1759                                 failCount++;
1760                             }
1761                         }
1762                     }
1763                     if (expectedLength > 1) { // Check segment 2
1764                         if (!result[1].equals(source.substring(x+1,10)))
1765                             failCount++;
1766                     }
1767                 }
1768             }
1769         }
1770         // Check the case for no match found
1771         for (int limit=-2; limit<3; limit++) {
1772             result = source.split("e", limit);
1773             if (result.length != 1)
1774                 failCount++;
1775             if (!result[0].equals(source))
1776                 failCount++;
1777         }
1778         // Check the case for limit == 0, source = "";
1779         source = "";
1780         result = source.split("e", 0);
1781         if (result.length != 1)
1782             failCount++;
1783         if (!result[0].equals(source))
1784             failCount++;
1785 
1786         report("Split");
1787     }
1788 
1789     private static void negationTest() {
1790         Pattern pattern = Pattern.compile("[\\[@^]+");
1791         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1792         if (!matcher.find())
1793             failCount++;
1794         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1795             failCount++;
1796         pattern = Pattern.compile("[@\\[^]+");
1797         matcher = pattern.matcher("@@@@[[[[^^^^");
1798         if (!matcher.find())
1799             failCount++;
1800         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1801             failCount++;
1802         pattern = Pattern.compile("[@\\[^@]+");
1803         matcher = pattern.matcher("@@@@[[[[^^^^");
1804         if (!matcher.find())
1805             failCount++;
1806         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1807             failCount++;
1808 
1809         pattern = Pattern.compile("\\)");
1810         matcher = pattern.matcher("xxx)xxx");
1811         if (!matcher.find())
1812             failCount++;
1813 
1814         report("Negation");
1815     }
1816 
1817     private static void ampersandTest() {
1818         Pattern pattern = Pattern.compile("[&@]+");
1819         check(pattern, "@@@@&&&&", true);
1820 
1821         pattern = Pattern.compile("[@&]+");
1822         check(pattern, "@@@@&&&&", true);
1823 
1824         pattern = Pattern.compile("[@\\&]+");
1825         check(pattern, "@@@@&&&&", true);
1826 
1827         report("Ampersand");
1828     }
1829 
1830     private static void octalTest() throws Exception {
1831         Pattern pattern = Pattern.compile("\\u0007");
1832         Matcher matcher = pattern.matcher("\u0007");
1833         if (!matcher.matches())
1834             failCount++;
1835         pattern = Pattern.compile("\\07");
1836         matcher = pattern.matcher("\u0007");
1837         if (!matcher.matches())
1838             failCount++;
1839         pattern = Pattern.compile("\\007");
1840         matcher = pattern.matcher("\u0007");
1841         if (!matcher.matches())
1842             failCount++;
1843         pattern = Pattern.compile("\\0007");
1844         matcher = pattern.matcher("\u0007");
1845         if (!matcher.matches())
1846             failCount++;
1847         pattern = Pattern.compile("\\040");
1848         matcher = pattern.matcher("\u0020");
1849         if (!matcher.matches())
1850             failCount++;
1851         pattern = Pattern.compile("\\0403");
1852         matcher = pattern.matcher("\u00203");
1853         if (!matcher.matches())
1854             failCount++;
1855         pattern = Pattern.compile("\\0103");
1856         matcher = pattern.matcher("\u0043");
1857         if (!matcher.matches())
1858             failCount++;
1859 
1860         report("Octal");
1861     }
1862 
1863     private static void longPatternTest() throws Exception {
1864         try {
1865             Pattern pattern = Pattern.compile(
1866                 "a 32-character-long pattern xxxx");
1867             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1868             pattern = Pattern.compile("a thirty four character long regex");
1869             StringBuffer patternToBe = new StringBuffer(101);
1870             for (int i=0; i<100; i++)
1871                 patternToBe.append((char)(97 + i%26));
1872             pattern = Pattern.compile(patternToBe.toString());
1873         } catch (PatternSyntaxException e) {
1874             failCount++;
1875         }
1876 
1877         // Supplementary character test
1878         try {
1879             Pattern pattern = Pattern.compile(
1880                 toSupplementaries("a 32-character-long pattern xxxx"));
1881             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1882             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1883             StringBuffer patternToBe = new StringBuffer(101*2);
1884             for (int i=0; i<100; i++)
1885                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1886                                                      + 97 + i%26));
1887             pattern = Pattern.compile(patternToBe.toString());
1888         } catch (PatternSyntaxException e) {
1889             failCount++;
1890         }
1891         report("LongPattern");
1892     }
1893 
1894     private static void group0Test() throws Exception {
1895         Pattern pattern = Pattern.compile("(tes)ting");
1896         Matcher matcher = pattern.matcher("testing");
1897         check(matcher, "testing");
1898 
1899         matcher.reset("testing");
1900         if (matcher.lookingAt()) {
1901             if (!matcher.group(0).equals("testing"))
1902                 failCount++;
1903         } else {
1904             failCount++;
1905         }
1906 
1907         matcher.reset("testing");
1908         if (matcher.matches()) {
1909             if (!matcher.group(0).equals("testing"))
1910                 failCount++;
1911         } else {
1912             failCount++;
1913         }
1914 
1915         pattern = Pattern.compile("(tes)ting");
1916         matcher = pattern.matcher("testing");
1917         if (matcher.lookingAt()) {
1918             if (!matcher.group(0).equals("testing"))
1919                 failCount++;
1920         } else {
1921             failCount++;
1922         }
1923 
1924         pattern = Pattern.compile("^(tes)ting");
1925         matcher = pattern.matcher("testing");
1926         if (matcher.matches()) {
1927             if (!matcher.group(0).equals("testing"))
1928                 failCount++;
1929         } else {
1930             failCount++;
1931         }
1932 
1933         // Supplementary character test
1934         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1935         matcher = pattern.matcher(toSupplementaries("testing"));
1936         check(matcher, toSupplementaries("testing"));
1937 
1938         matcher.reset(toSupplementaries("testing"));
1939         if (matcher.lookingAt()) {
1940             if (!matcher.group(0).equals(toSupplementaries("testing")))
1941                 failCount++;
1942         } else {
1943             failCount++;
1944         }
1945 
1946         matcher.reset(toSupplementaries("testing"));
1947         if (matcher.matches()) {
1948             if (!matcher.group(0).equals(toSupplementaries("testing")))
1949                 failCount++;
1950         } else {
1951             failCount++;
1952         }
1953 
1954         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1955         matcher = pattern.matcher(toSupplementaries("testing"));
1956         if (matcher.lookingAt()) {
1957             if (!matcher.group(0).equals(toSupplementaries("testing")))
1958                 failCount++;
1959         } else {
1960             failCount++;
1961         }
1962 
1963         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1964         matcher = pattern.matcher(toSupplementaries("testing"));
1965         if (matcher.matches()) {
1966             if (!matcher.group(0).equals(toSupplementaries("testing")))
1967                 failCount++;
1968         } else {
1969             failCount++;
1970         }
1971 
1972         report("Group0");
1973     }
1974 
1975     private static void findIntTest() throws Exception {
1976         Pattern p = Pattern.compile("blah");
1977         Matcher m = p.matcher("zzzzblahzzzzzblah");
1978         boolean result = m.find(2);
1979         if (!result)
1980             failCount++;
1981 
1982         p = Pattern.compile("$");
1983         m = p.matcher("1234567890");
1984         result = m.find(10);
1985         if (!result)
1986             failCount++;
1987         try {
1988             result = m.find(11);
1989             failCount++;
1990         } catch (IndexOutOfBoundsException e) {
1991             // correct result
1992         }
1993 
1994         // Supplementary character test
1995         p = Pattern.compile(toSupplementaries("blah"));
1996         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1997         result = m.find(2);
1998         if (!result)
1999             failCount++;
2000 
2001         report("FindInt");
2002     }
2003 
2004     private static void emptyPatternTest() throws Exception {
2005         Pattern p = Pattern.compile("");
2006         Matcher m = p.matcher("foo");
2007 
2008         // Should find empty pattern at beginning of input
2009         boolean result = m.find();
2010         if (result != true)
2011             failCount++;
2012         if (m.start() != 0)
2013             failCount++;
2014 
2015         // Should not match entire input if input is not empty
2016         m.reset();
2017         result = m.matches();
2018         if (result == true)
2019             failCount++;
2020 
2021         try {
2022             m.start(0);
2023             failCount++;
2024         } catch (IllegalStateException e) {
2025             // Correct result
2026         }
2027 
2028         // Should match entire input if input is empty
2029         m.reset("");
2030         result = m.matches();
2031         if (result != true)
2032             failCount++;
2033 
2034         result = Pattern.matches("", "");
2035         if (result != true)
2036             failCount++;
2037 
2038         result = Pattern.matches("", "foo");
2039         if (result == true)
2040             failCount++;
2041         report("EmptyPattern");
2042     }
2043 
2044     private static void charClassTest() throws Exception {
2045         Pattern pattern = Pattern.compile("blah[ab]]blech");
2046         check(pattern, "blahb]blech", true);
2047 
2048         pattern = Pattern.compile("[abc[def]]");
2049         check(pattern, "b", true);
2050 
2051         // Supplementary character tests
2052         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2053         check(pattern, toSupplementaries("blahb]blech"), true);
2054 
2055         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2056         check(pattern, toSupplementaries("b"), true);
2057 
2058         try {
2059             // u00ff when UNICODE_CASE
2060             pattern = Pattern.compile("[ab\u00ffcd]",
2061                                       Pattern.CASE_INSENSITIVE|
2062                                       Pattern.UNICODE_CASE);
2063             check(pattern, "ab\u00ffcd", true);
2064             check(pattern, "Ab\u0178Cd", true);
2065 
2066             // u00b5 when UNICODE_CASE
2067             pattern = Pattern.compile("[ab\u00b5cd]",
2068                                       Pattern.CASE_INSENSITIVE|
2069                                       Pattern.UNICODE_CASE);
2070             check(pattern, "ab\u00b5cd", true);
2071             check(pattern, "Ab\u039cCd", true);
2072         } catch (Exception e) { failCount++; }
2073 
2074         /* Special cases
2075            (1)LatinSmallLetterLongS u+017f
2076            (2)LatinSmallLetterDotlessI u+0131
2077            (3)LatineCapitalLetterIWithDotAbove u+0130
2078            (4)KelvinSign u+212a
2079            (5)AngstromSign u+212b
2080         */
2081         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2082         pattern = Pattern.compile("[sik\u00c5]+", flags);
2083         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2084             failCount++;
2085 
2086         report("CharClass");
2087     }
2088 
2089     private static void caretTest() throws Exception {
2090         Pattern pattern = Pattern.compile("\\w*");
2091         Matcher matcher = pattern.matcher("a#bc#def##g");
2092         check(matcher, "a");
2093         check(matcher, "");
2094         check(matcher, "bc");
2095         check(matcher, "");
2096         check(matcher, "def");
2097         check(matcher, "");
2098         check(matcher, "");
2099         check(matcher, "g");
2100         check(matcher, "");
2101         if (matcher.find())
2102             failCount++;
2103 
2104         pattern = Pattern.compile("^\\w*");
2105         matcher = pattern.matcher("a#bc#def##g");
2106         check(matcher, "a");
2107         if (matcher.find())
2108             failCount++;
2109 
2110         pattern = Pattern.compile("\\w");
2111         matcher = pattern.matcher("abc##x");
2112         check(matcher, "a");
2113         check(matcher, "b");
2114         check(matcher, "c");
2115         check(matcher, "x");
2116         if (matcher.find())
2117             failCount++;
2118 
2119         pattern = Pattern.compile("^\\w");
2120         matcher = pattern.matcher("abc##x");
2121         check(matcher, "a");
2122         if (matcher.find())
2123             failCount++;
2124 
2125         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2126         matcher = pattern.matcher("abcdef-ghi\njklmno");
2127         check(matcher, "abc");
2128         if (matcher.find())
2129             failCount++;
2130 
2131         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2132         matcher = pattern.matcher("abcdef-ghi\njklmno");
2133         check(matcher, "abc");
2134         check(matcher, "jkl");
2135         if (matcher.find())
2136             failCount++;
2137 
2138         pattern = Pattern.compile("^", Pattern.MULTILINE);
2139         matcher = pattern.matcher("this is some text");
2140         String result = matcher.replaceAll("X");
2141         if (!result.equals("Xthis is some text"))
2142             failCount++;
2143 
2144         pattern = Pattern.compile("^");
2145         matcher = pattern.matcher("this is some text");
2146         result = matcher.replaceAll("X");
2147         if (!result.equals("Xthis is some text"))
2148             failCount++;
2149 
2150         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2151         matcher = pattern.matcher("this is some text\n");
2152         result = matcher.replaceAll("X");
2153         if (!result.equals("Xthis is some text\n"))
2154             failCount++;
2155 
2156         report("Caret");
2157     }
2158 
2159     private static void groupCaptureTest() throws Exception {
2160         // Independent group
2161         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2162         Matcher matcher = pattern.matcher("xxxyyyzzz");
2163         matcher.find();
2164         try {
2165             String blah = matcher.group(1);
2166             failCount++;
2167         } catch (IndexOutOfBoundsException ioobe) {
2168             // Good result
2169         }
2170         // Pure group
2171         pattern = Pattern.compile("x+(?:y+)z+");
2172         matcher = pattern.matcher("xxxyyyzzz");
2173         matcher.find();
2174         try {
2175             String blah = matcher.group(1);
2176             failCount++;
2177         } catch (IndexOutOfBoundsException ioobe) {
2178             // Good result
2179         }
2180 
2181         // Supplementary character tests
2182         // Independent group
2183         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2184         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2185         matcher.find();
2186         try {
2187             String blah = matcher.group(1);
2188             failCount++;
2189         } catch (IndexOutOfBoundsException ioobe) {
2190             // Good result
2191         }
2192         // Pure group
2193         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2194         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2195         matcher.find();
2196         try {
2197             String blah = matcher.group(1);
2198             failCount++;
2199         } catch (IndexOutOfBoundsException ioobe) {
2200             // Good result
2201         }
2202 
2203         report("GroupCapture");
2204     }
2205 
2206     private static void backRefTest() throws Exception {
2207         Pattern pattern = Pattern.compile("(a*)bc\\1");
2208         check(pattern, "zzzaabcazzz", true);
2209 
2210         pattern = Pattern.compile("(a*)bc\\1");
2211         check(pattern, "zzzaabcaazzz", true);
2212 
2213         pattern = Pattern.compile("(abc)(def)\\1");
2214         check(pattern, "abcdefabc", true);
2215 
2216         pattern = Pattern.compile("(abc)(def)\\3");
2217         check(pattern, "abcdefabc", false);
2218 
2219         try {
2220             for (int i = 1; i < 10; i++) {
2221                 // Make sure backref 1-9 are always accepted
2222                 pattern = Pattern.compile("abcdef\\" + i);
2223                 // and fail to match if the target group does not exit
2224                 check(pattern, "abcdef", false);
2225             }
2226         } catch(PatternSyntaxException e) {
2227             failCount++;
2228         }
2229 
2230         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2231         check(pattern, "abcdefghija", false);
2232         check(pattern, "abcdefghija1", true);
2233 
2234         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2235         check(pattern, "abcdefghijkk", true);
2236 
2237         pattern = Pattern.compile("(a)bcdefghij\\11");
2238         check(pattern, "abcdefghija1", true);
2239 
2240         // Supplementary character tests
2241         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2242         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2243 
2244         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2245         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2246 
2247         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2248         check(pattern, toSupplementaries("abcdefabc"), true);
2249 
2250         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2251         check(pattern, toSupplementaries("abcdefabc"), false);
2252 
2253         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2254         check(pattern, toSupplementaries("abcdefghija"), false);
2255         check(pattern, toSupplementaries("abcdefghija1"), true);
2256 
2257         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2258         check(pattern, toSupplementaries("abcdefghijkk"), true);
2259 
2260         report("BackRef");
2261     }
2262 
2263     /**
2264      * Unicode Technical Report #18, section 2.6 End of Line
2265      * There is no empty line to be matched in the sequence \u000D\u000A
2266      * but there is an empty line in the sequence \u000A\u000D.
2267      */
2268     private static void anchorTest() throws Exception {
2269         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2270         Matcher m = p.matcher("blah1\r\nblah2");
2271         m.find();
2272         m.find();
2273         if (!m.group().equals("blah2"))
2274             failCount++;
2275 
2276         m.reset("blah1\n\rblah2");
2277         m.find();
2278         m.find();
2279         m.find();
2280         if (!m.group().equals("blah2"))
2281             failCount++;
2282 
2283         // Test behavior of $ with \r\n at end of input
2284         p = Pattern.compile(".+$");
2285         m = p.matcher("blah1\r\n");
2286         if (!m.find())
2287             failCount++;
2288        if (!m.group().equals("blah1"))
2289             failCount++;
2290         if (m.find())
2291             failCount++;
2292 
2293         // Test behavior of $ with \r\n at end of input in multiline
2294         p = Pattern.compile(".+$", Pattern.MULTILINE);
2295         m = p.matcher("blah1\r\n");
2296         if (!m.find())
2297             failCount++;
2298         if (m.find())
2299             failCount++;
2300 
2301         // Test for $ recognition of \u0085 for bug 4527731
2302         p = Pattern.compile(".+$", Pattern.MULTILINE);
2303         m = p.matcher("blah1\u0085");
2304         if (!m.find())
2305             failCount++;
2306 
2307         // Supplementary character test
2308         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2309         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2310         m.find();
2311         m.find();
2312         if (!m.group().equals(toSupplementaries("blah2")))
2313             failCount++;
2314 
2315         m.reset(toSupplementaries("blah1\n\rblah2"));
2316         m.find();
2317         m.find();
2318         m.find();
2319         if (!m.group().equals(toSupplementaries("blah2")))
2320             failCount++;
2321 
2322         // Test behavior of $ with \r\n at end of input
2323         p = Pattern.compile(".+$");
2324         m = p.matcher(toSupplementaries("blah1\r\n"));
2325         if (!m.find())
2326             failCount++;
2327         if (!m.group().equals(toSupplementaries("blah1")))
2328             failCount++;
2329         if (m.find())
2330             failCount++;
2331 
2332         // Test behavior of $ with \r\n at end of input in multiline
2333         p = Pattern.compile(".+$", Pattern.MULTILINE);
2334         m = p.matcher(toSupplementaries("blah1\r\n"));
2335         if (!m.find())
2336             failCount++;
2337         if (m.find())
2338             failCount++;
2339 
2340         // Test for $ recognition of \u0085 for bug 4527731
2341         p = Pattern.compile(".+$", Pattern.MULTILINE);
2342         m = p.matcher(toSupplementaries("blah1\u0085"));
2343         if (!m.find())
2344             failCount++;
2345 
2346         report("Anchors");
2347     }
2348 
2349     /**
2350      * A basic sanity test of Matcher.lookingAt().
2351      */
2352     private static void lookingAtTest() throws Exception {
2353         Pattern p = Pattern.compile("(ab)(c*)");
2354         Matcher m = p.matcher("abccczzzabcczzzabccc");
2355 
2356         if (!m.lookingAt())
2357             failCount++;
2358 
2359         if (!m.group().equals(m.group(0)))
2360             failCount++;
2361 
2362         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2363         if (m.lookingAt())
2364             failCount++;
2365 
2366         // Supplementary character test
2367         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2368         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2369 
2370         if (!m.lookingAt())
2371             failCount++;
2372 
2373         if (!m.group().equals(m.group(0)))
2374             failCount++;
2375 
2376         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2377         if (m.lookingAt())
2378             failCount++;
2379 
2380         report("Looking At");
2381     }
2382 
2383     /**
2384      * A basic sanity test of Matcher.matches().
2385      */
2386     private static void matchesTest() throws Exception {
2387         // matches()
2388         Pattern p = Pattern.compile("ulb(c*)");
2389         Matcher m = p.matcher("ulbcccccc");
2390         if (!m.matches())
2391             failCount++;
2392 
2393         // find() but not matches()
2394         m.reset("zzzulbcccccc");
2395         if (m.matches())
2396             failCount++;
2397 
2398         // lookingAt() but not matches()
2399         m.reset("ulbccccccdef");
2400         if (m.matches())
2401             failCount++;
2402 
2403         // matches()
2404         p = Pattern.compile("a|ad");
2405         m = p.matcher("ad");
2406         if (!m.matches())
2407             failCount++;
2408 
2409         // Supplementary character test
2410         // matches()
2411         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2412         m = p.matcher(toSupplementaries("ulbcccccc"));
2413         if (!m.matches())
2414             failCount++;
2415 
2416         // find() but not matches()
2417         m.reset(toSupplementaries("zzzulbcccccc"));
2418         if (m.matches())
2419             failCount++;
2420 
2421         // lookingAt() but not matches()
2422         m.reset(toSupplementaries("ulbccccccdef"));
2423         if (m.matches())
2424             failCount++;
2425 
2426         // matches()
2427         p = Pattern.compile(toSupplementaries("a|ad"));
2428         m = p.matcher(toSupplementaries("ad"));
2429         if (!m.matches())
2430             failCount++;
2431 
2432         report("Matches");
2433     }
2434 
2435     /**
2436      * A basic sanity test of Pattern.matches().
2437      */
2438     private static void patternMatchesTest() throws Exception {
2439         // matches()
2440         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2441                              toSupplementaries("ulbcccccc")))
2442             failCount++;
2443 
2444         // find() but not matches()
2445         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2446                             toSupplementaries("zzzulbcccccc")))
2447             failCount++;
2448 
2449         // lookingAt() but not matches()
2450         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2451                             toSupplementaries("ulbccccccdef")))
2452             failCount++;
2453 
2454         // Supplementary character test
2455         // matches()
2456         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2457                              toSupplementaries("ulbcccccc")))
2458             failCount++;
2459 
2460         // find() but not matches()
2461         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2462                             toSupplementaries("zzzulbcccccc")))
2463             failCount++;
2464 
2465         // lookingAt() but not matches()
2466         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2467                             toSupplementaries("ulbccccccdef")))
2468             failCount++;
2469 
2470         report("Pattern Matches");
2471     }
2472 
2473     /**
2474      * Canonical equivalence testing. Tests the ability of the engine
2475      * to match sequences that are not explicitly specified in the
2476      * pattern when they are considered equivalent by the Unicode Standard.
2477      */
2478     private static void ceTest() throws Exception {
2479         // Decomposed char outside char classes
2480         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2481         Matcher m = p.matcher("test\u00e5");
2482         if (!m.matches())
2483             failCount++;
2484 
2485         m.reset("testa\u030a");
2486         if (!m.matches())
2487             failCount++;
2488 
2489         // Composed char outside char classes
2490         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2491         m = p.matcher("test\u00e5");
2492         if (!m.matches())
2493             failCount++;
2494 
2495         m.reset("testa\u030a");
2496         if (!m.find())
2497             failCount++;
2498 
2499         // Decomposed char inside a char class
2500         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2501         m = p.matcher("test\u00e5");
2502         if (!m.find())
2503             failCount++;
2504 
2505         m.reset("testa\u030a");
2506         if (!m.find())
2507             failCount++;
2508 
2509         // Composed char inside a char class
2510         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2511         m = p.matcher("test\u00e5");
2512         if (!m.find())
2513             failCount++;
2514 
2515         m.reset("testa\u0300");
2516         if (!m.find())
2517             failCount++;
2518 
2519         m.reset("testa\u030a");
2520         if (!m.find())
2521             failCount++;
2522 
2523         // Marks that cannot legally change order and be equivalent
2524         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2525         check(p, "testa\u0308\u0300", true);
2526         check(p, "testa\u0300\u0308", false);
2527 
2528         // Marks that can legally change order and be equivalent
2529         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2530         check(p, "testa\u0308\u0323", true);
2531         check(p, "testa\u0323\u0308", true);
2532 
2533         // Test all equivalences of the sequence a\u0308\u0323\u0300
2534         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2535         check(p, "testa\u0308\u0323\u0300", true);
2536         check(p, "testa\u0323\u0308\u0300", true);
2537         check(p, "testa\u0308\u0300\u0323", true);
2538         check(p, "test\u00e4\u0323\u0300", true);
2539         check(p, "test\u00e4\u0300\u0323", true);
2540 
2541         /*
2542          * The following canonical equivalence tests don't work. Bug id: 4916384.
2543          *
2544         // Decomposed hangul (jamos)
2545         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2546         m = p.matcher("\u1100\u1161");
2547         if (!m.matches())
2548             failCount++;
2549 
2550         m.reset("\uac00");
2551         if (!m.matches())
2552             failCount++;
2553 
2554         // Composed hangul
2555         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2556         m = p.matcher("\u1100\u1161");
2557         if (!m.matches())
2558             failCount++;
2559 
2560         m.reset("\uac00");
2561         if (!m.matches())
2562             failCount++;
2563 
2564         // Decomposed supplementary outside char classes
2565         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2566         m = p.matcher("test\ud834\uddc0");
2567         if (!m.matches())
2568             failCount++;
2569 
2570         m.reset("test\ud834\uddbc\ud834\udd6f");
2571         if (!m.matches())
2572             failCount++;
2573 
2574         // Composed supplementary outside char classes
2575         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2576         m.reset("test\ud834\uddbc\ud834\udd6f");
2577         if (!m.matches())
2578             failCount++;
2579 
2580         m = p.matcher("test\ud834\uddc0");
2581         if (!m.matches())
2582             failCount++;
2583 
2584         */
2585 
2586         report("Canonical Equivalence");
2587     }
2588 
2589     /**
2590      * A basic sanity test of Matcher.replaceAll().
2591      */
2592     private static void globalSubstitute() throws Exception {
2593         // Global substitution with a literal
2594         Pattern p = Pattern.compile("(ab)(c*)");
2595         Matcher m = p.matcher("abccczzzabcczzzabccc");
2596         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2597             failCount++;
2598 
2599         m.reset("zzzabccczzzabcczzzabccczzz");
2600         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2601             failCount++;
2602 
2603         // Global substitution with groups
2604         m.reset("zzzabccczzzabcczzzabccczzz");
2605         String result = m.replaceAll("$1");
2606         if (!result.equals("zzzabzzzabzzzabzzz"))
2607             failCount++;
2608 
2609         // Supplementary character test
2610         // Global substitution with a literal
2611         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2612         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2613         if (!m.replaceAll(toSupplementaries("test")).
2614             equals(toSupplementaries("testzzztestzzztest")))
2615             failCount++;
2616 
2617         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2618         if (!m.replaceAll(toSupplementaries("test")).
2619             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2620             failCount++;
2621 
2622         // Global substitution with groups
2623         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2624         result = m.replaceAll("$1");
2625         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2626             failCount++;
2627 
2628         report("Global Substitution");
2629     }
2630 
2631     /**
2632      * Tests the usage of Matcher.appendReplacement() with literal
2633      * and group substitutions.
2634      */
2635     private static void stringbufferSubstitute() throws Exception {
2636         // SB substitution with literal
2637         String blah = "zzzblahzzz";
2638         Pattern p = Pattern.compile("blah");
2639         Matcher m = p.matcher(blah);
2640         StringBuffer result = new StringBuffer();
2641         try {
2642             m.appendReplacement(result, "blech");
2643             failCount++;
2644         } catch (IllegalStateException e) {
2645         }
2646         m.find();
2647         m.appendReplacement(result, "blech");
2648         if (!result.toString().equals("zzzblech"))
2649             failCount++;
2650 
2651         m.appendTail(result);
2652         if (!result.toString().equals("zzzblechzzz"))
2653             failCount++;
2654 
2655         // SB substitution with groups
2656         blah = "zzzabcdzzz";
2657         p = Pattern.compile("(ab)(cd)*");
2658         m = p.matcher(blah);
2659         result = new StringBuffer();
2660         try {
2661             m.appendReplacement(result, "$1");
2662             failCount++;
2663         } catch (IllegalStateException e) {
2664         }
2665         m.find();
2666         m.appendReplacement(result, "$1");
2667         if (!result.toString().equals("zzzab"))
2668             failCount++;
2669 
2670         m.appendTail(result);
2671         if (!result.toString().equals("zzzabzzz"))
2672             failCount++;
2673 
2674         // SB substitution with 3 groups
2675         blah = "zzzabcdcdefzzz";
2676         p = Pattern.compile("(ab)(cd)*(ef)");
2677         m = p.matcher(blah);
2678         result = new StringBuffer();
2679         try {
2680             m.appendReplacement(result, "$1w$2w$3");
2681             failCount++;
2682         } catch (IllegalStateException e) {
2683         }
2684         m.find();
2685         m.appendReplacement(result, "$1w$2w$3");
2686         if (!result.toString().equals("zzzabwcdwef"))
2687             failCount++;
2688 
2689         m.appendTail(result);
2690         if (!result.toString().equals("zzzabwcdwefzzz"))
2691             failCount++;
2692 
2693         // SB substitution with groups and three matches
2694         // skipping middle match
2695         blah = "zzzabcdzzzabcddzzzabcdzzz";
2696         p = Pattern.compile("(ab)(cd*)");
2697         m = p.matcher(blah);
2698         result = new StringBuffer();
2699         try {
2700             m.appendReplacement(result, "$1");
2701             failCount++;
2702         } catch (IllegalStateException e) {
2703         }
2704         m.find();
2705         m.appendReplacement(result, "$1");
2706         if (!result.toString().equals("zzzab"))
2707             failCount++;
2708 
2709         m.find();
2710         m.find();
2711         m.appendReplacement(result, "$2");
2712         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2713             failCount++;
2714 
2715         m.appendTail(result);
2716         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2717             failCount++;
2718 
2719         // Check to make sure escaped $ is ignored
2720         blah = "zzzabcdcdefzzz";
2721         p = Pattern.compile("(ab)(cd)*(ef)");
2722         m = p.matcher(blah);
2723         result = new StringBuffer();
2724         m.find();
2725         m.appendReplacement(result, "$1w\\$2w$3");
2726         if (!result.toString().equals("zzzabw$2wef"))
2727             failCount++;
2728 
2729         m.appendTail(result);
2730         if (!result.toString().equals("zzzabw$2wefzzz"))
2731             failCount++;
2732 
2733         // Check to make sure a reference to nonexistent group causes error
2734         blah = "zzzabcdcdefzzz";
2735         p = Pattern.compile("(ab)(cd)*(ef)");
2736         m = p.matcher(blah);
2737         result = new StringBuffer();
2738         m.find();
2739         try {
2740             m.appendReplacement(result, "$1w$5w$3");
2741             failCount++;
2742         } catch (IndexOutOfBoundsException ioobe) {
2743             // Correct result
2744         }
2745 
2746         // Check double digit group references
2747         blah = "zzz123456789101112zzz";
2748         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2749         m = p.matcher(blah);
2750         result = new StringBuffer();
2751         m.find();
2752         m.appendReplacement(result, "$1w$11w$3");
2753         if (!result.toString().equals("zzz1w11w3"))
2754             failCount++;
2755 
2756         // Check to make sure it backs off $15 to $1 if only three groups
2757         blah = "zzzabcdcdefzzz";
2758         p = Pattern.compile("(ab)(cd)*(ef)");
2759         m = p.matcher(blah);
2760         result = new StringBuffer();
2761         m.find();
2762         m.appendReplacement(result, "$1w$15w$3");
2763         if (!result.toString().equals("zzzabwab5wef"))
2764             failCount++;
2765 
2766 
2767         // Supplementary character test
2768         // SB substitution with literal
2769         blah = toSupplementaries("zzzblahzzz");
2770         p = Pattern.compile(toSupplementaries("blah"));
2771         m = p.matcher(blah);
2772         result = new StringBuffer();
2773         try {
2774             m.appendReplacement(result, toSupplementaries("blech"));
2775             failCount++;
2776         } catch (IllegalStateException e) {
2777         }
2778         m.find();
2779         m.appendReplacement(result, toSupplementaries("blech"));
2780         if (!result.toString().equals(toSupplementaries("zzzblech")))
2781             failCount++;
2782 
2783         m.appendTail(result);
2784         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2785             failCount++;
2786 
2787         // SB substitution with groups
2788         blah = toSupplementaries("zzzabcdzzz");
2789         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2790         m = p.matcher(blah);
2791         result = new StringBuffer();
2792         try {
2793             m.appendReplacement(result, "$1");
2794             failCount++;
2795         } catch (IllegalStateException e) {
2796         }
2797         m.find();
2798         m.appendReplacement(result, "$1");
2799         if (!result.toString().equals(toSupplementaries("zzzab")))
2800             failCount++;
2801 
2802         m.appendTail(result);
2803         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2804             failCount++;
2805 
2806         // SB substitution with 3 groups
2807         blah = toSupplementaries("zzzabcdcdefzzz");
2808         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2809         m = p.matcher(blah);
2810         result = new StringBuffer();
2811         try {
2812             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2813             failCount++;
2814         } catch (IllegalStateException e) {
2815         }
2816         m.find();
2817         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2818         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2819             failCount++;
2820 
2821         m.appendTail(result);
2822         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2823             failCount++;
2824 
2825         // SB substitution with groups and three matches
2826         // skipping middle match
2827         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2828         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2829         m = p.matcher(blah);
2830         result = new StringBuffer();
2831         try {
2832             m.appendReplacement(result, "$1");
2833             failCount++;
2834         } catch (IllegalStateException e) {
2835         }
2836         m.find();
2837         m.appendReplacement(result, "$1");
2838         if (!result.toString().equals(toSupplementaries("zzzab")))
2839             failCount++;
2840 
2841         m.find();
2842         m.find();
2843         m.appendReplacement(result, "$2");
2844         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2845             failCount++;
2846 
2847         m.appendTail(result);
2848         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2849             failCount++;
2850 
2851         // Check to make sure escaped $ is ignored
2852         blah = toSupplementaries("zzzabcdcdefzzz");
2853         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2854         m = p.matcher(blah);
2855         result = new StringBuffer();
2856         m.find();
2857         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2858         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2859             failCount++;
2860 
2861         m.appendTail(result);
2862         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2863             failCount++;
2864 
2865         // Check to make sure a reference to nonexistent group causes error
2866         blah = toSupplementaries("zzzabcdcdefzzz");
2867         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2868         m = p.matcher(blah);
2869         result = new StringBuffer();
2870         m.find();
2871         try {
2872             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2873             failCount++;
2874         } catch (IndexOutOfBoundsException ioobe) {
2875             // Correct result
2876         }
2877 
2878         // Check double digit group references
2879         blah = toSupplementaries("zzz123456789101112zzz");
2880         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2881         m = p.matcher(blah);
2882         result = new StringBuffer();
2883         m.find();
2884         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2885         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2886             failCount++;
2887 
2888         // Check to make sure it backs off $15 to $1 if only three groups
2889         blah = toSupplementaries("zzzabcdcdefzzz");
2890         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2891         m = p.matcher(blah);
2892         result = new StringBuffer();
2893         m.find();
2894         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2895         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2896             failCount++;
2897 
2898         // Check nothing has been appended into the output buffer if
2899         // the replacement string triggers IllegalArgumentException.
2900         p = Pattern.compile("(abc)");
2901         m = p.matcher("abcd");
2902         result = new StringBuffer();
2903         m.find();
2904         try {
2905             m.appendReplacement(result, ("xyz$g"));
2906             failCount++;
2907         } catch (IllegalArgumentException iae) {
2908             if (result.length() != 0)
2909                 failCount++;
2910         }
2911 
2912         report("SB Substitution");
2913     }
2914 
2915     /*
2916      * 5 groups of characters are created to make a substitution string.
2917      * A base string will be created including random lead chars, the
2918      * substitution string, and random trailing chars.
2919      * A pattern containing the 5 groups is searched for and replaced with:
2920      * random group + random string + random group.
2921      * The results are checked for correctness.
2922      */
2923     private static void substitutionBasher() {
2924         for (int runs = 0; runs<1000; runs++) {
2925             // Create a base string to work in
2926             int leadingChars = generator.nextInt(10);
2927             StringBuffer baseBuffer = new StringBuffer(100);
2928             String leadingString = getRandomAlphaString(leadingChars);
2929             baseBuffer.append(leadingString);
2930 
2931             // Create 5 groups of random number of random chars
2932             // Create the string to substitute
2933             // Create the pattern string to search for
2934             StringBuffer bufferToSub = new StringBuffer(25);
2935             StringBuffer bufferToPat = new StringBuffer(50);
2936             String[] groups = new String[5];
2937             for(int i=0; i<5; i++) {
2938                 int aGroupSize = generator.nextInt(5)+1;
2939                 groups[i] = getRandomAlphaString(aGroupSize);
2940                 bufferToSub.append(groups[i]);
2941                 bufferToPat.append('(');
2942                 bufferToPat.append(groups[i]);
2943                 bufferToPat.append(')');
2944             }
2945             String stringToSub = bufferToSub.toString();
2946             String pattern = bufferToPat.toString();
2947 
2948             // Place sub string into working string at random index
2949             baseBuffer.append(stringToSub);
2950 
2951             // Append random chars to end
2952             int trailingChars = generator.nextInt(10);
2953             String trailingString = getRandomAlphaString(trailingChars);
2954             baseBuffer.append(trailingString);
2955             String baseString = baseBuffer.toString();
2956 
2957             // Create test pattern and matcher
2958             Pattern p = Pattern.compile(pattern);
2959             Matcher m = p.matcher(baseString);
2960 
2961             // Reject candidate if pattern happens to start early
2962             m.find();
2963             if (m.start() < leadingChars)
2964                 continue;
2965 
2966             // Reject candidate if more than one match
2967             if (m.find())
2968                 continue;
2969 
2970             // Construct a replacement string with :
2971             // random group + random string + random group
2972             StringBuffer bufferToRep = new StringBuffer();
2973             int groupIndex1 = generator.nextInt(5);
2974             bufferToRep.append("$" + (groupIndex1 + 1));
2975             String randomMidString = getRandomAlphaString(5);
2976             bufferToRep.append(randomMidString);
2977             int groupIndex2 = generator.nextInt(5);
2978             bufferToRep.append("$" + (groupIndex2 + 1));
2979             String replacement = bufferToRep.toString();
2980 
2981             // Do the replacement
2982             String result = m.replaceAll(replacement);
2983 
2984             // Construct expected result
2985             StringBuffer bufferToRes = new StringBuffer();
2986             bufferToRes.append(leadingString);
2987             bufferToRes.append(groups[groupIndex1]);
2988             bufferToRes.append(randomMidString);
2989             bufferToRes.append(groups[groupIndex2]);
2990             bufferToRes.append(trailingString);
2991             String expectedResult = bufferToRes.toString();
2992 
2993             // Check results
2994             if (!result.equals(expectedResult))
2995                 failCount++;
2996         }
2997 
2998         report("Substitution Basher");
2999     }
3000 
3001     /**
3002      * Checks the handling of some escape sequences that the Pattern
3003      * class should process instead of the java compiler. These are
3004      * not in the file because the escapes should be be processed
3005      * by the Pattern class when the regex is compiled.
3006      */
3007     private static void escapes() throws Exception {
3008         Pattern p = Pattern.compile("\\043");
3009         Matcher m = p.matcher("#");
3010         if (!m.find())
3011             failCount++;
3012 
3013         p = Pattern.compile("\\x23");
3014         m = p.matcher("#");
3015         if (!m.find())
3016             failCount++;
3017 
3018         p = Pattern.compile("\\u0023");
3019         m = p.matcher("#");
3020         if (!m.find())
3021             failCount++;
3022 
3023         report("Escape sequences");
3024     }
3025 
3026     /**
3027      * Checks the handling of blank input situations. These
3028      * tests are incompatible with my test file format.
3029      */
3030     private static void blankInput() throws Exception {
3031         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3032         Matcher m = p.matcher("");
3033         if (m.find())
3034             failCount++;
3035 
3036         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3037         m = p.matcher("");
3038         if (!m.find())
3039             failCount++;
3040 
3041         p = Pattern.compile("abc");
3042         m = p.matcher("");
3043         if (m.find())
3044             failCount++;
3045 
3046         p = Pattern.compile("a*");
3047         m = p.matcher("");
3048         if (!m.find())
3049             failCount++;
3050 
3051         report("Blank input");
3052     }
3053 
3054     /**
3055      * Tests the Boyer-Moore pattern matching of a character sequence
3056      * on randomly generated patterns.
3057      */
3058     private static void bm() throws Exception {
3059         doBnM('a');
3060         report("Boyer Moore (ASCII)");
3061 
3062         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3063         report("Boyer Moore (Supplementary)");
3064     }
3065 
3066     private static void doBnM(int baseCharacter) throws Exception {
3067         int achar=0;
3068 
3069         for (int i=0; i<100; i++) {
3070             // Create a short pattern to search for
3071             int patternLength = generator.nextInt(7) + 4;
3072             StringBuffer patternBuffer = new StringBuffer(patternLength);
3073             for (int x=0; x<patternLength; x++) {
3074                 int ch = baseCharacter + generator.nextInt(26);
3075                 if (Character.isSupplementaryCodePoint(ch)) {
3076                     patternBuffer.append(Character.toChars(ch));
3077                 } else {
3078                     patternBuffer.append((char)ch);
3079                 }
3080             }
3081             String pattern =  patternBuffer.toString();
3082             Pattern p = Pattern.compile(pattern);
3083 
3084             // Create a buffer with random ASCII chars that does
3085             // not match the sample
3086             String toSearch = null;
3087             StringBuffer s = null;
3088             Matcher m = p.matcher("");
3089             do {
3090                 s = new StringBuffer(100);
3091                 for (int x=0; x<100; x++) {
3092                     int ch = baseCharacter + generator.nextInt(26);
3093                     if (Character.isSupplementaryCodePoint(ch)) {
3094                         s.append(Character.toChars(ch));
3095                     } else {
3096                         s.append((char)ch);
3097                     }
3098                 }
3099                 toSearch = s.toString();
3100                 m.reset(toSearch);
3101             } while (m.find());
3102 
3103             // Insert the pattern at a random spot
3104             int insertIndex = generator.nextInt(99);
3105             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3106                 insertIndex++;
3107             s = s.insert(insertIndex, pattern);
3108             toSearch = s.toString();
3109 
3110             // Make sure that the pattern is found
3111             m.reset(toSearch);
3112             if (!m.find())
3113                 failCount++;
3114 
3115             // Make sure that the match text is the pattern
3116             if (!m.group().equals(pattern))
3117                 failCount++;
3118 
3119             // Make sure match occured at insertion point
3120             if (m.start() != insertIndex)
3121                 failCount++;
3122         }
3123     }
3124 
3125     /**
3126      * Tests the matching of slices on randomly generated patterns.
3127      * The Boyer-Moore optimization is not done on these patterns
3128      * because it uses unicode case folding.
3129      */
3130     private static void slice() throws Exception {
3131         doSlice(Character.MAX_VALUE);
3132         report("Slice");
3133 
3134         doSlice(Character.MAX_CODE_POINT);
3135         report("Slice (Supplementary)");
3136     }
3137 
3138     private static void doSlice(int maxCharacter) throws Exception {
3139         Random generator = new Random();
3140         int achar=0;
3141 
3142         for (int i=0; i<100; i++) {
3143             // Create a short pattern to search for
3144             int patternLength = generator.nextInt(7) + 4;
3145             StringBuffer patternBuffer = new StringBuffer(patternLength);
3146             for (int x=0; x<patternLength; x++) {
3147                 int randomChar = 0;
3148                 while (!Character.isLetterOrDigit(randomChar))
3149                     randomChar = generator.nextInt(maxCharacter);
3150                 if (Character.isSupplementaryCodePoint(randomChar)) {
3151                     patternBuffer.append(Character.toChars(randomChar));
3152                 } else {
3153                     patternBuffer.append((char) randomChar);
3154                 }
3155             }
3156             String pattern =  patternBuffer.toString();
3157             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3158 
3159             // Create a buffer with random chars that does not match the sample
3160             String toSearch = null;
3161             StringBuffer s = null;
3162             Matcher m = p.matcher("");
3163             do {
3164                 s = new StringBuffer(100);
3165                 for (int x=0; x<100; x++) {
3166                     int randomChar = 0;
3167                     while (!Character.isLetterOrDigit(randomChar))
3168                         randomChar = generator.nextInt(maxCharacter);
3169                     if (Character.isSupplementaryCodePoint(randomChar)) {
3170                         s.append(Character.toChars(randomChar));
3171                     } else {
3172                         s.append((char) randomChar);
3173                     }
3174                 }
3175                 toSearch = s.toString();
3176                 m.reset(toSearch);
3177             } while (m.find());
3178 
3179             // Insert the pattern at a random spot
3180             int insertIndex = generator.nextInt(99);
3181             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3182                 insertIndex++;
3183             s = s.insert(insertIndex, pattern);
3184             toSearch = s.toString();
3185 
3186             // Make sure that the pattern is found
3187             m.reset(toSearch);
3188             if (!m.find())
3189                 failCount++;
3190 
3191             // Make sure that the match text is the pattern
3192             if (!m.group().equals(pattern))
3193                 failCount++;
3194 
3195             // Make sure match occured at insertion point
3196             if (m.start() != insertIndex)
3197                 failCount++;
3198         }
3199     }
3200 
3201     private static void explainFailure(String pattern, String data,
3202                                        String expected, String actual) {
3203         System.err.println("----------------------------------------");
3204         System.err.println("Pattern = "+pattern);
3205         System.err.println("Data = "+data);
3206         System.err.println("Expected = " + expected);
3207         System.err.println("Actual   = " + actual);
3208     }
3209 
3210     private static void explainFailure(String pattern, String data,
3211                                        Throwable t) {
3212         System.err.println("----------------------------------------");
3213         System.err.println("Pattern = "+pattern);
3214         System.err.println("Data = "+data);
3215         t.printStackTrace(System.err);
3216     }
3217 
3218     // Testing examples from a file
3219 
3220     /**
3221      * Goes through the file "TestCases.txt" and creates many patterns
3222      * described in the file, matching the patterns against input lines in
3223      * the file, and comparing the results against the correct results
3224      * also found in the file. The file format is described in comments
3225      * at the head of the file.
3226      */
3227     private static void processFile(String fileName) throws Exception {
3228         File testCases = new File(System.getProperty("test.src", "."),
3229                                   fileName);
3230         FileInputStream in = new FileInputStream(testCases);
3231         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3232 
3233         // Process next test case.
3234         String aLine;
3235         while((aLine = r.readLine()) != null) {
3236             // Read a line for pattern
3237             String patternString = grabLine(r);
3238             Pattern p = null;
3239             try {
3240                 p = compileTestPattern(patternString);
3241             } catch (PatternSyntaxException e) {
3242                 String dataString = grabLine(r);
3243                 String expectedResult = grabLine(r);
3244                 if (expectedResult.startsWith("error"))
3245                     continue;
3246                 explainFailure(patternString, dataString, e);
3247                 failCount++;
3248                 continue;
3249             }
3250 
3251             // Read a line for input string
3252             String dataString = grabLine(r);
3253             Matcher m = p.matcher(dataString);
3254             StringBuffer result = new StringBuffer();
3255 
3256             // Check for IllegalStateExceptions before a match
3257             failCount += preMatchInvariants(m);
3258 
3259             boolean found = m.find();
3260 
3261             if (found)
3262                 failCount += postTrueMatchInvariants(m);
3263             else
3264                 failCount += postFalseMatchInvariants(m);
3265 
3266             if (found) {
3267                 result.append("true ");
3268                 result.append(m.group(0) + " ");
3269             } else {
3270                 result.append("false ");
3271             }
3272 
3273             result.append(m.groupCount());
3274 
3275             if (found) {
3276                 for (int i=1; i<m.groupCount()+1; i++)
3277                     if (m.group(i) != null)
3278                         result.append(" " +m.group(i));
3279             }
3280 
3281             // Read a line for the expected result
3282             String expectedResult = grabLine(r);
3283 
3284             if (!result.toString().equals(expectedResult)) {
3285                 explainFailure(patternString, dataString, expectedResult, result.toString());
3286                 failCount++;
3287             }
3288         }
3289 
3290         report(fileName);
3291     }
3292 
3293     private static int preMatchInvariants(Matcher m) {
3294         int failCount = 0;
3295         try {
3296             m.start();
3297             failCount++;
3298         } catch (IllegalStateException ise) {}
3299         try {
3300             m.end();
3301             failCount++;
3302         } catch (IllegalStateException ise) {}
3303         try {
3304             m.group();
3305             failCount++;
3306         } catch (IllegalStateException ise) {}
3307         return failCount;
3308     }
3309 
3310     private static int postFalseMatchInvariants(Matcher m) {
3311         int failCount = 0;
3312         try {
3313             m.group();
3314             failCount++;
3315         } catch (IllegalStateException ise) {}
3316         try {
3317             m.start();
3318             failCount++;
3319         } catch (IllegalStateException ise) {}
3320         try {
3321             m.end();
3322             failCount++;
3323         } catch (IllegalStateException ise) {}
3324         return failCount;
3325     }
3326 
3327     private static int postTrueMatchInvariants(Matcher m) {
3328         int failCount = 0;
3329         //assert(m.start() = m.start(0);
3330         if (m.start() != m.start(0))
3331             failCount++;
3332         //assert(m.end() = m.end(0);
3333         if (m.start() != m.start(0))
3334             failCount++;
3335         //assert(m.group() = m.group(0);
3336         if (!m.group().equals(m.group(0)))
3337             failCount++;
3338         try {
3339             m.group(50);
3340             failCount++;
3341         } catch (IndexOutOfBoundsException ise) {}
3342 
3343         return failCount;
3344     }
3345 
3346     private static Pattern compileTestPattern(String patternString) {
3347         if (!patternString.startsWith("'")) {
3348             return Pattern.compile(patternString);
3349         }
3350 
3351         int break1 = patternString.lastIndexOf("'");
3352         String flagString = patternString.substring(
3353                                           break1+1, patternString.length());
3354         patternString = patternString.substring(1, break1);
3355 
3356         if (flagString.equals("i"))
3357             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3358 
3359         if (flagString.equals("m"))
3360             return Pattern.compile(patternString, Pattern.MULTILINE);
3361 
3362         return Pattern.compile(patternString);
3363     }
3364 
3365     /**
3366      * Reads a line from the input file. Keeps reading lines until a non
3367      * empty non comment line is read. If the line contains a \n then
3368      * these two characters are replaced by a newline char. If a \\uxxxx
3369      * sequence is read then the sequence is replaced by the unicode char.
3370      */
3371     private static String grabLine(BufferedReader r) throws Exception {
3372         int index = 0;
3373         String line = r.readLine();
3374         while (line.startsWith("//") || line.length() < 1)
3375             line = r.readLine();
3376         while ((index = line.indexOf("\\n")) != -1) {
3377             StringBuffer temp = new StringBuffer(line);
3378             temp.replace(index, index+2, "\n");
3379             line = temp.toString();
3380         }
3381         while ((index = line.indexOf("\\u")) != -1) {
3382             StringBuffer temp = new StringBuffer(line);
3383             String value = temp.substring(index+2, index+6);
3384             char aChar = (char)Integer.parseInt(value, 16);
3385             String unicodeChar = "" + aChar;
3386             temp.replace(index, index+6, unicodeChar);
3387             line = temp.toString();
3388         }
3389 
3390         return line;
3391     }
3392 
3393     private static void check(Pattern p, String s, String g, String expected) {
3394         Matcher m = p.matcher(s);
3395         m.find();
3396         if (!m.group(g).equals(expected) ||
3397             s.charAt(m.start(g)) != expected.charAt(0) ||
3398             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3399             failCount++;
3400     }
3401 
3402     private static void checkReplaceFirst(String p, String s, String r, String expected)
3403     {
3404         if (!expected.equals(Pattern.compile(p)
3405                                     .matcher(s)
3406                                     .replaceFirst(r)))
3407             failCount++;
3408     }
3409 
3410     private static void checkReplaceAll(String p, String s, String r, String expected)
3411     {
3412         if (!expected.equals(Pattern.compile(p)
3413                                     .matcher(s)
3414                                     .replaceAll(r)))
3415             failCount++;
3416     }
3417 
3418     private static void checkExpectedFail(String p) {
3419         try {
3420             Pattern.compile(p);
3421         } catch (PatternSyntaxException pse) {
3422             //pse.printStackTrace();
3423             return;
3424         }
3425         failCount++;
3426     }
3427 
3428     private static void checkExpectedIAE(Matcher m, String g) {
3429         m.find();
3430         try {
3431             m.group(g);
3432         } catch (IllegalArgumentException x) {
3433             //iae.printStackTrace();
3434             try {
3435                 m.start(g);
3436             } catch (IllegalArgumentException xx) {
3437                 try {
3438                     m.start(g);
3439                 } catch (IllegalArgumentException xxx) {
3440                     return;
3441                 }
3442             }
3443         }
3444         failCount++;
3445     }
3446 
3447     private static void checkExpectedNPE(Matcher m) {
3448         m.find();
3449         try {
3450             m.group(null);
3451         } catch (NullPointerException x) {
3452             try {
3453                 m.start(null);
3454             } catch (NullPointerException xx) {
3455                 try {
3456                     m.end(null);
3457                 } catch (NullPointerException xxx) {
3458                     return;
3459                 }
3460             }
3461         }
3462         failCount++;
3463     }
3464 
3465     private static void namedGroupCaptureTest() throws Exception {
3466         check(Pattern.compile("x+(?<gname>y+)z+"),
3467               "xxxyyyzzz",
3468               "gname",
3469               "yyy");
3470 
3471         check(Pattern.compile("x+(?<gname8>y+)z+"),
3472               "xxxyyyzzz",
3473               "gname8",
3474               "yyy");
3475 
3476         //backref
3477         Pattern pattern = Pattern.compile("(a*)bc\\1");
3478         check(pattern, "zzzaabcazzz", true);  // found "abca"
3479 
3480         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3481               "zzzaabcaazzz", true);
3482 
3483         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3484               "abcdefabc", true);
3485 
3486         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3487               "abcdefghijkk", true);
3488 
3489         // Supplementary character tests
3490         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3491               toSupplementaries("zzzaabcazzz"), true);
3492 
3493         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3494               toSupplementaries("zzzaabcaazzz"), true);
3495 
3496         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3497               toSupplementaries("abcdefabc"), true);
3498 
3499         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3500                               "(?<gname>" +
3501                               toSupplementaries("k)") + "\\k<gname>"),
3502               toSupplementaries("abcdefghijkk"), true);
3503 
3504         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3505               "xxxyyyzzzyyy",
3506               "gname",
3507               "yyy");
3508 
3509         //replaceFirst/All
3510         checkReplaceFirst("(?<gn>ab)(c*)",
3511                           "abccczzzabcczzzabccc",
3512                           "${gn}",
3513                           "abzzzabcczzzabccc");
3514 
3515         checkReplaceAll("(?<gn>ab)(c*)",
3516                         "abccczzzabcczzzabccc",
3517                         "${gn}",
3518                         "abzzzabzzzab");
3519 
3520 
3521         checkReplaceFirst("(?<gn>ab)(c*)",
3522                           "zzzabccczzzabcczzzabccczzz",
3523                           "${gn}",
3524                           "zzzabzzzabcczzzabccczzz");
3525 
3526         checkReplaceAll("(?<gn>ab)(c*)",
3527                         "zzzabccczzzabcczzzabccczzz",
3528                         "${gn}",
3529                         "zzzabzzzabzzzabzzz");
3530 
3531         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3532                           "zzzabccczzzabcczzzabccczzz",
3533                           "${gn2}",
3534                           "zzzccczzzabcczzzabccczzz");
3535 
3536         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3537                         "zzzabccczzzabcczzzabccczzz",
3538                         "${gn2}",
3539                         "zzzccczzzcczzzccczzz");
3540 
3541         //toSupplementaries("(ab)(c*)"));
3542         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3543                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3544                           toSupplementaries("abccczzzabcczzzabccc"),
3545                           "${gn1}",
3546                           toSupplementaries("abzzzabcczzzabccc"));
3547 
3548 
3549         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3550                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3551                         toSupplementaries("abccczzzabcczzzabccc"),
3552                         "${gn1}",
3553                         toSupplementaries("abzzzabzzzab"));
3554 
3555         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3556                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3557                           toSupplementaries("abccczzzabcczzzabccc"),
3558                           "${gn2}",
3559                           toSupplementaries("ccczzzabcczzzabccc"));
3560 
3561 
3562         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3563                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3564                         toSupplementaries("abccczzzabcczzzabccc"),
3565                         "${gn2}",
3566                         toSupplementaries("ccczzzcczzzccc"));
3567 
3568         checkReplaceFirst("(?<dog>Dog)AndCat",
3569                           "zzzDogAndCatzzzDogAndCatzzz",
3570                           "${dog}",
3571                           "zzzDogzzzDogAndCatzzz");
3572 
3573 
3574         checkReplaceAll("(?<dog>Dog)AndCat",
3575                           "zzzDogAndCatzzzDogAndCatzzz",
3576                           "${dog}",
3577                           "zzzDogzzzDogzzz");
3578 
3579         // backref in Matcher & String
3580         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3581             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3582             failCount++;
3583 
3584         // negative
3585         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3586         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3587         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3588         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3589         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3590         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3591                          "gnameX");
3592         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
3593         report("NamedGroupCapture");
3594     }
3595 
3596     // This is for bug 6969132
3597     private static void nonBmpClassComplementTest() throws Exception {
3598         Pattern p = Pattern.compile("\\P{Lu}");
3599         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3600         if (m.find() && m.start() == 1)
3601             failCount++;
3602 
3603         // from a unicode category
3604         p = Pattern.compile("\\P{Lu}");
3605         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3606         if (m.find())
3607             failCount++;
3608         if (!m.hitEnd())
3609             failCount++;
3610 
3611         // block
3612         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3613         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3614         if (m.find() && m.start() == 1)
3615             failCount++;
3616 
3617         report("NonBmpClassComplement");
3618     }
3619 
3620     private static void unicodePropertiesTest() throws Exception {
3621         // different forms
3622         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3623             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3624             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3625             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3626             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3627             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3628             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3629             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3630             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3631             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3632             failCount++;
3633 
3634         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3635         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3636         Matcher lastSM  = common;
3637         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3638 
3639         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3640         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3641         Matcher lastBM = latin;
3642         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3643 
3644         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3645             if (cp >= 0x30000 && (cp & 0x70) == 0){
3646                 continue;  // only pick couple code points, they are the same
3647             }
3648 
3649             // Unicode Script
3650             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3651             Matcher m;
3652             String str = new String(Character.toChars(cp));
3653             if (script == lastScript) {
3654                  m = lastSM;
3655                  m.reset(str);
3656             } else {
3657                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3658             }
3659             if (!m.matches()) {
3660                 failCount++;
3661             }
3662             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3663             other.reset(str);
3664             if (other.matches()) {
3665                 failCount++;
3666             }
3667             lastSM = m;
3668             lastScript = script;
3669 
3670             // Unicode Block
3671             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3672             if (block == null) {
3673                 //System.out.printf("Not a Block: cp=%x%n", cp);
3674                 continue;
3675             }
3676             if (block == lastBlock) {
3677                  m = lastBM;
3678                  m.reset(str);
3679             } else {
3680                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3681             }
3682             if (!m.matches()) {
3683                 failCount++;
3684             }
3685             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3686             other.reset(str);
3687             if (other.matches()) {
3688                 failCount++;
3689             }
3690             lastBM = m;
3691             lastBlock = block;
3692         }
3693         report("unicodeProperties");
3694     }
3695 
3696     private static void unicodeHexNotationTest() throws Exception {
3697 
3698         // negative
3699         checkExpectedFail("\\x{-23}");
3700         checkExpectedFail("\\x{110000}");
3701         checkExpectedFail("\\x{}");
3702         checkExpectedFail("\\x{AB[ef]");
3703 
3704         // codepoint
3705         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
3706         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3707         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
3708         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3709 
3710         // in class
3711         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
3712         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3713         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
3714         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
3715         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
3716         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3717 
3718         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3719              String s = "A" + new String(Character.toChars(cp)) + "B";
3720              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3721                                              : String.format("\\u%04x\\u%04x",
3722                                                (int) Character.toChars(cp)[0],
3723                                                (int) Character.toChars(cp)[1]);
3724              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3725              if (!Pattern.matches("A" + hexUTF16 + "B", s))
3726                  failCount++;
3727              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3728                  failCount++;
3729              if (!Pattern.matches("A" + hexCodePoint + "B", s))
3730                  failCount++;
3731              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3732                  failCount++;
3733          }
3734          report("unicodeHexNotation");
3735     }
3736 
3737     private static void unicodeClassesTest() throws Exception {
3738 
3739         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
3740         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
3741         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
3742         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
3743         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
3744         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
3745         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
3746         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
3747         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
3748         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
3749         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
3750         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3751         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
3752         Matcher bound  = Pattern.compile("\\b").matcher("");
3753         Matcher word   = Pattern.compile("\\w++").matcher("");
3754         // UNICODE_CHARACTER_CLASS
3755         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3756         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3757         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3758         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3759         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3760         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3761         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3762         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3763         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3764         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3765         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3766         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3767         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3768         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3769         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3770         // embedded flag (?U)
3771         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3772         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3773         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3774 
3775         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3776         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3777         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3778         // properties
3779         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
3780         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
3781         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
3782         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3783         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3784         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
3785         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
3786         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3787         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3788         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3789         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
3790 
3791         // javaMethod
3792         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
3793         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
3794         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3795         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
3796 
3797         for (int cp = 1; cp < 0x30000; cp++) {
3798             String str = new String(Character.toChars(cp));
3799             int type = Character.getType(cp);
3800             if (// lower
3801                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
3802                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3803                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3804                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3805                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3806                 // upper
3807                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
3808                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3809                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3810                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3811                 // alpha
3812                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
3813                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3814                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3815                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3816                 // digit
3817                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
3818                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
3819                 // alnum
3820                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
3821                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3822                 // punct
3823                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
3824                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3825                 // graph
3826                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
3827                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3828                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3829                 // blank
3830                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3831                                           != blank.reset(str).matches()  ||
3832                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3833                 // print
3834                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
3835                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3836                 // cntrl
3837                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
3838                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3839                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3840                 // hexdigit
3841                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
3842                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3843                 // space
3844                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
3845                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3846                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3847                 // word
3848                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
3849                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3850                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3851                 // bwordb
3852                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3853                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3854                 // properties
3855                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3856                 Character.isLetter(cp)    != letterP.reset(str).matches()||
3857                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3858                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3859                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3860                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3861                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
3862                 failCount++;
3863         }
3864 
3865         // bounds/word align
3866         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3867         if (!bwbU.reset("\u0180sherman\u0400").matches())
3868             failCount++;
3869         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3870         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3871             failCount++;
3872         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3873         if (!bwbU.reset("\u0724\u0739\u0724").matches())
3874             failCount++;
3875         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3876             failCount++;
3877         report("unicodePredefinedClasses");
3878     }
3879 
3880     private static void horizontalAndVerticalWSTest() throws Exception {
3881         String hws = new String (new char[] {
3882                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
3883                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3884                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3885                                      0x202f, 0x205f, 0x3000 });
3886         String vws = new String (new char[] {
3887                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3888         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3889             !Pattern.compile("[\\h]+").matcher(hws).matches())
3890             failCount++;
3891         if (Pattern.compile("\\H").matcher(hws).find() ||
3892             Pattern.compile("[\\H]").matcher(hws).find())
3893             failCount++;
3894         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3895             !Pattern.compile("[\\v]+").matcher(vws).matches())
3896             failCount++;
3897         if (Pattern.compile("\\V").matcher(vws).find() ||
3898             Pattern.compile("[\\V]").matcher(vws).find())
3899             failCount++;
3900         String prefix = "abcd";
3901         String suffix = "efgh";
3902         String ng = "A";
3903         for (int i = 0; i < hws.length(); i++) {
3904             String c = String.valueOf(hws.charAt(i));
3905             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3906             if (!m.find() || !c.equals(m.group()))
3907                 failCount++;
3908             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3909             if (!m.find() || !c.equals(m.group()))
3910                 failCount++;
3911 
3912             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3913             if (!m.find() || !ng.equals(m.group()))
3914                 failCount++;
3915             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3916             if (!m.find() || !ng.equals(m.group()))
3917                 failCount++;
3918         }
3919         for (int i = 0; i < vws.length(); i++) {
3920             String c = String.valueOf(vws.charAt(i));
3921             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3922             if (!m.find() || !c.equals(m.group()))
3923                 failCount++;
3924             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3925             if (!m.find() || !c.equals(m.group()))
3926                 failCount++;
3927 
3928             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3929             if (!m.find() || !ng.equals(m.group()))
3930                 failCount++;
3931             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3932             if (!m.find() || !ng.equals(m.group()))
3933                 failCount++;
3934         }
3935         // \v in range is interpreted as 0x0B. This is the undocumented behavior
3936         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3937             failCount++;
3938         report("horizontalAndVerticalWSTest");
3939     }
3940 
3941     private static void linebreakTest() throws Exception {
3942         String linebreaks = new String (new char[] {
3943             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3944         String crnl = "\r\n";
3945         if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3946             !Pattern.compile("\\R").matcher(crnl).matches() ||
3947             Pattern.compile("\\R\\R").matcher(crnl).matches())
3948             failCount++;
3949         report("linebreakTest");
3950     }
3951 
3952     // #7189363
3953     private static void branchTest() throws Exception {
3954         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
3955             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
3956             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
3957             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
3958             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
3959             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
3960             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
3961             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
3962             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
3963             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
3964             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
3965             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
3966             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
3967             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
3968             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
3969             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
3970             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
3971             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
3972             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
3973             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
3974             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
3975             !Pattern.compile("(a)??bc|de").matcher("de").matches())
3976             failCount++;
3977         report("branchTest");
3978     }
3979 
3980     // This test is for 8007395
3981     private static void groupCurlyNotFoundSuppTest() throws Exception {
3982         String input = "test this as \ud83d\ude0d";
3983         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
3984                                           "test(.)*(@[a-zA-Z.]+)",
3985                                           "test([^B])+(@[a-zA-Z.]+)",
3986                                           "test([^B])*(@[a-zA-Z.]+)",
3987                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
3988                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
3989                                         }) {
3990             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
3991                                .matcher(input);
3992             try {
3993                 if (m.find()) {
3994                     failCount++;
3995                 }
3996             } catch (Exception x) {
3997                 failCount++;
3998             }
3999         }
4000         report("GroupCurly NotFoundSupp");
4001     }
4002 
4003     // This test is for 8023647
4004     private static void groupCurlyBackoffTest() throws Exception {
4005         if (!"abc1c".matches("(\\w)+1\\1") ||
4006             "abc11".matches("(\\w)+1\\1")) {
4007             failCount++;
4008         }
4009         report("GroupCurly backoff");
4010     }
4011 
4012     // This test is for 8012646
4013     private static void patternAsPredicate() throws Exception {
4014         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4015 
4016         if (p.test("")) {
4017             failCount++;
4018         }
4019         if (!p.test("word")) {
4020             failCount++;
4021         }
4022         if (p.test("1234")) {
4023             failCount++;
4024         }
4025         report("Pattern.asPredicate");
4026     }
4027 }