1 /*
   2  * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647
  35  */
  36 
  37 import java.util.regex.*;
  38 import java.util.Random;
  39 import java.io.*;
  40 import java.util.*;
  41 import java.nio.CharBuffer;
  42 import java.util.function.Predicate;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53     private static String firstFailure = null;
  54 
  55     /**
  56      * Main to interpret arguments and run several tests.
  57      *
  58      */
  59     public static void main(String[] args) throws Exception {
  60         // Most of the tests are in a file
  61         processFile("TestCases.txt");
  62         //processFile("PerlCases.txt");
  63         processFile("BMPTestCases.txt");
  64         processFile("SupplementaryTestCases.txt");
  65 
  66         // These test many randomly generated char patterns
  67         bm();
  68         slice();
  69 
  70         // These are hard to put into the file
  71         escapes();
  72         blankInput();
  73 
  74         // Substitition tests on randomly generated sequences
  75         globalSubstitute();
  76         stringbufferSubstitute();
  77         substitutionBasher();
  78 
  79         // Canonical Equivalence
  80         ceTest();
  81 
  82         // Anchors
  83         anchorTest();
  84 
  85         // boolean match calls
  86         matchesTest();
  87         lookingAtTest();
  88 
  89         // Pattern API
  90         patternMatchesTest();
  91 
  92         // Misc
  93         lookbehindTest();
  94         nullArgumentTest();
  95         backRefTest();
  96         groupCaptureTest();
  97         caretTest();
  98         charClassTest();
  99         emptyPatternTest();
 100         findIntTest();
 101         group0Test();
 102         longPatternTest();
 103         octalTest();
 104         ampersandTest();
 105         negationTest();
 106         splitTest();
 107         appendTest();
 108         caseFoldingTest();
 109         commentsTest();
 110         unixLinesTest();
 111         replaceFirstTest();
 112         gTest();
 113         zTest();
 114         serializeTest();
 115         reluctantRepetitionTest();
 116         multilineDollarTest();
 117         dollarAtEndTest();
 118         caretBetweenTerminatorsTest();
 119         // This RFE rejected in Tiger numOccurrencesTest();
 120         javaCharClassTest();
 121         nonCaptureRepetitionTest();
 122         notCapturedGroupCurlyMatchTest();
 123         escapedSegmentTest();
 124         literalPatternTest();
 125         literalReplacementTest();
 126         regionTest();
 127         toStringTest();
 128         negatedCharClassTest();
 129         findFromTest();
 130         boundsTest();
 131         unicodeWordBoundsTest();
 132         caretAtEndTest();
 133         wordSearchTest();
 134         hitEndTest();
 135         toMatchResultTest();
 136         surrogatesInClassTest();
 137         removeQEQuotingTest();
 138         namedGroupCaptureTest();
 139         nonBmpClassComplementTest();
 140         unicodePropertiesTest();
 141         unicodeHexNotationTest();
 142         unicodeClassesTest();
 143         horizontalAndVerticalWSTest();
 144         linebreakTest();
 145         branchTest();
 146         groupCurlyNotFoundSuppTest();
 147         groupCurlyBackoffTest();
 148         patternAsPredicate();
 149         if (failure) {
 150             throw new
 151                 RuntimeException("RegExTest failed, 1st failure: " +
 152                                  firstFailure);
 153         } else {
 154             System.err.println("OKAY: All tests passed.");
 155         }
 156     }
 157 
 158     // Utility functions
 159 
 160     private static String getRandomAlphaString(int length) {
 161         StringBuffer buf = new StringBuffer(length);
 162         for (int i=0; i<length; i++) {
 163             char randChar = (char)(97 + generator.nextInt(26));
 164             buf.append(randChar);
 165         }
 166         return buf.toString();
 167     }
 168 
 169     private static void check(Matcher m, String expected) {
 170         m.find();
 171         if (!m.group().equals(expected))
 172             failCount++;
 173     }
 174 
 175     private static void check(Matcher m, String result, boolean expected) {
 176         m.find();
 177         if (m.group().equals(result) != expected)
 178             failCount++;
 179     }
 180 
 181     private static void check(Pattern p, String s, boolean expected) {
 182         if (p.matcher(s).find() != expected)
 183             failCount++;
 184     }
 185 
 186     private static void check(String p, String s, boolean expected) {
 187         Matcher matcher = Pattern.compile(p).matcher(s);
 188         if (matcher.find() != expected)
 189             failCount++;
 190     }
 191 
 192     private static void check(String p, char c, boolean expected) {
 193         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 194         Pattern pattern = Pattern.compile(propertyPattern);
 195         char[] ca = new char[1]; ca[0] = c;
 196         Matcher matcher = pattern.matcher(new String(ca));
 197         if (!matcher.find())
 198             failCount++;
 199     }
 200 
 201     private static void check(String p, int codePoint, boolean expected) {
 202         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 203         Pattern pattern = Pattern.compile(propertyPattern);
 204         char[] ca = Character.toChars(codePoint);
 205         Matcher matcher = pattern.matcher(new String(ca));
 206         if (!matcher.find())
 207             failCount++;
 208     }
 209 
 210     private static void check(String p, int flag, String input, String s,
 211                               boolean expected)
 212     {
 213         Pattern pattern = Pattern.compile(p, flag);
 214         Matcher matcher = pattern.matcher(input);
 215         if (expected)
 216             check(matcher, s, expected);
 217         else
 218             check(pattern, input, false);
 219     }
 220 
 221     private static void report(String testName) {
 222         int spacesToAdd = 30 - testName.length();
 223         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 224         for (int i=0; i<spacesToAdd; i++)
 225             paddedNameBuffer.append(" ");
 226         String paddedName = paddedNameBuffer.toString();
 227         System.err.println(paddedName + ": " +
 228                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 229         if (failCount > 0) {
 230             failure = true;
 231 
 232             if (firstFailure == null) {
 233                 firstFailure = testName;
 234             }
 235         }
 236 
 237         failCount = 0;
 238     }
 239 
 240     /**
 241      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 242      * supplementary characters. This method does NOT fully take care
 243      * of the regex syntax.
 244      */
 245     private static String toSupplementaries(String s) {
 246         int length = s.length();
 247         StringBuffer sb = new StringBuffer(length * 2);
 248 
 249         for (int i = 0; i < length; ) {
 250             char c = s.charAt(i++);
 251             if (c == '\\') {
 252                 sb.append(c);
 253                 if (i < length) {
 254                     c = s.charAt(i++);
 255                     sb.append(c);
 256                     if (c == 'u') {
 257                         // assume no syntax error
 258                         sb.append(s.charAt(i++));
 259                         sb.append(s.charAt(i++));
 260                         sb.append(s.charAt(i++));
 261                         sb.append(s.charAt(i++));
 262                     }
 263                 }
 264             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 265                 sb.append('\ud800').append((char)('\udc00'+c));
 266             } else {
 267                 sb.append(c);
 268             }
 269         }
 270         return sb.toString();
 271     }
 272 
 273     // Regular expression tests
 274 
 275     // This is for bug 6178785
 276     // Test if an expected NPE gets thrown when passing in a null argument
 277     private static boolean check(Runnable test) {
 278         try {
 279             test.run();
 280             failCount++;
 281             return false;
 282         } catch (NullPointerException npe) {
 283             return true;
 284         }
 285     }
 286 
 287     private static void nullArgumentTest() {
 288         check(new Runnable() { public void run() { Pattern.compile(null); }});
 289         check(new Runnable() { public void run() { Pattern.matches(null, null); }});
 290         check(new Runnable() { public void run() { Pattern.matches("xyz", null);}});
 291         check(new Runnable() { public void run() { Pattern.quote(null);}});
 292         check(new Runnable() { public void run() { Pattern.compile("xyz").split(null);}});
 293         check(new Runnable() { public void run() { Pattern.compile("xyz").matcher(null);}});
 294 
 295         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 296         m.matches();
 297         check(new Runnable() { public void run() { m.appendTail(null);}});
 298         check(new Runnable() { public void run() { m.replaceAll(null);}});
 299         check(new Runnable() { public void run() { m.replaceFirst(null);}});
 300         check(new Runnable() { public void run() { m.appendReplacement(null, null);}});
 301         check(new Runnable() { public void run() { m.reset(null);}});
 302         check(new Runnable() { public void run() { Matcher.quoteReplacement(null);}});
 303         //check(new Runnable() { public void run() { m.usePattern(null);}});
 304 
 305         report("Null Argument");
 306     }
 307 
 308     // This is for bug6635133
 309     // Test if surrogate pair in Unicode escapes can be handled correctly.
 310     private static void surrogatesInClassTest() throws Exception {
 311         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 312         Matcher matcher = pattern.matcher("\ud834\udd22");
 313         if (!matcher.find())
 314             failCount++;
 315 
 316         report("Surrogate pair in Unicode escape");
 317     }
 318 
 319     // This is for bug6990617
 320     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 321     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 322     // char is an octal digit.
 323     private static void removeQEQuotingTest() throws Exception {
 324         Pattern pattern =
 325             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 326         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 327         if (!matcher.find())
 328             failCount++;
 329 
 330         report("Remove Q/E Quoting");
 331     }
 332 
 333     // This is for bug 4988891
 334     // Test toMatchResult to see that it is a copy of the Matcher
 335     // that is not affected by subsequent operations on the original
 336     private static void toMatchResultTest() throws Exception {
 337         Pattern pattern = Pattern.compile("squid");
 338         Matcher matcher = pattern.matcher(
 339             "agiantsquidofdestinyasmallsquidoffate");
 340         matcher.find();
 341         int matcherStart1 = matcher.start();
 342         MatchResult mr = matcher.toMatchResult();
 343         if (mr == matcher)
 344             failCount++;
 345         int resultStart1 = mr.start();
 346         if (matcherStart1 != resultStart1)
 347             failCount++;
 348         matcher.find();
 349         int matcherStart2 = matcher.start();
 350         int resultStart2 = mr.start();
 351         if (matcherStart2 == resultStart2)
 352             failCount++;
 353         if (resultStart1 != resultStart2)
 354             failCount++;
 355         MatchResult mr2 = matcher.toMatchResult();
 356         if (mr == mr2)
 357             failCount++;
 358         if (mr2.start() != matcherStart2)
 359             failCount++;
 360         report("toMatchResult is a copy");
 361     }
 362 
 363     // This is for bug 5013885
 364     // Must test a slice to see if it reports hitEnd correctly
 365     private static void hitEndTest() throws Exception {
 366         // Basic test of Slice node
 367         Pattern p = Pattern.compile("^squidattack");
 368         Matcher m = p.matcher("squack");
 369         m.find();
 370         if (m.hitEnd())
 371             failCount++;
 372         m.reset("squid");
 373         m.find();
 374         if (!m.hitEnd())
 375             failCount++;
 376 
 377         // Test Slice, SliceA and SliceU nodes
 378         for (int i=0; i<3; i++) {
 379             int flags = 0;
 380             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 381             if (i==2) flags = Pattern.UNICODE_CASE;
 382             p = Pattern.compile("^abc", flags);
 383             m = p.matcher("ad");
 384             m.find();
 385             if (m.hitEnd())
 386                 failCount++;
 387             m.reset("ab");
 388             m.find();
 389             if (!m.hitEnd())
 390                 failCount++;
 391         }
 392 
 393         // Test Boyer-Moore node
 394         p = Pattern.compile("catattack");
 395         m = p.matcher("attack");
 396         m.find();
 397         if (!m.hitEnd())
 398             failCount++;
 399 
 400         p = Pattern.compile("catattack");
 401         m = p.matcher("attackattackattackcatatta");
 402         m.find();
 403         if (!m.hitEnd())
 404             failCount++;
 405         report("hitEnd from a Slice");
 406     }
 407 
 408     // This is for bug 4997476
 409     // It is weird code submitted by customer demonstrating a regression
 410     private static void wordSearchTest() throws Exception {
 411         String testString = new String("word1 word2 word3");
 412         Pattern p = Pattern.compile("\\b");
 413         Matcher m = p.matcher(testString);
 414         int position = 0;
 415         int start = 0;
 416         while (m.find(position)) {
 417             start = m.start();
 418             if (start == testString.length())
 419                 break;
 420             if (m.find(start+1)) {
 421                 position = m.start();
 422             } else {
 423                 position = testString.length();
 424             }
 425             if (testString.substring(start, position).equals(" "))
 426                 continue;
 427             if (!testString.substring(start, position-1).startsWith("word"))
 428                 failCount++;
 429         }
 430         report("Customer word search");
 431     }
 432 
 433     // This is for bug 4994840
 434     private static void caretAtEndTest() throws Exception {
 435         // Problem only occurs with multiline patterns
 436         // containing a beginning-of-line caret "^" followed
 437         // by an expression that also matches the empty string.
 438         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 439         Matcher matcher = pattern.matcher("\r");
 440         matcher.find();
 441         matcher.find();
 442         report("Caret at end");
 443     }
 444 
 445     // This test is for 4979006
 446     // Check to see if word boundary construct properly handles unicode
 447     // non spacing marks
 448     private static void unicodeWordBoundsTest() throws Exception {
 449         String spaces = "  ";
 450         String wordChar = "a";
 451         String nsm = "\u030a";
 452 
 453         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 454 
 455         Pattern pattern = Pattern.compile("\\b");
 456         Matcher matcher = pattern.matcher("");
 457         // S=other B=word character N=non spacing mark .=word boundary
 458         // SS.BB.SS
 459         String input = spaces + wordChar + wordChar + spaces;
 460         twoFindIndexes(input, matcher, 2, 4);
 461         // SS.BBN.SS
 462         input = spaces + wordChar +wordChar + nsm + spaces;
 463         twoFindIndexes(input, matcher, 2, 5);
 464         // SS.BN.SS
 465         input = spaces + wordChar + nsm + spaces;
 466         twoFindIndexes(input, matcher, 2, 4);
 467         // SS.BNN.SS
 468         input = spaces + wordChar + nsm + nsm + spaces;
 469         twoFindIndexes(input, matcher, 2, 5);
 470         // SSN.BB.SS
 471         input = spaces + nsm + wordChar + wordChar + spaces;
 472         twoFindIndexes(input, matcher, 3, 5);
 473         // SS.BNB.SS
 474         input = spaces + wordChar + nsm + wordChar + spaces;
 475         twoFindIndexes(input, matcher, 2, 5);
 476         // SSNNSS
 477         input = spaces + nsm + nsm + spaces;
 478         matcher.reset(input);
 479         if (matcher.find())
 480             failCount++;
 481         // SSN.BBN.SS
 482         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 483         twoFindIndexes(input, matcher, 3, 6);
 484 
 485         report("Unicode word boundary");
 486     }
 487 
 488     private static void twoFindIndexes(String input, Matcher matcher, int a,
 489                                        int b) throws Exception
 490     {
 491         matcher.reset(input);
 492         matcher.find();
 493         if (matcher.start() != a)
 494             failCount++;
 495         matcher.find();
 496         if (matcher.start() != b)
 497             failCount++;
 498     }
 499 
 500     // This test is for 6284152
 501     static void check(String regex, String input, String[] expected) {
 502         List<String> result = new ArrayList<String>();
 503         Pattern p = Pattern.compile(regex);
 504         Matcher m = p.matcher(input);
 505         while (m.find()) {
 506             result.add(m.group());
 507         }
 508         if (!Arrays.asList(expected).equals(result))
 509             failCount++;
 510     }
 511 
 512     private static void lookbehindTest() throws Exception {
 513         //Positive
 514         check("(?<=%.{0,5})foo\\d",
 515               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 516               new String[]{"foo1", "foo2", "foo3"});
 517 
 518         //boundary at end of the lookbehind sub-regex should work consistently
 519         //with the boundary just after the lookbehind sub-regex
 520         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 521         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 522         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 523         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 524 
 525         //Negative
 526         check("(?<!%.{0,5})foo\\d",
 527               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 528               new String[] {"foo4", "foo5"});
 529 
 530         //Positive greedy
 531         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 532 
 533         //Positive reluctant
 534         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 535 
 536         //supplementary
 537         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 538               new String[] {"fo\ud800\udc00o"});
 539         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 540               new String[] {"fo\ud800\udc00o"});
 541         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 542               new String[] {"fo\ud800\udc00o"});
 543         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 544               new String[] {"fo\ud800\udc00o"});
 545         report("Lookbehind");
 546     }
 547 
 548     // This test is for 4938995
 549     // Check to see if weak region boundaries are transparent to
 550     // lookahead and lookbehind constructs
 551     private static void boundsTest() throws Exception {
 552         String fullMessage = "catdogcat";
 553         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 554         Matcher matcher = pattern.matcher("catdogca");
 555         matcher.useTransparentBounds(true);
 556         if (matcher.find())
 557             failCount++;
 558         matcher.reset("atdogcat");
 559         if (matcher.find())
 560             failCount++;
 561         matcher.reset(fullMessage);
 562         if (!matcher.find())
 563             failCount++;
 564         matcher.reset(fullMessage);
 565         matcher.region(0,9);
 566         if (!matcher.find())
 567             failCount++;
 568         matcher.reset(fullMessage);
 569         matcher.region(0,6);
 570         if (!matcher.find())
 571             failCount++;
 572         matcher.reset(fullMessage);
 573         matcher.region(3,6);
 574         if (!matcher.find())
 575             failCount++;
 576         matcher.useTransparentBounds(false);
 577         if (matcher.find())
 578             failCount++;
 579 
 580         // Negative lookahead/lookbehind
 581         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 582         matcher = pattern.matcher("dogcat");
 583         matcher.useTransparentBounds(true);
 584         matcher.region(0,3);
 585         if (matcher.find())
 586             failCount++;
 587         matcher.reset("catdog");
 588         matcher.region(3,6);
 589         if (matcher.find())
 590             failCount++;
 591         matcher.useTransparentBounds(false);
 592         matcher.reset("dogcat");
 593         matcher.region(0,3);
 594         if (!matcher.find())
 595             failCount++;
 596         matcher.reset("catdog");
 597         matcher.region(3,6);
 598         if (!matcher.find())
 599             failCount++;
 600 
 601         report("Region bounds transparency");
 602     }
 603 
 604     // This test is for 4945394
 605     private static void findFromTest() throws Exception {
 606         String message = "This is 40 $0 message.";
 607         Pattern pat = Pattern.compile("\\$0");
 608         Matcher match = pat.matcher(message);
 609         if (!match.find())
 610             failCount++;
 611         if (match.find())
 612             failCount++;
 613         if (match.find())
 614             failCount++;
 615         report("Check for alternating find");
 616     }
 617 
 618     // This test is for 4872664 and 4892980
 619     private static void negatedCharClassTest() throws Exception {
 620         Pattern pattern = Pattern.compile("[^>]");
 621         Matcher matcher = pattern.matcher("\u203A");
 622         if (!matcher.matches())
 623             failCount++;
 624         pattern = Pattern.compile("[^fr]");
 625         matcher = pattern.matcher("a");
 626         if (!matcher.find())
 627             failCount++;
 628         matcher.reset("\u203A");
 629         if (!matcher.find())
 630             failCount++;
 631         String s = "for";
 632         String result[] = s.split("[^fr]");
 633         if (!result[0].equals("f"))
 634             failCount++;
 635         if (!result[1].equals("r"))
 636             failCount++;
 637         s = "f\u203Ar";
 638         result = s.split("[^fr]");
 639         if (!result[0].equals("f"))
 640             failCount++;
 641         if (!result[1].equals("r"))
 642             failCount++;
 643 
 644         // Test adding to bits, subtracting a node, then adding to bits again
 645         pattern = Pattern.compile("[^f\u203Ar]");
 646         matcher = pattern.matcher("a");
 647         if (!matcher.find())
 648             failCount++;
 649         matcher.reset("f");
 650         if (matcher.find())
 651             failCount++;
 652         matcher.reset("\u203A");
 653         if (matcher.find())
 654             failCount++;
 655         matcher.reset("r");
 656         if (matcher.find())
 657             failCount++;
 658         matcher.reset("\u203B");
 659         if (!matcher.find())
 660             failCount++;
 661 
 662         // Test subtracting a node, adding to bits, subtracting again
 663         pattern = Pattern.compile("[^\u203Ar\u203B]");
 664         matcher = pattern.matcher("a");
 665         if (!matcher.find())
 666             failCount++;
 667         matcher.reset("\u203A");
 668         if (matcher.find())
 669             failCount++;
 670         matcher.reset("r");
 671         if (matcher.find())
 672             failCount++;
 673         matcher.reset("\u203B");
 674         if (matcher.find())
 675             failCount++;
 676         matcher.reset("\u203C");
 677         if (!matcher.find())
 678             failCount++;
 679 
 680         report("Negated Character Class");
 681     }
 682 
 683     // This test is for 4628291
 684     private static void toStringTest() throws Exception {
 685         Pattern pattern = Pattern.compile("b+");
 686         if (pattern.toString() != "b+")
 687             failCount++;
 688         Matcher matcher = pattern.matcher("aaabbbccc");
 689         String matcherString = matcher.toString(); // unspecified
 690         matcher.find();
 691         matcherString = matcher.toString(); // unspecified
 692         matcher.region(0,3);
 693         matcherString = matcher.toString(); // unspecified
 694         matcher.reset();
 695         matcherString = matcher.toString(); // unspecified
 696         report("toString");
 697     }
 698 
 699     // This test is for 4808962
 700     private static void literalPatternTest() throws Exception {
 701         int flags = Pattern.LITERAL;
 702 
 703         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 704         check(pattern, "abc\\t$^", true);
 705 
 706         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 707         check(pattern, "abc\\t$^", true);
 708 
 709         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 710         check(pattern, "\\Qa^$bcabc\\E", true);
 711         check(pattern, "a^$bcabc", false);
 712 
 713         pattern = Pattern.compile("\\\\Q\\\\E");
 714         check(pattern, "\\Q\\E", true);
 715 
 716         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 717         check(pattern, "abcefg\\Q\\Ehij", true);
 718 
 719         pattern = Pattern.compile("\\\\\\Q\\\\E");
 720         check(pattern, "\\\\\\\\", true);
 721 
 722         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 723         check(pattern, "\\Qa^$bcabc\\E", true);
 724         check(pattern, "a^$bcabc", false);
 725 
 726         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 727         check(pattern, "\\Qabc\\Edef", true);
 728         check(pattern, "abcdef", false);
 729 
 730         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 731         check(pattern, "abc\\Edef", true);
 732         check(pattern, "abcdef", false);
 733 
 734         pattern = Pattern.compile(Pattern.quote("\\E"));
 735         check(pattern, "\\E", true);
 736 
 737         pattern = Pattern.compile("((((abc.+?:)", flags);
 738         check(pattern, "((((abc.+?:)", true);
 739 
 740         flags |= Pattern.MULTILINE;
 741 
 742         pattern = Pattern.compile("^cat$", flags);
 743         check(pattern, "abc^cat$def", true);
 744         check(pattern, "cat", false);
 745 
 746         flags |= Pattern.CASE_INSENSITIVE;
 747 
 748         pattern = Pattern.compile("abcdef", flags);
 749         check(pattern, "ABCDEF", true);
 750         check(pattern, "AbCdEf", true);
 751 
 752         flags |= Pattern.DOTALL;
 753 
 754         pattern = Pattern.compile("a...b", flags);
 755         check(pattern, "A...b", true);
 756         check(pattern, "Axxxb", false);
 757 
 758         flags |= Pattern.CANON_EQ;
 759 
 760         Pattern p = Pattern.compile("testa\u030a", flags);
 761         check(pattern, "testa\u030a", false);
 762         check(pattern, "test\u00e5", false);
 763 
 764         // Supplementary character test
 765         flags = Pattern.LITERAL;
 766 
 767         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 768         check(pattern, toSupplementaries("abc\\t$^"), true);
 769 
 770         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 771         check(pattern, toSupplementaries("abc\\t$^"), true);
 772 
 773         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 774         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 775         check(pattern, toSupplementaries("a^$bcabc"), false);
 776 
 777         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 778         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 779         check(pattern, toSupplementaries("a^$bcabc"), false);
 780 
 781         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 782         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 783         check(pattern, toSupplementaries("abcdef"), false);
 784 
 785         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 786         check(pattern, toSupplementaries("abc\\Edef"), true);
 787         check(pattern, toSupplementaries("abcdef"), false);
 788 
 789         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 790         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 791 
 792         flags |= Pattern.MULTILINE;
 793 
 794         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 795         check(pattern, toSupplementaries("abc^cat$def"), true);
 796         check(pattern, toSupplementaries("cat"), false);
 797 
 798         flags |= Pattern.DOTALL;
 799 
 800         // note: this is case-sensitive.
 801         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 802         check(pattern, toSupplementaries("a...b"), true);
 803         check(pattern, toSupplementaries("axxxb"), false);
 804 
 805         flags |= Pattern.CANON_EQ;
 806 
 807         String t = toSupplementaries("test");
 808         p = Pattern.compile(t + "a\u030a", flags);
 809         check(pattern, t + "a\u030a", false);
 810         check(pattern, t + "\u00e5", false);
 811 
 812         report("Literal pattern");
 813     }
 814 
 815     // This test is for 4803179
 816     // This test is also for 4808962, replacement parts
 817     private static void literalReplacementTest() throws Exception {
 818         int flags = Pattern.LITERAL;
 819 
 820         Pattern pattern = Pattern.compile("abc", flags);
 821         Matcher matcher = pattern.matcher("zzzabczzz");
 822         String replaceTest = "$0";
 823         String result = matcher.replaceAll(replaceTest);
 824         if (!result.equals("zzzabczzz"))
 825             failCount++;
 826 
 827         matcher.reset();
 828         String literalReplacement = matcher.quoteReplacement(replaceTest);
 829         result = matcher.replaceAll(literalReplacement);
 830         if (!result.equals("zzz$0zzz"))
 831             failCount++;
 832 
 833         matcher.reset();
 834         replaceTest = "\\t$\\$";
 835         literalReplacement = matcher.quoteReplacement(replaceTest);
 836         result = matcher.replaceAll(literalReplacement);
 837         if (!result.equals("zzz\\t$\\$zzz"))
 838             failCount++;
 839 
 840         // Supplementary character test
 841         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 842         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 843         replaceTest = "$0";
 844         result = matcher.replaceAll(replaceTest);
 845         if (!result.equals(toSupplementaries("zzzabczzz")))
 846             failCount++;
 847 
 848         matcher.reset();
 849         literalReplacement = matcher.quoteReplacement(replaceTest);
 850         result = matcher.replaceAll(literalReplacement);
 851         if (!result.equals(toSupplementaries("zzz$0zzz")))
 852             failCount++;
 853 
 854         matcher.reset();
 855         replaceTest = "\\t$\\$";
 856         literalReplacement = matcher.quoteReplacement(replaceTest);
 857         result = matcher.replaceAll(literalReplacement);
 858         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 859             failCount++;
 860 
 861         // IAE should be thrown if backslash or '$' is the last character
 862         // in replacement string
 863         try {
 864             "\uac00".replaceAll("\uac00", "$");
 865             failCount++;
 866         } catch (IllegalArgumentException iie) {
 867         } catch (Exception e) {
 868             failCount++;
 869         }
 870         try {
 871             "\uac00".replaceAll("\uac00", "\\");
 872             failCount++;
 873         } catch (IllegalArgumentException iie) {
 874         } catch (Exception e) {
 875             failCount++;
 876         }
 877         report("Literal replacement");
 878     }
 879 
 880     // This test is for 4757029
 881     private static void regionTest() throws Exception {
 882         Pattern pattern = Pattern.compile("abc");
 883         Matcher matcher = pattern.matcher("abcdefabc");
 884 
 885         matcher.region(0,9);
 886         if (!matcher.find())
 887             failCount++;
 888         if (!matcher.find())
 889             failCount++;
 890         matcher.region(0,3);
 891         if (!matcher.find())
 892            failCount++;
 893         matcher.region(3,6);
 894         if (matcher.find())
 895            failCount++;
 896         matcher.region(0,2);
 897         if (matcher.find())
 898            failCount++;
 899 
 900         expectRegionFail(matcher, 1, -1);
 901         expectRegionFail(matcher, -1, -1);
 902         expectRegionFail(matcher, -1, 1);
 903         expectRegionFail(matcher, 5, 3);
 904         expectRegionFail(matcher, 5, 12);
 905         expectRegionFail(matcher, 12, 12);
 906 
 907         pattern = Pattern.compile("^abc$");
 908         matcher = pattern.matcher("zzzabczzz");
 909         matcher.region(0,9);
 910         if (matcher.find())
 911             failCount++;
 912         matcher.region(3,6);
 913         if (!matcher.find())
 914            failCount++;
 915         matcher.region(3,6);
 916         matcher.useAnchoringBounds(false);
 917         if (matcher.find())
 918            failCount++;
 919 
 920         // Supplementary character test
 921         pattern = Pattern.compile(toSupplementaries("abc"));
 922         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 923         matcher.region(0,9*2);
 924         if (!matcher.find())
 925             failCount++;
 926         if (!matcher.find())
 927             failCount++;
 928         matcher.region(0,3*2);
 929         if (!matcher.find())
 930            failCount++;
 931         matcher.region(1,3*2);
 932         if (matcher.find())
 933            failCount++;
 934         matcher.region(3*2,6*2);
 935         if (matcher.find())
 936            failCount++;
 937         matcher.region(0,2*2);
 938         if (matcher.find())
 939            failCount++;
 940         matcher.region(0,2*2+1);
 941         if (matcher.find())
 942            failCount++;
 943 
 944         expectRegionFail(matcher, 1*2, -1);
 945         expectRegionFail(matcher, -1, -1);
 946         expectRegionFail(matcher, -1, 1*2);
 947         expectRegionFail(matcher, 5*2, 3*2);
 948         expectRegionFail(matcher, 5*2, 12*2);
 949         expectRegionFail(matcher, 12*2, 12*2);
 950 
 951         pattern = Pattern.compile(toSupplementaries("^abc$"));
 952         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 953         matcher.region(0,9*2);
 954         if (matcher.find())
 955             failCount++;
 956         matcher.region(3*2,6*2);
 957         if (!matcher.find())
 958            failCount++;
 959         matcher.region(3*2+1,6*2);
 960         if (matcher.find())
 961            failCount++;
 962         matcher.region(3*2,6*2-1);
 963         if (matcher.find())
 964            failCount++;
 965         matcher.region(3*2,6*2);
 966         matcher.useAnchoringBounds(false);
 967         if (matcher.find())
 968            failCount++;
 969         report("Regions");
 970     }
 971 
 972     private static void expectRegionFail(Matcher matcher, int index1,
 973                                          int index2)
 974     {
 975         try {
 976             matcher.region(index1, index2);
 977             failCount++;
 978         } catch (IndexOutOfBoundsException ioobe) {
 979             // Correct result
 980         } catch (IllegalStateException ise) {
 981             // Correct result
 982         }
 983     }
 984 
 985     // This test is for 4803197
 986     private static void escapedSegmentTest() throws Exception {
 987 
 988         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
 989         check(pattern, "dir1\\dir2", true);
 990 
 991         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
 992         check(pattern, "dir1\\dir2\\", true);
 993 
 994         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
 995         check(pattern, "dir1\\dir2\\", true);
 996 
 997         // Supplementary character test
 998         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
 999         check(pattern, toSupplementaries("dir1\\dir2"), true);
1000 
1001         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1002         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1003 
1004         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1005         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1006 
1007         report("Escaped segment");
1008     }
1009 
1010     // This test is for 4792284
1011     private static void nonCaptureRepetitionTest() throws Exception {
1012         String input = "abcdefgh;";
1013 
1014         String[] patterns = new String[] {
1015             "(?:\\w{4})+;",
1016             "(?:\\w{8})*;",
1017             "(?:\\w{2}){2,4};",
1018             "(?:\\w{4}){2,};",   // only matches the
1019             ".*?(?:\\w{5})+;",   //     specified minimum
1020             ".*?(?:\\w{9})*;",   //     number of reps - OK
1021             "(?:\\w{4})+?;",     // lazy repetition - OK
1022             "(?:\\w{4})++;",     // possessive repetition - OK
1023             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1024             "(\\w{4})+;",        // capturing group - OK
1025         };
1026 
1027         for (int i = 0; i < patterns.length; i++) {
1028             // Check find()
1029             check(patterns[i], 0, input, input, true);
1030             // Check matches()
1031             Pattern p = Pattern.compile(patterns[i]);
1032             Matcher m = p.matcher(input);
1033 
1034             if (m.matches()) {
1035                 if (!m.group(0).equals(input))
1036                     failCount++;
1037             } else {
1038                 failCount++;
1039             }
1040         }
1041 
1042         report("Non capturing repetition");
1043     }
1044 
1045     // This test is for 6358731
1046     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1047         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1048         Matcher matcher = pattern.matcher("abcd");
1049         if (!matcher.matches() ||
1050              matcher.group(1) != null ||
1051              !matcher.group(2).equals("abcd")) {
1052             failCount++;
1053         }
1054         report("Not captured GroupCurly");
1055     }
1056 
1057     // This test is for 4706545
1058     private static void javaCharClassTest() throws Exception {
1059         for (int i=0; i<1000; i++) {
1060             char c = (char)generator.nextInt();
1061             check("{javaLowerCase}", c, Character.isLowerCase(c));
1062             check("{javaUpperCase}", c, Character.isUpperCase(c));
1063             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1064             check("{javaTitleCase}", c, Character.isTitleCase(c));
1065             check("{javaDigit}", c, Character.isDigit(c));
1066             check("{javaDefined}", c, Character.isDefined(c));
1067             check("{javaLetter}", c, Character.isLetter(c));
1068             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1069             check("{javaJavaIdentifierStart}", c,
1070                   Character.isJavaIdentifierStart(c));
1071             check("{javaJavaIdentifierPart}", c,
1072                   Character.isJavaIdentifierPart(c));
1073             check("{javaUnicodeIdentifierStart}", c,
1074                   Character.isUnicodeIdentifierStart(c));
1075             check("{javaUnicodeIdentifierPart}", c,
1076                   Character.isUnicodeIdentifierPart(c));
1077             check("{javaIdentifierIgnorable}", c,
1078                   Character.isIdentifierIgnorable(c));
1079             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1080             check("{javaWhitespace}", c, Character.isWhitespace(c));
1081             check("{javaISOControl}", c, Character.isISOControl(c));
1082             check("{javaMirrored}", c, Character.isMirrored(c));
1083 
1084         }
1085 
1086         // Supplementary character test
1087         for (int i=0; i<1000; i++) {
1088             int c = generator.nextInt(Character.MAX_CODE_POINT
1089                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1090                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1091             check("{javaLowerCase}", c, Character.isLowerCase(c));
1092             check("{javaUpperCase}", c, Character.isUpperCase(c));
1093             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1094             check("{javaTitleCase}", c, Character.isTitleCase(c));
1095             check("{javaDigit}", c, Character.isDigit(c));
1096             check("{javaDefined}", c, Character.isDefined(c));
1097             check("{javaLetter}", c, Character.isLetter(c));
1098             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1099             check("{javaJavaIdentifierStart}", c,
1100                   Character.isJavaIdentifierStart(c));
1101             check("{javaJavaIdentifierPart}", c,
1102                   Character.isJavaIdentifierPart(c));
1103             check("{javaUnicodeIdentifierStart}", c,
1104                   Character.isUnicodeIdentifierStart(c));
1105             check("{javaUnicodeIdentifierPart}", c,
1106                   Character.isUnicodeIdentifierPart(c));
1107             check("{javaIdentifierIgnorable}", c,
1108                   Character.isIdentifierIgnorable(c));
1109             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1110             check("{javaWhitespace}", c, Character.isWhitespace(c));
1111             check("{javaISOControl}", c, Character.isISOControl(c));
1112             check("{javaMirrored}", c, Character.isMirrored(c));
1113         }
1114 
1115         report("Java character classes");
1116     }
1117 
1118     // This test is for 4523620
1119     /*
1120     private static void numOccurrencesTest() throws Exception {
1121         Pattern pattern = Pattern.compile("aaa");
1122 
1123         if (pattern.numOccurrences("aaaaaa", false) != 2)
1124             failCount++;
1125         if (pattern.numOccurrences("aaaaaa", true) != 4)
1126             failCount++;
1127 
1128         pattern = Pattern.compile("^");
1129         if (pattern.numOccurrences("aaaaaa", false) != 1)
1130             failCount++;
1131         if (pattern.numOccurrences("aaaaaa", true) != 1)
1132             failCount++;
1133 
1134         report("Number of Occurrences");
1135     }
1136     */
1137 
1138     // This test is for 4776374
1139     private static void caretBetweenTerminatorsTest() throws Exception {
1140         int flags1 = Pattern.DOTALL;
1141         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1142         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1143         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1144 
1145         check("^....", flags1, "test\ntest", "test", true);
1146         check(".....^", flags1, "test\ntest", "test", false);
1147         check(".....^", flags1, "test\n", "test", false);
1148         check("....^", flags1, "test\r\n", "test", false);
1149 
1150         check("^....", flags2, "test\ntest", "test", true);
1151         check("....^", flags2, "test\ntest", "test", false);
1152         check(".....^", flags2, "test\n", "test", false);
1153         check("....^", flags2, "test\r\n", "test", false);
1154 
1155         check("^....", flags3, "test\ntest", "test", true);
1156         check(".....^", flags3, "test\ntest", "test\n", true);
1157         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1158         check(".....^", flags3, "test\n", "test", false);
1159         check(".....^", flags3, "test\r\n", "test", false);
1160         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1161 
1162         check("^....", flags4, "test\ntest", "test", true);
1163         check(".....^", flags3, "test\ntest", "test\n", true);
1164         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1165         check(".....^", flags4, "test\n", "test\n", false);
1166         check(".....^", flags4, "test\r\n", "test\r", false);
1167 
1168         // Supplementary character test
1169         String t = toSupplementaries("test");
1170         check("^....", flags1, t+"\n"+t, t, true);
1171         check(".....^", flags1, t+"\n"+t, t, false);
1172         check(".....^", flags1, t+"\n", t, false);
1173         check("....^", flags1, t+"\r\n", t, false);
1174 
1175         check("^....", flags2, t+"\n"+t, t, true);
1176         check("....^", flags2, t+"\n"+t, t, false);
1177         check(".....^", flags2, t+"\n", t, false);
1178         check("....^", flags2, t+"\r\n", t, false);
1179 
1180         check("^....", flags3, t+"\n"+t, t, true);
1181         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1182         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1183         check(".....^", flags3, t+"\n", t, false);
1184         check(".....^", flags3, t+"\r\n", t, false);
1185         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1186 
1187         check("^....", flags4, t+"\n"+t, t, true);
1188         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1189         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1190         check(".....^", flags4, t+"\n", t+"\n", false);
1191         check(".....^", flags4, t+"\r\n", t+"\r", false);
1192 
1193         report("Caret between terminators");
1194     }
1195 
1196     // This test is for 4727935
1197     private static void dollarAtEndTest() throws Exception {
1198         int flags1 = Pattern.DOTALL;
1199         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1200         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1201 
1202         check("....$", flags1, "test\n", "test", true);
1203         check("....$", flags1, "test\r\n", "test", true);
1204         check(".....$", flags1, "test\n", "test\n", true);
1205         check(".....$", flags1, "test\u0085", "test\u0085", true);
1206         check("....$", flags1, "test\u0085", "test", true);
1207 
1208         check("....$", flags2, "test\n", "test", true);
1209         check(".....$", flags2, "test\n", "test\n", true);
1210         check(".....$", flags2, "test\u0085", "test\u0085", true);
1211         check("....$", flags2, "test\u0085", "est\u0085", true);
1212 
1213         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1214         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1215         check("....$blah", flags3, "test\nblah", "!!!!", false);
1216         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1217 
1218         // Supplementary character test
1219         String t = toSupplementaries("test");
1220         String b = toSupplementaries("blah");
1221         check("....$", flags1, t+"\n", t, true);
1222         check("....$", flags1, t+"\r\n", t, true);
1223         check(".....$", flags1, t+"\n", t+"\n", true);
1224         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1225         check("....$", flags1, t+"\u0085", t, true);
1226 
1227         check("....$", flags2, t+"\n", t, true);
1228         check(".....$", flags2, t+"\n", t+"\n", true);
1229         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1230         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1231 
1232         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1233         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1234         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1235         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1236 
1237         report("Dollar at End");
1238     }
1239 
1240     // This test is for 4711773
1241     private static void multilineDollarTest() throws Exception {
1242         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1243         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1244         matcher.find();
1245         if (matcher.start(0) != 9)
1246             failCount++;
1247         matcher.find();
1248         if (matcher.start(0) != 20)
1249             failCount++;
1250 
1251         // Supplementary character test
1252         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1253         matcher.find();
1254         if (matcher.start(0) != 9*2)
1255             failCount++;
1256         matcher.find();
1257         if (matcher.start(0) != 20*2)
1258             failCount++;
1259 
1260         report("Multiline Dollar");
1261     }
1262 
1263     private static void reluctantRepetitionTest() throws Exception {
1264         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1265         check(p, "1 word word word 2", true);
1266         check(p, "1 wor wo w 2", true);
1267         check(p, "1 word word 2", true);
1268         check(p, "1 word 2", true);
1269         check(p, "1 wo w w 2", true);
1270         check(p, "1 wo w 2", true);
1271         check(p, "1 wor w 2", true);
1272 
1273         p = Pattern.compile("([a-z])+?c");
1274         Matcher m = p.matcher("ababcdefdec");
1275         check(m, "ababc");
1276 
1277         // Supplementary character test
1278         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1279         m = p.matcher(toSupplementaries("ababcdefdec"));
1280         check(m, toSupplementaries("ababc"));
1281 
1282         report("Reluctant Repetition");
1283     }
1284 
1285     private static void serializeTest() throws Exception {
1286         String patternStr = "(b)";
1287         String matchStr = "b";
1288         Pattern pattern = Pattern.compile(patternStr);
1289         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1290         ObjectOutputStream oos = new ObjectOutputStream(baos);
1291         oos.writeObject(pattern);
1292         oos.close();
1293         ObjectInputStream ois = new ObjectInputStream(
1294             new ByteArrayInputStream(baos.toByteArray()));
1295         Pattern serializedPattern = (Pattern)ois.readObject();
1296         ois.close();
1297         Matcher matcher = serializedPattern.matcher(matchStr);
1298         if (!matcher.matches())
1299             failCount++;
1300         if (matcher.groupCount() != 1)
1301             failCount++;
1302 
1303         report("Serialization");
1304     }
1305 
1306     private static void gTest() {
1307         Pattern pattern = Pattern.compile("\\G\\w");
1308         Matcher matcher = pattern.matcher("abc#x#x");
1309         matcher.find();
1310         matcher.find();
1311         matcher.find();
1312         if (matcher.find())
1313             failCount++;
1314 
1315         pattern = Pattern.compile("\\GA*");
1316         matcher = pattern.matcher("1A2AA3");
1317         matcher.find();
1318         if (matcher.find())
1319             failCount++;
1320 
1321         pattern = Pattern.compile("\\GA*");
1322         matcher = pattern.matcher("1A2AA3");
1323         if (!matcher.find(1))
1324             failCount++;
1325         matcher.find();
1326         if (matcher.find())
1327             failCount++;
1328 
1329         report("\\G");
1330     }
1331 
1332     private static void zTest() {
1333         Pattern pattern = Pattern.compile("foo\\Z");
1334         // Positives
1335         check(pattern, "foo\u0085", true);
1336         check(pattern, "foo\u2028", true);
1337         check(pattern, "foo\u2029", true);
1338         check(pattern, "foo\n", true);
1339         check(pattern, "foo\r", true);
1340         check(pattern, "foo\r\n", true);
1341         // Negatives
1342         check(pattern, "fooo", false);
1343         check(pattern, "foo\n\r", false);
1344 
1345         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1346         // Positives
1347         check(pattern, "foo", true);
1348         check(pattern, "foo\n", true);
1349         // Negatives
1350         check(pattern, "foo\r", false);
1351         check(pattern, "foo\u0085", false);
1352         check(pattern, "foo\u2028", false);
1353         check(pattern, "foo\u2029", false);
1354 
1355         report("\\Z");
1356     }
1357 
1358     private static void replaceFirstTest() {
1359         Pattern pattern = Pattern.compile("(ab)(c*)");
1360         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1361         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1362             failCount++;
1363 
1364         matcher.reset("zzzabccczzzabcczzzabccczzz");
1365         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1366             failCount++;
1367 
1368         matcher.reset("zzzabccczzzabcczzzabccczzz");
1369         String result = matcher.replaceFirst("$1");
1370         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1371             failCount++;
1372 
1373         matcher.reset("zzzabccczzzabcczzzabccczzz");
1374         result = matcher.replaceFirst("$2");
1375         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1376             failCount++;
1377 
1378         pattern = Pattern.compile("a*");
1379         matcher = pattern.matcher("aaaaaaaaaa");
1380         if (!matcher.replaceFirst("test").equals("test"))
1381             failCount++;
1382 
1383         pattern = Pattern.compile("a+");
1384         matcher = pattern.matcher("zzzaaaaaaaaaa");
1385         if (!matcher.replaceFirst("test").equals("zzztest"))
1386             failCount++;
1387 
1388         // Supplementary character test
1389         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1390         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1391         if (!matcher.replaceFirst(toSupplementaries("test"))
1392                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1393             failCount++;
1394 
1395         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1396         if (!matcher.replaceFirst(toSupplementaries("test")).
1397             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1398             failCount++;
1399 
1400         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1401         result = matcher.replaceFirst("$1");
1402         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1403             failCount++;
1404 
1405         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1406         result = matcher.replaceFirst("$2");
1407         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1408             failCount++;
1409 
1410         pattern = Pattern.compile(toSupplementaries("a*"));
1411         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1412         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1413             failCount++;
1414 
1415         pattern = Pattern.compile(toSupplementaries("a+"));
1416         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1417         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1418             failCount++;
1419 
1420         report("Replace First");
1421     }
1422 
1423     private static void unixLinesTest() {
1424         Pattern pattern = Pattern.compile(".*");
1425         Matcher matcher = pattern.matcher("aa\u2028blah");
1426         matcher.find();
1427         if (!matcher.group(0).equals("aa"))
1428             failCount++;
1429 
1430         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1431         matcher = pattern.matcher("aa\u2028blah");
1432         matcher.find();
1433         if (!matcher.group(0).equals("aa\u2028blah"))
1434             failCount++;
1435 
1436         pattern = Pattern.compile("[az]$",
1437                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1438         matcher = pattern.matcher("aa\u2028zz");
1439         check(matcher, "a\u2028", false);
1440 
1441         // Supplementary character test
1442         pattern = Pattern.compile(".*");
1443         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1444         matcher.find();
1445         if (!matcher.group(0).equals(toSupplementaries("aa")))
1446             failCount++;
1447 
1448         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1449         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1450         matcher.find();
1451         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1452             failCount++;
1453 
1454         pattern = Pattern.compile(toSupplementaries("[az]$"),
1455                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1456         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1457         check(matcher, toSupplementaries("a\u2028"), false);
1458 
1459         report("Unix Lines");
1460     }
1461 
1462     private static void commentsTest() {
1463         int flags = Pattern.COMMENTS;
1464 
1465         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1466         Matcher matcher = pattern.matcher("aa#aa");
1467         if (!matcher.matches())
1468             failCount++;
1469 
1470         pattern = Pattern.compile("aa  # blah", flags);
1471         matcher = pattern.matcher("aa");
1472         if (!matcher.matches())
1473             failCount++;
1474 
1475         pattern = Pattern.compile("aa blah", flags);
1476         matcher = pattern.matcher("aablah");
1477         if (!matcher.matches())
1478              failCount++;
1479 
1480         pattern = Pattern.compile("aa  # blah blech  ", flags);
1481         matcher = pattern.matcher("aa");
1482         if (!matcher.matches())
1483             failCount++;
1484 
1485         pattern = Pattern.compile("aa  # blah\n  ", flags);
1486         matcher = pattern.matcher("aa");
1487         if (!matcher.matches())
1488             failCount++;
1489 
1490         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1491         matcher = pattern.matcher("aabc");
1492         if (!matcher.matches())
1493              failCount++;
1494 
1495         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1496         matcher = pattern.matcher("aabc");
1497         if (!matcher.matches())
1498              failCount++;
1499 
1500         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1501         matcher = pattern.matcher("aabc#blech");
1502         if (!matcher.matches())
1503              failCount++;
1504 
1505         // Supplementary character test
1506         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1507         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1508         if (!matcher.matches())
1509             failCount++;
1510 
1511         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1512         matcher = pattern.matcher(toSupplementaries("aa"));
1513         if (!matcher.matches())
1514             failCount++;
1515 
1516         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1517         matcher = pattern.matcher(toSupplementaries("aablah"));
1518         if (!matcher.matches())
1519              failCount++;
1520 
1521         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1522         matcher = pattern.matcher(toSupplementaries("aa"));
1523         if (!matcher.matches())
1524             failCount++;
1525 
1526         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1527         matcher = pattern.matcher(toSupplementaries("aa"));
1528         if (!matcher.matches())
1529             failCount++;
1530 
1531         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1532         matcher = pattern.matcher(toSupplementaries("aabc"));
1533         if (!matcher.matches())
1534              failCount++;
1535 
1536         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1537         matcher = pattern.matcher(toSupplementaries("aabc"));
1538         if (!matcher.matches())
1539              failCount++;
1540 
1541         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1542         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1543         if (!matcher.matches())
1544              failCount++;
1545 
1546         report("Comments");
1547     }
1548 
1549     private static void caseFoldingTest() { // bug 4504687
1550         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1551         Pattern pattern = Pattern.compile("aa", flags);
1552         Matcher matcher = pattern.matcher("ab");
1553         if (matcher.matches())
1554             failCount++;
1555 
1556         pattern = Pattern.compile("aA", flags);
1557         matcher = pattern.matcher("ab");
1558         if (matcher.matches())
1559             failCount++;
1560 
1561         pattern = Pattern.compile("aa", flags);
1562         matcher = pattern.matcher("aB");
1563         if (matcher.matches())
1564             failCount++;
1565         matcher = pattern.matcher("Ab");
1566         if (matcher.matches())
1567             failCount++;
1568 
1569         // ASCII               "a"
1570         // Latin-1 Supplement  "a" + grave
1571         // Cyrillic            "a"
1572         String[] patterns = new String[] {
1573             //single
1574             "a", "\u00e0", "\u0430",
1575             //slice
1576             "ab", "\u00e0\u00e1", "\u0430\u0431",
1577             //class single
1578             "[a]", "[\u00e0]", "[\u0430]",
1579             //class range
1580             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1581             //back reference
1582             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1583         };
1584 
1585         String[] texts = new String[] {
1586             "A", "\u00c0", "\u0410",
1587             "AB", "\u00c0\u00c1", "\u0410\u0411",
1588             "A", "\u00c0", "\u0410",
1589             "B", "\u00c2", "\u0411",
1590             "aA", "\u00e0\u00c0", "\u0430\u0410"
1591         };
1592 
1593         boolean[] expected = new boolean[] {
1594             true, false, false,
1595             true, false, false,
1596             true, false, false,
1597             true, false, false,
1598             true, false, false
1599         };
1600 
1601         flags = Pattern.CASE_INSENSITIVE;
1602         for (int i = 0; i < patterns.length; i++) {
1603             pattern = Pattern.compile(patterns[i], flags);
1604             matcher = pattern.matcher(texts[i]);
1605             if (matcher.matches() != expected[i]) {
1606                 System.out.println("<1> Failed at " + i);
1607                 failCount++;
1608             }
1609         }
1610 
1611         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1612         for (int i = 0; i < patterns.length; i++) {
1613             pattern = Pattern.compile(patterns[i], flags);
1614             matcher = pattern.matcher(texts[i]);
1615             if (!matcher.matches()) {
1616                 System.out.println("<2> Failed at " + i);
1617                 failCount++;
1618             }
1619         }
1620         // flag unicode_case alone should do nothing
1621         flags = Pattern.UNICODE_CASE;
1622         for (int i = 0; i < patterns.length; i++) {
1623             pattern = Pattern.compile(patterns[i], flags);
1624             matcher = pattern.matcher(texts[i]);
1625             if (matcher.matches()) {
1626                 System.out.println("<3> Failed at " + i);
1627                 failCount++;
1628             }
1629         }
1630 
1631         // Special cases: i, I, u+0131 and u+0130
1632         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1633         pattern = Pattern.compile("[h-j]+", flags);
1634         if (!pattern.matcher("\u0131\u0130").matches())
1635             failCount++;
1636         report("Case Folding");
1637     }
1638 
1639     private static void appendTest() {
1640         Pattern pattern = Pattern.compile("(ab)(cd)");
1641         Matcher matcher = pattern.matcher("abcd");
1642         String result = matcher.replaceAll("$2$1");
1643         if (!result.equals("cdab"))
1644             failCount++;
1645 
1646         String  s1 = "Swap all: first = 123, second = 456";
1647         String  s2 = "Swap one: first = 123, second = 456";
1648         String  r  = "$3$2$1";
1649         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1650         matcher = pattern.matcher(s1);
1651 
1652         result = matcher.replaceAll(r);
1653         if (!result.equals("Swap all: 123 = first, 456 = second"))
1654             failCount++;
1655 
1656         matcher = pattern.matcher(s2);
1657 
1658         if (matcher.find()) {
1659             StringBuffer sb = new StringBuffer();
1660             matcher.appendReplacement(sb, r);
1661             matcher.appendTail(sb);
1662             result = sb.toString();
1663             if (!result.equals("Swap one: 123 = first, second = 456"))
1664                 failCount++;
1665         }
1666 
1667         // Supplementary character test
1668         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1669         matcher = pattern.matcher(toSupplementaries("abcd"));
1670         result = matcher.replaceAll("$2$1");
1671         if (!result.equals(toSupplementaries("cdab")))
1672             failCount++;
1673 
1674         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1675         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1676         r  = toSupplementaries("$3$2$1");
1677         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1678         matcher = pattern.matcher(s1);
1679 
1680         result = matcher.replaceAll(r);
1681         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1682             failCount++;
1683 
1684         matcher = pattern.matcher(s2);
1685 
1686         if (matcher.find()) {
1687             StringBuffer sb = new StringBuffer();
1688             matcher.appendReplacement(sb, r);
1689             matcher.appendTail(sb);
1690             result = sb.toString();
1691             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1692                 failCount++;
1693         }
1694         report("Append");
1695     }
1696 
1697     private static void splitTest() {
1698         Pattern pattern = Pattern.compile(":");
1699         String[] result = pattern.split("foo:and:boo", 2);
1700         if (!result[0].equals("foo"))
1701             failCount++;
1702         if (!result[1].equals("and:boo"))
1703             failCount++;
1704         // Supplementary character test
1705         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1706         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1707         if (!result[0].equals(toSupplementaries("foo")))
1708             failCount++;
1709         if (!result[1].equals(toSupplementaries("andXboo")))
1710             failCount++;
1711 
1712         CharBuffer cb = CharBuffer.allocate(100);
1713         cb.put("foo:and:boo");
1714         cb.flip();
1715         result = pattern.split(cb);
1716         if (!result[0].equals("foo"))
1717             failCount++;
1718         if (!result[1].equals("and"))
1719             failCount++;
1720         if (!result[2].equals("boo"))
1721             failCount++;
1722 
1723         // Supplementary character test
1724         CharBuffer cbs = CharBuffer.allocate(100);
1725         cbs.put(toSupplementaries("fooXandXboo"));
1726         cbs.flip();
1727         result = patternX.split(cbs);
1728         if (!result[0].equals(toSupplementaries("foo")))
1729             failCount++;
1730         if (!result[1].equals(toSupplementaries("and")))
1731             failCount++;
1732         if (!result[2].equals(toSupplementaries("boo")))
1733             failCount++;
1734 
1735         String source = "0123456789";
1736         for (int limit=-2; limit<3; limit++) {
1737             for (int x=0; x<10; x++) {
1738                 result = source.split(Integer.toString(x), limit);
1739                 int expectedLength = limit < 1 ? 2 : limit;
1740 
1741                 if ((limit == 0) && (x == 9)) {
1742                     // expected dropping of ""
1743                     if (result.length != 1)
1744                         failCount++;
1745                     if (!result[0].equals("012345678")) {
1746                         failCount++;
1747                     }
1748                 } else {
1749                     if (result.length != expectedLength) {
1750                         failCount++;
1751                     }
1752                     if (!result[0].equals(source.substring(0,x))) {
1753                         if (limit != 1) {
1754                             failCount++;
1755                         } else {
1756                             if (!result[0].equals(source.substring(0,10))) {
1757                                 failCount++;
1758                             }
1759                         }
1760                     }
1761                     if (expectedLength > 1) { // Check segment 2
1762                         if (!result[1].equals(source.substring(x+1,10)))
1763                             failCount++;
1764                     }
1765                 }
1766             }
1767         }
1768         // Check the case for no match found
1769         for (int limit=-2; limit<3; limit++) {
1770             result = source.split("e", limit);
1771             if (result.length != 1)
1772                 failCount++;
1773             if (!result[0].equals(source))
1774                 failCount++;
1775         }
1776         // Check the case for limit == 0, source = "";
1777         source = "";
1778         result = source.split("e", 0);
1779         if (result.length != 1)
1780             failCount++;
1781         if (!result[0].equals(source))
1782             failCount++;
1783 
1784         report("Split");
1785     }
1786 
1787     private static void negationTest() {
1788         Pattern pattern = Pattern.compile("[\\[@^]+");
1789         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1790         if (!matcher.find())
1791             failCount++;
1792         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1793             failCount++;
1794         pattern = Pattern.compile("[@\\[^]+");
1795         matcher = pattern.matcher("@@@@[[[[^^^^");
1796         if (!matcher.find())
1797             failCount++;
1798         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1799             failCount++;
1800         pattern = Pattern.compile("[@\\[^@]+");
1801         matcher = pattern.matcher("@@@@[[[[^^^^");
1802         if (!matcher.find())
1803             failCount++;
1804         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1805             failCount++;
1806 
1807         pattern = Pattern.compile("\\)");
1808         matcher = pattern.matcher("xxx)xxx");
1809         if (!matcher.find())
1810             failCount++;
1811 
1812         report("Negation");
1813     }
1814 
1815     private static void ampersandTest() {
1816         Pattern pattern = Pattern.compile("[&@]+");
1817         check(pattern, "@@@@&&&&", true);
1818 
1819         pattern = Pattern.compile("[@&]+");
1820         check(pattern, "@@@@&&&&", true);
1821 
1822         pattern = Pattern.compile("[@\\&]+");
1823         check(pattern, "@@@@&&&&", true);
1824 
1825         report("Ampersand");
1826     }
1827 
1828     private static void octalTest() throws Exception {
1829         Pattern pattern = Pattern.compile("\\u0007");
1830         Matcher matcher = pattern.matcher("\u0007");
1831         if (!matcher.matches())
1832             failCount++;
1833         pattern = Pattern.compile("\\07");
1834         matcher = pattern.matcher("\u0007");
1835         if (!matcher.matches())
1836             failCount++;
1837         pattern = Pattern.compile("\\007");
1838         matcher = pattern.matcher("\u0007");
1839         if (!matcher.matches())
1840             failCount++;
1841         pattern = Pattern.compile("\\0007");
1842         matcher = pattern.matcher("\u0007");
1843         if (!matcher.matches())
1844             failCount++;
1845         pattern = Pattern.compile("\\040");
1846         matcher = pattern.matcher("\u0020");
1847         if (!matcher.matches())
1848             failCount++;
1849         pattern = Pattern.compile("\\0403");
1850         matcher = pattern.matcher("\u00203");
1851         if (!matcher.matches())
1852             failCount++;
1853         pattern = Pattern.compile("\\0103");
1854         matcher = pattern.matcher("\u0043");
1855         if (!matcher.matches())
1856             failCount++;
1857 
1858         report("Octal");
1859     }
1860 
1861     private static void longPatternTest() throws Exception {
1862         try {
1863             Pattern pattern = Pattern.compile(
1864                 "a 32-character-long pattern xxxx");
1865             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1866             pattern = Pattern.compile("a thirty four character long regex");
1867             StringBuffer patternToBe = new StringBuffer(101);
1868             for (int i=0; i<100; i++)
1869                 patternToBe.append((char)(97 + i%26));
1870             pattern = Pattern.compile(patternToBe.toString());
1871         } catch (PatternSyntaxException e) {
1872             failCount++;
1873         }
1874 
1875         // Supplementary character test
1876         try {
1877             Pattern pattern = Pattern.compile(
1878                 toSupplementaries("a 32-character-long pattern xxxx"));
1879             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
1880             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
1881             StringBuffer patternToBe = new StringBuffer(101*2);
1882             for (int i=0; i<100; i++)
1883                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
1884                                                      + 97 + i%26));
1885             pattern = Pattern.compile(patternToBe.toString());
1886         } catch (PatternSyntaxException e) {
1887             failCount++;
1888         }
1889         report("LongPattern");
1890     }
1891 
1892     private static void group0Test() throws Exception {
1893         Pattern pattern = Pattern.compile("(tes)ting");
1894         Matcher matcher = pattern.matcher("testing");
1895         check(matcher, "testing");
1896 
1897         matcher.reset("testing");
1898         if (matcher.lookingAt()) {
1899             if (!matcher.group(0).equals("testing"))
1900                 failCount++;
1901         } else {
1902             failCount++;
1903         }
1904 
1905         matcher.reset("testing");
1906         if (matcher.matches()) {
1907             if (!matcher.group(0).equals("testing"))
1908                 failCount++;
1909         } else {
1910             failCount++;
1911         }
1912 
1913         pattern = Pattern.compile("(tes)ting");
1914         matcher = pattern.matcher("testing");
1915         if (matcher.lookingAt()) {
1916             if (!matcher.group(0).equals("testing"))
1917                 failCount++;
1918         } else {
1919             failCount++;
1920         }
1921 
1922         pattern = Pattern.compile("^(tes)ting");
1923         matcher = pattern.matcher("testing");
1924         if (matcher.matches()) {
1925             if (!matcher.group(0).equals("testing"))
1926                 failCount++;
1927         } else {
1928             failCount++;
1929         }
1930 
1931         // Supplementary character test
1932         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1933         matcher = pattern.matcher(toSupplementaries("testing"));
1934         check(matcher, toSupplementaries("testing"));
1935 
1936         matcher.reset(toSupplementaries("testing"));
1937         if (matcher.lookingAt()) {
1938             if (!matcher.group(0).equals(toSupplementaries("testing")))
1939                 failCount++;
1940         } else {
1941             failCount++;
1942         }
1943 
1944         matcher.reset(toSupplementaries("testing"));
1945         if (matcher.matches()) {
1946             if (!matcher.group(0).equals(toSupplementaries("testing")))
1947                 failCount++;
1948         } else {
1949             failCount++;
1950         }
1951 
1952         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
1953         matcher = pattern.matcher(toSupplementaries("testing"));
1954         if (matcher.lookingAt()) {
1955             if (!matcher.group(0).equals(toSupplementaries("testing")))
1956                 failCount++;
1957         } else {
1958             failCount++;
1959         }
1960 
1961         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
1962         matcher = pattern.matcher(toSupplementaries("testing"));
1963         if (matcher.matches()) {
1964             if (!matcher.group(0).equals(toSupplementaries("testing")))
1965                 failCount++;
1966         } else {
1967             failCount++;
1968         }
1969 
1970         report("Group0");
1971     }
1972 
1973     private static void findIntTest() throws Exception {
1974         Pattern p = Pattern.compile("blah");
1975         Matcher m = p.matcher("zzzzblahzzzzzblah");
1976         boolean result = m.find(2);
1977         if (!result)
1978             failCount++;
1979 
1980         p = Pattern.compile("$");
1981         m = p.matcher("1234567890");
1982         result = m.find(10);
1983         if (!result)
1984             failCount++;
1985         try {
1986             result = m.find(11);
1987             failCount++;
1988         } catch (IndexOutOfBoundsException e) {
1989             // correct result
1990         }
1991 
1992         // Supplementary character test
1993         p = Pattern.compile(toSupplementaries("blah"));
1994         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
1995         result = m.find(2);
1996         if (!result)
1997             failCount++;
1998 
1999         report("FindInt");
2000     }
2001 
2002     private static void emptyPatternTest() throws Exception {
2003         Pattern p = Pattern.compile("");
2004         Matcher m = p.matcher("foo");
2005 
2006         // Should find empty pattern at beginning of input
2007         boolean result = m.find();
2008         if (result != true)
2009             failCount++;
2010         if (m.start() != 0)
2011             failCount++;
2012 
2013         // Should not match entire input if input is not empty
2014         m.reset();
2015         result = m.matches();
2016         if (result == true)
2017             failCount++;
2018 
2019         try {
2020             m.start(0);
2021             failCount++;
2022         } catch (IllegalStateException e) {
2023             // Correct result
2024         }
2025 
2026         // Should match entire input if input is empty
2027         m.reset("");
2028         result = m.matches();
2029         if (result != true)
2030             failCount++;
2031 
2032         result = Pattern.matches("", "");
2033         if (result != true)
2034             failCount++;
2035 
2036         result = Pattern.matches("", "foo");
2037         if (result == true)
2038             failCount++;
2039         report("EmptyPattern");
2040     }
2041 
2042     private static void charClassTest() throws Exception {
2043         Pattern pattern = Pattern.compile("blah[ab]]blech");
2044         check(pattern, "blahb]blech", true);
2045 
2046         pattern = Pattern.compile("[abc[def]]");
2047         check(pattern, "b", true);
2048 
2049         // Supplementary character tests
2050         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2051         check(pattern, toSupplementaries("blahb]blech"), true);
2052 
2053         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2054         check(pattern, toSupplementaries("b"), true);
2055 
2056         try {
2057             // u00ff when UNICODE_CASE
2058             pattern = Pattern.compile("[ab\u00ffcd]",
2059                                       Pattern.CASE_INSENSITIVE|
2060                                       Pattern.UNICODE_CASE);
2061             check(pattern, "ab\u00ffcd", true);
2062             check(pattern, "Ab\u0178Cd", true);
2063 
2064             // u00b5 when UNICODE_CASE
2065             pattern = Pattern.compile("[ab\u00b5cd]",
2066                                       Pattern.CASE_INSENSITIVE|
2067                                       Pattern.UNICODE_CASE);
2068             check(pattern, "ab\u00b5cd", true);
2069             check(pattern, "Ab\u039cCd", true);
2070         } catch (Exception e) { failCount++; }
2071 
2072         /* Special cases
2073            (1)LatinSmallLetterLongS u+017f
2074            (2)LatinSmallLetterDotlessI u+0131
2075            (3)LatineCapitalLetterIWithDotAbove u+0130
2076            (4)KelvinSign u+212a
2077            (5)AngstromSign u+212b
2078         */
2079         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2080         pattern = Pattern.compile("[sik\u00c5]+", flags);
2081         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2082             failCount++;
2083 
2084         report("CharClass");
2085     }
2086 
2087     private static void caretTest() throws Exception {
2088         Pattern pattern = Pattern.compile("\\w*");
2089         Matcher matcher = pattern.matcher("a#bc#def##g");
2090         check(matcher, "a");
2091         check(matcher, "");
2092         check(matcher, "bc");
2093         check(matcher, "");
2094         check(matcher, "def");
2095         check(matcher, "");
2096         check(matcher, "");
2097         check(matcher, "g");
2098         check(matcher, "");
2099         if (matcher.find())
2100             failCount++;
2101 
2102         pattern = Pattern.compile("^\\w*");
2103         matcher = pattern.matcher("a#bc#def##g");
2104         check(matcher, "a");
2105         if (matcher.find())
2106             failCount++;
2107 
2108         pattern = Pattern.compile("\\w");
2109         matcher = pattern.matcher("abc##x");
2110         check(matcher, "a");
2111         check(matcher, "b");
2112         check(matcher, "c");
2113         check(matcher, "x");
2114         if (matcher.find())
2115             failCount++;
2116 
2117         pattern = Pattern.compile("^\\w");
2118         matcher = pattern.matcher("abc##x");
2119         check(matcher, "a");
2120         if (matcher.find())
2121             failCount++;
2122 
2123         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2124         matcher = pattern.matcher("abcdef-ghi\njklmno");
2125         check(matcher, "abc");
2126         if (matcher.find())
2127             failCount++;
2128 
2129         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2130         matcher = pattern.matcher("abcdef-ghi\njklmno");
2131         check(matcher, "abc");
2132         check(matcher, "jkl");
2133         if (matcher.find())
2134             failCount++;
2135 
2136         pattern = Pattern.compile("^", Pattern.MULTILINE);
2137         matcher = pattern.matcher("this is some text");
2138         String result = matcher.replaceAll("X");
2139         if (!result.equals("Xthis is some text"))
2140             failCount++;
2141 
2142         pattern = Pattern.compile("^");
2143         matcher = pattern.matcher("this is some text");
2144         result = matcher.replaceAll("X");
2145         if (!result.equals("Xthis is some text"))
2146             failCount++;
2147 
2148         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2149         matcher = pattern.matcher("this is some text\n");
2150         result = matcher.replaceAll("X");
2151         if (!result.equals("Xthis is some text\n"))
2152             failCount++;
2153 
2154         report("Caret");
2155     }
2156 
2157     private static void groupCaptureTest() throws Exception {
2158         // Independent group
2159         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2160         Matcher matcher = pattern.matcher("xxxyyyzzz");
2161         matcher.find();
2162         try {
2163             String blah = matcher.group(1);
2164             failCount++;
2165         } catch (IndexOutOfBoundsException ioobe) {
2166             // Good result
2167         }
2168         // Pure group
2169         pattern = Pattern.compile("x+(?:y+)z+");
2170         matcher = pattern.matcher("xxxyyyzzz");
2171         matcher.find();
2172         try {
2173             String blah = matcher.group(1);
2174             failCount++;
2175         } catch (IndexOutOfBoundsException ioobe) {
2176             // Good result
2177         }
2178 
2179         // Supplementary character tests
2180         // Independent group
2181         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2182         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2183         matcher.find();
2184         try {
2185             String blah = matcher.group(1);
2186             failCount++;
2187         } catch (IndexOutOfBoundsException ioobe) {
2188             // Good result
2189         }
2190         // Pure group
2191         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2192         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2193         matcher.find();
2194         try {
2195             String blah = matcher.group(1);
2196             failCount++;
2197         } catch (IndexOutOfBoundsException ioobe) {
2198             // Good result
2199         }
2200 
2201         report("GroupCapture");
2202     }
2203 
2204     private static void backRefTest() throws Exception {
2205         Pattern pattern = Pattern.compile("(a*)bc\\1");
2206         check(pattern, "zzzaabcazzz", true);
2207 
2208         pattern = Pattern.compile("(a*)bc\\1");
2209         check(pattern, "zzzaabcaazzz", true);
2210 
2211         pattern = Pattern.compile("(abc)(def)\\1");
2212         check(pattern, "abcdefabc", true);
2213 
2214         pattern = Pattern.compile("(abc)(def)\\3");
2215         check(pattern, "abcdefabc", false);
2216 
2217         try {
2218             for (int i = 1; i < 10; i++) {
2219                 // Make sure backref 1-9 are always accepted
2220                 pattern = Pattern.compile("abcdef\\" + i);
2221                 // and fail to match if the target group does not exit
2222                 check(pattern, "abcdef", false);
2223             }
2224         } catch(PatternSyntaxException e) {
2225             failCount++;
2226         }
2227 
2228         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2229         check(pattern, "abcdefghija", false);
2230         check(pattern, "abcdefghija1", true);
2231 
2232         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2233         check(pattern, "abcdefghijkk", true);
2234 
2235         pattern = Pattern.compile("(a)bcdefghij\\11");
2236         check(pattern, "abcdefghija1", true);
2237 
2238         // Supplementary character tests
2239         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2240         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2241 
2242         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2243         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2244 
2245         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2246         check(pattern, toSupplementaries("abcdefabc"), true);
2247 
2248         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2249         check(pattern, toSupplementaries("abcdefabc"), false);
2250 
2251         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2252         check(pattern, toSupplementaries("abcdefghija"), false);
2253         check(pattern, toSupplementaries("abcdefghija1"), true);
2254 
2255         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2256         check(pattern, toSupplementaries("abcdefghijkk"), true);
2257 
2258         report("BackRef");
2259     }
2260 
2261     /**
2262      * Unicode Technical Report #18, section 2.6 End of Line
2263      * There is no empty line to be matched in the sequence \u000D\u000A
2264      * but there is an empty line in the sequence \u000A\u000D.
2265      */
2266     private static void anchorTest() throws Exception {
2267         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2268         Matcher m = p.matcher("blah1\r\nblah2");
2269         m.find();
2270         m.find();
2271         if (!m.group().equals("blah2"))
2272             failCount++;
2273 
2274         m.reset("blah1\n\rblah2");
2275         m.find();
2276         m.find();
2277         m.find();
2278         if (!m.group().equals("blah2"))
2279             failCount++;
2280 
2281         // Test behavior of $ with \r\n at end of input
2282         p = Pattern.compile(".+$");
2283         m = p.matcher("blah1\r\n");
2284         if (!m.find())
2285             failCount++;
2286        if (!m.group().equals("blah1"))
2287             failCount++;
2288         if (m.find())
2289             failCount++;
2290 
2291         // Test behavior of $ with \r\n at end of input in multiline
2292         p = Pattern.compile(".+$", Pattern.MULTILINE);
2293         m = p.matcher("blah1\r\n");
2294         if (!m.find())
2295             failCount++;
2296         if (m.find())
2297             failCount++;
2298 
2299         // Test for $ recognition of \u0085 for bug 4527731
2300         p = Pattern.compile(".+$", Pattern.MULTILINE);
2301         m = p.matcher("blah1\u0085");
2302         if (!m.find())
2303             failCount++;
2304 
2305         // Supplementary character test
2306         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2307         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2308         m.find();
2309         m.find();
2310         if (!m.group().equals(toSupplementaries("blah2")))
2311             failCount++;
2312 
2313         m.reset(toSupplementaries("blah1\n\rblah2"));
2314         m.find();
2315         m.find();
2316         m.find();
2317         if (!m.group().equals(toSupplementaries("blah2")))
2318             failCount++;
2319 
2320         // Test behavior of $ with \r\n at end of input
2321         p = Pattern.compile(".+$");
2322         m = p.matcher(toSupplementaries("blah1\r\n"));
2323         if (!m.find())
2324             failCount++;
2325         if (!m.group().equals(toSupplementaries("blah1")))
2326             failCount++;
2327         if (m.find())
2328             failCount++;
2329 
2330         // Test behavior of $ with \r\n at end of input in multiline
2331         p = Pattern.compile(".+$", Pattern.MULTILINE);
2332         m = p.matcher(toSupplementaries("blah1\r\n"));
2333         if (!m.find())
2334             failCount++;
2335         if (m.find())
2336             failCount++;
2337 
2338         // Test for $ recognition of \u0085 for bug 4527731
2339         p = Pattern.compile(".+$", Pattern.MULTILINE);
2340         m = p.matcher(toSupplementaries("blah1\u0085"));
2341         if (!m.find())
2342             failCount++;
2343 
2344         report("Anchors");
2345     }
2346 
2347     /**
2348      * A basic sanity test of Matcher.lookingAt().
2349      */
2350     private static void lookingAtTest() throws Exception {
2351         Pattern p = Pattern.compile("(ab)(c*)");
2352         Matcher m = p.matcher("abccczzzabcczzzabccc");
2353 
2354         if (!m.lookingAt())
2355             failCount++;
2356 
2357         if (!m.group().equals(m.group(0)))
2358             failCount++;
2359 
2360         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2361         if (m.lookingAt())
2362             failCount++;
2363 
2364         // Supplementary character test
2365         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2366         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2367 
2368         if (!m.lookingAt())
2369             failCount++;
2370 
2371         if (!m.group().equals(m.group(0)))
2372             failCount++;
2373 
2374         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2375         if (m.lookingAt())
2376             failCount++;
2377 
2378         report("Looking At");
2379     }
2380 
2381     /**
2382      * A basic sanity test of Matcher.matches().
2383      */
2384     private static void matchesTest() throws Exception {
2385         // matches()
2386         Pattern p = Pattern.compile("ulb(c*)");
2387         Matcher m = p.matcher("ulbcccccc");
2388         if (!m.matches())
2389             failCount++;
2390 
2391         // find() but not matches()
2392         m.reset("zzzulbcccccc");
2393         if (m.matches())
2394             failCount++;
2395 
2396         // lookingAt() but not matches()
2397         m.reset("ulbccccccdef");
2398         if (m.matches())
2399             failCount++;
2400 
2401         // matches()
2402         p = Pattern.compile("a|ad");
2403         m = p.matcher("ad");
2404         if (!m.matches())
2405             failCount++;
2406 
2407         // Supplementary character test
2408         // matches()
2409         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2410         m = p.matcher(toSupplementaries("ulbcccccc"));
2411         if (!m.matches())
2412             failCount++;
2413 
2414         // find() but not matches()
2415         m.reset(toSupplementaries("zzzulbcccccc"));
2416         if (m.matches())
2417             failCount++;
2418 
2419         // lookingAt() but not matches()
2420         m.reset(toSupplementaries("ulbccccccdef"));
2421         if (m.matches())
2422             failCount++;
2423 
2424         // matches()
2425         p = Pattern.compile(toSupplementaries("a|ad"));
2426         m = p.matcher(toSupplementaries("ad"));
2427         if (!m.matches())
2428             failCount++;
2429 
2430         report("Matches");
2431     }
2432 
2433     /**
2434      * A basic sanity test of Pattern.matches().
2435      */
2436     private static void patternMatchesTest() throws Exception {
2437         // matches()
2438         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2439                              toSupplementaries("ulbcccccc")))
2440             failCount++;
2441 
2442         // find() but not matches()
2443         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2444                             toSupplementaries("zzzulbcccccc")))
2445             failCount++;
2446 
2447         // lookingAt() but not matches()
2448         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2449                             toSupplementaries("ulbccccccdef")))
2450             failCount++;
2451 
2452         // Supplementary character test
2453         // matches()
2454         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2455                              toSupplementaries("ulbcccccc")))
2456             failCount++;
2457 
2458         // find() but not matches()
2459         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2460                             toSupplementaries("zzzulbcccccc")))
2461             failCount++;
2462 
2463         // lookingAt() but not matches()
2464         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2465                             toSupplementaries("ulbccccccdef")))
2466             failCount++;
2467 
2468         report("Pattern Matches");
2469     }
2470 
2471     /**
2472      * Canonical equivalence testing. Tests the ability of the engine
2473      * to match sequences that are not explicitly specified in the
2474      * pattern when they are considered equivalent by the Unicode Standard.
2475      */
2476     private static void ceTest() throws Exception {
2477         // Decomposed char outside char classes
2478         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2479         Matcher m = p.matcher("test\u00e5");
2480         if (!m.matches())
2481             failCount++;
2482 
2483         m.reset("testa\u030a");
2484         if (!m.matches())
2485             failCount++;
2486 
2487         // Composed char outside char classes
2488         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2489         m = p.matcher("test\u00e5");
2490         if (!m.matches())
2491             failCount++;
2492 
2493         m.reset("testa\u030a");
2494         if (!m.find())
2495             failCount++;
2496 
2497         // Decomposed char inside a char class
2498         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2499         m = p.matcher("test\u00e5");
2500         if (!m.find())
2501             failCount++;
2502 
2503         m.reset("testa\u030a");
2504         if (!m.find())
2505             failCount++;
2506 
2507         // Composed char inside a char class
2508         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2509         m = p.matcher("test\u00e5");
2510         if (!m.find())
2511             failCount++;
2512 
2513         m.reset("testa\u0300");
2514         if (!m.find())
2515             failCount++;
2516 
2517         m.reset("testa\u030a");
2518         if (!m.find())
2519             failCount++;
2520 
2521         // Marks that cannot legally change order and be equivalent
2522         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2523         check(p, "testa\u0308\u0300", true);
2524         check(p, "testa\u0300\u0308", false);
2525 
2526         // Marks that can legally change order and be equivalent
2527         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2528         check(p, "testa\u0308\u0323", true);
2529         check(p, "testa\u0323\u0308", true);
2530 
2531         // Test all equivalences of the sequence a\u0308\u0323\u0300
2532         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2533         check(p, "testa\u0308\u0323\u0300", true);
2534         check(p, "testa\u0323\u0308\u0300", true);
2535         check(p, "testa\u0308\u0300\u0323", true);
2536         check(p, "test\u00e4\u0323\u0300", true);
2537         check(p, "test\u00e4\u0300\u0323", true);
2538 
2539         /*
2540          * The following canonical equivalence tests don't work. Bug id: 4916384.
2541          *
2542         // Decomposed hangul (jamos)
2543         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2544         m = p.matcher("\u1100\u1161");
2545         if (!m.matches())
2546             failCount++;
2547 
2548         m.reset("\uac00");
2549         if (!m.matches())
2550             failCount++;
2551 
2552         // Composed hangul
2553         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2554         m = p.matcher("\u1100\u1161");
2555         if (!m.matches())
2556             failCount++;
2557 
2558         m.reset("\uac00");
2559         if (!m.matches())
2560             failCount++;
2561 
2562         // Decomposed supplementary outside char classes
2563         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2564         m = p.matcher("test\ud834\uddc0");
2565         if (!m.matches())
2566             failCount++;
2567 
2568         m.reset("test\ud834\uddbc\ud834\udd6f");
2569         if (!m.matches())
2570             failCount++;
2571 
2572         // Composed supplementary outside char classes
2573         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2574         m.reset("test\ud834\uddbc\ud834\udd6f");
2575         if (!m.matches())
2576             failCount++;
2577 
2578         m = p.matcher("test\ud834\uddc0");
2579         if (!m.matches())
2580             failCount++;
2581 
2582         */
2583 
2584         report("Canonical Equivalence");
2585     }
2586 
2587     /**
2588      * A basic sanity test of Matcher.replaceAll().
2589      */
2590     private static void globalSubstitute() throws Exception {
2591         // Global substitution with a literal
2592         Pattern p = Pattern.compile("(ab)(c*)");
2593         Matcher m = p.matcher("abccczzzabcczzzabccc");
2594         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2595             failCount++;
2596 
2597         m.reset("zzzabccczzzabcczzzabccczzz");
2598         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2599             failCount++;
2600 
2601         // Global substitution with groups
2602         m.reset("zzzabccczzzabcczzzabccczzz");
2603         String result = m.replaceAll("$1");
2604         if (!result.equals("zzzabzzzabzzzabzzz"))
2605             failCount++;
2606 
2607         // Supplementary character test
2608         // Global substitution with a literal
2609         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2610         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2611         if (!m.replaceAll(toSupplementaries("test")).
2612             equals(toSupplementaries("testzzztestzzztest")))
2613             failCount++;
2614 
2615         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2616         if (!m.replaceAll(toSupplementaries("test")).
2617             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2618             failCount++;
2619 
2620         // Global substitution with groups
2621         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2622         result = m.replaceAll("$1");
2623         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2624             failCount++;
2625 
2626         report("Global Substitution");
2627     }
2628 
2629     /**
2630      * Tests the usage of Matcher.appendReplacement() with literal
2631      * and group substitutions.
2632      */
2633     private static void stringbufferSubstitute() throws Exception {
2634         // SB substitution with literal
2635         String blah = "zzzblahzzz";
2636         Pattern p = Pattern.compile("blah");
2637         Matcher m = p.matcher(blah);
2638         StringBuffer result = new StringBuffer();
2639         try {
2640             m.appendReplacement(result, "blech");
2641             failCount++;
2642         } catch (IllegalStateException e) {
2643         }
2644         m.find();
2645         m.appendReplacement(result, "blech");
2646         if (!result.toString().equals("zzzblech"))
2647             failCount++;
2648 
2649         m.appendTail(result);
2650         if (!result.toString().equals("zzzblechzzz"))
2651             failCount++;
2652 
2653         // SB substitution with groups
2654         blah = "zzzabcdzzz";
2655         p = Pattern.compile("(ab)(cd)*");
2656         m = p.matcher(blah);
2657         result = new StringBuffer();
2658         try {
2659             m.appendReplacement(result, "$1");
2660             failCount++;
2661         } catch (IllegalStateException e) {
2662         }
2663         m.find();
2664         m.appendReplacement(result, "$1");
2665         if (!result.toString().equals("zzzab"))
2666             failCount++;
2667 
2668         m.appendTail(result);
2669         if (!result.toString().equals("zzzabzzz"))
2670             failCount++;
2671 
2672         // SB substitution with 3 groups
2673         blah = "zzzabcdcdefzzz";
2674         p = Pattern.compile("(ab)(cd)*(ef)");
2675         m = p.matcher(blah);
2676         result = new StringBuffer();
2677         try {
2678             m.appendReplacement(result, "$1w$2w$3");
2679             failCount++;
2680         } catch (IllegalStateException e) {
2681         }
2682         m.find();
2683         m.appendReplacement(result, "$1w$2w$3");
2684         if (!result.toString().equals("zzzabwcdwef"))
2685             failCount++;
2686 
2687         m.appendTail(result);
2688         if (!result.toString().equals("zzzabwcdwefzzz"))
2689             failCount++;
2690 
2691         // SB substitution with groups and three matches
2692         // skipping middle match
2693         blah = "zzzabcdzzzabcddzzzabcdzzz";
2694         p = Pattern.compile("(ab)(cd*)");
2695         m = p.matcher(blah);
2696         result = new StringBuffer();
2697         try {
2698             m.appendReplacement(result, "$1");
2699             failCount++;
2700         } catch (IllegalStateException e) {
2701         }
2702         m.find();
2703         m.appendReplacement(result, "$1");
2704         if (!result.toString().equals("zzzab"))
2705             failCount++;
2706 
2707         m.find();
2708         m.find();
2709         m.appendReplacement(result, "$2");
2710         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2711             failCount++;
2712 
2713         m.appendTail(result);
2714         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2715             failCount++;
2716 
2717         // Check to make sure escaped $ is ignored
2718         blah = "zzzabcdcdefzzz";
2719         p = Pattern.compile("(ab)(cd)*(ef)");
2720         m = p.matcher(blah);
2721         result = new StringBuffer();
2722         m.find();
2723         m.appendReplacement(result, "$1w\\$2w$3");
2724         if (!result.toString().equals("zzzabw$2wef"))
2725             failCount++;
2726 
2727         m.appendTail(result);
2728         if (!result.toString().equals("zzzabw$2wefzzz"))
2729             failCount++;
2730 
2731         // Check to make sure a reference to nonexistent group causes error
2732         blah = "zzzabcdcdefzzz";
2733         p = Pattern.compile("(ab)(cd)*(ef)");
2734         m = p.matcher(blah);
2735         result = new StringBuffer();
2736         m.find();
2737         try {
2738             m.appendReplacement(result, "$1w$5w$3");
2739             failCount++;
2740         } catch (IndexOutOfBoundsException ioobe) {
2741             // Correct result
2742         }
2743 
2744         // Check double digit group references
2745         blah = "zzz123456789101112zzz";
2746         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2747         m = p.matcher(blah);
2748         result = new StringBuffer();
2749         m.find();
2750         m.appendReplacement(result, "$1w$11w$3");
2751         if (!result.toString().equals("zzz1w11w3"))
2752             failCount++;
2753 
2754         // Check to make sure it backs off $15 to $1 if only three groups
2755         blah = "zzzabcdcdefzzz";
2756         p = Pattern.compile("(ab)(cd)*(ef)");
2757         m = p.matcher(blah);
2758         result = new StringBuffer();
2759         m.find();
2760         m.appendReplacement(result, "$1w$15w$3");
2761         if (!result.toString().equals("zzzabwab5wef"))
2762             failCount++;
2763 
2764 
2765         // Supplementary character test
2766         // SB substitution with literal
2767         blah = toSupplementaries("zzzblahzzz");
2768         p = Pattern.compile(toSupplementaries("blah"));
2769         m = p.matcher(blah);
2770         result = new StringBuffer();
2771         try {
2772             m.appendReplacement(result, toSupplementaries("blech"));
2773             failCount++;
2774         } catch (IllegalStateException e) {
2775         }
2776         m.find();
2777         m.appendReplacement(result, toSupplementaries("blech"));
2778         if (!result.toString().equals(toSupplementaries("zzzblech")))
2779             failCount++;
2780 
2781         m.appendTail(result);
2782         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2783             failCount++;
2784 
2785         // SB substitution with groups
2786         blah = toSupplementaries("zzzabcdzzz");
2787         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2788         m = p.matcher(blah);
2789         result = new StringBuffer();
2790         try {
2791             m.appendReplacement(result, "$1");
2792             failCount++;
2793         } catch (IllegalStateException e) {
2794         }
2795         m.find();
2796         m.appendReplacement(result, "$1");
2797         if (!result.toString().equals(toSupplementaries("zzzab")))
2798             failCount++;
2799 
2800         m.appendTail(result);
2801         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2802             failCount++;
2803 
2804         // SB substitution with 3 groups
2805         blah = toSupplementaries("zzzabcdcdefzzz");
2806         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2807         m = p.matcher(blah);
2808         result = new StringBuffer();
2809         try {
2810             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2811             failCount++;
2812         } catch (IllegalStateException e) {
2813         }
2814         m.find();
2815         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2816         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2817             failCount++;
2818 
2819         m.appendTail(result);
2820         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2821             failCount++;
2822 
2823         // SB substitution with groups and three matches
2824         // skipping middle match
2825         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2826         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2827         m = p.matcher(blah);
2828         result = new StringBuffer();
2829         try {
2830             m.appendReplacement(result, "$1");
2831             failCount++;
2832         } catch (IllegalStateException e) {
2833         }
2834         m.find();
2835         m.appendReplacement(result, "$1");
2836         if (!result.toString().equals(toSupplementaries("zzzab")))
2837             failCount++;
2838 
2839         m.find();
2840         m.find();
2841         m.appendReplacement(result, "$2");
2842         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2843             failCount++;
2844 
2845         m.appendTail(result);
2846         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2847             failCount++;
2848 
2849         // Check to make sure escaped $ is ignored
2850         blah = toSupplementaries("zzzabcdcdefzzz");
2851         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2852         m = p.matcher(blah);
2853         result = new StringBuffer();
2854         m.find();
2855         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2856         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2857             failCount++;
2858 
2859         m.appendTail(result);
2860         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2861             failCount++;
2862 
2863         // Check to make sure a reference to nonexistent group causes error
2864         blah = toSupplementaries("zzzabcdcdefzzz");
2865         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2866         m = p.matcher(blah);
2867         result = new StringBuffer();
2868         m.find();
2869         try {
2870             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2871             failCount++;
2872         } catch (IndexOutOfBoundsException ioobe) {
2873             // Correct result
2874         }
2875 
2876         // Check double digit group references
2877         blah = toSupplementaries("zzz123456789101112zzz");
2878         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2879         m = p.matcher(blah);
2880         result = new StringBuffer();
2881         m.find();
2882         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
2883         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
2884             failCount++;
2885 
2886         // Check to make sure it backs off $15 to $1 if only three groups
2887         blah = toSupplementaries("zzzabcdcdefzzz");
2888         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2889         m = p.matcher(blah);
2890         result = new StringBuffer();
2891         m.find();
2892         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
2893         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
2894             failCount++;
2895 
2896         // Check nothing has been appended into the output buffer if
2897         // the replacement string triggers IllegalArgumentException.
2898         p = Pattern.compile("(abc)");
2899         m = p.matcher("abcd");
2900         result = new StringBuffer();
2901         m.find();
2902         try {
2903             m.appendReplacement(result, ("xyz$g"));
2904             failCount++;
2905         } catch (IllegalArgumentException iae) {
2906             if (result.length() != 0)
2907                 failCount++;
2908         }
2909 
2910         report("SB Substitution");
2911     }
2912 
2913     /*
2914      * 5 groups of characters are created to make a substitution string.
2915      * A base string will be created including random lead chars, the
2916      * substitution string, and random trailing chars.
2917      * A pattern containing the 5 groups is searched for and replaced with:
2918      * random group + random string + random group.
2919      * The results are checked for correctness.
2920      */
2921     private static void substitutionBasher() {
2922         for (int runs = 0; runs<1000; runs++) {
2923             // Create a base string to work in
2924             int leadingChars = generator.nextInt(10);
2925             StringBuffer baseBuffer = new StringBuffer(100);
2926             String leadingString = getRandomAlphaString(leadingChars);
2927             baseBuffer.append(leadingString);
2928 
2929             // Create 5 groups of random number of random chars
2930             // Create the string to substitute
2931             // Create the pattern string to search for
2932             StringBuffer bufferToSub = new StringBuffer(25);
2933             StringBuffer bufferToPat = new StringBuffer(50);
2934             String[] groups = new String[5];
2935             for(int i=0; i<5; i++) {
2936                 int aGroupSize = generator.nextInt(5)+1;
2937                 groups[i] = getRandomAlphaString(aGroupSize);
2938                 bufferToSub.append(groups[i]);
2939                 bufferToPat.append('(');
2940                 bufferToPat.append(groups[i]);
2941                 bufferToPat.append(')');
2942             }
2943             String stringToSub = bufferToSub.toString();
2944             String pattern = bufferToPat.toString();
2945 
2946             // Place sub string into working string at random index
2947             baseBuffer.append(stringToSub);
2948 
2949             // Append random chars to end
2950             int trailingChars = generator.nextInt(10);
2951             String trailingString = getRandomAlphaString(trailingChars);
2952             baseBuffer.append(trailingString);
2953             String baseString = baseBuffer.toString();
2954 
2955             // Create test pattern and matcher
2956             Pattern p = Pattern.compile(pattern);
2957             Matcher m = p.matcher(baseString);
2958 
2959             // Reject candidate if pattern happens to start early
2960             m.find();
2961             if (m.start() < leadingChars)
2962                 continue;
2963 
2964             // Reject candidate if more than one match
2965             if (m.find())
2966                 continue;
2967 
2968             // Construct a replacement string with :
2969             // random group + random string + random group
2970             StringBuffer bufferToRep = new StringBuffer();
2971             int groupIndex1 = generator.nextInt(5);
2972             bufferToRep.append("$" + (groupIndex1 + 1));
2973             String randomMidString = getRandomAlphaString(5);
2974             bufferToRep.append(randomMidString);
2975             int groupIndex2 = generator.nextInt(5);
2976             bufferToRep.append("$" + (groupIndex2 + 1));
2977             String replacement = bufferToRep.toString();
2978 
2979             // Do the replacement
2980             String result = m.replaceAll(replacement);
2981 
2982             // Construct expected result
2983             StringBuffer bufferToRes = new StringBuffer();
2984             bufferToRes.append(leadingString);
2985             bufferToRes.append(groups[groupIndex1]);
2986             bufferToRes.append(randomMidString);
2987             bufferToRes.append(groups[groupIndex2]);
2988             bufferToRes.append(trailingString);
2989             String expectedResult = bufferToRes.toString();
2990 
2991             // Check results
2992             if (!result.equals(expectedResult))
2993                 failCount++;
2994         }
2995 
2996         report("Substitution Basher");
2997     }
2998 
2999     /**
3000      * Checks the handling of some escape sequences that the Pattern
3001      * class should process instead of the java compiler. These are
3002      * not in the file because the escapes should be be processed
3003      * by the Pattern class when the regex is compiled.
3004      */
3005     private static void escapes() throws Exception {
3006         Pattern p = Pattern.compile("\\043");
3007         Matcher m = p.matcher("#");
3008         if (!m.find())
3009             failCount++;
3010 
3011         p = Pattern.compile("\\x23");
3012         m = p.matcher("#");
3013         if (!m.find())
3014             failCount++;
3015 
3016         p = Pattern.compile("\\u0023");
3017         m = p.matcher("#");
3018         if (!m.find())
3019             failCount++;
3020 
3021         report("Escape sequences");
3022     }
3023 
3024     /**
3025      * Checks the handling of blank input situations. These
3026      * tests are incompatible with my test file format.
3027      */
3028     private static void blankInput() throws Exception {
3029         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3030         Matcher m = p.matcher("");
3031         if (m.find())
3032             failCount++;
3033 
3034         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3035         m = p.matcher("");
3036         if (!m.find())
3037             failCount++;
3038 
3039         p = Pattern.compile("abc");
3040         m = p.matcher("");
3041         if (m.find())
3042             failCount++;
3043 
3044         p = Pattern.compile("a*");
3045         m = p.matcher("");
3046         if (!m.find())
3047             failCount++;
3048 
3049         report("Blank input");
3050     }
3051 
3052     /**
3053      * Tests the Boyer-Moore pattern matching of a character sequence
3054      * on randomly generated patterns.
3055      */
3056     private static void bm() throws Exception {
3057         doBnM('a');
3058         report("Boyer Moore (ASCII)");
3059 
3060         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3061         report("Boyer Moore (Supplementary)");
3062     }
3063 
3064     private static void doBnM(int baseCharacter) throws Exception {
3065         int achar=0;
3066 
3067         for (int i=0; i<100; i++) {
3068             // Create a short pattern to search for
3069             int patternLength = generator.nextInt(7) + 4;
3070             StringBuffer patternBuffer = new StringBuffer(patternLength);
3071             for (int x=0; x<patternLength; x++) {
3072                 int ch = baseCharacter + generator.nextInt(26);
3073                 if (Character.isSupplementaryCodePoint(ch)) {
3074                     patternBuffer.append(Character.toChars(ch));
3075                 } else {
3076                     patternBuffer.append((char)ch);
3077                 }
3078             }
3079             String pattern =  patternBuffer.toString();
3080             Pattern p = Pattern.compile(pattern);
3081 
3082             // Create a buffer with random ASCII chars that does
3083             // not match the sample
3084             String toSearch = null;
3085             StringBuffer s = null;
3086             Matcher m = p.matcher("");
3087             do {
3088                 s = new StringBuffer(100);
3089                 for (int x=0; x<100; x++) {
3090                     int ch = baseCharacter + generator.nextInt(26);
3091                     if (Character.isSupplementaryCodePoint(ch)) {
3092                         s.append(Character.toChars(ch));
3093                     } else {
3094                         s.append((char)ch);
3095                     }
3096                 }
3097                 toSearch = s.toString();
3098                 m.reset(toSearch);
3099             } while (m.find());
3100 
3101             // Insert the pattern at a random spot
3102             int insertIndex = generator.nextInt(99);
3103             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3104                 insertIndex++;
3105             s = s.insert(insertIndex, pattern);
3106             toSearch = s.toString();
3107 
3108             // Make sure that the pattern is found
3109             m.reset(toSearch);
3110             if (!m.find())
3111                 failCount++;
3112 
3113             // Make sure that the match text is the pattern
3114             if (!m.group().equals(pattern))
3115                 failCount++;
3116 
3117             // Make sure match occured at insertion point
3118             if (m.start() != insertIndex)
3119                 failCount++;
3120         }
3121     }
3122 
3123     /**
3124      * Tests the matching of slices on randomly generated patterns.
3125      * The Boyer-Moore optimization is not done on these patterns
3126      * because it uses unicode case folding.
3127      */
3128     private static void slice() throws Exception {
3129         doSlice(Character.MAX_VALUE);
3130         report("Slice");
3131 
3132         doSlice(Character.MAX_CODE_POINT);
3133         report("Slice (Supplementary)");
3134     }
3135 
3136     private static void doSlice(int maxCharacter) throws Exception {
3137         Random generator = new Random();
3138         int achar=0;
3139 
3140         for (int i=0; i<100; i++) {
3141             // Create a short pattern to search for
3142             int patternLength = generator.nextInt(7) + 4;
3143             StringBuffer patternBuffer = new StringBuffer(patternLength);
3144             for (int x=0; x<patternLength; x++) {
3145                 int randomChar = 0;
3146                 while (!Character.isLetterOrDigit(randomChar))
3147                     randomChar = generator.nextInt(maxCharacter);
3148                 if (Character.isSupplementaryCodePoint(randomChar)) {
3149                     patternBuffer.append(Character.toChars(randomChar));
3150                 } else {
3151                     patternBuffer.append((char) randomChar);
3152                 }
3153             }
3154             String pattern =  patternBuffer.toString();
3155             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3156 
3157             // Create a buffer with random chars that does not match the sample
3158             String toSearch = null;
3159             StringBuffer s = null;
3160             Matcher m = p.matcher("");
3161             do {
3162                 s = new StringBuffer(100);
3163                 for (int x=0; x<100; x++) {
3164                     int randomChar = 0;
3165                     while (!Character.isLetterOrDigit(randomChar))
3166                         randomChar = generator.nextInt(maxCharacter);
3167                     if (Character.isSupplementaryCodePoint(randomChar)) {
3168                         s.append(Character.toChars(randomChar));
3169                     } else {
3170                         s.append((char) randomChar);
3171                     }
3172                 }
3173                 toSearch = s.toString();
3174                 m.reset(toSearch);
3175             } while (m.find());
3176 
3177             // Insert the pattern at a random spot
3178             int insertIndex = generator.nextInt(99);
3179             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3180                 insertIndex++;
3181             s = s.insert(insertIndex, pattern);
3182             toSearch = s.toString();
3183 
3184             // Make sure that the pattern is found
3185             m.reset(toSearch);
3186             if (!m.find())
3187                 failCount++;
3188 
3189             // Make sure that the match text is the pattern
3190             if (!m.group().equals(pattern))
3191                 failCount++;
3192 
3193             // Make sure match occured at insertion point
3194             if (m.start() != insertIndex)
3195                 failCount++;
3196         }
3197     }
3198 
3199     private static void explainFailure(String pattern, String data,
3200                                        String expected, String actual) {
3201         System.err.println("----------------------------------------");
3202         System.err.println("Pattern = "+pattern);
3203         System.err.println("Data = "+data);
3204         System.err.println("Expected = " + expected);
3205         System.err.println("Actual   = " + actual);
3206     }
3207 
3208     private static void explainFailure(String pattern, String data,
3209                                        Throwable t) {
3210         System.err.println("----------------------------------------");
3211         System.err.println("Pattern = "+pattern);
3212         System.err.println("Data = "+data);
3213         t.printStackTrace(System.err);
3214     }
3215 
3216     // Testing examples from a file
3217 
3218     /**
3219      * Goes through the file "TestCases.txt" and creates many patterns
3220      * described in the file, matching the patterns against input lines in
3221      * the file, and comparing the results against the correct results
3222      * also found in the file. The file format is described in comments
3223      * at the head of the file.
3224      */
3225     private static void processFile(String fileName) throws Exception {
3226         File testCases = new File(System.getProperty("test.src", "."),
3227                                   fileName);
3228         FileInputStream in = new FileInputStream(testCases);
3229         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3230 
3231         // Process next test case.
3232         String aLine;
3233         while((aLine = r.readLine()) != null) {
3234             // Read a line for pattern
3235             String patternString = grabLine(r);
3236             Pattern p = null;
3237             try {
3238                 p = compileTestPattern(patternString);
3239             } catch (PatternSyntaxException e) {
3240                 String dataString = grabLine(r);
3241                 String expectedResult = grabLine(r);
3242                 if (expectedResult.startsWith("error"))
3243                     continue;
3244                 explainFailure(patternString, dataString, e);
3245                 failCount++;
3246                 continue;
3247             }
3248 
3249             // Read a line for input string
3250             String dataString = grabLine(r);
3251             Matcher m = p.matcher(dataString);
3252             StringBuffer result = new StringBuffer();
3253 
3254             // Check for IllegalStateExceptions before a match
3255             failCount += preMatchInvariants(m);
3256 
3257             boolean found = m.find();
3258 
3259             if (found)
3260                 failCount += postTrueMatchInvariants(m);
3261             else
3262                 failCount += postFalseMatchInvariants(m);
3263 
3264             if (found) {
3265                 result.append("true ");
3266                 result.append(m.group(0) + " ");
3267             } else {
3268                 result.append("false ");
3269             }
3270 
3271             result.append(m.groupCount());
3272 
3273             if (found) {
3274                 for (int i=1; i<m.groupCount()+1; i++)
3275                     if (m.group(i) != null)
3276                         result.append(" " +m.group(i));
3277             }
3278 
3279             // Read a line for the expected result
3280             String expectedResult = grabLine(r);
3281 
3282             if (!result.toString().equals(expectedResult)) {
3283                 explainFailure(patternString, dataString, expectedResult, result.toString());
3284                 failCount++;
3285             }
3286         }
3287 
3288         report(fileName);
3289     }
3290 
3291     private static int preMatchInvariants(Matcher m) {
3292         int failCount = 0;
3293         try {
3294             m.start();
3295             failCount++;
3296         } catch (IllegalStateException ise) {}
3297         try {
3298             m.end();
3299             failCount++;
3300         } catch (IllegalStateException ise) {}
3301         try {
3302             m.group();
3303             failCount++;
3304         } catch (IllegalStateException ise) {}
3305         return failCount;
3306     }
3307 
3308     private static int postFalseMatchInvariants(Matcher m) {
3309         int failCount = 0;
3310         try {
3311             m.group();
3312             failCount++;
3313         } catch (IllegalStateException ise) {}
3314         try {
3315             m.start();
3316             failCount++;
3317         } catch (IllegalStateException ise) {}
3318         try {
3319             m.end();
3320             failCount++;
3321         } catch (IllegalStateException ise) {}
3322         return failCount;
3323     }
3324 
3325     private static int postTrueMatchInvariants(Matcher m) {
3326         int failCount = 0;
3327         //assert(m.start() = m.start(0);
3328         if (m.start() != m.start(0))
3329             failCount++;
3330         //assert(m.end() = m.end(0);
3331         if (m.start() != m.start(0))
3332             failCount++;
3333         //assert(m.group() = m.group(0);
3334         if (!m.group().equals(m.group(0)))
3335             failCount++;
3336         try {
3337             m.group(50);
3338             failCount++;
3339         } catch (IndexOutOfBoundsException ise) {}
3340 
3341         return failCount;
3342     }
3343 
3344     private static Pattern compileTestPattern(String patternString) {
3345         if (!patternString.startsWith("'")) {
3346             return Pattern.compile(patternString);
3347         }
3348 
3349         int break1 = patternString.lastIndexOf("'");
3350         String flagString = patternString.substring(
3351                                           break1+1, patternString.length());
3352         patternString = patternString.substring(1, break1);
3353 
3354         if (flagString.equals("i"))
3355             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3356 
3357         if (flagString.equals("m"))
3358             return Pattern.compile(patternString, Pattern.MULTILINE);
3359 
3360         return Pattern.compile(patternString);
3361     }
3362 
3363     /**
3364      * Reads a line from the input file. Keeps reading lines until a non
3365      * empty non comment line is read. If the line contains a \n then
3366      * these two characters are replaced by a newline char. If a \\uxxxx
3367      * sequence is read then the sequence is replaced by the unicode char.
3368      */
3369     private static String grabLine(BufferedReader r) throws Exception {
3370         int index = 0;
3371         String line = r.readLine();
3372         while (line.startsWith("//") || line.length() < 1)
3373             line = r.readLine();
3374         while ((index = line.indexOf("\\n")) != -1) {
3375             StringBuffer temp = new StringBuffer(line);
3376             temp.replace(index, index+2, "\n");
3377             line = temp.toString();
3378         }
3379         while ((index = line.indexOf("\\u")) != -1) {
3380             StringBuffer temp = new StringBuffer(line);
3381             String value = temp.substring(index+2, index+6);
3382             char aChar = (char)Integer.parseInt(value, 16);
3383             String unicodeChar = "" + aChar;
3384             temp.replace(index, index+6, unicodeChar);
3385             line = temp.toString();
3386         }
3387 
3388         return line;
3389     }
3390 
3391     private static void check(Pattern p, String s, String g, String expected) {
3392         Matcher m = p.matcher(s);
3393         m.find();
3394         if (!m.group(g).equals(expected) ||
3395             s.charAt(m.start(g)) != expected.charAt(0) ||
3396             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3397             failCount++;
3398     }
3399 
3400     private static void checkReplaceFirst(String p, String s, String r, String expected)
3401     {
3402         if (!expected.equals(Pattern.compile(p)
3403                                     .matcher(s)
3404                                     .replaceFirst(r)))
3405             failCount++;
3406     }
3407 
3408     private static void checkReplaceAll(String p, String s, String r, String expected)
3409     {
3410         if (!expected.equals(Pattern.compile(p)
3411                                     .matcher(s)
3412                                     .replaceAll(r)))
3413             failCount++;
3414     }
3415 
3416     private static void checkExpectedFail(String p) {
3417         try {
3418             Pattern.compile(p);
3419         } catch (PatternSyntaxException pse) {
3420             //pse.printStackTrace();
3421             return;
3422         }
3423         failCount++;
3424     }
3425 
3426     private static void checkExpectedIAE(Matcher m, String g) {
3427         m.find();
3428         try {
3429             m.group(g);
3430         } catch (IllegalArgumentException x) {
3431             //iae.printStackTrace();
3432             try {
3433                 m.start(g);
3434             } catch (IllegalArgumentException xx) {
3435                 try {
3436                     m.start(g);
3437                 } catch (IllegalArgumentException xxx) {
3438                     return;
3439                 }
3440             }
3441         }
3442         failCount++;
3443     }
3444 
3445     private static void checkExpectedNPE(Matcher m) {
3446         m.find();
3447         try {
3448             m.group(null);
3449         } catch (NullPointerException x) {
3450             try {
3451                 m.start(null);
3452             } catch (NullPointerException xx) {
3453                 try {
3454                     m.end(null);
3455                 } catch (NullPointerException xxx) {
3456                     return;
3457                 }
3458             }
3459         }
3460         failCount++;
3461     }
3462 
3463     private static void namedGroupCaptureTest() throws Exception {
3464         check(Pattern.compile("x+(?<gname>y+)z+"),
3465               "xxxyyyzzz",
3466               "gname",
3467               "yyy");
3468 
3469         check(Pattern.compile("x+(?<gname8>y+)z+"),
3470               "xxxyyyzzz",
3471               "gname8",
3472               "yyy");
3473 
3474         //backref
3475         Pattern pattern = Pattern.compile("(a*)bc\\1");
3476         check(pattern, "zzzaabcazzz", true);  // found "abca"
3477 
3478         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3479               "zzzaabcaazzz", true);
3480 
3481         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3482               "abcdefabc", true);
3483 
3484         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3485               "abcdefghijkk", true);
3486 
3487         // Supplementary character tests
3488         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3489               toSupplementaries("zzzaabcazzz"), true);
3490 
3491         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3492               toSupplementaries("zzzaabcaazzz"), true);
3493 
3494         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3495               toSupplementaries("abcdefabc"), true);
3496 
3497         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3498                               "(?<gname>" +
3499                               toSupplementaries("k)") + "\\k<gname>"),
3500               toSupplementaries("abcdefghijkk"), true);
3501 
3502         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
3503               "xxxyyyzzzyyy",
3504               "gname",
3505               "yyy");
3506 
3507         //replaceFirst/All
3508         checkReplaceFirst("(?<gn>ab)(c*)",
3509                           "abccczzzabcczzzabccc",
3510                           "${gn}",
3511                           "abzzzabcczzzabccc");
3512 
3513         checkReplaceAll("(?<gn>ab)(c*)",
3514                         "abccczzzabcczzzabccc",
3515                         "${gn}",
3516                         "abzzzabzzzab");
3517 
3518 
3519         checkReplaceFirst("(?<gn>ab)(c*)",
3520                           "zzzabccczzzabcczzzabccczzz",
3521                           "${gn}",
3522                           "zzzabzzzabcczzzabccczzz");
3523 
3524         checkReplaceAll("(?<gn>ab)(c*)",
3525                         "zzzabccczzzabcczzzabccczzz",
3526                         "${gn}",
3527                         "zzzabzzzabzzzabzzz");
3528 
3529         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
3530                           "zzzabccczzzabcczzzabccczzz",
3531                           "${gn2}",
3532                           "zzzccczzzabcczzzabccczzz");
3533 
3534         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
3535                         "zzzabccczzzabcczzzabccczzz",
3536                         "${gn2}",
3537                         "zzzccczzzcczzzccczzz");
3538 
3539         //toSupplementaries("(ab)(c*)"));
3540         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3541                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3542                           toSupplementaries("abccczzzabcczzzabccc"),
3543                           "${gn1}",
3544                           toSupplementaries("abzzzabcczzzabccc"));
3545 
3546 
3547         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3548                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3549                         toSupplementaries("abccczzzabcczzzabccc"),
3550                         "${gn1}",
3551                         toSupplementaries("abzzzabzzzab"));
3552 
3553         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
3554                            ")(?<gn2>" + toSupplementaries("c") + "*)",
3555                           toSupplementaries("abccczzzabcczzzabccc"),
3556                           "${gn2}",
3557                           toSupplementaries("ccczzzabcczzzabccc"));
3558 
3559 
3560         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
3561                         ")(?<gn2>" + toSupplementaries("c") + "*)",
3562                         toSupplementaries("abccczzzabcczzzabccc"),
3563                         "${gn2}",
3564                         toSupplementaries("ccczzzcczzzccc"));
3565 
3566         checkReplaceFirst("(?<dog>Dog)AndCat",
3567                           "zzzDogAndCatzzzDogAndCatzzz",
3568                           "${dog}",
3569                           "zzzDogzzzDogAndCatzzz");
3570 
3571 
3572         checkReplaceAll("(?<dog>Dog)AndCat",
3573                           "zzzDogAndCatzzzDogAndCatzzz",
3574                           "${dog}",
3575                           "zzzDogzzzDogzzz");
3576 
3577         // backref in Matcher & String
3578         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3579             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3580             failCount++;
3581 
3582         // negative
3583         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3584         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3585         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3586         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3587         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3588         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3589                          "gnameX");
3590         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
3591         report("NamedGroupCapture");
3592     }
3593 
3594     // This is for bug 6969132
3595     private static void nonBmpClassComplementTest() throws Exception {
3596         Pattern p = Pattern.compile("\\P{Lu}");
3597         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3598         if (m.find() && m.start() == 1)
3599             failCount++;
3600 
3601         // from a unicode category
3602         p = Pattern.compile("\\P{Lu}");
3603         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3604         if (m.find())
3605             failCount++;
3606         if (!m.hitEnd())
3607             failCount++;
3608 
3609         // block
3610         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3611         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3612         if (m.find() && m.start() == 1)
3613             failCount++;
3614 
3615         report("NonBmpClassComplement");
3616     }
3617 
3618     private static void unicodePropertiesTest() throws Exception {
3619         // different forms
3620         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3621             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3622             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3623             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3624             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3625             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3626             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3627             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3628             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3629             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3630             failCount++;
3631 
3632         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3633         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3634         Matcher lastSM  = common;
3635         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3636 
3637         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3638         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3639         Matcher lastBM = latin;
3640         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3641 
3642         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3643             if (cp >= 0x30000 && (cp & 0x70) == 0){
3644                 continue;  // only pick couple code points, they are the same
3645             }
3646 
3647             // Unicode Script
3648             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3649             Matcher m;
3650             String str = new String(Character.toChars(cp));
3651             if (script == lastScript) {
3652                  m = lastSM;
3653                  m.reset(str);
3654             } else {
3655                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3656             }
3657             if (!m.matches()) {
3658                 failCount++;
3659             }
3660             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3661             other.reset(str);
3662             if (other.matches()) {
3663                 failCount++;
3664             }
3665             lastSM = m;
3666             lastScript = script;
3667 
3668             // Unicode Block
3669             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3670             if (block == null) {
3671                 //System.out.printf("Not a Block: cp=%x%n", cp);
3672                 continue;
3673             }
3674             if (block == lastBlock) {
3675                  m = lastBM;
3676                  m.reset(str);
3677             } else {
3678                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3679             }
3680             if (!m.matches()) {
3681                 failCount++;
3682             }
3683             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3684             other.reset(str);
3685             if (other.matches()) {
3686                 failCount++;
3687             }
3688             lastBM = m;
3689             lastBlock = block;
3690         }
3691         report("unicodeProperties");
3692     }
3693 
3694     private static void unicodeHexNotationTest() throws Exception {
3695 
3696         // negative
3697         checkExpectedFail("\\x{-23}");
3698         checkExpectedFail("\\x{110000}");
3699         checkExpectedFail("\\x{}");
3700         checkExpectedFail("\\x{AB[ef]");
3701 
3702         // codepoint
3703         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
3704         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3705         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
3706         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
3707 
3708         // in class
3709         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
3710         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
3711         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
3712         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
3713         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
3714         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3715 
3716         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3717              String s = "A" + new String(Character.toChars(cp)) + "B";
3718              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3719                                              : String.format("\\u%04x\\u%04x",
3720                                                (int) Character.toChars(cp)[0],
3721                                                (int) Character.toChars(cp)[1]);
3722              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3723              if (!Pattern.matches("A" + hexUTF16 + "B", s))
3724                  failCount++;
3725              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3726                  failCount++;
3727              if (!Pattern.matches("A" + hexCodePoint + "B", s))
3728                  failCount++;
3729              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3730                  failCount++;
3731          }
3732          report("unicodeHexNotation");
3733     }
3734 
3735     private static void unicodeClassesTest() throws Exception {
3736 
3737         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
3738         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
3739         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
3740         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
3741         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
3742         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
3743         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
3744         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
3745         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
3746         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
3747         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
3748         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3749         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
3750         Matcher bound  = Pattern.compile("\\b").matcher("");
3751         Matcher word   = Pattern.compile("\\w++").matcher("");
3752         // UNICODE_CHARACTER_CLASS
3753         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3754         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3755         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3756         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3757         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3758         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3759         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3760         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3761         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3762         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3763         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3764         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3765         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3766         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3767         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3768         // embedded flag (?U)
3769         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3770         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3771         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3772 
3773         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3774         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3775         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3776         // properties
3777         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
3778         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
3779         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
3780         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3781         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3782         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
3783         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
3784         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3785         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3786         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3787         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
3788 
3789         // javaMethod
3790         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
3791         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
3792         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3793         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
3794 
3795         for (int cp = 1; cp < 0x30000; cp++) {
3796             String str = new String(Character.toChars(cp));
3797             int type = Character.getType(cp);
3798             if (// lower
3799                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
3800                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3801                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3802                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3803                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3804                 // upper
3805                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
3806                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3807                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3808                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3809                 // alpha
3810                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
3811                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3812                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3813                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3814                 // digit
3815                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
3816                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
3817                 // alnum
3818                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
3819                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3820                 // punct
3821                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
3822                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3823                 // graph
3824                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
3825                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3826                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3827                 // blank
3828                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3829                                           != blank.reset(str).matches()  ||
3830                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3831                 // print
3832                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
3833                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3834                 // cntrl
3835                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
3836                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3837                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3838                 // hexdigit
3839                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
3840                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3841                 // space
3842                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
3843                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3844                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3845                 // word
3846                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
3847                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3848                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3849                 // bwordb
3850                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3851                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3852                 // properties
3853                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3854                 Character.isLetter(cp)    != letterP.reset(str).matches()||
3855                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3856                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3857                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3858                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
3859                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
3860                 failCount++;
3861         }
3862 
3863         // bounds/word align
3864         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3865         if (!bwbU.reset("\u0180sherman\u0400").matches())
3866             failCount++;
3867         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3868         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3869             failCount++;
3870         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3871         if (!bwbU.reset("\u0724\u0739\u0724").matches())
3872             failCount++;
3873         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3874             failCount++;
3875         report("unicodePredefinedClasses");
3876     }
3877 
3878     private static void horizontalAndVerticalWSTest() throws Exception {
3879         String hws = new String (new char[] {
3880                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
3881                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
3882                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
3883                                      0x202f, 0x205f, 0x3000 });
3884         String vws = new String (new char[] {
3885                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
3886         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
3887             !Pattern.compile("[\\h]+").matcher(hws).matches())
3888             failCount++;
3889         if (Pattern.compile("\\H").matcher(hws).find() ||
3890             Pattern.compile("[\\H]").matcher(hws).find())
3891             failCount++;
3892         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
3893             !Pattern.compile("[\\v]+").matcher(vws).matches())
3894             failCount++;
3895         if (Pattern.compile("\\V").matcher(vws).find() ||
3896             Pattern.compile("[\\V]").matcher(vws).find())
3897             failCount++;
3898         String prefix = "abcd";
3899         String suffix = "efgh";
3900         String ng = "A";
3901         for (int i = 0; i < hws.length(); i++) {
3902             String c = String.valueOf(hws.charAt(i));
3903             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
3904             if (!m.find() || !c.equals(m.group()))
3905                 failCount++;
3906             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
3907             if (!m.find() || !c.equals(m.group()))
3908                 failCount++;
3909 
3910             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
3911             if (!m.find() || !ng.equals(m.group()))
3912                 failCount++;
3913             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
3914             if (!m.find() || !ng.equals(m.group()))
3915                 failCount++;
3916         }
3917         for (int i = 0; i < vws.length(); i++) {
3918             String c = String.valueOf(vws.charAt(i));
3919             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
3920             if (!m.find() || !c.equals(m.group()))
3921                 failCount++;
3922             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
3923             if (!m.find() || !c.equals(m.group()))
3924                 failCount++;
3925 
3926             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
3927             if (!m.find() || !ng.equals(m.group()))
3928                 failCount++;
3929             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
3930             if (!m.find() || !ng.equals(m.group()))
3931                 failCount++;
3932         }
3933         // \v in range is interpreted as 0x0B. This is the undocumented behavior
3934         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
3935             failCount++;
3936         report("horizontalAndVerticalWSTest");
3937     }
3938 
3939     private static void linebreakTest() throws Exception {
3940         String linebreaks = new String (new char[] {
3941             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
3942         String crnl = "\r\n";
3943         if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
3944             !Pattern.compile("\\R").matcher(crnl).matches() ||
3945             Pattern.compile("\\R\\R").matcher(crnl).matches())
3946             failCount++;
3947         report("linebreakTest");
3948     }
3949 
3950     // #7189363
3951     private static void branchTest() throws Exception {
3952         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
3953             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
3954             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
3955             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
3956             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
3957             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
3958             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
3959             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
3960             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
3961             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
3962             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
3963             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
3964             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
3965             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
3966             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
3967             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
3968             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
3969             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
3970             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
3971             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
3972             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
3973             !Pattern.compile("(a)??bc|de").matcher("de").matches())
3974             failCount++;
3975         report("branchTest");
3976     }
3977 
3978     // This test is for 8007395
3979     private static void groupCurlyNotFoundSuppTest() throws Exception {
3980         String input = "test this as \ud83d\ude0d";
3981         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
3982                                           "test(.)*(@[a-zA-Z.]+)",
3983                                           "test([^B])+(@[a-zA-Z.]+)",
3984                                           "test([^B])*(@[a-zA-Z.]+)",
3985                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
3986                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
3987                                         }) {
3988             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
3989                                .matcher(input);
3990             try {
3991                 if (m.find()) {
3992                     failCount++;
3993                 }
3994             } catch (Exception x) {
3995                 failCount++;
3996             }
3997         }
3998         report("GroupCurly NotFoundSupp");
3999     }
4000 
4001     // This test is for 8023647
4002     private static void groupCurlyBackoffTest() throws Exception {
4003         if (!"abc1c".matches("(\\w)+1\\1") ||
4004             "abc11".matches("(\\w)+1\\1")) {
4005             failCount++;
4006         }
4007         report("GroupCurly backoff");
4008     }
4009 
4010     // This test is for 8012646
4011     private static void patternAsPredicate() throws Exception {
4012         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4013 
4014         if (p.test("")) {
4015             failCount++;
4016         }
4017         if (!p.test("word")) {
4018             failCount++;
4019         }
4020         if (p.test("1234")) {
4021             failCount++;
4022         }
4023         report("Pattern.asPredicate");
4024     }
4025 }