1 /*
   2  * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819



  36  * @library /lib/testlibrary
  37  * @build jdk.testlibrary.*
  38  * @run main RegExTest
  39  * @key randomness
  40  */
  41 
  42 import java.util.function.Function;
  43 import java.util.regex.*;
  44 import java.util.Random;
  45 import java.util.Scanner;
  46 import java.io.*;
  47 import java.nio.file.*;
  48 import java.util.*;
  49 import java.nio.CharBuffer;
  50 import java.util.function.Predicate;
  51 import jdk.testlibrary.RandomFactory;
  52 
  53 /**
  54  * This is a test class created to check the operation of
  55  * the Pattern and Matcher classes.
  56  */
  57 public class RegExTest {
  58 
  59     private static Random generator = RandomFactory.getRandom();
  60     private static boolean failure = false;
  61     private static int failCount = 0;
  62     private static String firstFailure = null;
  63 
  64     /**
  65      * Main to interpret arguments and run several tests.
  66      *
  67      */
  68     public static void main(String[] args) throws Exception {
  69         // Most of the tests are in a file
  70         processFile("TestCases.txt");
  71         //processFile("PerlCases.txt");
  72         processFile("BMPTestCases.txt");
  73         processFile("SupplementaryTestCases.txt");
  74 
  75         // These test many randomly generated char patterns
  76         bm();
  77         slice();
  78 
  79         // These are hard to put into the file
  80         escapes();
  81         blankInput();
  82 
  83         // Substitition tests on randomly generated sequences
  84         globalSubstitute();
  85         stringbufferSubstitute();
  86         stringbuilderSubstitute();
  87 
  88         substitutionBasher();
  89         substitutionBasher2();
  90 
  91         // Canonical Equivalence
  92         ceTest();
  93 
  94         // Anchors
  95         anchorTest();
  96 
  97         // boolean match calls
  98         matchesTest();
  99         lookingAtTest();
 100 
 101         // Pattern API
 102         patternMatchesTest();
 103 
 104         // Misc
 105         lookbehindTest();
 106         nullArgumentTest();
 107         backRefTest();
 108         groupCaptureTest();
 109         caretTest();
 110         charClassTest();
 111         emptyPatternTest();
 112         findIntTest();
 113         group0Test();
 114         longPatternTest();
 115         octalTest();
 116         ampersandTest();
 117         negationTest();
 118         splitTest();
 119         appendTest();
 120         caseFoldingTest();
 121         commentsTest();
 122         unixLinesTest();
 123         replaceFirstTest();
 124         gTest();
 125         zTest();
 126         serializeTest();
 127         reluctantRepetitionTest();
 128         multilineDollarTest();
 129         dollarAtEndTest();
 130         caretBetweenTerminatorsTest();
 131         // This RFE rejected in Tiger numOccurrencesTest();
 132         javaCharClassTest();
 133         nonCaptureRepetitionTest();
 134         notCapturedGroupCurlyMatchTest();
 135         escapedSegmentTest();
 136         literalPatternTest();
 137         literalReplacementTest();
 138         regionTest();
 139         toStringTest();
 140         negatedCharClassTest();
 141         findFromTest();
 142         boundsTest();
 143         unicodeWordBoundsTest();
 144         caretAtEndTest();
 145         wordSearchTest();
 146         hitEndTest();
 147         toMatchResultTest();
 148         toMatchResultTest2();
 149         surrogatesInClassTest();
 150         removeQEQuotingTest();
 151         namedGroupCaptureTest();
 152         nonBmpClassComplementTest();
 153         unicodePropertiesTest();
 154         unicodeHexNotationTest();
 155         unicodeClassesTest();
 156         unicodeCharacterNameTest();
 157         horizontalAndVerticalWSTest();
 158         linebreakTest();
 159         branchTest();
 160         groupCurlyNotFoundSuppTest();
 161         groupCurlyBackoffTest();
 162         patternAsPredicate();
 163         invalidFlags();
 164         grapheme();

 165 
 166         if (failure) {
 167             throw new
 168                 RuntimeException("RegExTest failed, 1st failure: " +
 169                                  firstFailure);
 170         } else {
 171             System.err.println("OKAY: All tests passed.");
 172         }
 173     }
 174 
 175     // Utility functions
 176 
 177     private static String getRandomAlphaString(int length) {
 178         StringBuffer buf = new StringBuffer(length);
 179         for (int i=0; i<length; i++) {
 180             char randChar = (char)(97 + generator.nextInt(26));
 181             buf.append(randChar);
 182         }
 183         return buf.toString();
 184     }
 185 
 186     private static void check(Matcher m, String expected) {
 187         m.find();
 188         if (!m.group().equals(expected))
 189             failCount++;
 190     }
 191 
 192     private static void check(Matcher m, String result, boolean expected) {
 193         m.find();
 194         if (m.group().equals(result) != expected)
 195             failCount++;
 196     }
 197 
 198     private static void check(Pattern p, String s, boolean expected) {
 199         if (p.matcher(s).find() != expected)
 200             failCount++;
 201     }
 202 
 203     private static void check(String p, String s, boolean expected) {
 204         Matcher matcher = Pattern.compile(p).matcher(s);
 205         if (matcher.find() != expected)
 206             failCount++;
 207     }
 208 
 209     private static void check(String p, char c, boolean expected) {
 210         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 211         Pattern pattern = Pattern.compile(propertyPattern);
 212         char[] ca = new char[1]; ca[0] = c;
 213         Matcher matcher = pattern.matcher(new String(ca));
 214         if (!matcher.find())
 215             failCount++;
 216     }
 217 
 218     private static void check(String p, int codePoint, boolean expected) {
 219         String propertyPattern = expected ? "\\p" + p : "\\P" + p;
 220         Pattern pattern = Pattern.compile(propertyPattern);
 221         char[] ca = Character.toChars(codePoint);
 222         Matcher matcher = pattern.matcher(new String(ca));
 223         if (!matcher.find())
 224             failCount++;
 225     }
 226 
 227     private static void check(String p, int flag, String input, String s,
 228                               boolean expected)
 229     {
 230         Pattern pattern = Pattern.compile(p, flag);
 231         Matcher matcher = pattern.matcher(input);
 232         if (expected)
 233             check(matcher, s, expected);
 234         else
 235             check(pattern, input, false);
 236     }
 237 
 238     private static void report(String testName) {
 239         int spacesToAdd = 30 - testName.length();
 240         StringBuffer paddedNameBuffer = new StringBuffer(testName);
 241         for (int i=0; i<spacesToAdd; i++)
 242             paddedNameBuffer.append(" ");
 243         String paddedName = paddedNameBuffer.toString();
 244         System.err.println(paddedName + ": " +
 245                            (failCount==0 ? "Passed":"Failed("+failCount+")"));
 246         if (failCount > 0) {
 247             failure = true;
 248 
 249             if (firstFailure == null) {
 250                 firstFailure = testName;
 251             }
 252         }
 253 
 254         failCount = 0;
 255     }
 256 
 257     /**
 258      * Converts ASCII alphabet characters [A-Za-z] in the given 's' to
 259      * supplementary characters. This method does NOT fully take care
 260      * of the regex syntax.
 261      */
 262     private static String toSupplementaries(String s) {
 263         int length = s.length();
 264         StringBuffer sb = new StringBuffer(length * 2);
 265 
 266         for (int i = 0; i < length; ) {
 267             char c = s.charAt(i++);
 268             if (c == '\\') {
 269                 sb.append(c);
 270                 if (i < length) {
 271                     c = s.charAt(i++);
 272                     sb.append(c);
 273                     if (c == 'u') {
 274                         // assume no syntax error
 275                         sb.append(s.charAt(i++));
 276                         sb.append(s.charAt(i++));
 277                         sb.append(s.charAt(i++));
 278                         sb.append(s.charAt(i++));
 279                     }
 280                 }
 281             } else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
 282                 sb.append('\ud800').append((char)('\udc00'+c));
 283             } else {
 284                 sb.append(c);
 285             }
 286         }
 287         return sb.toString();
 288     }
 289 
 290     // Regular expression tests
 291 
 292     // This is for bug 6178785
 293     // Test if an expected NPE gets thrown when passing in a null argument
 294     private static boolean check(Runnable test) {
 295         try {
 296             test.run();
 297             failCount++;
 298             return false;
 299         } catch (NullPointerException npe) {
 300             return true;
 301         }
 302     }
 303 
 304     private static void nullArgumentTest() {
 305         check(() -> Pattern.compile(null));
 306         check(() -> Pattern.matches(null, null));
 307         check(() -> Pattern.matches("xyz", null));
 308         check(() -> Pattern.quote(null));
 309         check(() -> Pattern.compile("xyz").split(null));
 310         check(() -> Pattern.compile("xyz").matcher(null));
 311 
 312         final Matcher m = Pattern.compile("xyz").matcher("xyz");
 313         m.matches();
 314         check(() -> m.appendTail((StringBuffer) null));
 315         check(() -> m.appendTail((StringBuilder)null));
 316         check(() -> m.replaceAll((String) null));
 317         check(() -> m.replaceAll((Function<MatchResult, String>)null));
 318         check(() -> m.replaceFirst((String)null));
 319         check(() -> m.replaceFirst((Function<MatchResult, String>) null));
 320         check(() -> m.appendReplacement((StringBuffer)null, null));
 321         check(() -> m.appendReplacement((StringBuilder)null, null));
 322         check(() -> m.reset(null));
 323         check(() -> Matcher.quoteReplacement(null));
 324         //check(() -> m.usePattern(null));
 325 
 326         report("Null Argument");
 327     }
 328 
 329     // This is for bug6635133
 330     // Test if surrogate pair in Unicode escapes can be handled correctly.
 331     private static void surrogatesInClassTest() throws Exception {
 332         Pattern pattern = Pattern.compile("[\\ud834\\udd21-\\ud834\\udd24]");
 333         Matcher matcher = pattern.matcher("\ud834\udd22");
 334         if (!matcher.find())
 335             failCount++;
 336 
 337         report("Surrogate pair in Unicode escape");
 338     }
 339 
 340     // This is for bug6990617
 341     // Test if Pattern.RemoveQEQuoting works correctly if the octal unicode
 342     // char encoding is only 2 or 3 digits instead of 4 and the first quoted
 343     // char is an octal digit.
 344     private static void removeQEQuotingTest() throws Exception {
 345         Pattern pattern =
 346             Pattern.compile("\\011\\Q1sometext\\E\\011\\Q2sometext\\E");
 347         Matcher matcher = pattern.matcher("\t1sometext\t2sometext");
 348         if (!matcher.find())
 349             failCount++;
 350 
 351         report("Remove Q/E Quoting");
 352     }
 353 
 354     // This is for bug 4988891
 355     // Test toMatchResult to see that it is a copy of the Matcher
 356     // that is not affected by subsequent operations on the original
 357     private static void toMatchResultTest() throws Exception {
 358         Pattern pattern = Pattern.compile("squid");
 359         Matcher matcher = pattern.matcher(
 360             "agiantsquidofdestinyasmallsquidoffate");
 361         matcher.find();
 362         int matcherStart1 = matcher.start();
 363         MatchResult mr = matcher.toMatchResult();
 364         if (mr == matcher)
 365             failCount++;
 366         int resultStart1 = mr.start();
 367         if (matcherStart1 != resultStart1)
 368             failCount++;
 369         matcher.find();
 370         int matcherStart2 = matcher.start();
 371         int resultStart2 = mr.start();
 372         if (matcherStart2 == resultStart2)
 373             failCount++;
 374         if (resultStart1 != resultStart2)
 375             failCount++;
 376         MatchResult mr2 = matcher.toMatchResult();
 377         if (mr == mr2)
 378             failCount++;
 379         if (mr2.start() != matcherStart2)
 380             failCount++;
 381         report("toMatchResult is a copy");
 382     }
 383 
 384     private static void checkExpectedISE(Runnable test) {
 385         try {
 386             test.run();
 387             failCount++;
 388         } catch (IllegalStateException x) {
 389         } catch (IndexOutOfBoundsException xx) {
 390             failCount++;
 391         }
 392     }
 393 
 394     private static void checkExpectedIOOE(Runnable test) {
 395         try {
 396             test.run();
 397             failCount++;
 398         } catch (IndexOutOfBoundsException x) {}
 399     }
 400 
 401     // This is for bug 8074678
 402     // Test the result of toMatchResult throws ISE if no match is availble
 403     private static void toMatchResultTest2() throws Exception {
 404         Matcher matcher = Pattern.compile("nomatch").matcher("hello world");
 405         matcher.find();
 406         MatchResult mr = matcher.toMatchResult();
 407 
 408         checkExpectedISE(() -> mr.start());
 409         checkExpectedISE(() -> mr.start(2));
 410         checkExpectedISE(() -> mr.end());
 411         checkExpectedISE(() -> mr.end(2));
 412         checkExpectedISE(() -> mr.group());
 413         checkExpectedISE(() -> mr.group(2));
 414 
 415         matcher = Pattern.compile("(match)").matcher("there is a match");
 416         matcher.find();
 417         MatchResult mr2 = matcher.toMatchResult();
 418         checkExpectedIOOE(() -> mr2.start(2));
 419         checkExpectedIOOE(() -> mr2.end(2));
 420         checkExpectedIOOE(() -> mr2.group(2));
 421 
 422         report("toMatchResult2 appropriate exceptions");
 423     }
 424 
 425     // This is for bug 5013885
 426     // Must test a slice to see if it reports hitEnd correctly
 427     private static void hitEndTest() throws Exception {
 428         // Basic test of Slice node
 429         Pattern p = Pattern.compile("^squidattack");
 430         Matcher m = p.matcher("squack");
 431         m.find();
 432         if (m.hitEnd())
 433             failCount++;
 434         m.reset("squid");
 435         m.find();
 436         if (!m.hitEnd())
 437             failCount++;
 438 
 439         // Test Slice, SliceA and SliceU nodes
 440         for (int i=0; i<3; i++) {
 441             int flags = 0;
 442             if (i==1) flags = Pattern.CASE_INSENSITIVE;
 443             if (i==2) flags = Pattern.UNICODE_CASE;
 444             p = Pattern.compile("^abc", flags);
 445             m = p.matcher("ad");
 446             m.find();
 447             if (m.hitEnd())
 448                 failCount++;
 449             m.reset("ab");
 450             m.find();
 451             if (!m.hitEnd())
 452                 failCount++;
 453         }
 454 
 455         // Test Boyer-Moore node
 456         p = Pattern.compile("catattack");
 457         m = p.matcher("attack");
 458         m.find();
 459         if (!m.hitEnd())
 460             failCount++;
 461 
 462         p = Pattern.compile("catattack");
 463         m = p.matcher("attackattackattackcatatta");
 464         m.find();
 465         if (!m.hitEnd())
 466             failCount++;
 467         report("hitEnd from a Slice");
 468     }
 469 
 470     // This is for bug 4997476
 471     // It is weird code submitted by customer demonstrating a regression
 472     private static void wordSearchTest() throws Exception {
 473         String testString = new String("word1 word2 word3");
 474         Pattern p = Pattern.compile("\\b");
 475         Matcher m = p.matcher(testString);
 476         int position = 0;
 477         int start = 0;
 478         while (m.find(position)) {
 479             start = m.start();
 480             if (start == testString.length())
 481                 break;
 482             if (m.find(start+1)) {
 483                 position = m.start();
 484             } else {
 485                 position = testString.length();
 486             }
 487             if (testString.substring(start, position).equals(" "))
 488                 continue;
 489             if (!testString.substring(start, position-1).startsWith("word"))
 490                 failCount++;
 491         }
 492         report("Customer word search");
 493     }
 494 
 495     // This is for bug 4994840
 496     private static void caretAtEndTest() throws Exception {
 497         // Problem only occurs with multiline patterns
 498         // containing a beginning-of-line caret "^" followed
 499         // by an expression that also matches the empty string.
 500         Pattern pattern = Pattern.compile("^x?", Pattern.MULTILINE);
 501         Matcher matcher = pattern.matcher("\r");
 502         matcher.find();
 503         matcher.find();
 504         report("Caret at end");
 505     }
 506 
 507     // This test is for 4979006
 508     // Check to see if word boundary construct properly handles unicode
 509     // non spacing marks
 510     private static void unicodeWordBoundsTest() throws Exception {
 511         String spaces = "  ";
 512         String wordChar = "a";
 513         String nsm = "\u030a";
 514 
 515         assert (Character.getType('\u030a') == Character.NON_SPACING_MARK);
 516 
 517         Pattern pattern = Pattern.compile("\\b");
 518         Matcher matcher = pattern.matcher("");
 519         // S=other B=word character N=non spacing mark .=word boundary
 520         // SS.BB.SS
 521         String input = spaces + wordChar + wordChar + spaces;
 522         twoFindIndexes(input, matcher, 2, 4);
 523         // SS.BBN.SS
 524         input = spaces + wordChar +wordChar + nsm + spaces;
 525         twoFindIndexes(input, matcher, 2, 5);
 526         // SS.BN.SS
 527         input = spaces + wordChar + nsm + spaces;
 528         twoFindIndexes(input, matcher, 2, 4);
 529         // SS.BNN.SS
 530         input = spaces + wordChar + nsm + nsm + spaces;
 531         twoFindIndexes(input, matcher, 2, 5);
 532         // SSN.BB.SS
 533         input = spaces + nsm + wordChar + wordChar + spaces;
 534         twoFindIndexes(input, matcher, 3, 5);
 535         // SS.BNB.SS
 536         input = spaces + wordChar + nsm + wordChar + spaces;
 537         twoFindIndexes(input, matcher, 2, 5);
 538         // SSNNSS
 539         input = spaces + nsm + nsm + spaces;
 540         matcher.reset(input);
 541         if (matcher.find())
 542             failCount++;
 543         // SSN.BBN.SS
 544         input = spaces + nsm + wordChar + wordChar + nsm + spaces;
 545         twoFindIndexes(input, matcher, 3, 6);
 546 
 547         report("Unicode word boundary");
 548     }
 549 
 550     private static void twoFindIndexes(String input, Matcher matcher, int a,
 551                                        int b) throws Exception
 552     {
 553         matcher.reset(input);
 554         matcher.find();
 555         if (matcher.start() != a)
 556             failCount++;
 557         matcher.find();
 558         if (matcher.start() != b)
 559             failCount++;
 560     }
 561 
 562     // This test is for 6284152
 563     static void check(String regex, String input, String[] expected) {
 564         List<String> result = new ArrayList<String>();
 565         Pattern p = Pattern.compile(regex);
 566         Matcher m = p.matcher(input);
 567         while (m.find()) {
 568             result.add(m.group());
 569         }
 570         if (!Arrays.asList(expected).equals(result))
 571             failCount++;
 572     }
 573 
 574     private static void lookbehindTest() throws Exception {
 575         //Positive
 576         check("(?<=%.{0,5})foo\\d",
 577               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 578               new String[]{"foo1", "foo2", "foo3"});
 579 
 580         //boundary at end of the lookbehind sub-regex should work consistently
 581         //with the boundary just after the lookbehind sub-regex
 582         check("(?<=.*\\b)foo", "abcd foo", new String[]{"foo"});
 583         check("(?<=.*)\\bfoo", "abcd foo", new String[]{"foo"});
 584         check("(?<!abc )\\bfoo", "abc foo", new String[0]);
 585         check("(?<!abc \\b)foo", "abc foo", new String[0]);
 586 
 587         //Negative
 588         check("(?<!%.{0,5})foo\\d",
 589               "%foo1\n%bar foo2\n%bar  foo3\n%blahblah foo4\nfoo5",
 590               new String[] {"foo4", "foo5"});
 591 
 592         //Positive greedy
 593         check("(?<=%b{1,4})foo", "%bbbbfoo", new String[] {"foo"});
 594 
 595         //Positive reluctant
 596         check("(?<=%b{1,4}?)foo", "%bbbbfoo", new String[] {"foo"});
 597 
 598         //supplementary
 599         check("(?<=%b{1,4})fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 600               new String[] {"fo\ud800\udc00o"});
 601         check("(?<=%b{1,4}?)fo\ud800\udc00o", "%bbbbfo\ud800\udc00o",
 602               new String[] {"fo\ud800\udc00o"});
 603         check("(?<!%b{1,4})fo\ud800\udc00o", "%afo\ud800\udc00o",
 604               new String[] {"fo\ud800\udc00o"});
 605         check("(?<!%b{1,4}?)fo\ud800\udc00o", "%afo\ud800\udc00o",
 606               new String[] {"fo\ud800\udc00o"});
 607         report("Lookbehind");
 608     }
 609 
 610     // This test is for 4938995
 611     // Check to see if weak region boundaries are transparent to
 612     // lookahead and lookbehind constructs
 613     private static void boundsTest() throws Exception {
 614         String fullMessage = "catdogcat";
 615         Pattern pattern = Pattern.compile("(?<=cat)dog(?=cat)");
 616         Matcher matcher = pattern.matcher("catdogca");
 617         matcher.useTransparentBounds(true);
 618         if (matcher.find())
 619             failCount++;
 620         matcher.reset("atdogcat");
 621         if (matcher.find())
 622             failCount++;
 623         matcher.reset(fullMessage);
 624         if (!matcher.find())
 625             failCount++;
 626         matcher.reset(fullMessage);
 627         matcher.region(0,9);
 628         if (!matcher.find())
 629             failCount++;
 630         matcher.reset(fullMessage);
 631         matcher.region(0,6);
 632         if (!matcher.find())
 633             failCount++;
 634         matcher.reset(fullMessage);
 635         matcher.region(3,6);
 636         if (!matcher.find())
 637             failCount++;
 638         matcher.useTransparentBounds(false);
 639         if (matcher.find())
 640             failCount++;
 641 
 642         // Negative lookahead/lookbehind
 643         pattern = Pattern.compile("(?<!cat)dog(?!cat)");
 644         matcher = pattern.matcher("dogcat");
 645         matcher.useTransparentBounds(true);
 646         matcher.region(0,3);
 647         if (matcher.find())
 648             failCount++;
 649         matcher.reset("catdog");
 650         matcher.region(3,6);
 651         if (matcher.find())
 652             failCount++;
 653         matcher.useTransparentBounds(false);
 654         matcher.reset("dogcat");
 655         matcher.region(0,3);
 656         if (!matcher.find())
 657             failCount++;
 658         matcher.reset("catdog");
 659         matcher.region(3,6);
 660         if (!matcher.find())
 661             failCount++;
 662 
 663         report("Region bounds transparency");
 664     }
 665 
 666     // This test is for 4945394
 667     private static void findFromTest() throws Exception {
 668         String message = "This is 40 $0 message.";
 669         Pattern pat = Pattern.compile("\\$0");
 670         Matcher match = pat.matcher(message);
 671         if (!match.find())
 672             failCount++;
 673         if (match.find())
 674             failCount++;
 675         if (match.find())
 676             failCount++;
 677         report("Check for alternating find");
 678     }
 679 
 680     // This test is for 4872664 and 4892980
 681     private static void negatedCharClassTest() throws Exception {
 682         Pattern pattern = Pattern.compile("[^>]");
 683         Matcher matcher = pattern.matcher("\u203A");
 684         if (!matcher.matches())
 685             failCount++;
 686         pattern = Pattern.compile("[^fr]");
 687         matcher = pattern.matcher("a");
 688         if (!matcher.find())
 689             failCount++;
 690         matcher.reset("\u203A");
 691         if (!matcher.find())
 692             failCount++;
 693         String s = "for";
 694         String result[] = s.split("[^fr]");
 695         if (!result[0].equals("f"))
 696             failCount++;
 697         if (!result[1].equals("r"))
 698             failCount++;
 699         s = "f\u203Ar";
 700         result = s.split("[^fr]");
 701         if (!result[0].equals("f"))
 702             failCount++;
 703         if (!result[1].equals("r"))
 704             failCount++;
 705 
 706         // Test adding to bits, subtracting a node, then adding to bits again
 707         pattern = Pattern.compile("[^f\u203Ar]");
 708         matcher = pattern.matcher("a");
 709         if (!matcher.find())
 710             failCount++;
 711         matcher.reset("f");
 712         if (matcher.find())
 713             failCount++;
 714         matcher.reset("\u203A");
 715         if (matcher.find())
 716             failCount++;
 717         matcher.reset("r");
 718         if (matcher.find())
 719             failCount++;
 720         matcher.reset("\u203B");
 721         if (!matcher.find())
 722             failCount++;
 723 
 724         // Test subtracting a node, adding to bits, subtracting again
 725         pattern = Pattern.compile("[^\u203Ar\u203B]");
 726         matcher = pattern.matcher("a");
 727         if (!matcher.find())
 728             failCount++;
 729         matcher.reset("\u203A");
 730         if (matcher.find())
 731             failCount++;
 732         matcher.reset("r");
 733         if (matcher.find())
 734             failCount++;
 735         matcher.reset("\u203B");
 736         if (matcher.find())
 737             failCount++;
 738         matcher.reset("\u203C");
 739         if (!matcher.find())
 740             failCount++;
 741 
 742         report("Negated Character Class");
 743     }
 744 
 745     // This test is for 4628291
 746     private static void toStringTest() throws Exception {
 747         Pattern pattern = Pattern.compile("b+");
 748         if (pattern.toString() != "b+")
 749             failCount++;
 750         Matcher matcher = pattern.matcher("aaabbbccc");
 751         String matcherString = matcher.toString(); // unspecified
 752         matcher.find();
 753         matcherString = matcher.toString(); // unspecified
 754         matcher.region(0,3);
 755         matcherString = matcher.toString(); // unspecified
 756         matcher.reset();
 757         matcherString = matcher.toString(); // unspecified
 758         report("toString");
 759     }
 760 
 761     // This test is for 4808962
 762     private static void literalPatternTest() throws Exception {
 763         int flags = Pattern.LITERAL;
 764 
 765         Pattern pattern = Pattern.compile("abc\\t$^", flags);
 766         check(pattern, "abc\\t$^", true);
 767 
 768         pattern = Pattern.compile(Pattern.quote("abc\\t$^"));
 769         check(pattern, "abc\\t$^", true);
 770 
 771         pattern = Pattern.compile("\\Qa^$bcabc\\E", flags);
 772         check(pattern, "\\Qa^$bcabc\\E", true);
 773         check(pattern, "a^$bcabc", false);
 774 
 775         pattern = Pattern.compile("\\\\Q\\\\E");
 776         check(pattern, "\\Q\\E", true);
 777 
 778         pattern = Pattern.compile("\\Qabc\\Eefg\\\\Q\\\\Ehij");
 779         check(pattern, "abcefg\\Q\\Ehij", true);
 780 
 781         pattern = Pattern.compile("\\\\\\Q\\\\E");
 782         check(pattern, "\\\\\\\\", true);
 783 
 784         pattern = Pattern.compile(Pattern.quote("\\Qa^$bcabc\\E"));
 785         check(pattern, "\\Qa^$bcabc\\E", true);
 786         check(pattern, "a^$bcabc", false);
 787 
 788         pattern = Pattern.compile(Pattern.quote("\\Qabc\\Edef"));
 789         check(pattern, "\\Qabc\\Edef", true);
 790         check(pattern, "abcdef", false);
 791 
 792         pattern = Pattern.compile(Pattern.quote("abc\\Edef"));
 793         check(pattern, "abc\\Edef", true);
 794         check(pattern, "abcdef", false);
 795 
 796         pattern = Pattern.compile(Pattern.quote("\\E"));
 797         check(pattern, "\\E", true);
 798 
 799         pattern = Pattern.compile("((((abc.+?:)", flags);
 800         check(pattern, "((((abc.+?:)", true);
 801 
 802         flags |= Pattern.MULTILINE;
 803 
 804         pattern = Pattern.compile("^cat$", flags);
 805         check(pattern, "abc^cat$def", true);
 806         check(pattern, "cat", false);
 807 
 808         flags |= Pattern.CASE_INSENSITIVE;
 809 
 810         pattern = Pattern.compile("abcdef", flags);
 811         check(pattern, "ABCDEF", true);
 812         check(pattern, "AbCdEf", true);
 813 
 814         flags |= Pattern.DOTALL;
 815 
 816         pattern = Pattern.compile("a...b", flags);
 817         check(pattern, "A...b", true);
 818         check(pattern, "Axxxb", false);
 819 
 820         flags |= Pattern.CANON_EQ;
 821 
 822         Pattern p = Pattern.compile("testa\u030a", flags);
 823         check(pattern, "testa\u030a", false);
 824         check(pattern, "test\u00e5", false);
 825 
 826         // Supplementary character test
 827         flags = Pattern.LITERAL;
 828 
 829         pattern = Pattern.compile(toSupplementaries("abc\\t$^"), flags);
 830         check(pattern, toSupplementaries("abc\\t$^"), true);
 831 
 832         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\t$^")));
 833         check(pattern, toSupplementaries("abc\\t$^"), true);
 834 
 835         pattern = Pattern.compile(toSupplementaries("\\Qa^$bcabc\\E"), flags);
 836         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 837         check(pattern, toSupplementaries("a^$bcabc"), false);
 838 
 839         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qa^$bcabc\\E")));
 840         check(pattern, toSupplementaries("\\Qa^$bcabc\\E"), true);
 841         check(pattern, toSupplementaries("a^$bcabc"), false);
 842 
 843         pattern = Pattern.compile(Pattern.quote(toSupplementaries("\\Qabc\\Edef")));
 844         check(pattern, toSupplementaries("\\Qabc\\Edef"), true);
 845         check(pattern, toSupplementaries("abcdef"), false);
 846 
 847         pattern = Pattern.compile(Pattern.quote(toSupplementaries("abc\\Edef")));
 848         check(pattern, toSupplementaries("abc\\Edef"), true);
 849         check(pattern, toSupplementaries("abcdef"), false);
 850 
 851         pattern = Pattern.compile(toSupplementaries("((((abc.+?:)"), flags);
 852         check(pattern, toSupplementaries("((((abc.+?:)"), true);
 853 
 854         flags |= Pattern.MULTILINE;
 855 
 856         pattern = Pattern.compile(toSupplementaries("^cat$"), flags);
 857         check(pattern, toSupplementaries("abc^cat$def"), true);
 858         check(pattern, toSupplementaries("cat"), false);
 859 
 860         flags |= Pattern.DOTALL;
 861 
 862         // note: this is case-sensitive.
 863         pattern = Pattern.compile(toSupplementaries("a...b"), flags);
 864         check(pattern, toSupplementaries("a...b"), true);
 865         check(pattern, toSupplementaries("axxxb"), false);
 866 
 867         flags |= Pattern.CANON_EQ;
 868 
 869         String t = toSupplementaries("test");
 870         p = Pattern.compile(t + "a\u030a", flags);
 871         check(pattern, t + "a\u030a", false);
 872         check(pattern, t + "\u00e5", false);
 873 
 874         report("Literal pattern");
 875     }
 876 
 877     // This test is for 4803179
 878     // This test is also for 4808962, replacement parts
 879     private static void literalReplacementTest() throws Exception {
 880         int flags = Pattern.LITERAL;
 881 
 882         Pattern pattern = Pattern.compile("abc", flags);
 883         Matcher matcher = pattern.matcher("zzzabczzz");
 884         String replaceTest = "$0";
 885         String result = matcher.replaceAll(replaceTest);
 886         if (!result.equals("zzzabczzz"))
 887             failCount++;
 888 
 889         matcher.reset();
 890         String literalReplacement = matcher.quoteReplacement(replaceTest);
 891         result = matcher.replaceAll(literalReplacement);
 892         if (!result.equals("zzz$0zzz"))
 893             failCount++;
 894 
 895         matcher.reset();
 896         replaceTest = "\\t$\\$";
 897         literalReplacement = matcher.quoteReplacement(replaceTest);
 898         result = matcher.replaceAll(literalReplacement);
 899         if (!result.equals("zzz\\t$\\$zzz"))
 900             failCount++;
 901 
 902         // Supplementary character test
 903         pattern = Pattern.compile(toSupplementaries("abc"), flags);
 904         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
 905         replaceTest = "$0";
 906         result = matcher.replaceAll(replaceTest);
 907         if (!result.equals(toSupplementaries("zzzabczzz")))
 908             failCount++;
 909 
 910         matcher.reset();
 911         literalReplacement = matcher.quoteReplacement(replaceTest);
 912         result = matcher.replaceAll(literalReplacement);
 913         if (!result.equals(toSupplementaries("zzz$0zzz")))
 914             failCount++;
 915 
 916         matcher.reset();
 917         replaceTest = "\\t$\\$";
 918         literalReplacement = matcher.quoteReplacement(replaceTest);
 919         result = matcher.replaceAll(literalReplacement);
 920         if (!result.equals(toSupplementaries("zzz\\t$\\$zzz")))
 921             failCount++;
 922 
 923         // IAE should be thrown if backslash or '$' is the last character
 924         // in replacement string
 925         try {
 926             "\uac00".replaceAll("\uac00", "$");
 927             failCount++;
 928         } catch (IllegalArgumentException iie) {
 929         } catch (Exception e) {
 930             failCount++;
 931         }
 932         try {
 933             "\uac00".replaceAll("\uac00", "\\");
 934             failCount++;
 935         } catch (IllegalArgumentException iie) {
 936         } catch (Exception e) {
 937             failCount++;
 938         }
 939         report("Literal replacement");
 940     }
 941 
 942     // This test is for 4757029
 943     private static void regionTest() throws Exception {
 944         Pattern pattern = Pattern.compile("abc");
 945         Matcher matcher = pattern.matcher("abcdefabc");
 946 
 947         matcher.region(0,9);
 948         if (!matcher.find())
 949             failCount++;
 950         if (!matcher.find())
 951             failCount++;
 952         matcher.region(0,3);
 953         if (!matcher.find())
 954            failCount++;
 955         matcher.region(3,6);
 956         if (matcher.find())
 957            failCount++;
 958         matcher.region(0,2);
 959         if (matcher.find())
 960            failCount++;
 961 
 962         expectRegionFail(matcher, 1, -1);
 963         expectRegionFail(matcher, -1, -1);
 964         expectRegionFail(matcher, -1, 1);
 965         expectRegionFail(matcher, 5, 3);
 966         expectRegionFail(matcher, 5, 12);
 967         expectRegionFail(matcher, 12, 12);
 968 
 969         pattern = Pattern.compile("^abc$");
 970         matcher = pattern.matcher("zzzabczzz");
 971         matcher.region(0,9);
 972         if (matcher.find())
 973             failCount++;
 974         matcher.region(3,6);
 975         if (!matcher.find())
 976            failCount++;
 977         matcher.region(3,6);
 978         matcher.useAnchoringBounds(false);
 979         if (matcher.find())
 980            failCount++;
 981 
 982         // Supplementary character test
 983         pattern = Pattern.compile(toSupplementaries("abc"));
 984         matcher = pattern.matcher(toSupplementaries("abcdefabc"));
 985         matcher.region(0,9*2);
 986         if (!matcher.find())
 987             failCount++;
 988         if (!matcher.find())
 989             failCount++;
 990         matcher.region(0,3*2);
 991         if (!matcher.find())
 992            failCount++;
 993         matcher.region(1,3*2);
 994         if (matcher.find())
 995            failCount++;
 996         matcher.region(3*2,6*2);
 997         if (matcher.find())
 998            failCount++;
 999         matcher.region(0,2*2);
1000         if (matcher.find())
1001            failCount++;
1002         matcher.region(0,2*2+1);
1003         if (matcher.find())
1004            failCount++;
1005 
1006         expectRegionFail(matcher, 1*2, -1);
1007         expectRegionFail(matcher, -1, -1);
1008         expectRegionFail(matcher, -1, 1*2);
1009         expectRegionFail(matcher, 5*2, 3*2);
1010         expectRegionFail(matcher, 5*2, 12*2);
1011         expectRegionFail(matcher, 12*2, 12*2);
1012 
1013         pattern = Pattern.compile(toSupplementaries("^abc$"));
1014         matcher = pattern.matcher(toSupplementaries("zzzabczzz"));
1015         matcher.region(0,9*2);
1016         if (matcher.find())
1017             failCount++;
1018         matcher.region(3*2,6*2);
1019         if (!matcher.find())
1020            failCount++;
1021         matcher.region(3*2+1,6*2);
1022         if (matcher.find())
1023            failCount++;
1024         matcher.region(3*2,6*2-1);
1025         if (matcher.find())
1026            failCount++;
1027         matcher.region(3*2,6*2);
1028         matcher.useAnchoringBounds(false);
1029         if (matcher.find())
1030            failCount++;
1031         report("Regions");
1032     }
1033 
1034     private static void expectRegionFail(Matcher matcher, int index1,
1035                                          int index2)
1036     {
1037         try {
1038             matcher.region(index1, index2);
1039             failCount++;
1040         } catch (IndexOutOfBoundsException ioobe) {
1041             // Correct result
1042         } catch (IllegalStateException ise) {
1043             // Correct result
1044         }
1045     }
1046 
1047     // This test is for 4803197
1048     private static void escapedSegmentTest() throws Exception {
1049 
1050         Pattern pattern = Pattern.compile("\\Qdir1\\dir2\\E");
1051         check(pattern, "dir1\\dir2", true);
1052 
1053         pattern = Pattern.compile("\\Qdir1\\dir2\\\\E");
1054         check(pattern, "dir1\\dir2\\", true);
1055 
1056         pattern = Pattern.compile("(\\Qdir1\\dir2\\\\E)");
1057         check(pattern, "dir1\\dir2\\", true);
1058 
1059         // Supplementary character test
1060         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2\\E"));
1061         check(pattern, toSupplementaries("dir1\\dir2"), true);
1062 
1063         pattern = Pattern.compile(toSupplementaries("\\Qdir1\\dir2")+"\\\\E");
1064         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1065 
1066         pattern = Pattern.compile(toSupplementaries("(\\Qdir1\\dir2")+"\\\\E)");
1067         check(pattern, toSupplementaries("dir1\\dir2\\"), true);
1068 
1069         report("Escaped segment");
1070     }
1071 
1072     // This test is for 4792284
1073     private static void nonCaptureRepetitionTest() throws Exception {
1074         String input = "abcdefgh;";
1075 
1076         String[] patterns = new String[] {
1077             "(?:\\w{4})+;",
1078             "(?:\\w{8})*;",
1079             "(?:\\w{2}){2,4};",
1080             "(?:\\w{4}){2,};",   // only matches the
1081             ".*?(?:\\w{5})+;",   //     specified minimum
1082             ".*?(?:\\w{9})*;",   //     number of reps - OK
1083             "(?:\\w{4})+?;",     // lazy repetition - OK
1084             "(?:\\w{4})++;",     // possessive repetition - OK
1085             "(?:\\w{2,}?)+;",    // non-deterministic - OK
1086             "(\\w{4})+;",        // capturing group - OK
1087         };
1088 
1089         for (int i = 0; i < patterns.length; i++) {
1090             // Check find()
1091             check(patterns[i], 0, input, input, true);
1092             // Check matches()
1093             Pattern p = Pattern.compile(patterns[i]);
1094             Matcher m = p.matcher(input);
1095 
1096             if (m.matches()) {
1097                 if (!m.group(0).equals(input))
1098                     failCount++;
1099             } else {
1100                 failCount++;
1101             }
1102         }
1103 
1104         report("Non capturing repetition");
1105     }
1106 
1107     // This test is for 6358731
1108     private static void notCapturedGroupCurlyMatchTest() throws Exception {
1109         Pattern pattern = Pattern.compile("(abc)+|(abcd)+");
1110         Matcher matcher = pattern.matcher("abcd");
1111         if (!matcher.matches() ||
1112              matcher.group(1) != null ||
1113              !matcher.group(2).equals("abcd")) {
1114             failCount++;
1115         }
1116         report("Not captured GroupCurly");
1117     }
1118 
1119     // This test is for 4706545
1120     private static void javaCharClassTest() throws Exception {
1121         for (int i=0; i<1000; i++) {
1122             char c = (char)generator.nextInt();
1123             check("{javaLowerCase}", c, Character.isLowerCase(c));
1124             check("{javaUpperCase}", c, Character.isUpperCase(c));
1125             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1126             check("{javaTitleCase}", c, Character.isTitleCase(c));
1127             check("{javaDigit}", c, Character.isDigit(c));
1128             check("{javaDefined}", c, Character.isDefined(c));
1129             check("{javaLetter}", c, Character.isLetter(c));
1130             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1131             check("{javaJavaIdentifierStart}", c,
1132                   Character.isJavaIdentifierStart(c));
1133             check("{javaJavaIdentifierPart}", c,
1134                   Character.isJavaIdentifierPart(c));
1135             check("{javaUnicodeIdentifierStart}", c,
1136                   Character.isUnicodeIdentifierStart(c));
1137             check("{javaUnicodeIdentifierPart}", c,
1138                   Character.isUnicodeIdentifierPart(c));
1139             check("{javaIdentifierIgnorable}", c,
1140                   Character.isIdentifierIgnorable(c));
1141             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1142             check("{javaWhitespace}", c, Character.isWhitespace(c));
1143             check("{javaISOControl}", c, Character.isISOControl(c));
1144             check("{javaMirrored}", c, Character.isMirrored(c));
1145 
1146         }
1147 
1148         // Supplementary character test
1149         for (int i=0; i<1000; i++) {
1150             int c = generator.nextInt(Character.MAX_CODE_POINT
1151                                       - Character.MIN_SUPPLEMENTARY_CODE_POINT)
1152                         + Character.MIN_SUPPLEMENTARY_CODE_POINT;
1153             check("{javaLowerCase}", c, Character.isLowerCase(c));
1154             check("{javaUpperCase}", c, Character.isUpperCase(c));
1155             check("{javaUpperCase}+", c, Character.isUpperCase(c));
1156             check("{javaTitleCase}", c, Character.isTitleCase(c));
1157             check("{javaDigit}", c, Character.isDigit(c));
1158             check("{javaDefined}", c, Character.isDefined(c));
1159             check("{javaLetter}", c, Character.isLetter(c));
1160             check("{javaLetterOrDigit}", c, Character.isLetterOrDigit(c));
1161             check("{javaJavaIdentifierStart}", c,
1162                   Character.isJavaIdentifierStart(c));
1163             check("{javaJavaIdentifierPart}", c,
1164                   Character.isJavaIdentifierPart(c));
1165             check("{javaUnicodeIdentifierStart}", c,
1166                   Character.isUnicodeIdentifierStart(c));
1167             check("{javaUnicodeIdentifierPart}", c,
1168                   Character.isUnicodeIdentifierPart(c));
1169             check("{javaIdentifierIgnorable}", c,
1170                   Character.isIdentifierIgnorable(c));
1171             check("{javaSpaceChar}", c, Character.isSpaceChar(c));
1172             check("{javaWhitespace}", c, Character.isWhitespace(c));
1173             check("{javaISOControl}", c, Character.isISOControl(c));
1174             check("{javaMirrored}", c, Character.isMirrored(c));
1175         }
1176 
1177         report("Java character classes");
1178     }
1179 
1180     // This test is for 4523620
1181     /*
1182     private static void numOccurrencesTest() throws Exception {
1183         Pattern pattern = Pattern.compile("aaa");
1184 
1185         if (pattern.numOccurrences("aaaaaa", false) != 2)
1186             failCount++;
1187         if (pattern.numOccurrences("aaaaaa", true) != 4)
1188             failCount++;
1189 
1190         pattern = Pattern.compile("^");
1191         if (pattern.numOccurrences("aaaaaa", false) != 1)
1192             failCount++;
1193         if (pattern.numOccurrences("aaaaaa", true) != 1)
1194             failCount++;
1195 
1196         report("Number of Occurrences");
1197     }
1198     */
1199 
1200     // This test is for 4776374
1201     private static void caretBetweenTerminatorsTest() throws Exception {
1202         int flags1 = Pattern.DOTALL;
1203         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1204         int flags3 = Pattern.DOTALL | Pattern.UNIX_LINES | Pattern.MULTILINE;
1205         int flags4 = Pattern.DOTALL | Pattern.MULTILINE;
1206 
1207         check("^....", flags1, "test\ntest", "test", true);
1208         check(".....^", flags1, "test\ntest", "test", false);
1209         check(".....^", flags1, "test\n", "test", false);
1210         check("....^", flags1, "test\r\n", "test", false);
1211 
1212         check("^....", flags2, "test\ntest", "test", true);
1213         check("....^", flags2, "test\ntest", "test", false);
1214         check(".....^", flags2, "test\n", "test", false);
1215         check("....^", flags2, "test\r\n", "test", false);
1216 
1217         check("^....", flags3, "test\ntest", "test", true);
1218         check(".....^", flags3, "test\ntest", "test\n", true);
1219         check(".....^", flags3, "test\u0085test", "test\u0085", false);
1220         check(".....^", flags3, "test\n", "test", false);
1221         check(".....^", flags3, "test\r\n", "test", false);
1222         check("......^", flags3, "test\r\ntest", "test\r\n", true);
1223 
1224         check("^....", flags4, "test\ntest", "test", true);
1225         check(".....^", flags3, "test\ntest", "test\n", true);
1226         check(".....^", flags4, "test\u0085test", "test\u0085", true);
1227         check(".....^", flags4, "test\n", "test\n", false);
1228         check(".....^", flags4, "test\r\n", "test\r", false);
1229 
1230         // Supplementary character test
1231         String t = toSupplementaries("test");
1232         check("^....", flags1, t+"\n"+t, t, true);
1233         check(".....^", flags1, t+"\n"+t, t, false);
1234         check(".....^", flags1, t+"\n", t, false);
1235         check("....^", flags1, t+"\r\n", t, false);
1236 
1237         check("^....", flags2, t+"\n"+t, t, true);
1238         check("....^", flags2, t+"\n"+t, t, false);
1239         check(".....^", flags2, t+"\n", t, false);
1240         check("....^", flags2, t+"\r\n", t, false);
1241 
1242         check("^....", flags3, t+"\n"+t, t, true);
1243         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1244         check(".....^", flags3, t+"\u0085"+t, t+"\u0085", false);
1245         check(".....^", flags3, t+"\n", t, false);
1246         check(".....^", flags3, t+"\r\n", t, false);
1247         check("......^", flags3, t+"\r\n"+t, t+"\r\n", true);
1248 
1249         check("^....", flags4, t+"\n"+t, t, true);
1250         check(".....^", flags3, t+"\n"+t, t+"\n", true);
1251         check(".....^", flags4, t+"\u0085"+t, t+"\u0085", true);
1252         check(".....^", flags4, t+"\n", t+"\n", false);
1253         check(".....^", flags4, t+"\r\n", t+"\r", false);
1254 
1255         report("Caret between terminators");
1256     }
1257 
1258     // This test is for 4727935
1259     private static void dollarAtEndTest() throws Exception {
1260         int flags1 = Pattern.DOTALL;
1261         int flags2 = Pattern.DOTALL | Pattern.UNIX_LINES;
1262         int flags3 = Pattern.DOTALL | Pattern.MULTILINE;
1263 
1264         check("....$", flags1, "test\n", "test", true);
1265         check("....$", flags1, "test\r\n", "test", true);
1266         check(".....$", flags1, "test\n", "test\n", true);
1267         check(".....$", flags1, "test\u0085", "test\u0085", true);
1268         check("....$", flags1, "test\u0085", "test", true);
1269 
1270         check("....$", flags2, "test\n", "test", true);
1271         check(".....$", flags2, "test\n", "test\n", true);
1272         check(".....$", flags2, "test\u0085", "test\u0085", true);
1273         check("....$", flags2, "test\u0085", "est\u0085", true);
1274 
1275         check("....$.blah", flags3, "test\nblah", "test\nblah", true);
1276         check(".....$.blah", flags3, "test\n\nblah", "test\n\nblah", true);
1277         check("....$blah", flags3, "test\nblah", "!!!!", false);
1278         check(".....$blah", flags3, "test\nblah", "!!!!", false);
1279 
1280         // Supplementary character test
1281         String t = toSupplementaries("test");
1282         String b = toSupplementaries("blah");
1283         check("....$", flags1, t+"\n", t, true);
1284         check("....$", flags1, t+"\r\n", t, true);
1285         check(".....$", flags1, t+"\n", t+"\n", true);
1286         check(".....$", flags1, t+"\u0085", t+"\u0085", true);
1287         check("....$", flags1, t+"\u0085", t, true);
1288 
1289         check("....$", flags2, t+"\n", t, true);
1290         check(".....$", flags2, t+"\n", t+"\n", true);
1291         check(".....$", flags2, t+"\u0085", t+"\u0085", true);
1292         check("....$", flags2, t+"\u0085", toSupplementaries("est\u0085"), true);
1293 
1294         check("....$."+b, flags3, t+"\n"+b, t+"\n"+b, true);
1295         check(".....$."+b, flags3, t+"\n\n"+b, t+"\n\n"+b, true);
1296         check("....$"+b, flags3, t+"\n"+b, "!!!!", false);
1297         check(".....$"+b, flags3, t+"\n"+b, "!!!!", false);
1298 
1299         report("Dollar at End");
1300     }
1301 
1302     // This test is for 4711773
1303     private static void multilineDollarTest() throws Exception {
1304         Pattern findCR = Pattern.compile("$", Pattern.MULTILINE);
1305         Matcher matcher = findCR.matcher("first bit\nsecond bit");
1306         matcher.find();
1307         if (matcher.start(0) != 9)
1308             failCount++;
1309         matcher.find();
1310         if (matcher.start(0) != 20)
1311             failCount++;
1312 
1313         // Supplementary character test
1314         matcher = findCR.matcher(toSupplementaries("first  bit\n second  bit")); // double BMP chars
1315         matcher.find();
1316         if (matcher.start(0) != 9*2)
1317             failCount++;
1318         matcher.find();
1319         if (matcher.start(0) != 20*2)
1320             failCount++;
1321 
1322         report("Multiline Dollar");
1323     }
1324 
1325     private static void reluctantRepetitionTest() throws Exception {
1326         Pattern p = Pattern.compile("1(\\s\\S+?){1,3}?[\\s,]2");
1327         check(p, "1 word word word 2", true);
1328         check(p, "1 wor wo w 2", true);
1329         check(p, "1 word word 2", true);
1330         check(p, "1 word 2", true);
1331         check(p, "1 wo w w 2", true);
1332         check(p, "1 wo w 2", true);
1333         check(p, "1 wor w 2", true);
1334 
1335         p = Pattern.compile("([a-z])+?c");
1336         Matcher m = p.matcher("ababcdefdec");
1337         check(m, "ababc");
1338 
1339         // Supplementary character test
1340         p = Pattern.compile(toSupplementaries("([a-z])+?c"));
1341         m = p.matcher(toSupplementaries("ababcdefdec"));
1342         check(m, toSupplementaries("ababc"));
1343 
1344         report("Reluctant Repetition");
1345     }
1346 
1347     private static void serializeTest() throws Exception {
1348         String patternStr = "(b)";
1349         String matchStr = "b";
1350         Pattern pattern = Pattern.compile(patternStr);
1351         ByteArrayOutputStream baos = new ByteArrayOutputStream();
1352         ObjectOutputStream oos = new ObjectOutputStream(baos);
1353         oos.writeObject(pattern);
1354         oos.close();
1355         ObjectInputStream ois = new ObjectInputStream(
1356             new ByteArrayInputStream(baos.toByteArray()));
1357         Pattern serializedPattern = (Pattern)ois.readObject();
1358         ois.close();
1359         Matcher matcher = serializedPattern.matcher(matchStr);
1360         if (!matcher.matches())
1361             failCount++;
1362         if (matcher.groupCount() != 1)
1363             failCount++;
1364 
1365         report("Serialization");
1366     }
1367 
1368     private static void gTest() {
1369         Pattern pattern = Pattern.compile("\\G\\w");
1370         Matcher matcher = pattern.matcher("abc#x#x");
1371         matcher.find();
1372         matcher.find();
1373         matcher.find();
1374         if (matcher.find())
1375             failCount++;
1376 
1377         pattern = Pattern.compile("\\GA*");
1378         matcher = pattern.matcher("1A2AA3");
1379         matcher.find();
1380         if (matcher.find())
1381             failCount++;
1382 
1383         pattern = Pattern.compile("\\GA*");
1384         matcher = pattern.matcher("1A2AA3");
1385         if (!matcher.find(1))
1386             failCount++;
1387         matcher.find();
1388         if (matcher.find())
1389             failCount++;
1390 
1391         report("\\G");
1392     }
1393 
1394     private static void zTest() {
1395         Pattern pattern = Pattern.compile("foo\\Z");
1396         // Positives
1397         check(pattern, "foo\u0085", true);
1398         check(pattern, "foo\u2028", true);
1399         check(pattern, "foo\u2029", true);
1400         check(pattern, "foo\n", true);
1401         check(pattern, "foo\r", true);
1402         check(pattern, "foo\r\n", true);
1403         // Negatives
1404         check(pattern, "fooo", false);
1405         check(pattern, "foo\n\r", false);
1406 
1407         pattern = Pattern.compile("foo\\Z", Pattern.UNIX_LINES);
1408         // Positives
1409         check(pattern, "foo", true);
1410         check(pattern, "foo\n", true);
1411         // Negatives
1412         check(pattern, "foo\r", false);
1413         check(pattern, "foo\u0085", false);
1414         check(pattern, "foo\u2028", false);
1415         check(pattern, "foo\u2029", false);
1416 
1417         report("\\Z");
1418     }
1419 
1420     private static void replaceFirstTest() {
1421         Pattern pattern = Pattern.compile("(ab)(c*)");
1422         Matcher matcher = pattern.matcher("abccczzzabcczzzabccc");
1423         if (!matcher.replaceFirst("test").equals("testzzzabcczzzabccc"))
1424             failCount++;
1425 
1426         matcher.reset("zzzabccczzzabcczzzabccczzz");
1427         if (!matcher.replaceFirst("test").equals("zzztestzzzabcczzzabccczzz"))
1428             failCount++;
1429 
1430         matcher.reset("zzzabccczzzabcczzzabccczzz");
1431         String result = matcher.replaceFirst("$1");
1432         if (!result.equals("zzzabzzzabcczzzabccczzz"))
1433             failCount++;
1434 
1435         matcher.reset("zzzabccczzzabcczzzabccczzz");
1436         result = matcher.replaceFirst("$2");
1437         if (!result.equals("zzzccczzzabcczzzabccczzz"))
1438             failCount++;
1439 
1440         pattern = Pattern.compile("a*");
1441         matcher = pattern.matcher("aaaaaaaaaa");
1442         if (!matcher.replaceFirst("test").equals("test"))
1443             failCount++;
1444 
1445         pattern = Pattern.compile("a+");
1446         matcher = pattern.matcher("zzzaaaaaaaaaa");
1447         if (!matcher.replaceFirst("test").equals("zzztest"))
1448             failCount++;
1449 
1450         // Supplementary character test
1451         pattern = Pattern.compile(toSupplementaries("(ab)(c*)"));
1452         matcher = pattern.matcher(toSupplementaries("abccczzzabcczzzabccc"));
1453         if (!matcher.replaceFirst(toSupplementaries("test"))
1454                 .equals(toSupplementaries("testzzzabcczzzabccc")))
1455             failCount++;
1456 
1457         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1458         if (!matcher.replaceFirst(toSupplementaries("test")).
1459             equals(toSupplementaries("zzztestzzzabcczzzabccczzz")))
1460             failCount++;
1461 
1462         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1463         result = matcher.replaceFirst("$1");
1464         if (!result.equals(toSupplementaries("zzzabzzzabcczzzabccczzz")))
1465             failCount++;
1466 
1467         matcher.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
1468         result = matcher.replaceFirst("$2");
1469         if (!result.equals(toSupplementaries("zzzccczzzabcczzzabccczzz")))
1470             failCount++;
1471 
1472         pattern = Pattern.compile(toSupplementaries("a*"));
1473         matcher = pattern.matcher(toSupplementaries("aaaaaaaaaa"));
1474         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("test")))
1475             failCount++;
1476 
1477         pattern = Pattern.compile(toSupplementaries("a+"));
1478         matcher = pattern.matcher(toSupplementaries("zzzaaaaaaaaaa"));
1479         if (!matcher.replaceFirst(toSupplementaries("test")).equals(toSupplementaries("zzztest")))
1480             failCount++;
1481 
1482         report("Replace First");
1483     }
1484 
1485     private static void unixLinesTest() {
1486         Pattern pattern = Pattern.compile(".*");
1487         Matcher matcher = pattern.matcher("aa\u2028blah");
1488         matcher.find();
1489         if (!matcher.group(0).equals("aa"))
1490             failCount++;
1491 
1492         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1493         matcher = pattern.matcher("aa\u2028blah");
1494         matcher.find();
1495         if (!matcher.group(0).equals("aa\u2028blah"))
1496             failCount++;
1497 
1498         pattern = Pattern.compile("[az]$",
1499                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1500         matcher = pattern.matcher("aa\u2028zz");
1501         check(matcher, "a\u2028", false);
1502 
1503         // Supplementary character test
1504         pattern = Pattern.compile(".*");
1505         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1506         matcher.find();
1507         if (!matcher.group(0).equals(toSupplementaries("aa")))
1508             failCount++;
1509 
1510         pattern = Pattern.compile(".*", Pattern.UNIX_LINES);
1511         matcher = pattern.matcher(toSupplementaries("aa\u2028blah"));
1512         matcher.find();
1513         if (!matcher.group(0).equals(toSupplementaries("aa\u2028blah")))
1514             failCount++;
1515 
1516         pattern = Pattern.compile(toSupplementaries("[az]$"),
1517                                   Pattern.MULTILINE | Pattern.UNIX_LINES);
1518         matcher = pattern.matcher(toSupplementaries("aa\u2028zz"));
1519         check(matcher, toSupplementaries("a\u2028"), false);
1520 
1521         report("Unix Lines");
1522     }
1523 
1524     private static void commentsTest() {
1525         int flags = Pattern.COMMENTS;
1526 
1527         Pattern pattern = Pattern.compile("aa \\# aa", flags);
1528         Matcher matcher = pattern.matcher("aa#aa");
1529         if (!matcher.matches())
1530             failCount++;
1531 
1532         pattern = Pattern.compile("aa  # blah", flags);
1533         matcher = pattern.matcher("aa");
1534         if (!matcher.matches())
1535             failCount++;
1536 
1537         pattern = Pattern.compile("aa blah", flags);
1538         matcher = pattern.matcher("aablah");
1539         if (!matcher.matches())
1540              failCount++;
1541 
1542         pattern = Pattern.compile("aa  # blah blech  ", flags);
1543         matcher = pattern.matcher("aa");
1544         if (!matcher.matches())
1545             failCount++;
1546 
1547         pattern = Pattern.compile("aa  # blah\n  ", flags);
1548         matcher = pattern.matcher("aa");
1549         if (!matcher.matches())
1550             failCount++;
1551 
1552         pattern = Pattern.compile("aa  # blah\nbc # blech", flags);
1553         matcher = pattern.matcher("aabc");
1554         if (!matcher.matches())
1555              failCount++;
1556 
1557         pattern = Pattern.compile("aa  # blah\nbc# blech", flags);
1558         matcher = pattern.matcher("aabc");
1559         if (!matcher.matches())
1560              failCount++;
1561 
1562         pattern = Pattern.compile("aa  # blah\nbc\\# blech", flags);
1563         matcher = pattern.matcher("aabc#blech");
1564         if (!matcher.matches())
1565              failCount++;
1566 
1567         // Supplementary character test
1568         pattern = Pattern.compile(toSupplementaries("aa \\# aa"), flags);
1569         matcher = pattern.matcher(toSupplementaries("aa#aa"));
1570         if (!matcher.matches())
1571             failCount++;
1572 
1573         pattern = Pattern.compile(toSupplementaries("aa  # blah"), flags);
1574         matcher = pattern.matcher(toSupplementaries("aa"));
1575         if (!matcher.matches())
1576             failCount++;
1577 
1578         pattern = Pattern.compile(toSupplementaries("aa blah"), flags);
1579         matcher = pattern.matcher(toSupplementaries("aablah"));
1580         if (!matcher.matches())
1581              failCount++;
1582 
1583         pattern = Pattern.compile(toSupplementaries("aa  # blah blech  "), flags);
1584         matcher = pattern.matcher(toSupplementaries("aa"));
1585         if (!matcher.matches())
1586             failCount++;
1587 
1588         pattern = Pattern.compile(toSupplementaries("aa  # blah\n  "), flags);
1589         matcher = pattern.matcher(toSupplementaries("aa"));
1590         if (!matcher.matches())
1591             failCount++;
1592 
1593         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc # blech"), flags);
1594         matcher = pattern.matcher(toSupplementaries("aabc"));
1595         if (!matcher.matches())
1596              failCount++;
1597 
1598         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc# blech"), flags);
1599         matcher = pattern.matcher(toSupplementaries("aabc"));
1600         if (!matcher.matches())
1601              failCount++;
1602 
1603         pattern = Pattern.compile(toSupplementaries("aa  # blah\nbc\\# blech"), flags);
1604         matcher = pattern.matcher(toSupplementaries("aabc#blech"));
1605         if (!matcher.matches())
1606              failCount++;
1607 
1608         report("Comments");
1609     }
1610 
1611     private static void caseFoldingTest() { // bug 4504687
1612         int flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1613         Pattern pattern = Pattern.compile("aa", flags);
1614         Matcher matcher = pattern.matcher("ab");
1615         if (matcher.matches())
1616             failCount++;
1617 
1618         pattern = Pattern.compile("aA", flags);
1619         matcher = pattern.matcher("ab");
1620         if (matcher.matches())
1621             failCount++;
1622 
1623         pattern = Pattern.compile("aa", flags);
1624         matcher = pattern.matcher("aB");
1625         if (matcher.matches())
1626             failCount++;
1627         matcher = pattern.matcher("Ab");
1628         if (matcher.matches())
1629             failCount++;
1630 
1631         // ASCII               "a"
1632         // Latin-1 Supplement  "a" + grave
1633         // Cyrillic            "a"
1634         String[] patterns = new String[] {
1635             //single
1636             "a", "\u00e0", "\u0430",
1637             //slice
1638             "ab", "\u00e0\u00e1", "\u0430\u0431",
1639             //class single
1640             "[a]", "[\u00e0]", "[\u0430]",
1641             //class range
1642             "[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
1643             //back reference
1644             "(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
1645         };
1646 
1647         String[] texts = new String[] {
1648             "A", "\u00c0", "\u0410",
1649             "AB", "\u00c0\u00c1", "\u0410\u0411",
1650             "A", "\u00c0", "\u0410",
1651             "B", "\u00c2", "\u0411",
1652             "aA", "\u00e0\u00c0", "\u0430\u0410"
1653         };
1654 
1655         boolean[] expected = new boolean[] {
1656             true, false, false,
1657             true, false, false,
1658             true, false, false,
1659             true, false, false,
1660             true, false, false
1661         };
1662 
1663         flags = Pattern.CASE_INSENSITIVE;
1664         for (int i = 0; i < patterns.length; i++) {
1665             pattern = Pattern.compile(patterns[i], flags);
1666             matcher = pattern.matcher(texts[i]);
1667             if (matcher.matches() != expected[i]) {
1668                 System.out.println("<1> Failed at " + i);
1669                 failCount++;
1670             }
1671         }
1672 
1673         flags = Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
1674         for (int i = 0; i < patterns.length; i++) {
1675             pattern = Pattern.compile(patterns[i], flags);
1676             matcher = pattern.matcher(texts[i]);
1677             if (!matcher.matches()) {
1678                 System.out.println("<2> Failed at " + i);
1679                 failCount++;
1680             }
1681         }
1682         // flag unicode_case alone should do nothing
1683         flags = Pattern.UNICODE_CASE;
1684         for (int i = 0; i < patterns.length; i++) {
1685             pattern = Pattern.compile(patterns[i], flags);
1686             matcher = pattern.matcher(texts[i]);
1687             if (matcher.matches()) {
1688                 System.out.println("<3> Failed at " + i);
1689                 failCount++;
1690             }
1691         }
1692 
1693         // Special cases: i, I, u+0131 and u+0130
1694         flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
1695         pattern = Pattern.compile("[h-j]+", flags);
1696         if (!pattern.matcher("\u0131\u0130").matches())
1697             failCount++;
1698         report("Case Folding");
1699     }
1700 
1701     private static void appendTest() {
1702         Pattern pattern = Pattern.compile("(ab)(cd)");
1703         Matcher matcher = pattern.matcher("abcd");
1704         String result = matcher.replaceAll("$2$1");
1705         if (!result.equals("cdab"))
1706             failCount++;
1707 
1708         String  s1 = "Swap all: first = 123, second = 456";
1709         String  s2 = "Swap one: first = 123, second = 456";
1710         String  r  = "$3$2$1";
1711         pattern = Pattern.compile("([a-z]+)( *= *)([0-9]+)");
1712         matcher = pattern.matcher(s1);
1713 
1714         result = matcher.replaceAll(r);
1715         if (!result.equals("Swap all: 123 = first, 456 = second"))
1716             failCount++;
1717 
1718         matcher = pattern.matcher(s2);
1719 
1720         if (matcher.find()) {
1721             StringBuffer sb = new StringBuffer();
1722             matcher.appendReplacement(sb, r);
1723             matcher.appendTail(sb);
1724             result = sb.toString();
1725             if (!result.equals("Swap one: 123 = first, second = 456"))
1726                 failCount++;
1727         }
1728 
1729         // Supplementary character test
1730         pattern = Pattern.compile(toSupplementaries("(ab)(cd)"));
1731         matcher = pattern.matcher(toSupplementaries("abcd"));
1732         result = matcher.replaceAll("$2$1");
1733         if (!result.equals(toSupplementaries("cdab")))
1734             failCount++;
1735 
1736         s1 = toSupplementaries("Swap all: first = 123, second = 456");
1737         s2 = toSupplementaries("Swap one: first = 123, second = 456");
1738         r  = toSupplementaries("$3$2$1");
1739         pattern = Pattern.compile(toSupplementaries("([a-z]+)( *= *)([0-9]+)"));
1740         matcher = pattern.matcher(s1);
1741 
1742         result = matcher.replaceAll(r);
1743         if (!result.equals(toSupplementaries("Swap all: 123 = first, 456 = second")))
1744             failCount++;
1745 
1746         matcher = pattern.matcher(s2);
1747 
1748         if (matcher.find()) {
1749             StringBuffer sb = new StringBuffer();
1750             matcher.appendReplacement(sb, r);
1751             matcher.appendTail(sb);
1752             result = sb.toString();
1753             if (!result.equals(toSupplementaries("Swap one: 123 = first, second = 456")))
1754                 failCount++;
1755         }
1756         report("Append");
1757     }
1758 
1759     private static void splitTest() {
1760         Pattern pattern = Pattern.compile(":");
1761         String[] result = pattern.split("foo:and:boo", 2);
1762         if (!result[0].equals("foo"))
1763             failCount++;
1764         if (!result[1].equals("and:boo"))
1765             failCount++;
1766         // Supplementary character test
1767         Pattern patternX = Pattern.compile(toSupplementaries("X"));
1768         result = patternX.split(toSupplementaries("fooXandXboo"), 2);
1769         if (!result[0].equals(toSupplementaries("foo")))
1770             failCount++;
1771         if (!result[1].equals(toSupplementaries("andXboo")))
1772             failCount++;
1773 
1774         CharBuffer cb = CharBuffer.allocate(100);
1775         cb.put("foo:and:boo");
1776         cb.flip();
1777         result = pattern.split(cb);
1778         if (!result[0].equals("foo"))
1779             failCount++;
1780         if (!result[1].equals("and"))
1781             failCount++;
1782         if (!result[2].equals("boo"))
1783             failCount++;
1784 
1785         // Supplementary character test
1786         CharBuffer cbs = CharBuffer.allocate(100);
1787         cbs.put(toSupplementaries("fooXandXboo"));
1788         cbs.flip();
1789         result = patternX.split(cbs);
1790         if (!result[0].equals(toSupplementaries("foo")))
1791             failCount++;
1792         if (!result[1].equals(toSupplementaries("and")))
1793             failCount++;
1794         if (!result[2].equals(toSupplementaries("boo")))
1795             failCount++;
1796 
1797         String source = "0123456789";
1798         for (int limit=-2; limit<3; limit++) {
1799             for (int x=0; x<10; x++) {
1800                 result = source.split(Integer.toString(x), limit);
1801                 int expectedLength = limit < 1 ? 2 : limit;
1802 
1803                 if ((limit == 0) && (x == 9)) {
1804                     // expected dropping of ""
1805                     if (result.length != 1)
1806                         failCount++;
1807                     if (!result[0].equals("012345678")) {
1808                         failCount++;
1809                     }
1810                 } else {
1811                     if (result.length != expectedLength) {
1812                         failCount++;
1813                     }
1814                     if (!result[0].equals(source.substring(0,x))) {
1815                         if (limit != 1) {
1816                             failCount++;
1817                         } else {
1818                             if (!result[0].equals(source.substring(0,10))) {
1819                                 failCount++;
1820                             }
1821                         }
1822                     }
1823                     if (expectedLength > 1) { // Check segment 2
1824                         if (!result[1].equals(source.substring(x+1,10)))
1825                             failCount++;
1826                     }
1827                 }
1828             }
1829         }
1830         // Check the case for no match found
1831         for (int limit=-2; limit<3; limit++) {
1832             result = source.split("e", limit);
1833             if (result.length != 1)
1834                 failCount++;
1835             if (!result[0].equals(source))
1836                 failCount++;
1837         }
1838         // Check the case for limit == 0, source = "";
1839         // split() now returns 0-length for empty source "" see #6559590
1840         source = "";
1841         result = source.split("e", 0);
1842         if (result.length != 1)
1843             failCount++;
1844         if (!result[0].equals(source))
1845             failCount++;
1846 
1847         // Check both split() and splitAsStraem(), especially for zero-lenth
1848         // input and zero-lenth match cases
1849         String[][] input = new String[][] {
1850             { " ",           "Abc Efg Hij" },   // normal non-zero-match
1851             { " ",           " Abc Efg Hij" },  // leading empty str for non-zero-match
1852             { " ",           "Abc  Efg Hij" },  // non-zero-match in the middle
1853             { "(?=\\p{Lu})", "AbcEfgHij" },     // no leading empty str for zero-match
1854             { "(?=\\p{Lu})", "AbcEfg" },
1855             { "(?=\\p{Lu})", "Abc" },
1856             { " ",           "" },              // zero-length input
1857             { ".*",          "" },
1858 
1859             // some tests from PatternStreamTest.java
1860             { "4",       "awgqwefg1fefw4vssv1vvv1" },
1861             { "\u00a3a", "afbfq\u00a3abgwgb\u00a3awngnwggw\u00a3a\u00a3ahjrnhneerh" },
1862             { "1",       "awgqwefg1fefw4vssv1vvv1" },
1863             { "1",       "a\u4ebafg1fefw\u4eba4\u9f9cvssv\u9f9c1v\u672c\u672cvv" },
1864             { "\u56da",  "1\u56da23\u56da456\u56da7890" },
1865             { "\u56da",  "1\u56da23\u9f9c\u672c\u672c\u56da456\u56da\u9f9c\u672c7890" },
1866             { "\u56da",  "" },
1867             { "[ \t,:.]","This is,testing: with\tdifferent separators." }, //multiple septs
1868             { "o",       "boo:and:foo" },
1869             { "o",       "booooo:and:fooooo" },
1870             { "o",       "fooooo:" },
1871         };
1872 
1873         String[][] expected = new String[][] {
1874             { "Abc", "Efg", "Hij" },
1875             { "", "Abc", "Efg", "Hij" },
1876             { "Abc", "", "Efg", "Hij" },
1877             { "Abc", "Efg", "Hij" },
1878             { "Abc", "Efg" },
1879             { "Abc" },
1880             { "" },
1881             { "" },
1882 
1883             { "awgqwefg1fefw", "vssv1vvv1" },
1884             { "afbfq", "bgwgb", "wngnwggw", "", "hjrnhneerh" },
1885             { "awgqwefg", "fefw4vssv", "vvv" },
1886             { "a\u4ebafg", "fefw\u4eba4\u9f9cvssv\u9f9c", "v\u672c\u672cvv" },
1887             { "1", "23", "456", "7890" },
1888             { "1", "23\u9f9c\u672c\u672c", "456", "\u9f9c\u672c7890" },
1889             { "" },
1890             { "This", "is", "testing", "", "with", "different", "separators" },
1891             { "b", "", ":and:f" },
1892             { "b", "", "", "", "", ":and:f" },
1893             { "f", "", "", "", "", ":" },
1894         };
1895         for (int i = 0; i < input.length; i++) {
1896             pattern = Pattern.compile(input[i][0]);
1897             if (!Arrays.equals(pattern.split(input[i][1]), expected[i])) {
1898                 failCount++;
1899             }
1900             if (input[i][1].length() > 0 &&  // splitAsStream() return empty resulting
1901                                              // array for zero-length input for now
1902                 !Arrays.equals(pattern.splitAsStream(input[i][1]).toArray(),
1903                                expected[i])) {
1904                 failCount++;
1905             }
1906         }
1907         report("Split");
1908     }
1909 
1910     private static void negationTest() {
1911         Pattern pattern = Pattern.compile("[\\[@^]+");
1912         Matcher matcher = pattern.matcher("@@@@[[[[^^^^");
1913         if (!matcher.find())
1914             failCount++;
1915         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1916             failCount++;
1917         pattern = Pattern.compile("[@\\[^]+");
1918         matcher = pattern.matcher("@@@@[[[[^^^^");
1919         if (!matcher.find())
1920             failCount++;
1921         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1922             failCount++;
1923         pattern = Pattern.compile("[@\\[^@]+");
1924         matcher = pattern.matcher("@@@@[[[[^^^^");
1925         if (!matcher.find())
1926             failCount++;
1927         if (!matcher.group(0).equals("@@@@[[[[^^^^"))
1928             failCount++;
1929 
1930         pattern = Pattern.compile("\\)");
1931         matcher = pattern.matcher("xxx)xxx");
1932         if (!matcher.find())
1933             failCount++;
1934 
1935         report("Negation");
1936     }
1937 
1938     private static void ampersandTest() {
1939         Pattern pattern = Pattern.compile("[&@]+");
1940         check(pattern, "@@@@&&&&", true);
1941 
1942         pattern = Pattern.compile("[@&]+");
1943         check(pattern, "@@@@&&&&", true);
1944 
1945         pattern = Pattern.compile("[@\\&]+");
1946         check(pattern, "@@@@&&&&", true);
1947 
1948         report("Ampersand");
1949     }
1950 
1951     private static void octalTest() throws Exception {
1952         Pattern pattern = Pattern.compile("\\u0007");
1953         Matcher matcher = pattern.matcher("\u0007");
1954         if (!matcher.matches())
1955             failCount++;
1956         pattern = Pattern.compile("\\07");
1957         matcher = pattern.matcher("\u0007");
1958         if (!matcher.matches())
1959             failCount++;
1960         pattern = Pattern.compile("\\007");
1961         matcher = pattern.matcher("\u0007");
1962         if (!matcher.matches())
1963             failCount++;
1964         pattern = Pattern.compile("\\0007");
1965         matcher = pattern.matcher("\u0007");
1966         if (!matcher.matches())
1967             failCount++;
1968         pattern = Pattern.compile("\\040");
1969         matcher = pattern.matcher("\u0020");
1970         if (!matcher.matches())
1971             failCount++;
1972         pattern = Pattern.compile("\\0403");
1973         matcher = pattern.matcher("\u00203");
1974         if (!matcher.matches())
1975             failCount++;
1976         pattern = Pattern.compile("\\0103");
1977         matcher = pattern.matcher("\u0043");
1978         if (!matcher.matches())
1979             failCount++;
1980 
1981         report("Octal");
1982     }
1983 
1984     private static void longPatternTest() throws Exception {
1985         try {
1986             Pattern pattern = Pattern.compile(
1987                 "a 32-character-long pattern xxxx");
1988             pattern = Pattern.compile("a 33-character-long pattern xxxxx");
1989             pattern = Pattern.compile("a thirty four character long regex");
1990             StringBuffer patternToBe = new StringBuffer(101);
1991             for (int i=0; i<100; i++)
1992                 patternToBe.append((char)(97 + i%26));
1993             pattern = Pattern.compile(patternToBe.toString());
1994         } catch (PatternSyntaxException e) {
1995             failCount++;
1996         }
1997 
1998         // Supplementary character test
1999         try {
2000             Pattern pattern = Pattern.compile(
2001                 toSupplementaries("a 32-character-long pattern xxxx"));
2002             pattern = Pattern.compile(toSupplementaries("a 33-character-long pattern xxxxx"));
2003             pattern = Pattern.compile(toSupplementaries("a thirty four character long regex"));
2004             StringBuffer patternToBe = new StringBuffer(101*2);
2005             for (int i=0; i<100; i++)
2006                 patternToBe.append(Character.toChars(Character.MIN_SUPPLEMENTARY_CODE_POINT
2007                                                      + 97 + i%26));
2008             pattern = Pattern.compile(patternToBe.toString());
2009         } catch (PatternSyntaxException e) {
2010             failCount++;
2011         }
2012         report("LongPattern");
2013     }
2014 
2015     private static void group0Test() throws Exception {
2016         Pattern pattern = Pattern.compile("(tes)ting");
2017         Matcher matcher = pattern.matcher("testing");
2018         check(matcher, "testing");
2019 
2020         matcher.reset("testing");
2021         if (matcher.lookingAt()) {
2022             if (!matcher.group(0).equals("testing"))
2023                 failCount++;
2024         } else {
2025             failCount++;
2026         }
2027 
2028         matcher.reset("testing");
2029         if (matcher.matches()) {
2030             if (!matcher.group(0).equals("testing"))
2031                 failCount++;
2032         } else {
2033             failCount++;
2034         }
2035 
2036         pattern = Pattern.compile("(tes)ting");
2037         matcher = pattern.matcher("testing");
2038         if (matcher.lookingAt()) {
2039             if (!matcher.group(0).equals("testing"))
2040                 failCount++;
2041         } else {
2042             failCount++;
2043         }
2044 
2045         pattern = Pattern.compile("^(tes)ting");
2046         matcher = pattern.matcher("testing");
2047         if (matcher.matches()) {
2048             if (!matcher.group(0).equals("testing"))
2049                 failCount++;
2050         } else {
2051             failCount++;
2052         }
2053 
2054         // Supplementary character test
2055         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2056         matcher = pattern.matcher(toSupplementaries("testing"));
2057         check(matcher, toSupplementaries("testing"));
2058 
2059         matcher.reset(toSupplementaries("testing"));
2060         if (matcher.lookingAt()) {
2061             if (!matcher.group(0).equals(toSupplementaries("testing")))
2062                 failCount++;
2063         } else {
2064             failCount++;
2065         }
2066 
2067         matcher.reset(toSupplementaries("testing"));
2068         if (matcher.matches()) {
2069             if (!matcher.group(0).equals(toSupplementaries("testing")))
2070                 failCount++;
2071         } else {
2072             failCount++;
2073         }
2074 
2075         pattern = Pattern.compile(toSupplementaries("(tes)ting"));
2076         matcher = pattern.matcher(toSupplementaries("testing"));
2077         if (matcher.lookingAt()) {
2078             if (!matcher.group(0).equals(toSupplementaries("testing")))
2079                 failCount++;
2080         } else {
2081             failCount++;
2082         }
2083 
2084         pattern = Pattern.compile(toSupplementaries("^(tes)ting"));
2085         matcher = pattern.matcher(toSupplementaries("testing"));
2086         if (matcher.matches()) {
2087             if (!matcher.group(0).equals(toSupplementaries("testing")))
2088                 failCount++;
2089         } else {
2090             failCount++;
2091         }
2092 
2093         report("Group0");
2094     }
2095 
2096     private static void findIntTest() throws Exception {
2097         Pattern p = Pattern.compile("blah");
2098         Matcher m = p.matcher("zzzzblahzzzzzblah");
2099         boolean result = m.find(2);
2100         if (!result)
2101             failCount++;
2102 
2103         p = Pattern.compile("$");
2104         m = p.matcher("1234567890");
2105         result = m.find(10);
2106         if (!result)
2107             failCount++;
2108         try {
2109             result = m.find(11);
2110             failCount++;
2111         } catch (IndexOutOfBoundsException e) {
2112             // correct result
2113         }
2114 
2115         // Supplementary character test
2116         p = Pattern.compile(toSupplementaries("blah"));
2117         m = p.matcher(toSupplementaries("zzzzblahzzzzzblah"));
2118         result = m.find(2);
2119         if (!result)
2120             failCount++;
2121 
2122         report("FindInt");
2123     }
2124 
2125     private static void emptyPatternTest() throws Exception {
2126         Pattern p = Pattern.compile("");
2127         Matcher m = p.matcher("foo");
2128 
2129         // Should find empty pattern at beginning of input
2130         boolean result = m.find();
2131         if (result != true)
2132             failCount++;
2133         if (m.start() != 0)
2134             failCount++;
2135 
2136         // Should not match entire input if input is not empty
2137         m.reset();
2138         result = m.matches();
2139         if (result == true)
2140             failCount++;
2141 
2142         try {
2143             m.start(0);
2144             failCount++;
2145         } catch (IllegalStateException e) {
2146             // Correct result
2147         }
2148 
2149         // Should match entire input if input is empty
2150         m.reset("");
2151         result = m.matches();
2152         if (result != true)
2153             failCount++;
2154 
2155         result = Pattern.matches("", "");
2156         if (result != true)
2157             failCount++;
2158 
2159         result = Pattern.matches("", "foo");
2160         if (result == true)
2161             failCount++;
2162         report("EmptyPattern");
2163     }
2164 
2165     private static void charClassTest() throws Exception {
2166         Pattern pattern = Pattern.compile("blah[ab]]blech");
2167         check(pattern, "blahb]blech", true);
2168 
2169         pattern = Pattern.compile("[abc[def]]");
2170         check(pattern, "b", true);
2171 
2172         // Supplementary character tests
2173         pattern = Pattern.compile(toSupplementaries("blah[ab]]blech"));
2174         check(pattern, toSupplementaries("blahb]blech"), true);
2175 
2176         pattern = Pattern.compile(toSupplementaries("[abc[def]]"));
2177         check(pattern, toSupplementaries("b"), true);
2178 
2179         try {
2180             // u00ff when UNICODE_CASE
2181             pattern = Pattern.compile("[ab\u00ffcd]",
2182                                       Pattern.CASE_INSENSITIVE|
2183                                       Pattern.UNICODE_CASE);
2184             check(pattern, "ab\u00ffcd", true);
2185             check(pattern, "Ab\u0178Cd", true);
2186 
2187             // u00b5 when UNICODE_CASE
2188             pattern = Pattern.compile("[ab\u00b5cd]",
2189                                       Pattern.CASE_INSENSITIVE|
2190                                       Pattern.UNICODE_CASE);
2191             check(pattern, "ab\u00b5cd", true);
2192             check(pattern, "Ab\u039cCd", true);
2193         } catch (Exception e) { failCount++; }
2194 
2195         /* Special cases
2196            (1)LatinSmallLetterLongS u+017f
2197            (2)LatinSmallLetterDotlessI u+0131
2198            (3)LatineCapitalLetterIWithDotAbove u+0130
2199            (4)KelvinSign u+212a
2200            (5)AngstromSign u+212b
2201         */
2202         int flags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
2203         pattern = Pattern.compile("[sik\u00c5]+", flags);
2204         if (!pattern.matcher("\u017f\u0130\u0131\u212a\u212b").matches())
2205             failCount++;
2206 
2207         report("CharClass");
2208     }
2209 
2210     private static void caretTest() throws Exception {
2211         Pattern pattern = Pattern.compile("\\w*");
2212         Matcher matcher = pattern.matcher("a#bc#def##g");
2213         check(matcher, "a");
2214         check(matcher, "");
2215         check(matcher, "bc");
2216         check(matcher, "");
2217         check(matcher, "def");
2218         check(matcher, "");
2219         check(matcher, "");
2220         check(matcher, "g");
2221         check(matcher, "");
2222         if (matcher.find())
2223             failCount++;
2224 
2225         pattern = Pattern.compile("^\\w*");
2226         matcher = pattern.matcher("a#bc#def##g");
2227         check(matcher, "a");
2228         if (matcher.find())
2229             failCount++;
2230 
2231         pattern = Pattern.compile("\\w");
2232         matcher = pattern.matcher("abc##x");
2233         check(matcher, "a");
2234         check(matcher, "b");
2235         check(matcher, "c");
2236         check(matcher, "x");
2237         if (matcher.find())
2238             failCount++;
2239 
2240         pattern = Pattern.compile("^\\w");
2241         matcher = pattern.matcher("abc##x");
2242         check(matcher, "a");
2243         if (matcher.find())
2244             failCount++;
2245 
2246         pattern = Pattern.compile("\\A\\p{Alpha}{3}");
2247         matcher = pattern.matcher("abcdef-ghi\njklmno");
2248         check(matcher, "abc");
2249         if (matcher.find())
2250             failCount++;
2251 
2252         pattern = Pattern.compile("^\\p{Alpha}{3}", Pattern.MULTILINE);
2253         matcher = pattern.matcher("abcdef-ghi\njklmno");
2254         check(matcher, "abc");
2255         check(matcher, "jkl");
2256         if (matcher.find())
2257             failCount++;
2258 
2259         pattern = Pattern.compile("^", Pattern.MULTILINE);
2260         matcher = pattern.matcher("this is some text");
2261         String result = matcher.replaceAll("X");
2262         if (!result.equals("Xthis is some text"))
2263             failCount++;
2264 
2265         pattern = Pattern.compile("^");
2266         matcher = pattern.matcher("this is some text");
2267         result = matcher.replaceAll("X");
2268         if (!result.equals("Xthis is some text"))
2269             failCount++;
2270 
2271         pattern = Pattern.compile("^", Pattern.MULTILINE | Pattern.UNIX_LINES);
2272         matcher = pattern.matcher("this is some text\n");
2273         result = matcher.replaceAll("X");
2274         if (!result.equals("Xthis is some text\n"))
2275             failCount++;
2276 
2277         report("Caret");
2278     }
2279 
2280     private static void groupCaptureTest() throws Exception {
2281         // Independent group
2282         Pattern pattern = Pattern.compile("x+(?>y+)z+");
2283         Matcher matcher = pattern.matcher("xxxyyyzzz");
2284         matcher.find();
2285         try {
2286             String blah = matcher.group(1);
2287             failCount++;
2288         } catch (IndexOutOfBoundsException ioobe) {
2289             // Good result
2290         }
2291         // Pure group
2292         pattern = Pattern.compile("x+(?:y+)z+");
2293         matcher = pattern.matcher("xxxyyyzzz");
2294         matcher.find();
2295         try {
2296             String blah = matcher.group(1);
2297             failCount++;
2298         } catch (IndexOutOfBoundsException ioobe) {
2299             // Good result
2300         }
2301 
2302         // Supplementary character tests
2303         // Independent group
2304         pattern = Pattern.compile(toSupplementaries("x+(?>y+)z+"));
2305         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2306         matcher.find();
2307         try {
2308             String blah = matcher.group(1);
2309             failCount++;
2310         } catch (IndexOutOfBoundsException ioobe) {
2311             // Good result
2312         }
2313         // Pure group
2314         pattern = Pattern.compile(toSupplementaries("x+(?:y+)z+"));
2315         matcher = pattern.matcher(toSupplementaries("xxxyyyzzz"));
2316         matcher.find();
2317         try {
2318             String blah = matcher.group(1);
2319             failCount++;
2320         } catch (IndexOutOfBoundsException ioobe) {
2321             // Good result
2322         }
2323 
2324         report("GroupCapture");
2325     }
2326 
2327     private static void backRefTest() throws Exception {
2328         Pattern pattern = Pattern.compile("(a*)bc\\1");
2329         check(pattern, "zzzaabcazzz", true);
2330 
2331         pattern = Pattern.compile("(a*)bc\\1");
2332         check(pattern, "zzzaabcaazzz", true);
2333 
2334         pattern = Pattern.compile("(abc)(def)\\1");
2335         check(pattern, "abcdefabc", true);
2336 
2337         pattern = Pattern.compile("(abc)(def)\\3");
2338         check(pattern, "abcdefabc", false);
2339 
2340         try {
2341             for (int i = 1; i < 10; i++) {
2342                 // Make sure backref 1-9 are always accepted
2343                 pattern = Pattern.compile("abcdef\\" + i);
2344                 // and fail to match if the target group does not exit
2345                 check(pattern, "abcdef", false);
2346             }
2347         } catch(PatternSyntaxException e) {
2348             failCount++;
2349         }
2350 
2351         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11");
2352         check(pattern, "abcdefghija", false);
2353         check(pattern, "abcdefghija1", true);
2354 
2355         pattern = Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11");
2356         check(pattern, "abcdefghijkk", true);
2357 
2358         pattern = Pattern.compile("(a)bcdefghij\\11");
2359         check(pattern, "abcdefghija1", true);
2360 
2361         // Supplementary character tests
2362         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2363         check(pattern, toSupplementaries("zzzaabcazzz"), true);
2364 
2365         pattern = Pattern.compile(toSupplementaries("(a*)bc\\1"));
2366         check(pattern, toSupplementaries("zzzaabcaazzz"), true);
2367 
2368         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\1"));
2369         check(pattern, toSupplementaries("abcdefabc"), true);
2370 
2371         pattern = Pattern.compile(toSupplementaries("(abc)(def)\\3"));
2372         check(pattern, toSupplementaries("abcdefabc"), false);
2373 
2374         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\11"));
2375         check(pattern, toSupplementaries("abcdefghija"), false);
2376         check(pattern, toSupplementaries("abcdefghija1"), true);
2377 
2378         pattern = Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)\\11"));
2379         check(pattern, toSupplementaries("abcdefghijkk"), true);
2380 
2381         report("BackRef");
2382     }
2383 
2384     /**
2385      * Unicode Technical Report #18, section 2.6 End of Line
2386      * There is no empty line to be matched in the sequence \u000D\u000A
2387      * but there is an empty line in the sequence \u000A\u000D.
2388      */
2389     private static void anchorTest() throws Exception {
2390         Pattern p = Pattern.compile("^.*$", Pattern.MULTILINE);
2391         Matcher m = p.matcher("blah1\r\nblah2");
2392         m.find();
2393         m.find();
2394         if (!m.group().equals("blah2"))
2395             failCount++;
2396 
2397         m.reset("blah1\n\rblah2");
2398         m.find();
2399         m.find();
2400         m.find();
2401         if (!m.group().equals("blah2"))
2402             failCount++;
2403 
2404         // Test behavior of $ with \r\n at end of input
2405         p = Pattern.compile(".+$");
2406         m = p.matcher("blah1\r\n");
2407         if (!m.find())
2408             failCount++;
2409        if (!m.group().equals("blah1"))
2410             failCount++;
2411         if (m.find())
2412             failCount++;
2413 
2414         // Test behavior of $ with \r\n at end of input in multiline
2415         p = Pattern.compile(".+$", Pattern.MULTILINE);
2416         m = p.matcher("blah1\r\n");
2417         if (!m.find())
2418             failCount++;
2419         if (m.find())
2420             failCount++;
2421 
2422         // Test for $ recognition of \u0085 for bug 4527731
2423         p = Pattern.compile(".+$", Pattern.MULTILINE);
2424         m = p.matcher("blah1\u0085");
2425         if (!m.find())
2426             failCount++;
2427 
2428         // Supplementary character test
2429         p = Pattern.compile("^.*$", Pattern.MULTILINE);
2430         m = p.matcher(toSupplementaries("blah1\r\nblah2"));
2431         m.find();
2432         m.find();
2433         if (!m.group().equals(toSupplementaries("blah2")))
2434             failCount++;
2435 
2436         m.reset(toSupplementaries("blah1\n\rblah2"));
2437         m.find();
2438         m.find();
2439         m.find();
2440         if (!m.group().equals(toSupplementaries("blah2")))
2441             failCount++;
2442 
2443         // Test behavior of $ with \r\n at end of input
2444         p = Pattern.compile(".+$");
2445         m = p.matcher(toSupplementaries("blah1\r\n"));
2446         if (!m.find())
2447             failCount++;
2448         if (!m.group().equals(toSupplementaries("blah1")))
2449             failCount++;
2450         if (m.find())
2451             failCount++;
2452 
2453         // Test behavior of $ with \r\n at end of input in multiline
2454         p = Pattern.compile(".+$", Pattern.MULTILINE);
2455         m = p.matcher(toSupplementaries("blah1\r\n"));
2456         if (!m.find())
2457             failCount++;
2458         if (m.find())
2459             failCount++;
2460 
2461         // Test for $ recognition of \u0085 for bug 4527731
2462         p = Pattern.compile(".+$", Pattern.MULTILINE);
2463         m = p.matcher(toSupplementaries("blah1\u0085"));
2464         if (!m.find())
2465             failCount++;
2466 
2467         report("Anchors");
2468     }
2469 
2470     /**
2471      * A basic sanity test of Matcher.lookingAt().
2472      */
2473     private static void lookingAtTest() throws Exception {
2474         Pattern p = Pattern.compile("(ab)(c*)");
2475         Matcher m = p.matcher("abccczzzabcczzzabccc");
2476 
2477         if (!m.lookingAt())
2478             failCount++;
2479 
2480         if (!m.group().equals(m.group(0)))
2481             failCount++;
2482 
2483         m = p.matcher("zzzabccczzzabcczzzabccczzz");
2484         if (m.lookingAt())
2485             failCount++;
2486 
2487         // Supplementary character test
2488         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2489         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2490 
2491         if (!m.lookingAt())
2492             failCount++;
2493 
2494         if (!m.group().equals(m.group(0)))
2495             failCount++;
2496 
2497         m = p.matcher(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2498         if (m.lookingAt())
2499             failCount++;
2500 
2501         report("Looking At");
2502     }
2503 
2504     /**
2505      * A basic sanity test of Matcher.matches().
2506      */
2507     private static void matchesTest() throws Exception {
2508         // matches()
2509         Pattern p = Pattern.compile("ulb(c*)");
2510         Matcher m = p.matcher("ulbcccccc");
2511         if (!m.matches())
2512             failCount++;
2513 
2514         // find() but not matches()
2515         m.reset("zzzulbcccccc");
2516         if (m.matches())
2517             failCount++;
2518 
2519         // lookingAt() but not matches()
2520         m.reset("ulbccccccdef");
2521         if (m.matches())
2522             failCount++;
2523 
2524         // matches()
2525         p = Pattern.compile("a|ad");
2526         m = p.matcher("ad");
2527         if (!m.matches())
2528             failCount++;
2529 
2530         // Supplementary character test
2531         // matches()
2532         p = Pattern.compile(toSupplementaries("ulb(c*)"));
2533         m = p.matcher(toSupplementaries("ulbcccccc"));
2534         if (!m.matches())
2535             failCount++;
2536 
2537         // find() but not matches()
2538         m.reset(toSupplementaries("zzzulbcccccc"));
2539         if (m.matches())
2540             failCount++;
2541 
2542         // lookingAt() but not matches()
2543         m.reset(toSupplementaries("ulbccccccdef"));
2544         if (m.matches())
2545             failCount++;
2546 
2547         // matches()
2548         p = Pattern.compile(toSupplementaries("a|ad"));
2549         m = p.matcher(toSupplementaries("ad"));
2550         if (!m.matches())
2551             failCount++;
2552 
2553         report("Matches");
2554     }
2555 
2556     /**
2557      * A basic sanity test of Pattern.matches().
2558      */
2559     private static void patternMatchesTest() throws Exception {
2560         // matches()
2561         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2562                              toSupplementaries("ulbcccccc")))
2563             failCount++;
2564 
2565         // find() but not matches()
2566         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2567                             toSupplementaries("zzzulbcccccc")))
2568             failCount++;
2569 
2570         // lookingAt() but not matches()
2571         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2572                             toSupplementaries("ulbccccccdef")))
2573             failCount++;
2574 
2575         // Supplementary character test
2576         // matches()
2577         if (!Pattern.matches(toSupplementaries("ulb(c*)"),
2578                              toSupplementaries("ulbcccccc")))
2579             failCount++;
2580 
2581         // find() but not matches()
2582         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2583                             toSupplementaries("zzzulbcccccc")))
2584             failCount++;
2585 
2586         // lookingAt() but not matches()
2587         if (Pattern.matches(toSupplementaries("ulb(c*)"),
2588                             toSupplementaries("ulbccccccdef")))
2589             failCount++;
2590 
2591         report("Pattern Matches");
2592     }
2593 
2594     /**
2595      * Canonical equivalence testing. Tests the ability of the engine
2596      * to match sequences that are not explicitly specified in the
2597      * pattern when they are considered equivalent by the Unicode Standard.
2598      */
2599     private static void ceTest() throws Exception {
2600         // Decomposed char outside char classes
2601         Pattern p = Pattern.compile("testa\u030a", Pattern.CANON_EQ);
2602         Matcher m = p.matcher("test\u00e5");
2603         if (!m.matches())
2604             failCount++;
2605 
2606         m.reset("testa\u030a");
2607         if (!m.matches())
2608             failCount++;
2609 
2610         // Composed char outside char classes
2611         p = Pattern.compile("test\u00e5", Pattern.CANON_EQ);
2612         m = p.matcher("test\u00e5");
2613         if (!m.matches())
2614             failCount++;
2615 
2616         m.reset("testa\u030a");
2617         if (!m.find())
2618             failCount++;
2619 
2620         // Decomposed char inside a char class
2621         p = Pattern.compile("test[abca\u030a]", Pattern.CANON_EQ);
2622         m = p.matcher("test\u00e5");
2623         if (!m.find())
2624             failCount++;
2625 
2626         m.reset("testa\u030a");
2627         if (!m.find())
2628             failCount++;
2629 
2630         // Composed char inside a char class
2631         p = Pattern.compile("test[abc\u00e5def\u00e0]", Pattern.CANON_EQ);
2632         m = p.matcher("test\u00e5");
2633         if (!m.find())
2634             failCount++;
2635 
2636         m.reset("testa\u0300");
2637         if (!m.find())
2638             failCount++;
2639 
2640         m.reset("testa\u030a");
2641         if (!m.find())
2642             failCount++;
2643 
2644         // Marks that cannot legally change order and be equivalent
2645         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2646         check(p, "testa\u0308\u0300", true);
2647         check(p, "testa\u0300\u0308", false);
2648 
2649         // Marks that can legally change order and be equivalent
2650         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2651         check(p, "testa\u0308\u0323", true);
2652         check(p, "testa\u0323\u0308", true);
2653 
2654         // Test all equivalences of the sequence a\u0308\u0323\u0300
2655         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2656         check(p, "testa\u0308\u0323\u0300", true);
2657         check(p, "testa\u0323\u0308\u0300", true);
2658         check(p, "testa\u0308\u0300\u0323", true);
2659         check(p, "test\u00e4\u0323\u0300", true);
2660         check(p, "test\u00e4\u0300\u0323", true);
2661 
2662         /*
2663          * The following canonical equivalence tests don't work. Bug id: 4916384.
2664          *
2665         // Decomposed hangul (jamos)
2666         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2667         m = p.matcher("\u1100\u1161");
2668         if (!m.matches())
2669             failCount++;
2670 
2671         m.reset("\uac00");
2672         if (!m.matches())
2673             failCount++;
















































2674 
2675         // Composed hangul
2676         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2677         m = p.matcher("\u1100\u1161");
2678         if (!m.matches())
2679             failCount++;
2680 
2681         m.reset("\uac00");
2682         if (!m.matches())
2683             failCount++;
2684 
2685         // Decomposed supplementary outside char classes
2686         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2687         m = p.matcher("test\ud834\uddc0");
2688         if (!m.matches())
2689             failCount++;
2690 
2691         m.reset("test\ud834\uddbc\ud834\udd6f");
2692         if (!m.matches())
2693             failCount++;

2694 


2695         // Composed supplementary outside char classes
2696         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2697         m.reset("test\ud834\uddbc\ud834\udd6f");
2698         if (!m.matches())
2699             failCount++;
2700 
2701         m = p.matcher("test\ud834\uddc0");
2702         if (!m.matches())
2703             failCount++;
2704 
2705         */


2706 

















2707         report("Canonical Equivalence");
2708     }
2709 
2710     /**
2711      * A basic sanity test of Matcher.replaceAll().
2712      */
2713     private static void globalSubstitute() throws Exception {
2714         // Global substitution with a literal
2715         Pattern p = Pattern.compile("(ab)(c*)");
2716         Matcher m = p.matcher("abccczzzabcczzzabccc");
2717         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2718             failCount++;
2719 
2720         m.reset("zzzabccczzzabcczzzabccczzz");
2721         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2722             failCount++;
2723 
2724         // Global substitution with groups
2725         m.reset("zzzabccczzzabcczzzabccczzz");
2726         String result = m.replaceAll("$1");
2727         if (!result.equals("zzzabzzzabzzzabzzz"))
2728             failCount++;
2729 
2730         // Supplementary character test
2731         // Global substitution with a literal
2732         p = Pattern.compile(toSupplementaries("(ab)(c*)"));
2733         m = p.matcher(toSupplementaries("abccczzzabcczzzabccc"));
2734         if (!m.replaceAll(toSupplementaries("test")).
2735             equals(toSupplementaries("testzzztestzzztest")))
2736             failCount++;
2737 
2738         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2739         if (!m.replaceAll(toSupplementaries("test")).
2740             equals(toSupplementaries("zzztestzzztestzzztestzzz")))
2741             failCount++;
2742 
2743         // Global substitution with groups
2744         m.reset(toSupplementaries("zzzabccczzzabcczzzabccczzz"));
2745         result = m.replaceAll("$1");
2746         if (!result.equals(toSupplementaries("zzzabzzzabzzzabzzz")))
2747             failCount++;
2748 
2749         report("Global Substitution");
2750     }
2751 
2752     /**
2753      * Tests the usage of Matcher.appendReplacement() with literal
2754      * and group substitutions.
2755      */
2756     private static void stringbufferSubstitute() throws Exception {
2757         // SB substitution with literal
2758         String blah = "zzzblahzzz";
2759         Pattern p = Pattern.compile("blah");
2760         Matcher m = p.matcher(blah);
2761         StringBuffer result = new StringBuffer();
2762         try {
2763             m.appendReplacement(result, "blech");
2764             failCount++;
2765         } catch (IllegalStateException e) {
2766         }
2767         m.find();
2768         m.appendReplacement(result, "blech");
2769         if (!result.toString().equals("zzzblech"))
2770             failCount++;
2771 
2772         m.appendTail(result);
2773         if (!result.toString().equals("zzzblechzzz"))
2774             failCount++;
2775 
2776         // SB substitution with groups
2777         blah = "zzzabcdzzz";
2778         p = Pattern.compile("(ab)(cd)*");
2779         m = p.matcher(blah);
2780         result = new StringBuffer();
2781         try {
2782             m.appendReplacement(result, "$1");
2783             failCount++;
2784         } catch (IllegalStateException e) {
2785         }
2786         m.find();
2787         m.appendReplacement(result, "$1");
2788         if (!result.toString().equals("zzzab"))
2789             failCount++;
2790 
2791         m.appendTail(result);
2792         if (!result.toString().equals("zzzabzzz"))
2793             failCount++;
2794 
2795         // SB substitution with 3 groups
2796         blah = "zzzabcdcdefzzz";
2797         p = Pattern.compile("(ab)(cd)*(ef)");
2798         m = p.matcher(blah);
2799         result = new StringBuffer();
2800         try {
2801             m.appendReplacement(result, "$1w$2w$3");
2802             failCount++;
2803         } catch (IllegalStateException e) {
2804         }
2805         m.find();
2806         m.appendReplacement(result, "$1w$2w$3");
2807         if (!result.toString().equals("zzzabwcdwef"))
2808             failCount++;
2809 
2810         m.appendTail(result);
2811         if (!result.toString().equals("zzzabwcdwefzzz"))
2812             failCount++;
2813 
2814         // SB substitution with groups and three matches
2815         // skipping middle match
2816         blah = "zzzabcdzzzabcddzzzabcdzzz";
2817         p = Pattern.compile("(ab)(cd*)");
2818         m = p.matcher(blah);
2819         result = new StringBuffer();
2820         try {
2821             m.appendReplacement(result, "$1");
2822             failCount++;
2823         } catch (IllegalStateException e) {
2824         }
2825         m.find();
2826         m.appendReplacement(result, "$1");
2827         if (!result.toString().equals("zzzab"))
2828             failCount++;
2829 
2830         m.find();
2831         m.find();
2832         m.appendReplacement(result, "$2");
2833         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
2834             failCount++;
2835 
2836         m.appendTail(result);
2837         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
2838             failCount++;
2839 
2840         // Check to make sure escaped $ is ignored
2841         blah = "zzzabcdcdefzzz";
2842         p = Pattern.compile("(ab)(cd)*(ef)");
2843         m = p.matcher(blah);
2844         result = new StringBuffer();
2845         m.find();
2846         m.appendReplacement(result, "$1w\\$2w$3");
2847         if (!result.toString().equals("zzzabw$2wef"))
2848             failCount++;
2849 
2850         m.appendTail(result);
2851         if (!result.toString().equals("zzzabw$2wefzzz"))
2852             failCount++;
2853 
2854         // Check to make sure a reference to nonexistent group causes error
2855         blah = "zzzabcdcdefzzz";
2856         p = Pattern.compile("(ab)(cd)*(ef)");
2857         m = p.matcher(blah);
2858         result = new StringBuffer();
2859         m.find();
2860         try {
2861             m.appendReplacement(result, "$1w$5w$3");
2862             failCount++;
2863         } catch (IndexOutOfBoundsException ioobe) {
2864             // Correct result
2865         }
2866 
2867         // Check double digit group references
2868         blah = "zzz123456789101112zzz";
2869         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
2870         m = p.matcher(blah);
2871         result = new StringBuffer();
2872         m.find();
2873         m.appendReplacement(result, "$1w$11w$3");
2874         if (!result.toString().equals("zzz1w11w3"))
2875             failCount++;
2876 
2877         // Check to make sure it backs off $15 to $1 if only three groups
2878         blah = "zzzabcdcdefzzz";
2879         p = Pattern.compile("(ab)(cd)*(ef)");
2880         m = p.matcher(blah);
2881         result = new StringBuffer();
2882         m.find();
2883         m.appendReplacement(result, "$1w$15w$3");
2884         if (!result.toString().equals("zzzabwab5wef"))
2885             failCount++;
2886 
2887 
2888         // Supplementary character test
2889         // SB substitution with literal
2890         blah = toSupplementaries("zzzblahzzz");
2891         p = Pattern.compile(toSupplementaries("blah"));
2892         m = p.matcher(blah);
2893         result = new StringBuffer();
2894         try {
2895             m.appendReplacement(result, toSupplementaries("blech"));
2896             failCount++;
2897         } catch (IllegalStateException e) {
2898         }
2899         m.find();
2900         m.appendReplacement(result, toSupplementaries("blech"));
2901         if (!result.toString().equals(toSupplementaries("zzzblech")))
2902             failCount++;
2903 
2904         m.appendTail(result);
2905         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
2906             failCount++;
2907 
2908         // SB substitution with groups
2909         blah = toSupplementaries("zzzabcdzzz");
2910         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
2911         m = p.matcher(blah);
2912         result = new StringBuffer();
2913         try {
2914             m.appendReplacement(result, "$1");
2915             failCount++;
2916         } catch (IllegalStateException e) {
2917         }
2918         m.find();
2919         m.appendReplacement(result, "$1");
2920         if (!result.toString().equals(toSupplementaries("zzzab")))
2921             failCount++;
2922 
2923         m.appendTail(result);
2924         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
2925             failCount++;
2926 
2927         // SB substitution with 3 groups
2928         blah = toSupplementaries("zzzabcdcdefzzz");
2929         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2930         m = p.matcher(blah);
2931         result = new StringBuffer();
2932         try {
2933             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2934             failCount++;
2935         } catch (IllegalStateException e) {
2936         }
2937         m.find();
2938         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
2939         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
2940             failCount++;
2941 
2942         m.appendTail(result);
2943         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
2944             failCount++;
2945 
2946         // SB substitution with groups and three matches
2947         // skipping middle match
2948         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
2949         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
2950         m = p.matcher(blah);
2951         result = new StringBuffer();
2952         try {
2953             m.appendReplacement(result, "$1");
2954             failCount++;
2955         } catch (IllegalStateException e) {
2956         }
2957         m.find();
2958         m.appendReplacement(result, "$1");
2959         if (!result.toString().equals(toSupplementaries("zzzab")))
2960             failCount++;
2961 
2962         m.find();
2963         m.find();
2964         m.appendReplacement(result, "$2");
2965         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
2966             failCount++;
2967 
2968         m.appendTail(result);
2969         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
2970             failCount++;
2971 
2972         // Check to make sure escaped $ is ignored
2973         blah = toSupplementaries("zzzabcdcdefzzz");
2974         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2975         m = p.matcher(blah);
2976         result = new StringBuffer();
2977         m.find();
2978         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
2979         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
2980             failCount++;
2981 
2982         m.appendTail(result);
2983         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
2984             failCount++;
2985 
2986         // Check to make sure a reference to nonexistent group causes error
2987         blah = toSupplementaries("zzzabcdcdefzzz");
2988         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
2989         m = p.matcher(blah);
2990         result = new StringBuffer();
2991         m.find();
2992         try {
2993             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
2994             failCount++;
2995         } catch (IndexOutOfBoundsException ioobe) {
2996             // Correct result
2997         }
2998 
2999         // Check double digit group references
3000         blah = toSupplementaries("zzz123456789101112zzz");
3001         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3002         m = p.matcher(blah);
3003         result = new StringBuffer();
3004         m.find();
3005         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3006         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3007             failCount++;
3008 
3009         // Check to make sure it backs off $15 to $1 if only three groups
3010         blah = toSupplementaries("zzzabcdcdefzzz");
3011         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3012         m = p.matcher(blah);
3013         result = new StringBuffer();
3014         m.find();
3015         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3016         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3017             failCount++;
3018 
3019         // Check nothing has been appended into the output buffer if
3020         // the replacement string triggers IllegalArgumentException.
3021         p = Pattern.compile("(abc)");
3022         m = p.matcher("abcd");
3023         result = new StringBuffer();
3024         m.find();
3025         try {
3026             m.appendReplacement(result, ("xyz$g"));
3027             failCount++;
3028         } catch (IllegalArgumentException iae) {
3029             if (result.length() != 0)
3030                 failCount++;
3031         }
3032 
3033         report("SB Substitution");
3034     }
3035 
3036     /**
3037      * Tests the usage of Matcher.appendReplacement() with literal
3038      * and group substitutions.
3039      */
3040     private static void stringbuilderSubstitute() throws Exception {
3041         // SB substitution with literal
3042         String blah = "zzzblahzzz";
3043         Pattern p = Pattern.compile("blah");
3044         Matcher m = p.matcher(blah);
3045         StringBuilder result = new StringBuilder();
3046         try {
3047             m.appendReplacement(result, "blech");
3048             failCount++;
3049         } catch (IllegalStateException e) {
3050         }
3051         m.find();
3052         m.appendReplacement(result, "blech");
3053         if (!result.toString().equals("zzzblech"))
3054             failCount++;
3055 
3056         m.appendTail(result);
3057         if (!result.toString().equals("zzzblechzzz"))
3058             failCount++;
3059 
3060         // SB substitution with groups
3061         blah = "zzzabcdzzz";
3062         p = Pattern.compile("(ab)(cd)*");
3063         m = p.matcher(blah);
3064         result = new StringBuilder();
3065         try {
3066             m.appendReplacement(result, "$1");
3067             failCount++;
3068         } catch (IllegalStateException e) {
3069         }
3070         m.find();
3071         m.appendReplacement(result, "$1");
3072         if (!result.toString().equals("zzzab"))
3073             failCount++;
3074 
3075         m.appendTail(result);
3076         if (!result.toString().equals("zzzabzzz"))
3077             failCount++;
3078 
3079         // SB substitution with 3 groups
3080         blah = "zzzabcdcdefzzz";
3081         p = Pattern.compile("(ab)(cd)*(ef)");
3082         m = p.matcher(blah);
3083         result = new StringBuilder();
3084         try {
3085             m.appendReplacement(result, "$1w$2w$3");
3086             failCount++;
3087         } catch (IllegalStateException e) {
3088         }
3089         m.find();
3090         m.appendReplacement(result, "$1w$2w$3");
3091         if (!result.toString().equals("zzzabwcdwef"))
3092             failCount++;
3093 
3094         m.appendTail(result);
3095         if (!result.toString().equals("zzzabwcdwefzzz"))
3096             failCount++;
3097 
3098         // SB substitution with groups and three matches
3099         // skipping middle match
3100         blah = "zzzabcdzzzabcddzzzabcdzzz";
3101         p = Pattern.compile("(ab)(cd*)");
3102         m = p.matcher(blah);
3103         result = new StringBuilder();
3104         try {
3105             m.appendReplacement(result, "$1");
3106             failCount++;
3107         } catch (IllegalStateException e) {
3108         }
3109         m.find();
3110         m.appendReplacement(result, "$1");
3111         if (!result.toString().equals("zzzab"))
3112             failCount++;
3113 
3114         m.find();
3115         m.find();
3116         m.appendReplacement(result, "$2");
3117         if (!result.toString().equals("zzzabzzzabcddzzzcd"))
3118             failCount++;
3119 
3120         m.appendTail(result);
3121         if (!result.toString().equals("zzzabzzzabcddzzzcdzzz"))
3122             failCount++;
3123 
3124         // Check to make sure escaped $ is ignored
3125         blah = "zzzabcdcdefzzz";
3126         p = Pattern.compile("(ab)(cd)*(ef)");
3127         m = p.matcher(blah);
3128         result = new StringBuilder();
3129         m.find();
3130         m.appendReplacement(result, "$1w\\$2w$3");
3131         if (!result.toString().equals("zzzabw$2wef"))
3132             failCount++;
3133 
3134         m.appendTail(result);
3135         if (!result.toString().equals("zzzabw$2wefzzz"))
3136             failCount++;
3137 
3138         // Check to make sure a reference to nonexistent group causes error
3139         blah = "zzzabcdcdefzzz";
3140         p = Pattern.compile("(ab)(cd)*(ef)");
3141         m = p.matcher(blah);
3142         result = new StringBuilder();
3143         m.find();
3144         try {
3145             m.appendReplacement(result, "$1w$5w$3");
3146             failCount++;
3147         } catch (IndexOutOfBoundsException ioobe) {
3148             // Correct result
3149         }
3150 
3151         // Check double digit group references
3152         blah = "zzz123456789101112zzz";
3153         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3154         m = p.matcher(blah);
3155         result = new StringBuilder();
3156         m.find();
3157         m.appendReplacement(result, "$1w$11w$3");
3158         if (!result.toString().equals("zzz1w11w3"))
3159             failCount++;
3160 
3161         // Check to make sure it backs off $15 to $1 if only three groups
3162         blah = "zzzabcdcdefzzz";
3163         p = Pattern.compile("(ab)(cd)*(ef)");
3164         m = p.matcher(blah);
3165         result = new StringBuilder();
3166         m.find();
3167         m.appendReplacement(result, "$1w$15w$3");
3168         if (!result.toString().equals("zzzabwab5wef"))
3169             failCount++;
3170 
3171 
3172         // Supplementary character test
3173         // SB substitution with literal
3174         blah = toSupplementaries("zzzblahzzz");
3175         p = Pattern.compile(toSupplementaries("blah"));
3176         m = p.matcher(blah);
3177         result = new StringBuilder();
3178         try {
3179             m.appendReplacement(result, toSupplementaries("blech"));
3180             failCount++;
3181         } catch (IllegalStateException e) {
3182         }
3183         m.find();
3184         m.appendReplacement(result, toSupplementaries("blech"));
3185         if (!result.toString().equals(toSupplementaries("zzzblech")))
3186             failCount++;
3187         m.appendTail(result);
3188         if (!result.toString().equals(toSupplementaries("zzzblechzzz")))
3189             failCount++;
3190 
3191         // SB substitution with groups
3192         blah = toSupplementaries("zzzabcdzzz");
3193         p = Pattern.compile(toSupplementaries("(ab)(cd)*"));
3194         m = p.matcher(blah);
3195         result = new StringBuilder();
3196         try {
3197             m.appendReplacement(result, "$1");
3198             failCount++;
3199         } catch (IllegalStateException e) {
3200         }
3201         m.find();
3202         m.appendReplacement(result, "$1");
3203         if (!result.toString().equals(toSupplementaries("zzzab")))
3204             failCount++;
3205 
3206         m.appendTail(result);
3207         if (!result.toString().equals(toSupplementaries("zzzabzzz")))
3208             failCount++;
3209 
3210         // SB substitution with 3 groups
3211         blah = toSupplementaries("zzzabcdcdefzzz");
3212         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3213         m = p.matcher(blah);
3214         result = new StringBuilder();
3215         try {
3216             m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3217             failCount++;
3218         } catch (IllegalStateException e) {
3219         }
3220         m.find();
3221         m.appendReplacement(result, toSupplementaries("$1w$2w$3"));
3222         if (!result.toString().equals(toSupplementaries("zzzabwcdwef")))
3223             failCount++;
3224 
3225         m.appendTail(result);
3226         if (!result.toString().equals(toSupplementaries("zzzabwcdwefzzz")))
3227             failCount++;
3228 
3229         // SB substitution with groups and three matches
3230         // skipping middle match
3231         blah = toSupplementaries("zzzabcdzzzabcddzzzabcdzzz");
3232         p = Pattern.compile(toSupplementaries("(ab)(cd*)"));
3233         m = p.matcher(blah);
3234         result = new StringBuilder();
3235         try {
3236             m.appendReplacement(result, "$1");
3237             failCount++;
3238         } catch (IllegalStateException e) {
3239         }
3240         m.find();
3241         m.appendReplacement(result, "$1");
3242         if (!result.toString().equals(toSupplementaries("zzzab")))
3243             failCount++;
3244 
3245         m.find();
3246         m.find();
3247         m.appendReplacement(result, "$2");
3248         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcd")))
3249             failCount++;
3250 
3251         m.appendTail(result);
3252         if (!result.toString().equals(toSupplementaries("zzzabzzzabcddzzzcdzzz")))
3253             failCount++;
3254 
3255         // Check to make sure escaped $ is ignored
3256         blah = toSupplementaries("zzzabcdcdefzzz");
3257         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3258         m = p.matcher(blah);
3259         result = new StringBuilder();
3260         m.find();
3261         m.appendReplacement(result, toSupplementaries("$1w\\$2w$3"));
3262         if (!result.toString().equals(toSupplementaries("zzzabw$2wef")))
3263             failCount++;
3264 
3265         m.appendTail(result);
3266         if (!result.toString().equals(toSupplementaries("zzzabw$2wefzzz")))
3267             failCount++;
3268 
3269         // Check to make sure a reference to nonexistent group causes error
3270         blah = toSupplementaries("zzzabcdcdefzzz");
3271         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3272         m = p.matcher(blah);
3273         result = new StringBuilder();
3274         m.find();
3275         try {
3276             m.appendReplacement(result, toSupplementaries("$1w$5w$3"));
3277             failCount++;
3278         } catch (IndexOutOfBoundsException ioobe) {
3279             // Correct result
3280         }
3281         // Check double digit group references
3282         blah = toSupplementaries("zzz123456789101112zzz");
3283         p = Pattern.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)");
3284         m = p.matcher(blah);
3285         result = new StringBuilder();
3286         m.find();
3287         m.appendReplacement(result, toSupplementaries("$1w$11w$3"));
3288         if (!result.toString().equals(toSupplementaries("zzz1w11w3")))
3289             failCount++;
3290 
3291         // Check to make sure it backs off $15 to $1 if only three groups
3292         blah = toSupplementaries("zzzabcdcdefzzz");
3293         p = Pattern.compile(toSupplementaries("(ab)(cd)*(ef)"));
3294         m = p.matcher(blah);
3295         result = new StringBuilder();
3296         m.find();
3297         m.appendReplacement(result, toSupplementaries("$1w$15w$3"));
3298         if (!result.toString().equals(toSupplementaries("zzzabwab5wef")))
3299             failCount++;
3300         // Check nothing has been appended into the output buffer if
3301         // the replacement string triggers IllegalArgumentException.
3302         p = Pattern.compile("(abc)");
3303         m = p.matcher("abcd");
3304         result = new StringBuilder();
3305         m.find();
3306         try {
3307             m.appendReplacement(result, ("xyz$g"));
3308             failCount++;
3309         } catch (IllegalArgumentException iae) {
3310             if (result.length() != 0)
3311                 failCount++;
3312         }
3313         report("SB Substitution 2");
3314     }
3315 
3316     /*
3317      * 5 groups of characters are created to make a substitution string.
3318      * A base string will be created including random lead chars, the
3319      * substitution string, and random trailing chars.
3320      * A pattern containing the 5 groups is searched for and replaced with:
3321      * random group + random string + random group.
3322      * The results are checked for correctness.
3323      */
3324     private static void substitutionBasher() {
3325         for (int runs = 0; runs<1000; runs++) {
3326             // Create a base string to work in
3327             int leadingChars = generator.nextInt(10);
3328             StringBuffer baseBuffer = new StringBuffer(100);
3329             String leadingString = getRandomAlphaString(leadingChars);
3330             baseBuffer.append(leadingString);
3331 
3332             // Create 5 groups of random number of random chars
3333             // Create the string to substitute
3334             // Create the pattern string to search for
3335             StringBuffer bufferToSub = new StringBuffer(25);
3336             StringBuffer bufferToPat = new StringBuffer(50);
3337             String[] groups = new String[5];
3338             for(int i=0; i<5; i++) {
3339                 int aGroupSize = generator.nextInt(5)+1;
3340                 groups[i] = getRandomAlphaString(aGroupSize);
3341                 bufferToSub.append(groups[i]);
3342                 bufferToPat.append('(');
3343                 bufferToPat.append(groups[i]);
3344                 bufferToPat.append(')');
3345             }
3346             String stringToSub = bufferToSub.toString();
3347             String pattern = bufferToPat.toString();
3348 
3349             // Place sub string into working string at random index
3350             baseBuffer.append(stringToSub);
3351 
3352             // Append random chars to end
3353             int trailingChars = generator.nextInt(10);
3354             String trailingString = getRandomAlphaString(trailingChars);
3355             baseBuffer.append(trailingString);
3356             String baseString = baseBuffer.toString();
3357 
3358             // Create test pattern and matcher
3359             Pattern p = Pattern.compile(pattern);
3360             Matcher m = p.matcher(baseString);
3361 
3362             // Reject candidate if pattern happens to start early
3363             m.find();
3364             if (m.start() < leadingChars)
3365                 continue;
3366 
3367             // Reject candidate if more than one match
3368             if (m.find())
3369                 continue;
3370 
3371             // Construct a replacement string with :
3372             // random group + random string + random group
3373             StringBuffer bufferToRep = new StringBuffer();
3374             int groupIndex1 = generator.nextInt(5);
3375             bufferToRep.append("$" + (groupIndex1 + 1));
3376             String randomMidString = getRandomAlphaString(5);
3377             bufferToRep.append(randomMidString);
3378             int groupIndex2 = generator.nextInt(5);
3379             bufferToRep.append("$" + (groupIndex2 + 1));
3380             String replacement = bufferToRep.toString();
3381 
3382             // Do the replacement
3383             String result = m.replaceAll(replacement);
3384 
3385             // Construct expected result
3386             StringBuffer bufferToRes = new StringBuffer();
3387             bufferToRes.append(leadingString);
3388             bufferToRes.append(groups[groupIndex1]);
3389             bufferToRes.append(randomMidString);
3390             bufferToRes.append(groups[groupIndex2]);
3391             bufferToRes.append(trailingString);
3392             String expectedResult = bufferToRes.toString();
3393 
3394             // Check results
3395             if (!result.equals(expectedResult))
3396                 failCount++;
3397         }
3398 
3399         report("Substitution Basher");
3400     }
3401 
3402     /*
3403      * 5 groups of characters are created to make a substitution string.
3404      * A base string will be created including random lead chars, the
3405      * substitution string, and random trailing chars.
3406      * A pattern containing the 5 groups is searched for and replaced with:
3407      * random group + random string + random group.
3408      * The results are checked for correctness.
3409      */
3410     private static void substitutionBasher2() {
3411         for (int runs = 0; runs<1000; runs++) {
3412             // Create a base string to work in
3413             int leadingChars = generator.nextInt(10);
3414             StringBuilder baseBuffer = new StringBuilder(100);
3415             String leadingString = getRandomAlphaString(leadingChars);
3416             baseBuffer.append(leadingString);
3417 
3418             // Create 5 groups of random number of random chars
3419             // Create the string to substitute
3420             // Create the pattern string to search for
3421             StringBuilder bufferToSub = new StringBuilder(25);
3422             StringBuilder bufferToPat = new StringBuilder(50);
3423             String[] groups = new String[5];
3424             for(int i=0; i<5; i++) {
3425                 int aGroupSize = generator.nextInt(5)+1;
3426                 groups[i] = getRandomAlphaString(aGroupSize);
3427                 bufferToSub.append(groups[i]);
3428                 bufferToPat.append('(');
3429                 bufferToPat.append(groups[i]);
3430                 bufferToPat.append(')');
3431             }
3432             String stringToSub = bufferToSub.toString();
3433             String pattern = bufferToPat.toString();
3434 
3435             // Place sub string into working string at random index
3436             baseBuffer.append(stringToSub);
3437 
3438             // Append random chars to end
3439             int trailingChars = generator.nextInt(10);
3440             String trailingString = getRandomAlphaString(trailingChars);
3441             baseBuffer.append(trailingString);
3442             String baseString = baseBuffer.toString();
3443 
3444             // Create test pattern and matcher
3445             Pattern p = Pattern.compile(pattern);
3446             Matcher m = p.matcher(baseString);
3447 
3448             // Reject candidate if pattern happens to start early
3449             m.find();
3450             if (m.start() < leadingChars)
3451                 continue;
3452 
3453             // Reject candidate if more than one match
3454             if (m.find())
3455                 continue;
3456 
3457             // Construct a replacement string with :
3458             // random group + random string + random group
3459             StringBuilder bufferToRep = new StringBuilder();
3460             int groupIndex1 = generator.nextInt(5);
3461             bufferToRep.append("$" + (groupIndex1 + 1));
3462             String randomMidString = getRandomAlphaString(5);
3463             bufferToRep.append(randomMidString);
3464             int groupIndex2 = generator.nextInt(5);
3465             bufferToRep.append("$" + (groupIndex2 + 1));
3466             String replacement = bufferToRep.toString();
3467 
3468             // Do the replacement
3469             String result = m.replaceAll(replacement);
3470 
3471             // Construct expected result
3472             StringBuilder bufferToRes = new StringBuilder();
3473             bufferToRes.append(leadingString);
3474             bufferToRes.append(groups[groupIndex1]);
3475             bufferToRes.append(randomMidString);
3476             bufferToRes.append(groups[groupIndex2]);
3477             bufferToRes.append(trailingString);
3478             String expectedResult = bufferToRes.toString();
3479 
3480             // Check results
3481             if (!result.equals(expectedResult)) {
3482                 failCount++;
3483             }
3484         }
3485 
3486         report("Substitution Basher 2");
3487     }
3488 
3489     /**
3490      * Checks the handling of some escape sequences that the Pattern
3491      * class should process instead of the java compiler. These are
3492      * not in the file because the escapes should be be processed
3493      * by the Pattern class when the regex is compiled.
3494      */
3495     private static void escapes() throws Exception {
3496         Pattern p = Pattern.compile("\\043");
3497         Matcher m = p.matcher("#");
3498         if (!m.find())
3499             failCount++;
3500 
3501         p = Pattern.compile("\\x23");
3502         m = p.matcher("#");
3503         if (!m.find())
3504             failCount++;
3505 
3506         p = Pattern.compile("\\u0023");
3507         m = p.matcher("#");
3508         if (!m.find())
3509             failCount++;
3510 
3511         report("Escape sequences");
3512     }
3513 
3514     /**
3515      * Checks the handling of blank input situations. These
3516      * tests are incompatible with my test file format.
3517      */
3518     private static void blankInput() throws Exception {
3519         Pattern p = Pattern.compile("abc", Pattern.CASE_INSENSITIVE);
3520         Matcher m = p.matcher("");
3521         if (m.find())
3522             failCount++;
3523 
3524         p = Pattern.compile("a*", Pattern.CASE_INSENSITIVE);
3525         m = p.matcher("");
3526         if (!m.find())
3527             failCount++;
3528 
3529         p = Pattern.compile("abc");
3530         m = p.matcher("");
3531         if (m.find())
3532             failCount++;
3533 
3534         p = Pattern.compile("a*");
3535         m = p.matcher("");
3536         if (!m.find())
3537             failCount++;
3538 
3539         report("Blank input");
3540     }
3541 
3542     /**
3543      * Tests the Boyer-Moore pattern matching of a character sequence
3544      * on randomly generated patterns.
3545      */
3546     private static void bm() throws Exception {
3547         doBnM('a');
3548         report("Boyer Moore (ASCII)");
3549 
3550         doBnM(Character.MIN_SUPPLEMENTARY_CODE_POINT - 10);
3551         report("Boyer Moore (Supplementary)");
3552     }
3553 
3554     private static void doBnM(int baseCharacter) throws Exception {
3555         int achar=0;
3556 
3557         for (int i=0; i<100; i++) {
3558             // Create a short pattern to search for
3559             int patternLength = generator.nextInt(7) + 4;
3560             StringBuffer patternBuffer = new StringBuffer(patternLength);
3561             String pattern;
3562             retry: for (;;) {
3563                 for (int x=0; x<patternLength; x++) {
3564                     int ch = baseCharacter + generator.nextInt(26);
3565                     if (Character.isSupplementaryCodePoint(ch)) {
3566                         patternBuffer.append(Character.toChars(ch));
3567                     } else {
3568                         patternBuffer.append((char)ch);
3569                     }
3570                 }
3571                 pattern = patternBuffer.toString();
3572 
3573                 // Avoid patterns that start and end with the same substring
3574                 // See JDK-6854417
3575                 for (int x=1; x < pattern.length(); x++) {
3576                     if (pattern.startsWith(pattern.substring(x)))
3577                         continue retry;
3578                 }
3579                 break;
3580             }
3581             Pattern p = Pattern.compile(pattern);
3582 
3583             // Create a buffer with random ASCII chars that does
3584             // not match the sample
3585             String toSearch = null;
3586             StringBuffer s = null;
3587             Matcher m = p.matcher("");
3588             do {
3589                 s = new StringBuffer(100);
3590                 for (int x=0; x<100; x++) {
3591                     int ch = baseCharacter + generator.nextInt(26);
3592                     if (Character.isSupplementaryCodePoint(ch)) {
3593                         s.append(Character.toChars(ch));
3594                     } else {
3595                         s.append((char)ch);
3596                     }
3597                 }
3598                 toSearch = s.toString();
3599                 m.reset(toSearch);
3600             } while (m.find());
3601 
3602             // Insert the pattern at a random spot
3603             int insertIndex = generator.nextInt(99);
3604             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3605                 insertIndex++;
3606             s = s.insert(insertIndex, pattern);
3607             toSearch = s.toString();
3608 
3609             // Make sure that the pattern is found
3610             m.reset(toSearch);
3611             if (!m.find())
3612                 failCount++;
3613 
3614             // Make sure that the match text is the pattern
3615             if (!m.group().equals(pattern))
3616                 failCount++;
3617 
3618             // Make sure match occured at insertion point
3619             if (m.start() != insertIndex)
3620                 failCount++;
3621         }
3622     }
3623 
3624     /**
3625      * Tests the matching of slices on randomly generated patterns.
3626      * The Boyer-Moore optimization is not done on these patterns
3627      * because it uses unicode case folding.
3628      */
3629     private static void slice() throws Exception {
3630         doSlice(Character.MAX_VALUE);
3631         report("Slice");
3632 
3633         doSlice(Character.MAX_CODE_POINT);
3634         report("Slice (Supplementary)");
3635     }
3636 
3637     private static void doSlice(int maxCharacter) throws Exception {
3638         Random generator = new Random();
3639         int achar=0;
3640 
3641         for (int i=0; i<100; i++) {
3642             // Create a short pattern to search for
3643             int patternLength = generator.nextInt(7) + 4;
3644             StringBuffer patternBuffer = new StringBuffer(patternLength);
3645             for (int x=0; x<patternLength; x++) {
3646                 int randomChar = 0;
3647                 while (!Character.isLetterOrDigit(randomChar))
3648                     randomChar = generator.nextInt(maxCharacter);
3649                 if (Character.isSupplementaryCodePoint(randomChar)) {
3650                     patternBuffer.append(Character.toChars(randomChar));
3651                 } else {
3652                     patternBuffer.append((char) randomChar);
3653                 }
3654             }
3655             String pattern =  patternBuffer.toString();
3656             Pattern p = Pattern.compile(pattern, Pattern.UNICODE_CASE);
3657 
3658             // Create a buffer with random chars that does not match the sample
3659             String toSearch = null;
3660             StringBuffer s = null;
3661             Matcher m = p.matcher("");
3662             do {
3663                 s = new StringBuffer(100);
3664                 for (int x=0; x<100; x++) {
3665                     int randomChar = 0;
3666                     while (!Character.isLetterOrDigit(randomChar))
3667                         randomChar = generator.nextInt(maxCharacter);
3668                     if (Character.isSupplementaryCodePoint(randomChar)) {
3669                         s.append(Character.toChars(randomChar));
3670                     } else {
3671                         s.append((char) randomChar);
3672                     }
3673                 }
3674                 toSearch = s.toString();
3675                 m.reset(toSearch);
3676             } while (m.find());
3677 
3678             // Insert the pattern at a random spot
3679             int insertIndex = generator.nextInt(99);
3680             if (Character.isLowSurrogate(s.charAt(insertIndex)))
3681                 insertIndex++;
3682             s = s.insert(insertIndex, pattern);
3683             toSearch = s.toString();
3684 
3685             // Make sure that the pattern is found
3686             m.reset(toSearch);
3687             if (!m.find())
3688                 failCount++;
3689 
3690             // Make sure that the match text is the pattern
3691             if (!m.group().equals(pattern))
3692                 failCount++;
3693 
3694             // Make sure match occured at insertion point
3695             if (m.start() != insertIndex)
3696                 failCount++;
3697         }
3698     }
3699 
3700     private static void explainFailure(String pattern, String data,
3701                                        String expected, String actual) {
3702         System.err.println("----------------------------------------");
3703         System.err.println("Pattern = "+pattern);
3704         System.err.println("Data = "+data);
3705         System.err.println("Expected = " + expected);
3706         System.err.println("Actual   = " + actual);
3707     }
3708 
3709     private static void explainFailure(String pattern, String data,
3710                                        Throwable t) {
3711         System.err.println("----------------------------------------");
3712         System.err.println("Pattern = "+pattern);
3713         System.err.println("Data = "+data);
3714         t.printStackTrace(System.err);
3715     }
3716 
3717     // Testing examples from a file
3718 
3719     /**
3720      * Goes through the file "TestCases.txt" and creates many patterns
3721      * described in the file, matching the patterns against input lines in
3722      * the file, and comparing the results against the correct results
3723      * also found in the file. The file format is described in comments
3724      * at the head of the file.
3725      */
3726     private static void processFile(String fileName) throws Exception {
3727         File testCases = new File(System.getProperty("test.src", "."),
3728                                   fileName);
3729         FileInputStream in = new FileInputStream(testCases);
3730         BufferedReader r = new BufferedReader(new InputStreamReader(in));
3731 
3732         // Process next test case.
3733         String aLine;
3734         while((aLine = r.readLine()) != null) {
3735             // Read a line for pattern
3736             String patternString = grabLine(r);
3737             Pattern p = null;
3738             try {
3739                 p = compileTestPattern(patternString);
3740             } catch (PatternSyntaxException e) {
3741                 String dataString = grabLine(r);
3742                 String expectedResult = grabLine(r);
3743                 if (expectedResult.startsWith("error"))
3744                     continue;
3745                 explainFailure(patternString, dataString, e);
3746                 failCount++;
3747                 continue;
3748             }
3749 
3750             // Read a line for input string
3751             String dataString = grabLine(r);
3752             Matcher m = p.matcher(dataString);
3753             StringBuffer result = new StringBuffer();
3754 
3755             // Check for IllegalStateExceptions before a match
3756             failCount += preMatchInvariants(m);
3757 
3758             boolean found = m.find();
3759 
3760             if (found)
3761                 failCount += postTrueMatchInvariants(m);
3762             else
3763                 failCount += postFalseMatchInvariants(m);
3764 
3765             if (found) {
3766                 result.append("true ");
3767                 result.append(m.group(0) + " ");
3768             } else {
3769                 result.append("false ");
3770             }
3771 
3772             result.append(m.groupCount());
3773 
3774             if (found) {
3775                 for (int i=1; i<m.groupCount()+1; i++)
3776                     if (m.group(i) != null)
3777                         result.append(" " +m.group(i));
3778             }
3779 
3780             // Read a line for the expected result
3781             String expectedResult = grabLine(r);
3782 
3783             if (!result.toString().equals(expectedResult)) {
3784                 explainFailure(patternString, dataString, expectedResult, result.toString());
3785                 failCount++;
3786             }
3787         }
3788 
3789         report(fileName);
3790     }
3791 
3792     private static int preMatchInvariants(Matcher m) {
3793         int failCount = 0;
3794         try {
3795             m.start();
3796             failCount++;
3797         } catch (IllegalStateException ise) {}
3798         try {
3799             m.end();
3800             failCount++;
3801         } catch (IllegalStateException ise) {}
3802         try {
3803             m.group();
3804             failCount++;
3805         } catch (IllegalStateException ise) {}
3806         return failCount;
3807     }
3808 
3809     private static int postFalseMatchInvariants(Matcher m) {
3810         int failCount = 0;
3811         try {
3812             m.group();
3813             failCount++;
3814         } catch (IllegalStateException ise) {}
3815         try {
3816             m.start();
3817             failCount++;
3818         } catch (IllegalStateException ise) {}
3819         try {
3820             m.end();
3821             failCount++;
3822         } catch (IllegalStateException ise) {}
3823         return failCount;
3824     }
3825 
3826     private static int postTrueMatchInvariants(Matcher m) {
3827         int failCount = 0;
3828         //assert(m.start() = m.start(0);
3829         if (m.start() != m.start(0))
3830             failCount++;
3831         //assert(m.end() = m.end(0);
3832         if (m.start() != m.start(0))
3833             failCount++;
3834         //assert(m.group() = m.group(0);
3835         if (!m.group().equals(m.group(0)))
3836             failCount++;
3837         try {
3838             m.group(50);
3839             failCount++;
3840         } catch (IndexOutOfBoundsException ise) {}
3841 
3842         return failCount;
3843     }
3844 
3845     private static Pattern compileTestPattern(String patternString) {
3846         if (!patternString.startsWith("'")) {
3847             return Pattern.compile(patternString);
3848         }
3849 
3850         int break1 = patternString.lastIndexOf("'");
3851         String flagString = patternString.substring(
3852                                           break1+1, patternString.length());
3853         patternString = patternString.substring(1, break1);
3854 
3855         if (flagString.equals("i"))
3856             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3857 
3858         if (flagString.equals("m"))
3859             return Pattern.compile(patternString, Pattern.MULTILINE);
3860 
3861         return Pattern.compile(patternString);
3862     }
3863 
3864     /**
3865      * Reads a line from the input file. Keeps reading lines until a non
3866      * empty non comment line is read. If the line contains a \n then
3867      * these two characters are replaced by a newline char. If a \\uxxxx
3868      * sequence is read then the sequence is replaced by the unicode char.
3869      */
3870     private static String grabLine(BufferedReader r) throws Exception {
3871         int index = 0;
3872         String line = r.readLine();
3873         while (line.startsWith("//") || line.length() < 1)
3874             line = r.readLine();
3875         while ((index = line.indexOf("\\n")) != -1) {
3876             StringBuffer temp = new StringBuffer(line);
3877             temp.replace(index, index+2, "\n");
3878             line = temp.toString();
3879         }
3880         while ((index = line.indexOf("\\u")) != -1) {
3881             StringBuffer temp = new StringBuffer(line);
3882             String value = temp.substring(index+2, index+6);
3883             char aChar = (char)Integer.parseInt(value, 16);
3884             String unicodeChar = "" + aChar;
3885             temp.replace(index, index+6, unicodeChar);
3886             line = temp.toString();
3887         }
3888 
3889         return line;
3890     }
3891 
3892     private static void check(Pattern p, String s, String g, String expected) {
3893         Matcher m = p.matcher(s);
3894         m.find();
3895         if (!m.group(g).equals(expected) ||
3896             s.charAt(m.start(g)) != expected.charAt(0) ||
3897             s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
3898             failCount++;
3899     }
3900 
3901     private static void checkReplaceFirst(String p, String s, String r, String expected)
3902     {
3903         if (!expected.equals(Pattern.compile(p)
3904                                     .matcher(s)
3905                                     .replaceFirst(r)))
3906             failCount++;
3907     }
3908 
3909     private static void checkReplaceAll(String p, String s, String r, String expected)
3910     {
3911         if (!expected.equals(Pattern.compile(p)
3912                                     .matcher(s)
3913                                     .replaceAll(r)))
3914             failCount++;
3915     }
3916 
3917     private static void checkExpectedFail(String p) {
3918         try {
3919             Pattern.compile(p);
3920         } catch (PatternSyntaxException pse) {
3921             //pse.printStackTrace();
3922             return;
3923         }
3924         failCount++;
3925     }
3926 
3927     private static void checkExpectedIAE(Matcher m, String g) {
3928         m.find();
3929         try {
3930             m.group(g);
3931         } catch (IllegalArgumentException x) {
3932             //iae.printStackTrace();
3933             try {
3934                 m.start(g);
3935             } catch (IllegalArgumentException xx) {
3936                 try {
3937                     m.start(g);
3938                 } catch (IllegalArgumentException xxx) {
3939                     return;
3940                 }
3941             }
3942         }
3943         failCount++;
3944     }
3945 
3946     private static void checkExpectedNPE(Matcher m) {
3947         m.find();
3948         try {
3949             m.group(null);
3950         } catch (NullPointerException x) {
3951             try {
3952                 m.start(null);
3953             } catch (NullPointerException xx) {
3954                 try {
3955                     m.end(null);
3956                 } catch (NullPointerException xxx) {
3957                     return;
3958                 }
3959             }
3960         }
3961         failCount++;
3962     }
3963 
3964     private static void namedGroupCaptureTest() throws Exception {
3965         check(Pattern.compile("x+(?<gname>y+)z+"),
3966               "xxxyyyzzz",
3967               "gname",
3968               "yyy");
3969 
3970         check(Pattern.compile("x+(?<gname8>y+)z+"),
3971               "xxxyyyzzz",
3972               "gname8",
3973               "yyy");
3974 
3975         //backref
3976         Pattern pattern = Pattern.compile("(a*)bc\\1");
3977         check(pattern, "zzzaabcazzz", true);  // found "abca"
3978 
3979         check(Pattern.compile("(?<gname>a*)bc\\k<gname>"),
3980               "zzzaabcaazzz", true);
3981 
3982         check(Pattern.compile("(?<gname>abc)(def)\\k<gname>"),
3983               "abcdefabc", true);
3984 
3985         check(Pattern.compile("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(?<gname>k)\\k<gname>"),
3986               "abcdefghijkk", true);
3987 
3988         // Supplementary character tests
3989         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3990               toSupplementaries("zzzaabcazzz"), true);
3991 
3992         check(Pattern.compile("(?<gname>" + toSupplementaries("a*)bc") + "\\k<gname>"),
3993               toSupplementaries("zzzaabcaazzz"), true);
3994 
3995         check(Pattern.compile("(?<gname>" + toSupplementaries("abc)(def)") + "\\k<gname>"),
3996               toSupplementaries("abcdefabc"), true);
3997 
3998         check(Pattern.compile(toSupplementaries("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)") +
3999                               "(?<gname>" +
4000                               toSupplementaries("k)") + "\\k<gname>"),
4001               toSupplementaries("abcdefghijkk"), true);
4002 
4003         check(Pattern.compile("x+(?<gname>y+)z+\\k<gname>"),
4004               "xxxyyyzzzyyy",
4005               "gname",
4006               "yyy");
4007 
4008         //replaceFirst/All
4009         checkReplaceFirst("(?<gn>ab)(c*)",
4010                           "abccczzzabcczzzabccc",
4011                           "${gn}",
4012                           "abzzzabcczzzabccc");
4013 
4014         checkReplaceAll("(?<gn>ab)(c*)",
4015                         "abccczzzabcczzzabccc",
4016                         "${gn}",
4017                         "abzzzabzzzab");
4018 
4019 
4020         checkReplaceFirst("(?<gn>ab)(c*)",
4021                           "zzzabccczzzabcczzzabccczzz",
4022                           "${gn}",
4023                           "zzzabzzzabcczzzabccczzz");
4024 
4025         checkReplaceAll("(?<gn>ab)(c*)",
4026                         "zzzabccczzzabcczzzabccczzz",
4027                         "${gn}",
4028                         "zzzabzzzabzzzabzzz");
4029 
4030         checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
4031                           "zzzabccczzzabcczzzabccczzz",
4032                           "${gn2}",
4033                           "zzzccczzzabcczzzabccczzz");
4034 
4035         checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
4036                         "zzzabccczzzabcczzzabccczzz",
4037                         "${gn2}",
4038                         "zzzccczzzcczzzccczzz");
4039 
4040         //toSupplementaries("(ab)(c*)"));
4041         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4042                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4043                           toSupplementaries("abccczzzabcczzzabccc"),
4044                           "${gn1}",
4045                           toSupplementaries("abzzzabcczzzabccc"));
4046 
4047 
4048         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4049                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4050                         toSupplementaries("abccczzzabcczzzabccc"),
4051                         "${gn1}",
4052                         toSupplementaries("abzzzabzzzab"));
4053 
4054         checkReplaceFirst("(?<gn1>" + toSupplementaries("ab") +
4055                            ")(?<gn2>" + toSupplementaries("c") + "*)",
4056                           toSupplementaries("abccczzzabcczzzabccc"),
4057                           "${gn2}",
4058                           toSupplementaries("ccczzzabcczzzabccc"));
4059 
4060 
4061         checkReplaceAll("(?<gn1>" + toSupplementaries("ab") +
4062                         ")(?<gn2>" + toSupplementaries("c") + "*)",
4063                         toSupplementaries("abccczzzabcczzzabccc"),
4064                         "${gn2}",
4065                         toSupplementaries("ccczzzcczzzccc"));
4066 
4067         checkReplaceFirst("(?<dog>Dog)AndCat",
4068                           "zzzDogAndCatzzzDogAndCatzzz",
4069                           "${dog}",
4070                           "zzzDogzzzDogAndCatzzz");
4071 
4072 
4073         checkReplaceAll("(?<dog>Dog)AndCat",
4074                           "zzzDogAndCatzzzDogAndCatzzz",
4075                           "${dog}",
4076                           "zzzDogzzzDogzzz");
4077 
4078         // backref in Matcher & String
4079         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4080             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4081             failCount++;
4082 
4083         // negative
4084         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4085         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4086         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4087         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4088         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4089         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4090                          "gnameX");
4091         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4092         report("NamedGroupCapture");
4093     }
4094 
4095     // This is for bug 6969132
4096     private static void nonBmpClassComplementTest() throws Exception {
4097         Pattern p = Pattern.compile("\\P{Lu}");
4098         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));

4099         if (m.find() && m.start() == 1)
4100             failCount++;
4101 
4102         // from a unicode category
4103         p = Pattern.compile("\\P{Lu}");
4104         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4105         if (m.find())
4106             failCount++;
4107         if (!m.hitEnd())
4108             failCount++;
4109 
4110         // block
4111         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4112         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4113         if (m.find() && m.start() == 1)
4114             failCount++;
4115 





4116         report("NonBmpClassComplement");
4117     }
4118 
4119     private static void unicodePropertiesTest() throws Exception {
4120         // different forms
4121         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4122             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4123             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4124             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4125             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4126             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4127             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4128             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4129             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4130             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4131             failCount++;
4132 
4133         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4134         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4135         Matcher lastSM  = common;
4136         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
4137 
4138         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
4139         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
4140         Matcher lastBM = latin;
4141         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
4142 
4143         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
4144             if (cp >= 0x30000 && (cp & 0x70) == 0){
4145                 continue;  // only pick couple code points, they are the same
4146             }
4147 
4148             // Unicode Script
4149             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
4150             Matcher m;
4151             String str = new String(Character.toChars(cp));
4152             if (script == lastScript) {
4153                  m = lastSM;
4154                  m.reset(str);
4155             } else {
4156                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
4157             }
4158             if (!m.matches()) {
4159                 failCount++;
4160             }
4161             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
4162             other.reset(str);
4163             if (other.matches()) {
4164                 failCount++;
4165             }
4166             lastSM = m;
4167             lastScript = script;
4168 
4169             // Unicode Block
4170             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
4171             if (block == null) {
4172                 //System.out.printf("Not a Block: cp=%x%n", cp);
4173                 continue;
4174             }
4175             if (block == lastBlock) {
4176                  m = lastBM;
4177                  m.reset(str);
4178             } else {
4179                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
4180             }
4181             if (!m.matches()) {
4182                 failCount++;
4183             }
4184             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
4185             other.reset(str);
4186             if (other.matches()) {
4187                 failCount++;
4188             }
4189             lastBM = m;
4190             lastBlock = block;
4191         }
4192         report("unicodeProperties");
4193     }
4194 
4195     private static void unicodeHexNotationTest() throws Exception {
4196 
4197         // negative
4198         checkExpectedFail("\\x{-23}");
4199         checkExpectedFail("\\x{110000}");
4200         checkExpectedFail("\\x{}");
4201         checkExpectedFail("\\x{AB[ef]");
4202 
4203         // codepoint
4204         check("^\\x{1033c}$",              "\uD800\uDF3C", true);
4205         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4206         check("^\\x{D800}\\x{DF3c}+$",     "\uD800\uDF3C", false);
4207         check("^\\xF0\\x90\\x8C\\xBC$",    "\uD800\uDF3C", false);
4208 
4209         // in class
4210         check("^[\\x{D800}\\x{DF3c}]+$",   "\uD800\uDF3C", false);
4211         check("^[\\xF0\\x90\\x8C\\xBC]+$", "\uD800\uDF3C", false);
4212         check("^[\\x{D800}\\x{DF3C}]+$",   "\uD800\uDF3C", false);
4213         check("^[\\x{DF3C}\\x{D800}]+$",   "\uD800\uDF3C", false);
4214         check("^[\\x{D800}\\x{DF3C}]+$",   "\uDF3C\uD800", true);
4215         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
4216 
4217         for (int cp = 0; cp <= 0x10FFFF; cp++) {
4218              String s = "A" + new String(Character.toChars(cp)) + "B";
4219              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
4220                                              : String.format("\\u%04x\\u%04x",
4221                                                (int) Character.toChars(cp)[0],
4222                                                (int) Character.toChars(cp)[1]);
4223              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
4224              if (!Pattern.matches("A" + hexUTF16 + "B", s))
4225                  failCount++;
4226              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
4227                  failCount++;
4228              if (!Pattern.matches("A" + hexCodePoint + "B", s))
4229                  failCount++;
4230              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
4231                  failCount++;
4232          }
4233          report("unicodeHexNotation");
4234     }
4235 
4236     private static void unicodeClassesTest() throws Exception {
4237 
4238         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
4239         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
4240         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
4241         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
4242         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
4243         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
4244         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
4245         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
4246         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
4247         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
4248         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
4249         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
4250         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
4251         Matcher bound  = Pattern.compile("\\b").matcher("");
4252         Matcher word   = Pattern.compile("\\w++").matcher("");
4253         // UNICODE_CHARACTER_CLASS
4254         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4255         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4256         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4257         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4258         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4259         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4260         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4261         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4262         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4263         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4264         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4265         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4266         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4267         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4268         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4269         // embedded flag (?U)
4270         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4271         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4272         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4273 
4274         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
4275         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4276         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
4277         // properties
4278         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
4279         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
4280         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
4281         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
4282         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
4283         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
4284         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
4285         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
4286         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
4287         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
4288         Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
4289 
4290         // javaMethod
4291         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
4292         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
4293         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
4294         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
4295 
4296         for (int cp = 1; cp < 0x30000; cp++) {
4297             String str = new String(Character.toChars(cp));
4298             int type = Character.getType(cp);
4299             if (// lower
4300                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
4301                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
4302                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
4303                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
4304                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
4305                 // upper
4306                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
4307                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
4308                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
4309                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
4310                 // alpha
4311                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
4312                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
4313                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
4314                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
4315                 // digit
4316                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
4317                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
4318                 // alnum
4319                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
4320                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
4321                 // punct
4322                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
4323                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
4324                 // graph
4325                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
4326                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
4327                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
4328                 // blank
4329                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
4330                                           != blank.reset(str).matches()  ||
4331                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
4332                 // print
4333                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
4334                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
4335                 // cntrl
4336                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
4337                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
4338                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
4339                 // hexdigit
4340                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
4341                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
4342                 // space
4343                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
4344                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
4345                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
4346                 // word
4347                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
4348                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
4349                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
4350                 // bwordb
4351                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
4352                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
4353                 // properties
4354                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
4355                 Character.isLetter(cp)    != letterP.reset(str).matches()||
4356                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
4357                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
4358                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
4359                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
4360                 POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
4361                 failCount++;
4362         }
4363 
4364         // bounds/word align
4365         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
4366         if (!bwbU.reset("\u0180sherman\u0400").matches())
4367             failCount++;
4368         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
4369         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
4370             failCount++;
4371         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
4372         if (!bwbU.reset("\u0724\u0739\u0724").matches())
4373             failCount++;
4374         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
4375             failCount++;
4376         report("unicodePredefinedClasses");
4377     }
4378 
4379     private static void unicodeCharacterNameTest() throws Exception {
4380 
4381         for (int cp = 0; cp < Character.MAX_CODE_POINT; cp++) {
4382             if (!Character.isValidCodePoint(cp) ||
4383                 Character.getType(cp) == Character.UNASSIGNED)
4384                 continue;
4385             String str = new String(Character.toChars(cp));
4386             // single
4387             String p = "\\N{" + Character.getName(cp) + "}";
4388             if (!Pattern.compile(p).matcher(str).matches()) {
4389                 failCount++;
4390             }
4391             // class[c]
4392             p = "[\\N{" + Character.getName(cp) + "}]";
4393             if (!Pattern.compile(p).matcher(str).matches()) {
4394                 failCount++;
4395             }
4396         }
4397 
4398         // range
4399         for (int i = 0; i < 10; i++) {
4400             int start = generator.nextInt(20);
4401             int end = start + generator.nextInt(200);
4402             String p = "[\\N{" + Character.getName(start) + "}-\\N{" + Character.getName(end) + "}]";
4403             String str;
4404             for (int cp = start; cp < end; cp++) {
4405                 str = new String(Character.toChars(cp));
4406                 if (!Pattern.compile(p).matcher(str).matches()) {
4407                     failCount++;
4408                 }
4409             }
4410             str = new String(Character.toChars(end + 10));
4411             if (Pattern.compile(p).matcher(str).matches()) {
4412                 failCount++;
4413             }
4414         }
4415 
4416         // slice
4417         for (int i = 0; i < 10; i++) {
4418             int n = generator.nextInt(256);
4419             int[] buf = new int[n];
4420             StringBuffer sb = new StringBuffer(1024);
4421             for (int j = 0; j < n; j++) {
4422                 int cp = generator.nextInt(1000);
4423                 if (!Character.isValidCodePoint(cp) ||
4424                     Character.getType(cp) == Character.UNASSIGNED)
4425                     cp = 0x4e00;    // just use 4e00
4426                 sb.append("\\N{" + Character.getName(cp) + "}");
4427                 buf[j] = cp;
4428             }
4429             String p = sb.toString();
4430             String str = new String(buf, 0, buf.length);
4431             if (!Pattern.compile(p).matcher(str).matches()) {
4432                 failCount++;
4433             }
4434         }
4435         report("unicodeCharacterName");
4436     }
4437 
4438     private static void horizontalAndVerticalWSTest() throws Exception {
4439         String hws = new String (new char[] {
4440                                      0x09, 0x20, 0xa0, 0x1680, 0x180e,
4441                                      0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
4442                                      0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
4443                                      0x202f, 0x205f, 0x3000 });
4444         String vws = new String (new char[] {
4445                                      0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, 0x2029 });
4446         if (!Pattern.compile("\\h+").matcher(hws).matches() ||
4447             !Pattern.compile("[\\h]+").matcher(hws).matches())
4448             failCount++;
4449         if (Pattern.compile("\\H").matcher(hws).find() ||
4450             Pattern.compile("[\\H]").matcher(hws).find())
4451             failCount++;
4452         if (!Pattern.compile("\\v+").matcher(vws).matches() ||
4453             !Pattern.compile("[\\v]+").matcher(vws).matches())
4454             failCount++;
4455         if (Pattern.compile("\\V").matcher(vws).find() ||
4456             Pattern.compile("[\\V]").matcher(vws).find())
4457             failCount++;
4458         String prefix = "abcd";
4459         String suffix = "efgh";
4460         String ng = "A";
4461         for (int i = 0; i < hws.length(); i++) {
4462             String c = String.valueOf(hws.charAt(i));
4463             Matcher m = Pattern.compile("\\h").matcher(prefix + c + suffix);
4464             if (!m.find() || !c.equals(m.group()))
4465                 failCount++;
4466             m = Pattern.compile("[\\h]").matcher(prefix + c + suffix);
4467             if (!m.find() || !c.equals(m.group()))
4468                 failCount++;
4469 
4470             m = Pattern.compile("\\H").matcher(hws.substring(0, i) + ng + hws.substring(i));
4471             if (!m.find() || !ng.equals(m.group()))
4472                 failCount++;
4473             m = Pattern.compile("[\\H]").matcher(hws.substring(0, i) + ng + hws.substring(i));
4474             if (!m.find() || !ng.equals(m.group()))
4475                 failCount++;
4476         }
4477         for (int i = 0; i < vws.length(); i++) {
4478             String c = String.valueOf(vws.charAt(i));
4479             Matcher m = Pattern.compile("\\v").matcher(prefix + c + suffix);
4480             if (!m.find() || !c.equals(m.group()))
4481                 failCount++;
4482             m = Pattern.compile("[\\v]").matcher(prefix + c + suffix);
4483             if (!m.find() || !c.equals(m.group()))
4484                 failCount++;
4485 
4486             m = Pattern.compile("\\V").matcher(vws.substring(0, i) + ng + vws.substring(i));
4487             if (!m.find() || !ng.equals(m.group()))
4488                 failCount++;
4489             m = Pattern.compile("[\\V]").matcher(vws.substring(0, i) + ng + vws.substring(i));
4490             if (!m.find() || !ng.equals(m.group()))
4491                 failCount++;
4492         }
4493         // \v in range is interpreted as 0x0B. This is the undocumented behavior
4494         if (!Pattern.compile("[\\v-\\v]").matcher(String.valueOf((char)0x0B)).matches())
4495             failCount++;
4496         report("horizontalAndVerticalWSTest");
4497     }
4498 
4499     private static void linebreakTest() throws Exception {
4500         String linebreaks = new String (new char[] {
4501             0x0A, 0x0B, 0x0C, 0x0D, 0x85, 0x2028, 0x2029 });
4502         String crnl = "\r\n";
4503         if (!Pattern.compile("\\R+").matcher(linebreaks).matches() ||
4504             !Pattern.compile("\\R").matcher(crnl).matches() ||
4505             Pattern.compile("\\R\\R").matcher(crnl).matches())
4506             failCount++;
4507         report("linebreakTest");
4508     }
4509 
4510     // #7189363
4511     private static void branchTest() throws Exception {
4512         if (!Pattern.compile("(a)?bc|d").matcher("d").find() ||     // greedy
4513             !Pattern.compile("(a)+bc|d").matcher("d").find() ||
4514             !Pattern.compile("(a)*bc|d").matcher("d").find() ||
4515             !Pattern.compile("(a)??bc|d").matcher("d").find() ||    // reluctant
4516             !Pattern.compile("(a)+?bc|d").matcher("d").find() ||
4517             !Pattern.compile("(a)*?bc|d").matcher("d").find() ||
4518             !Pattern.compile("(a)?+bc|d").matcher("d").find() ||    // possessive
4519             !Pattern.compile("(a)++bc|d").matcher("d").find() ||
4520             !Pattern.compile("(a)*+bc|d").matcher("d").find() ||
4521             !Pattern.compile("(a)?bc|d").matcher("d").matches() ||  // greedy
4522             !Pattern.compile("(a)+bc|d").matcher("d").matches() ||
4523             !Pattern.compile("(a)*bc|d").matcher("d").matches() ||
4524             !Pattern.compile("(a)??bc|d").matcher("d").matches() || // reluctant
4525             !Pattern.compile("(a)+?bc|d").matcher("d").matches() ||
4526             !Pattern.compile("(a)*?bc|d").matcher("d").matches() ||
4527             !Pattern.compile("(a)?+bc|d").matcher("d").matches() || // possessive
4528             !Pattern.compile("(a)++bc|d").matcher("d").matches() ||
4529             !Pattern.compile("(a)*+bc|d").matcher("d").matches() ||
4530             !Pattern.compile("(a)?bc|de").matcher("de").find() ||   // others
4531             !Pattern.compile("(a)??bc|de").matcher("de").find() ||
4532             !Pattern.compile("(a)?bc|de").matcher("de").matches() ||
4533             !Pattern.compile("(a)??bc|de").matcher("de").matches())
4534             failCount++;
4535         report("branchTest");
4536     }
4537 
4538     // This test is for 8007395
4539     private static void groupCurlyNotFoundSuppTest() throws Exception {
4540         String input = "test this as \ud83d\ude0d";
4541         for (String pStr : new String[] { "test(.)+(@[a-zA-Z.]+)",
4542                                           "test(.)*(@[a-zA-Z.]+)",
4543                                           "test([^B])+(@[a-zA-Z.]+)",
4544                                           "test([^B])*(@[a-zA-Z.]+)",
4545                                           "test(\\P{IsControl})+(@[a-zA-Z.]+)",
4546                                           "test(\\P{IsControl})*(@[a-zA-Z.]+)",
4547                                         }) {
4548             Matcher m = Pattern.compile(pStr, Pattern.CASE_INSENSITIVE)
4549                                .matcher(input);
4550             try {
4551                 if (m.find()) {
4552                     failCount++;
4553                 }
4554             } catch (Exception x) {
4555                 failCount++;
4556             }
4557         }
4558         report("GroupCurly NotFoundSupp");
4559     }
4560 
4561     // This test is for 8023647
4562     private static void groupCurlyBackoffTest() throws Exception {
4563         if (!"abc1c".matches("(\\w)+1\\1") ||
4564             "abc11".matches("(\\w)+1\\1")) {
4565             failCount++;
4566         }
4567         report("GroupCurly backoff");
4568     }
4569 
4570     // This test is for 8012646
4571     private static void patternAsPredicate() throws Exception {
4572         Predicate<String> p = Pattern.compile("[a-z]+").asPredicate();
4573 
4574         if (p.test("")) {
4575             failCount++;
4576         }
4577         if (!p.test("word")) {
4578             failCount++;
4579         }
4580         if (p.test("1234")) {
4581             failCount++;
4582         }
4583         report("Pattern.asPredicate");
4584     }
4585 
4586     // This test is for 8035975
4587     private static void invalidFlags() throws Exception {
4588         for (int flag = 1; flag != 0; flag <<= 1) {
4589             switch (flag) {
4590             case Pattern.CASE_INSENSITIVE:
4591             case Pattern.MULTILINE:
4592             case Pattern.DOTALL:
4593             case Pattern.UNICODE_CASE:
4594             case Pattern.CANON_EQ:
4595             case Pattern.UNIX_LINES:
4596             case Pattern.LITERAL:
4597             case Pattern.UNICODE_CHARACTER_CLASS:
4598             case Pattern.COMMENTS:
4599                 // valid flag, continue
4600                 break;
4601             default:
4602                 try {
4603                     Pattern.compile(".", flag);
4604                     failCount++;
4605                 } catch (IllegalArgumentException expected) {
4606                 }
4607             }
4608         }
4609         report("Invalid compile flags");
4610     }
4611 
4612     private static void grapheme() throws Exception {
4613         Files.lines(Paths.get(System.getProperty("test.src", "."),
4614                               "GraphemeBreakTest.txt"))
4615             .filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
4616             .forEach( ln -> {
4617                     ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");
4618                     // System.out.println(str);
4619                     String[] strs = ln.split("\u00f7|\u00d7");
4620                     StringBuilder src = new StringBuilder();
4621                     ArrayList<String> graphemes = new ArrayList<>();
4622                     StringBuilder buf = new StringBuilder();
4623                     int offBk = 0;
4624                     for (String str : strs) {
4625                         if (str.length() == 0)  // first empty str
4626                             continue;
4627                         int cp = Integer.parseInt(str, 16);
4628                         src.appendCodePoint(cp);
4629                         buf.appendCodePoint(cp);
4630                         offBk += (str.length() + 1);
4631                         if (ln.charAt(offBk) == '\u00f7') {    // DIV
4632                             graphemes.add(buf.toString());
4633                             buf = new StringBuilder();
4634                         }
4635                     }
4636                     Pattern p = Pattern.compile("\\X");
4637                     Matcher m = p.matcher(src.toString());
4638                     Scanner s = new Scanner(src.toString()).useDelimiter("\\b{g}");
4639                     for (String g : graphemes) {
4640                         // System.out.printf("     grapheme:=[%s]%n", g);
4641                         // (1) test \\X directly
4642                         if (!m.find() || !m.group().equals(g)) {
4643                             System.out.println("Failed \\X [" + ln + "] : " + g);
4644                             failCount++;
4645                         }
4646                         // (2) test \\b{g} + \\X  via Scanner
4647                         boolean hasNext = s.hasNext(p);
4648                         // if (!s.hasNext() || !s.next().equals(next)) {
4649                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4650                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4651                             failCount++;
4652                         }
4653                     }
4654                 });
4655         // some sanity checks
4656         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4657             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4658             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4659             failCount++;
4660         // make sure "\b{n}" still works
4661         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4662             failCount++;
4663         report("Unicode extended grapheme cluster");
4664     }
























































































4665 }
--- EOF ---