test/java/util/regex/RegExTest.java

Print this page




  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819



  36  * @library /lib/testlibrary
  37  * @build jdk.testlibrary.*
  38  * @run main RegExTest
  39  * @key randomness
  40  */
  41 
  42 import java.util.function.Function;
  43 import java.util.regex.*;
  44 import java.util.Random;
  45 import java.util.Scanner;
  46 import java.io.*;
  47 import java.nio.file.*;
  48 import java.util.*;
  49 import java.nio.CharBuffer;
  50 import java.util.function.Predicate;
  51 import jdk.testlibrary.RandomFactory;
  52 
  53 /**
  54  * This is a test class created to check the operation of
  55  * the Pattern and Matcher classes.


 145         wordSearchTest();
 146         hitEndTest();
 147         toMatchResultTest();
 148         toMatchResultTest2();
 149         surrogatesInClassTest();
 150         removeQEQuotingTest();
 151         namedGroupCaptureTest();
 152         nonBmpClassComplementTest();
 153         unicodePropertiesTest();
 154         unicodeHexNotationTest();
 155         unicodeClassesTest();
 156         unicodeCharacterNameTest();
 157         horizontalAndVerticalWSTest();
 158         linebreakTest();
 159         branchTest();
 160         groupCurlyNotFoundSuppTest();
 161         groupCurlyBackoffTest();
 162         patternAsPredicate();
 163         invalidFlags();
 164         grapheme();

 165 
 166         if (failure) {
 167             throw new
 168                 RuntimeException("RegExTest failed, 1st failure: " +
 169                                  firstFailure);
 170         } else {
 171             System.err.println("OKAY: All tests passed.");
 172         }
 173     }
 174 
 175     // Utility functions
 176 
 177     private static String getRandomAlphaString(int length) {
 178         StringBuffer buf = new StringBuffer(length);
 179         for (int i=0; i<length; i++) {
 180             char randChar = (char)(97 + generator.nextInt(26));
 181             buf.append(randChar);
 182         }
 183         return buf.toString();
 184     }


2642             failCount++;
2643 
2644         // Marks that cannot legally change order and be equivalent
2645         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2646         check(p, "testa\u0308\u0300", true);
2647         check(p, "testa\u0300\u0308", false);
2648 
2649         // Marks that can legally change order and be equivalent
2650         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2651         check(p, "testa\u0308\u0323", true);
2652         check(p, "testa\u0323\u0308", true);
2653 
2654         // Test all equivalences of the sequence a\u0308\u0323\u0300
2655         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2656         check(p, "testa\u0308\u0323\u0300", true);
2657         check(p, "testa\u0323\u0308\u0300", true);
2658         check(p, "testa\u0308\u0300\u0323", true);
2659         check(p, "test\u00e4\u0323\u0300", true);
2660         check(p, "test\u00e4\u0300\u0323", true);
2661 
2662         /*
2663          * The following canonical equivalence tests don't work. Bug id: 4916384.
2664          *
2665         // Decomposed hangul (jamos)
2666         p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
2667         m = p.matcher("\u1100\u1161");
2668         if (!m.matches())
2669             failCount++;
2670 
2671         m.reset("\uac00");
2672         if (!m.matches())
2673             failCount++;
















































2674 
2675         // Composed hangul
2676         p = Pattern.compile("\uac00", Pattern.CANON_EQ);
2677         m = p.matcher("\u1100\u1161");
2678         if (!m.matches())
2679             failCount++;
2680 
2681         m.reset("\uac00");
2682         if (!m.matches())
2683             failCount++;
2684 
2685         // Decomposed supplementary outside char classes
2686         p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
2687         m = p.matcher("test\ud834\uddc0");
2688         if (!m.matches())
2689             failCount++;
2690 
2691         m.reset("test\ud834\uddbc\ud834\udd6f");
2692         if (!m.matches())
2693             failCount++;

2694 


2695         // Composed supplementary outside char classes
2696         p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
2697         m.reset("test\ud834\uddbc\ud834\udd6f");
2698         if (!m.matches())
2699             failCount++;
2700 
2701         m = p.matcher("test\ud834\uddc0");
2702         if (!m.matches())
2703             failCount++;
2704 
2705         */


2706 

















2707         report("Canonical Equivalence");
2708     }
2709 
2710     /**
2711      * A basic sanity test of Matcher.replaceAll().
2712      */
2713     private static void globalSubstitute() throws Exception {
2714         // Global substitution with a literal
2715         Pattern p = Pattern.compile("(ab)(c*)");
2716         Matcher m = p.matcher("abccczzzabcczzzabccc");
2717         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2718             failCount++;
2719 
2720         m.reset("zzzabccczzzabcczzzabccczzz");
2721         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2722             failCount++;
2723 
2724         // Global substitution with groups
2725         m.reset("zzzabccczzzabcczzzabccczzz");
2726         String result = m.replaceAll("$1");


3829         if (m.start() != m.start(0))
3830             failCount++;
3831         //assert(m.end() = m.end(0);
3832         if (m.start() != m.start(0))
3833             failCount++;
3834         //assert(m.group() = m.group(0);
3835         if (!m.group().equals(m.group(0)))
3836             failCount++;
3837         try {
3838             m.group(50);
3839             failCount++;
3840         } catch (IndexOutOfBoundsException ise) {}
3841 
3842         return failCount;
3843     }
3844 
3845     private static Pattern compileTestPattern(String patternString) {
3846         if (!patternString.startsWith("'")) {
3847             return Pattern.compile(patternString);
3848         }
3849 
3850         int break1 = patternString.lastIndexOf("'");
3851         String flagString = patternString.substring(
3852                                           break1+1, patternString.length());
3853         patternString = patternString.substring(1, break1);
3854 
3855         if (flagString.equals("i"))
3856             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3857 
3858         if (flagString.equals("m"))
3859             return Pattern.compile(patternString, Pattern.MULTILINE);
3860 
3861         return Pattern.compile(patternString);
3862     }
3863 
3864     /**
3865      * Reads a line from the input file. Keeps reading lines until a non
3866      * empty non comment line is read. If the line contains a \n then
3867      * these two characters are replaced by a newline char. If a \\uxxxx
3868      * sequence is read then the sequence is replaced by the unicode char.
3869      */


4075                           "${dog}",
4076                           "zzzDogzzzDogzzz");
4077 
4078         // backref in Matcher & String
4079         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4080             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4081             failCount++;
4082 
4083         // negative
4084         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4085         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4086         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4087         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4088         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4089         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4090                          "gnameX");
4091         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4092         report("NamedGroupCapture");
4093     }
4094 
4095     // This is for bug 6969132
4096     private static void nonBmpClassComplementTest() throws Exception {
4097         Pattern p = Pattern.compile("\\P{Lu}");
4098         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));

4099         if (m.find() && m.start() == 1)
4100             failCount++;
4101 
4102         // from a unicode category
4103         p = Pattern.compile("\\P{Lu}");
4104         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4105         if (m.find())
4106             failCount++;
4107         if (!m.hitEnd())
4108             failCount++;
4109 
4110         // block
4111         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4112         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4113         if (m.find() && m.start() == 1)
4114             failCount++;
4115 





4116         report("NonBmpClassComplement");
4117     }
4118 
4119     private static void unicodePropertiesTest() throws Exception {
4120         // different forms
4121         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4122             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4123             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4124             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4125             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4126             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4127             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4128             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4129             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4130             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4131             failCount++;
4132 
4133         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4134         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4135         Matcher lastSM  = common;


4645                         }
4646                         // (2) test \\b{g} + \\X  via Scanner
4647                         boolean hasNext = s.hasNext(p);
4648                         // if (!s.hasNext() || !s.next().equals(next)) {
4649                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4650                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4651                             failCount++;
4652                         }
4653                     }
4654                 });
4655         // some sanity checks
4656         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4657             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4658             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4659             failCount++;
4660         // make sure "\b{n}" still works
4661         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4662             failCount++;
4663         report("Unicode extended grapheme cluster");
4664     }
























































































4665 }


  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 /**
  25  * @test
  26  * @summary tests RegExp framework (use -Dseed=X to set PRNG seed)
  27  * @author Mike McCloskey
  28  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  29  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  30  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  31  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  32  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  33  * 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
  34  * 7067045 7014640 7189363 8007395 8013252 8013254 8012646 8023647 6559590
  35  * 8027645 8035076 8039124 8035975 8074678 6854417 8143854 8147531 7071819
  36  * 8151481 4867170 7080302 6728861 6995635 6736245 4916384
  37  * 6328855 6192895 6345469 6988218 6693451 7006761 8140212
  38  *
  39  * @library /lib/testlibrary
  40  * @build jdk.testlibrary.*
  41  * @run main RegExTest
  42  * @key randomness
  43  */
  44 
  45 import java.util.function.Function;
  46 import java.util.regex.*;
  47 import java.util.Random;
  48 import java.util.Scanner;
  49 import java.io.*;
  50 import java.nio.file.*;
  51 import java.util.*;
  52 import java.nio.CharBuffer;
  53 import java.util.function.Predicate;
  54 import jdk.testlibrary.RandomFactory;
  55 
  56 /**
  57  * This is a test class created to check the operation of
  58  * the Pattern and Matcher classes.


 148         wordSearchTest();
 149         hitEndTest();
 150         toMatchResultTest();
 151         toMatchResultTest2();
 152         surrogatesInClassTest();
 153         removeQEQuotingTest();
 154         namedGroupCaptureTest();
 155         nonBmpClassComplementTest();
 156         unicodePropertiesTest();
 157         unicodeHexNotationTest();
 158         unicodeClassesTest();
 159         unicodeCharacterNameTest();
 160         horizontalAndVerticalWSTest();
 161         linebreakTest();
 162         branchTest();
 163         groupCurlyNotFoundSuppTest();
 164         groupCurlyBackoffTest();
 165         patternAsPredicate();
 166         invalidFlags();
 167         grapheme();
 168         expoBacktracking();
 169 
 170         if (failure) {
 171             throw new
 172                 RuntimeException("RegExTest failed, 1st failure: " +
 173                                  firstFailure);
 174         } else {
 175             System.err.println("OKAY: All tests passed.");
 176         }
 177     }
 178 
 179     // Utility functions
 180 
 181     private static String getRandomAlphaString(int length) {
 182         StringBuffer buf = new StringBuffer(length);
 183         for (int i=0; i<length; i++) {
 184             char randChar = (char)(97 + generator.nextInt(26));
 185             buf.append(randChar);
 186         }
 187         return buf.toString();
 188     }


2646             failCount++;
2647 
2648         // Marks that cannot legally change order and be equivalent
2649         p = Pattern.compile("testa\u0308\u0300", Pattern.CANON_EQ);
2650         check(p, "testa\u0308\u0300", true);
2651         check(p, "testa\u0300\u0308", false);
2652 
2653         // Marks that can legally change order and be equivalent
2654         p = Pattern.compile("testa\u0308\u0323", Pattern.CANON_EQ);
2655         check(p, "testa\u0308\u0323", true);
2656         check(p, "testa\u0323\u0308", true);
2657 
2658         // Test all equivalences of the sequence a\u0308\u0323\u0300
2659         p = Pattern.compile("testa\u0308\u0323\u0300", Pattern.CANON_EQ);
2660         check(p, "testa\u0308\u0323\u0300", true);
2661         check(p, "testa\u0323\u0308\u0300", true);
2662         check(p, "testa\u0308\u0300\u0323", true);
2663         check(p, "test\u00e4\u0323\u0300", true);
2664         check(p, "test\u00e4\u0300\u0323", true);
2665 
2666         Object[][] data = new Object[][] {







2667 
2668         // JDK-4867170
2669         { "[\u1f80-\u1f82]", "ab\u1f80cd",             "f", true },
2670         { "[\u1f80-\u1f82]", "ab\u1f81cd",             "f", true },
2671         { "[\u1f80-\u1f82]", "ab\u1f82cd",             "f", true },
2672         { "[\u1f80-\u1f82]", "ab\u03b1\u0314\u0345cd", "f", true },
2673         { "[\u1f80-\u1f82]", "ab\u03b1\u0345\u0314cd", "f", true },
2674         { "[\u1f80-\u1f82]", "ab\u1f01\u0345cd",       "f", true },
2675         { "[\u1f80-\u1f82]", "ab\u1f00\u0345cd",       "f", true },
2676 
2677         { "\\p{IsGreek}",    "ab\u1f80cd",             "f", true },
2678         { "\\p{IsGreek}",    "ab\u1f81cd",             "f", true },
2679         { "\\p{IsGreek}",    "ab\u1f82cd",             "f", true },
2680         { "\\p{IsGreek}",    "ab\u03b1\u0314\u0345cd", "f", true },
2681         { "\\p{IsGreek}",    "ab\u1f01\u0345cd",       "f", true },
2682 
2683         // backtracking, force to match "\u1f80", instead of \u1f82"
2684         { "ab\\p{IsGreek}\u0300cd", "ab\u03b1\u0313\u0345\u0300cd", "m", true },
2685 
2686         { "[\\p{IsGreek}]",  "\u03b1\u0314\u0345",     "m", true },
2687         { "\\p{IsGreek}",    "\u03b1\u0314\u0345",     "m", true },
2688  
2689         { "[^\u1f80-\u1f82]","\u1f81",                 "m", false },
2690         { "[^\u1f80-\u1f82]","\u03b1\u0314\u0345",     "m", false },
2691         { "[^\u1f01\u0345]", "\u1f81",                 "f", false },
2692  
2693         { "[^\u1f81]+",      "\u1f80\u1f82",           "f", true },
2694         { "[\u1f80]",        "ab\u1f80cd",             "f", true },
2695         { "\u1f80",          "ab\u1f80cd",             "f", true },
2696         { "\u1f00\u0345\u0300",  "\u1f82", "m", true },
2697         { "\u1f80",          "-\u1f00\u0345\u0300-",   "f", true },
2698         { "\u1f82",          "\u1f00\u0345\u0300",     "m", true },
2699         { "\u1f82",          "\u1f80\u0300",           "m", true },
2700  
2701         // JDK-7080302       # compile failed
2702         { "a(\u0041\u0301\u0328)", "a\u0041\u0301\u0328", "m", true},
2703 
2704         // JDK-6728861, same cause as above one
2705         { "\u00e9\u00e9n", "e\u0301e\u0301n", "m", true},
2706 
2707         // JDK-6995635
2708         { "(\u00e9)", "e\u0301", "m", true },
2709 
2710         // JDK-6736245
2711         // intereting special case, nfc(u2add+u0338) -> u2add+u0338) NOT u2adc
2712         { "\u2ADC", "\u2ADC", "m", true},          // NFC
2713         { "\u2ADC", "\u2ADD\u0338", "m", true},    // NFD 
2714 
2715         //  4916384.
2716         // Decomposed hangul (jamos) works inside clazz
2717         { "[\u1100\u1161]", "\u1100\u1161", "m", true},
2718         { "[\u1100\u1161]", "\uac00", "m", true},
2719 
2720         { "[\uac00]", "\u1100\u1161", "m", true},
2721         { "[\uac00]", "\uac00", "m", true},



2722 
2723         // Decomposed hangul (jamos)
2724         { "\u1100\u1161", "\u1100\u1161", "m", true},
2725         { "\u1100\u1161", "\uac00", "m", true},
2726 
2727         // Composed hangul
2728         { "\uac00",  "\u1100\u1161", "m", true },
2729         { "\uac00",  "\uac00", "m", true },


2730 
2731         /* Need a NFDSlice to nfd the source to solve this issue
2732            u+1d1c0 -> nfd: <u+1d1ba><u+1d165><u+1d16f>  -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2733            u+1d1bc -> nfd: <u+1d1ba><u+1d165>           -> nfc: <u+1d1ba><u+1d165>
2734            <u+1d1bc><u+1d16f> -> nfd: <u+1d1ba><u+1d165><u+1d16f> -> nfc: <u+1d1ba><u+1d165><u+1d16f>
2735 
2736         // Decomposed supplementary outside char classes
2737         // { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0", "m", true },
2738         // Composed supplementary outside char classes
2739         // { "test\ud834\uddc0", "test\ud834\uddbc\ud834\udd6f", "m", true },








2740         */
2741         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddbc\ud834\udd6f", "m", true },
2742         { "test\ud834\uddc0",             "test\ud834\uddbc\ud834\udd6f", "m", true },
2743 
2744         { "test\ud834\uddc0",             "test\ud834\uddc0",             "m", true },
2745         { "test\ud834\uddbc\ud834\udd6f", "test\ud834\uddc0",             "m", true },
2746         };
2747 
2748         int failCount = 0;
2749         for (Object[] d : data) {
2750             String pn = (String)d[0];
2751             String tt = (String)d[1];
2752             boolean isFind = "f".equals(((String)d[2]));
2753             boolean expected = (boolean)d[3];
2754             boolean ret = isFind ? Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).find()
2755                                  : Pattern.compile(pn, Pattern.CANON_EQ).matcher(tt).matches();
2756             if (ret != expected) {
2757                 failCount++;
2758                 continue;
2759             }
2760         }
2761         report("Canonical Equivalence");
2762     }
2763 
2764     /**
2765      * A basic sanity test of Matcher.replaceAll().
2766      */
2767     private static void globalSubstitute() throws Exception {
2768         // Global substitution with a literal
2769         Pattern p = Pattern.compile("(ab)(c*)");
2770         Matcher m = p.matcher("abccczzzabcczzzabccc");
2771         if (!m.replaceAll("test").equals("testzzztestzzztest"))
2772             failCount++;
2773 
2774         m.reset("zzzabccczzzabcczzzabccczzz");
2775         if (!m.replaceAll("test").equals("zzztestzzztestzzztestzzz"))
2776             failCount++;
2777 
2778         // Global substitution with groups
2779         m.reset("zzzabccczzzabcczzzabccczzz");
2780         String result = m.replaceAll("$1");


3883         if (m.start() != m.start(0))
3884             failCount++;
3885         //assert(m.end() = m.end(0);
3886         if (m.start() != m.start(0))
3887             failCount++;
3888         //assert(m.group() = m.group(0);
3889         if (!m.group().equals(m.group(0)))
3890             failCount++;
3891         try {
3892             m.group(50);
3893             failCount++;
3894         } catch (IndexOutOfBoundsException ise) {}
3895 
3896         return failCount;
3897     }
3898 
3899     private static Pattern compileTestPattern(String patternString) {
3900         if (!patternString.startsWith("'")) {
3901             return Pattern.compile(patternString);
3902         }

3903         int break1 = patternString.lastIndexOf("'");
3904         String flagString = patternString.substring(
3905                                           break1+1, patternString.length());
3906         patternString = patternString.substring(1, break1);
3907 
3908         if (flagString.equals("i"))
3909             return Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
3910 
3911         if (flagString.equals("m"))
3912             return Pattern.compile(patternString, Pattern.MULTILINE);
3913 
3914         return Pattern.compile(patternString);
3915     }
3916 
3917     /**
3918      * Reads a line from the input file. Keeps reading lines until a non
3919      * empty non comment line is read. If the line contains a \n then
3920      * these two characters are replaced by a newline char. If a \\uxxxx
3921      * sequence is read then the sequence is replaced by the unicode char.
3922      */


4128                           "${dog}",
4129                           "zzzDogzzzDogzzz");
4130 
4131         // backref in Matcher & String
4132         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
4133             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
4134             failCount++;
4135 
4136         // negative
4137         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
4138         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
4139         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
4140         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
4141         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
4142         checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
4143                          "gnameX");
4144         checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
4145         report("NamedGroupCapture");
4146     }
4147 
4148     // This is for bug 6919132
4149     private static void nonBmpClassComplementTest() throws Exception {
4150         Pattern p = Pattern.compile("\\P{Lu}");
4151         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4152 
4153         if (m.find() && m.start() == 1)
4154             failCount++;
4155 
4156         // from a unicode category
4157         p = Pattern.compile("\\P{Lu}");
4158         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4159         if (m.find())
4160             failCount++;
4161         if (!m.hitEnd())
4162             failCount++;
4163 
4164         // block
4165         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
4166         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
4167         if (m.find() && m.start() == 1)
4168             failCount++;
4169 
4170         p = Pattern.compile("\\P{sc=GRANTHA}");
4171         m = p.matcher(new String(new int[] {0x11350}, 0, 1));
4172         if (m.find() && m.start() == 1)
4173             failCount++;
4174 
4175         report("NonBmpClassComplement");
4176     }
4177 
4178     private static void unicodePropertiesTest() throws Exception {
4179         // different forms
4180         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
4181             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
4182             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
4183             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
4184             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
4185             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
4186             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
4187             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
4188             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
4189             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
4190             failCount++;
4191 
4192         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
4193         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
4194         Matcher lastSM  = common;


4704                         }
4705                         // (2) test \\b{g} + \\X  via Scanner
4706                         boolean hasNext = s.hasNext(p);
4707                         // if (!s.hasNext() || !s.next().equals(next)) {
4708                         if (!s.hasNext(p) || !s.next(p).equals(g)) {
4709                             System.out.println("Failed b{g} [" + ln + "] : " + g);
4710                             failCount++;
4711                         }
4712                     }
4713                 });
4714         // some sanity checks
4715         if (!Pattern.compile("\\X{10}").matcher("abcdefghij").matches() ||
4716             !Pattern.compile("\\b{g}(?:\\X\\b{g}){5}\\b{g}").matcher("abcde").matches() ||
4717             !Pattern.compile("(?:\\X\\b{g}){2}").matcher("\ud800\udc00\ud801\udc02").matches())
4718             failCount++;
4719         // make sure "\b{n}" still works
4720         if (!Pattern.compile("\\b{1}hello\\b{1} \\b{1}world\\b{1}").matcher("hello world").matches())
4721             failCount++;
4722         report("Unicode extended grapheme cluster");
4723     }
4724 
4725     // hangup/timeout if go into exponential backtracking
4726     private static void expoBacktracking() throws Exception {
4727 
4728         Object[][] patternMatchers = {
4729             // 6328855
4730             { "(.*\n*)*", 
4731               "this little fine string lets\r\njava.lang.String.matches\r\ncrash\r\n(We don't know why but adding \r* to the regex makes it work again)",
4732               false },
4733             // 6192895
4734             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4735               "Hello World this is a test this is a test this is a test A",
4736               true },
4737             { " *([a-zA-Z0-9/\\-\\?:\\(\\)\\.,'\\+\\{\\}]+ *)+",
4738               "Hello World this is a test this is a test this is a test \u4e00 ",
4739               false },
4740             { " *([a-z0-9]+ *)+",
4741               "hello world this is a test this is a test this is a test A",
4742               false },
4743             // 4771934 [FIXED] #5013651?
4744             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4745               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;abc@sdfsd.com",
4746               true },
4747             // 4866249 [FIXED] 
4748             { "<\\s*" + "(meta|META)" + "(\\s|[^>])+" + "(CHARSET|charset)=" + "(\\s|[^>])+>",
4749               "<META http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-5\">",
4750               true }, 
4751             { "^(\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,4})+[,;]?)+$",
4752               "abc@efg.abc,efg@abc.abc,abc@xyz.mno;sdfsd.com",
4753               false },
4754             // 6345469
4755             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4756               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; </p>",
4757               true }, // --> matched
4758             { "((<[^>]+>)?(((\\s)?)*(\\&nbsp;)?)*((\\s)?)*)+",
4759               "&nbsp;&nbsp; < br/> &nbsp; < / p> <p> <html> <adfasfdasdf>&nbsp; p </p>",
4760               false },
4761             // 5026912
4762             { "^\\s*" + "(\\w|\\d|[\\xC0-\\xFF]|/)+" + "\\s+|$",
4763               "156580451111112225588087755221111111566969655555555",
4764               false},
4765             // 6988218
4766             { "^([+-]?((0[xX](\\p{XDigit}+))|(((\\p{Digit}+)(\\.)?((\\p{Digit}+)?)([eE][+-]?(\\p{Digit}+))?)|(\\.((\\p{Digit}+))([eE][+-]?(\\p{Digit}+))?)))|[n|N]?'([^']*(?:'')*[^']*)*')",
4767               "'%)) order by ANGEBOT.ID",
4768               false},    // find
4769             // 6693451
4770             { "^(\\s*foo\\s*)*$", 
4771               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo",
4772               true },
4773             { "^(\\s*foo\\s*)*$", 
4774               "foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo fo",
4775               false
4776             },
4777             // 7006761
4778             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_", true},
4779             { "(([0-9A-Z]+)([_]?+)*)*", "FOOOOO_BAAAR_FOOOOOOOOO_BA_ ", false},
4780             // 8140212
4781             { "(?<before>.*)\\{(?<reflection>\\w+):(?<innerMethod>\\w+(\\.?\\w+(\\(((?<args>(('[^']*')|((/|\\w)+))(,(('[^']*')|((/|\\w)+)))*))?\\))?)*)\\}(?<after>.*)",
4782               "{CeGlobal:getSodCutoff.getGui.getAmqp.getSimpleModeEnabled()",
4783               false
4784             },
4785             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", true},
4786             { "^(a+)+$", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!", false},
4787 
4788             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true },
4789             { "(x+)*y",  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4790 
4791             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy", true},
4792             { "(x+x+)+y", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz", false},
4793 
4794             { "(([0-9A-Z]+)([_]?+)*)*", "--------------------------------------", false},
4795 
4796             /* not fixed
4797             //8132141   --->    second level exponential backtracking
4798             { "(h|h|ih(((i|a|c|c|a|i|i|j|b|a|i|b|a|a|j))+h)ahbfhba|c|i)*",
4799               "hchcchicihcchciiicichhcichcihcchiihichiciiiihhcchicchhcihchcihiihciichhccciccichcichiihcchcihhicchcciicchcccihiiihhihihihichicihhcciccchihhhcchichchciihiicihciihcccciciccicciiiiiiiiicihhhiiiihchccchchhhhiiihchihcccchhhiiiiiiiicicichicihcciciihichhhhchihciiihhiccccccciciihhichiccchhicchicihihccichicciihcichccihhiciccccccccichhhhihihhcchchihihiihhihihihicichihiiiihhhhihhhchhichiicihhiiiiihchccccchichci" },
4800             */
4801         };
4802 
4803         for (Object[] pm : patternMatchers) {
4804             String p = (String)pm[0];
4805             String s = (String)pm[1];
4806             boolean r = (Boolean)pm[2];
4807             if (r != Pattern.compile(p).matcher(s).matches()) {
4808                 failCount++;
4809             }
4810         }
4811     }
4812 }