test/java/util/regex/RegExTest.java

Print this page




  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676 6948903 7014645
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.


 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138         unicodePropertiesTest();
 139         unicodeHexNotationTest();

 140         if (failure)
 141             throw new RuntimeException("Failure in the RE handling.");
 142         else
 143             System.err.println("OKAY: All tests passed.");
 144     }
 145 
 146     // Utility functions
 147 
 148     private static String getRandomAlphaString(int length) {
 149         StringBuffer buf = new StringBuffer(length);
 150         for (int i=0; i<length; i++) {
 151             char randChar = (char)(97 + generator.nextInt(26));
 152             buf.append(randChar);
 153         }
 154         return buf.toString();
 155     }
 156 
 157     private static void check(Matcher m, String expected) {
 158         m.find();
 159         if (!m.group().equals(expected))


3640         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3641 
3642         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3643              String s = "A" + new String(Character.toChars(cp)) + "B";
3644              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3645                                              : String.format("\\u%04x\\u%04x",
3646                                                (int) Character.toChars(cp)[0],
3647                                                (int) Character.toChars(cp)[1]);
3648              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3649              if (!Pattern.matches("A" + hexUTF16 + "B", s))
3650                  failCount++;
3651              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3652                  failCount++;
3653              if (!Pattern.matches("A" + hexCodePoint + "B", s))
3654                  failCount++;
3655              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3656                  failCount++;
3657          }
3658          report("unicodeHexNotation");
3659      }













































































































































3660 }


  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.


 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138         unicodePropertiesTest();
 139         unicodeHexNotationTest();
 140         unicodeClassesTest();
 141         if (failure)
 142             throw new RuntimeException("Failure in the RE handling.");
 143         else
 144             System.err.println("OKAY: All tests passed.");
 145     }
 146 
 147     // Utility functions
 148 
 149     private static String getRandomAlphaString(int length) {
 150         StringBuffer buf = new StringBuffer(length);
 151         for (int i=0; i<length; i++) {
 152             char randChar = (char)(97 + generator.nextInt(26));
 153             buf.append(randChar);
 154         }
 155         return buf.toString();
 156     }
 157 
 158     private static void check(Matcher m, String expected) {
 159         m.find();
 160         if (!m.group().equals(expected))


3641         check("^[\\x{DF3C}\\x{D800}]+$",   "\uDF3C\uD800", true);
3642 
3643         for (int cp = 0; cp <= 0x10FFFF; cp++) {
3644              String s = "A" + new String(Character.toChars(cp)) + "B";
3645              String hexUTF16 = (cp <= 0xFFFF)? String.format("\\u%04x", cp)
3646                                              : String.format("\\u%04x\\u%04x",
3647                                                (int) Character.toChars(cp)[0],
3648                                                (int) Character.toChars(cp)[1]);
3649              String hexCodePoint = "\\x{" + Integer.toHexString(cp) + "}";
3650              if (!Pattern.matches("A" + hexUTF16 + "B", s))
3651                  failCount++;
3652              if (!Pattern.matches("A[" + hexUTF16 + "]B", s))
3653                  failCount++;
3654              if (!Pattern.matches("A" + hexCodePoint + "B", s))
3655                  failCount++;
3656              if (!Pattern.matches("A[" + hexCodePoint + "]B", s))
3657                  failCount++;
3658          }
3659          report("unicodeHexNotation");
3660     }
3661 
3662     private static void unicodeClassesTest() throws Exception {
3663 
3664         Matcher lower  = Pattern.compile("\\p{Lower}").matcher("");
3665         Matcher upper  = Pattern.compile("\\p{Upper}").matcher("");
3666         Matcher ASCII  = Pattern.compile("\\p{ASCII}").matcher("");
3667         Matcher alpha  = Pattern.compile("\\p{Alpha}").matcher("");
3668         Matcher digit  = Pattern.compile("\\p{Digit}").matcher("");
3669         Matcher alnum  = Pattern.compile("\\p{Alnum}").matcher("");
3670         Matcher punct  = Pattern.compile("\\p{Punct}").matcher("");
3671         Matcher graph  = Pattern.compile("\\p{Graph}").matcher("");
3672         Matcher print  = Pattern.compile("\\p{Print}").matcher("");
3673         Matcher blank  = Pattern.compile("\\p{Blank}").matcher("");
3674         Matcher cntrl  = Pattern.compile("\\p{Cntrl}").matcher("");
3675         Matcher xdigit = Pattern.compile("\\p{XDigit}").matcher("");
3676         Matcher space  = Pattern.compile("\\p{Space}").matcher("");
3677         Matcher bound  = Pattern.compile("\\b").matcher("");
3678         Matcher word   = Pattern.compile("\\w++").matcher("");
3679         // UNICODE_CHARACTER_CLASS
3680         Matcher lowerU  = Pattern.compile("\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3681         Matcher upperU  = Pattern.compile("\\p{Upper}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3682         Matcher ASCIIU  = Pattern.compile("\\p{ASCII}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3683         Matcher alphaU  = Pattern.compile("\\p{Alpha}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3684         Matcher digitU  = Pattern.compile("\\p{Digit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3685         Matcher alnumU  = Pattern.compile("\\p{Alnum}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3686         Matcher punctU  = Pattern.compile("\\p{Punct}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3687         Matcher graphU  = Pattern.compile("\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3688         Matcher printU  = Pattern.compile("\\p{Print}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3689         Matcher blankU  = Pattern.compile("\\p{Blank}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3690         Matcher cntrlU  = Pattern.compile("\\p{Cntrl}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3691         Matcher xdigitU = Pattern.compile("\\p{XDigit}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3692         Matcher spaceU  = Pattern.compile("\\p{Space}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3693         Matcher boundU  = Pattern.compile("\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3694         Matcher wordU   = Pattern.compile("\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3695         // embedded flag (?U)
3696         Matcher lowerEU  = Pattern.compile("(?U)\\p{Lower}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3697         Matcher graphEU  = Pattern.compile("(?U)\\p{Graph}", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3698         Matcher wordEU   = Pattern.compile("(?U)\\w", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3699 
3700         Matcher bwb    = Pattern.compile("\\b\\w\\b").matcher("");
3701         Matcher bwbU   = Pattern.compile("\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3702         Matcher bwbEU  = Pattern.compile("(?U)\\b\\w++\\b", Pattern.UNICODE_CHARACTER_CLASS).matcher("");
3703         // properties
3704         Matcher lowerP  = Pattern.compile("\\p{IsLowerCase}").matcher("");
3705         Matcher upperP  = Pattern.compile("\\p{IsUpperCase}").matcher("");
3706         Matcher titleP  = Pattern.compile("\\p{IsTitleCase}").matcher("");
3707         Matcher letterP = Pattern.compile("\\p{IsLetter}").matcher("");
3708         Matcher alphaP  = Pattern.compile("\\p{IsAlphabetic}").matcher("");
3709         Matcher ideogP  = Pattern.compile("\\p{IsIdeographic}").matcher("");
3710         Matcher cntrlP  = Pattern.compile("\\p{IsControl}").matcher("");
3711         Matcher spaceP  = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
3712         Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
3713         Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
3714 
3715         // javaMethod
3716         Matcher lowerJ  = Pattern.compile("\\p{javaLowerCase}").matcher("");
3717         Matcher upperJ  = Pattern.compile("\\p{javaUpperCase}").matcher("");
3718         Matcher alphaJ  = Pattern.compile("\\p{javaAlphabetic}").matcher("");
3719         Matcher ideogJ  = Pattern.compile("\\p{javaIdeographic}").matcher("");
3720 
3721         for (int cp = 1; cp < 0x30000; cp++) {
3722             String str = new String(Character.toChars(cp));
3723             int type = Character.getType(cp);
3724             if (// lower
3725                 POSIX_ASCII.isLower(cp)   != lower.reset(str).matches()  ||
3726                 Character.isLowerCase(cp) != lowerU.reset(str).matches() ||
3727                 Character.isLowerCase(cp) != lowerP.reset(str).matches() ||
3728                 Character.isLowerCase(cp) != lowerEU.reset(str).matches()||
3729                 Character.isLowerCase(cp) != lowerJ.reset(str).matches()||
3730                 // upper
3731                 POSIX_ASCII.isUpper(cp)   != upper.reset(str).matches()  ||
3732                 POSIX_Unicode.isUpper(cp) != upperU.reset(str).matches() ||
3733                 Character.isUpperCase(cp) != upperP.reset(str).matches() ||
3734                 Character.isUpperCase(cp) != upperJ.reset(str).matches() ||
3735                 // alpha
3736                 POSIX_ASCII.isAlpha(cp)   != alpha.reset(str).matches()  ||
3737                 POSIX_Unicode.isAlpha(cp) != alphaU.reset(str).matches() ||
3738                 Character.isAlphabetic(cp)!= alphaP.reset(str).matches() ||
3739                 Character.isAlphabetic(cp)!= alphaJ.reset(str).matches() ||
3740                 // digit
3741                 POSIX_ASCII.isDigit(cp)   != digit.reset(str).matches()  ||
3742                 Character.isDigit(cp)     != digitU.reset(str).matches() ||
3743                 // alnum
3744                 POSIX_ASCII.isAlnum(cp)   != alnum.reset(str).matches()  ||
3745                 POSIX_Unicode.isAlnum(cp) != alnumU.reset(str).matches() ||
3746                 // punct
3747                 POSIX_ASCII.isPunct(cp)   != punct.reset(str).matches()  ||
3748                 POSIX_Unicode.isPunct(cp) != punctU.reset(str).matches() ||
3749                 // graph
3750                 POSIX_ASCII.isGraph(cp)   != graph.reset(str).matches()  ||
3751                 POSIX_Unicode.isGraph(cp) != graphU.reset(str).matches() ||
3752                 POSIX_Unicode.isGraph(cp) != graphEU.reset(str).matches()||
3753                 // blank
3754                 POSIX_ASCII.isType(cp, POSIX_ASCII.BLANK)
3755                                           != blank.reset(str).matches()  ||
3756                 POSIX_Unicode.isBlank(cp) != blankU.reset(str).matches() ||
3757                 // print
3758                 POSIX_ASCII.isPrint(cp)   != print.reset(str).matches()  ||
3759                 POSIX_Unicode.isPrint(cp) != printU.reset(str).matches() ||
3760                 // cntrl
3761                 POSIX_ASCII.isCntrl(cp)   != cntrl.reset(str).matches()  ||
3762                 POSIX_Unicode.isCntrl(cp) != cntrlU.reset(str).matches() ||
3763                 (Character.CONTROL == type) != cntrlP.reset(str).matches() ||
3764                 // hexdigit
3765                 POSIX_ASCII.isHexDigit(cp)   != xdigit.reset(str).matches()  ||
3766                 POSIX_Unicode.isHexDigit(cp) != xdigitU.reset(str).matches() ||
3767                 // space
3768                 POSIX_ASCII.isSpace(cp)   != space.reset(str).matches()  ||
3769                 POSIX_Unicode.isSpace(cp) != spaceU.reset(str).matches() ||
3770                 POSIX_Unicode.isSpace(cp) != spaceP.reset(str).matches() ||
3771                 // word
3772                 POSIX_ASCII.isWord(cp)   != word.reset(str).matches()  ||
3773                 POSIX_Unicode.isWord(cp) != wordU.reset(str).matches() ||
3774                 POSIX_Unicode.isWord(cp) != wordEU.reset(str).matches()||
3775                 // bwordb
3776                 POSIX_ASCII.isWord(cp) != bwb.reset(str).matches() ||
3777                 POSIX_Unicode.isWord(cp) != bwbU.reset(str).matches() ||
3778                 // properties
3779                 Character.isTitleCase(cp) != titleP.reset(str).matches() ||
3780                 Character.isLetter(cp)    != letterP.reset(str).matches()||
3781                 Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
3782                 Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
3783                 (Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
3784                 POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
3785                 failCount++;
3786         }
3787 
3788         // bounds/word align
3789         twoFindIndexes(" \u0180sherman\u0400 ", bound, 1, 10);
3790         if (!bwbU.reset("\u0180sherman\u0400").matches())
3791             failCount++;
3792         twoFindIndexes(" \u0180sh\u0345erman\u0400 ", bound, 1, 11);
3793         if (!bwbU.reset("\u0180sh\u0345erman\u0400").matches())
3794             failCount++;
3795         twoFindIndexes(" \u0724\u0739\u0724 ", bound, 1, 4);
3796         if (!bwbU.reset("\u0724\u0739\u0724").matches())
3797             failCount++;
3798         if (!bwbEU.reset("\u0724\u0739\u0724").matches())
3799             failCount++;
3800         report("unicodePredefinedClasses");
3801     }
3802 }