test/java/util/regex/RegExTest.java

Print this page




  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.


 118         // This RFE rejected in Tiger numOccurrencesTest();
 119         javaCharClassTest();
 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138 
 139         if (failure)
 140             throw new RuntimeException("Failure in the RE handling.");
 141         else
 142             System.err.println("OKAY: All tests passed.");
 143     }
 144 
 145     // Utility functions
 146 
 147     private static String getRandomAlphaString(int length) {
 148         StringBuffer buf = new StringBuffer(length);
 149         for (int i=0; i<length; i++) {
 150             char randChar = (char)(97 + generator.nextInt(26));
 151             buf.append(randChar);
 152         }
 153         return buf.toString();
 154     }
 155 
 156     private static void check(Matcher m, String expected) {
 157         m.find();
 158         if (!m.group().equals(expected))


3498                           "zzzDogzzzDogzzz");
3499 
3500         // backref in Matcher & String
3501         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3502             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3503             failCount++;
3504 
3505         // negative
3506         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3507         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3508         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3509         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3510         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3511         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3512                           "gnameX");
3513         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514                           null);
3515         report("NamedGroupCapture");
3516     }
3517 
3518     // This is for bug 6919132
3519     private static void nonBmpClassComplementTest() throws Exception {
3520         Pattern p = Pattern.compile("\\P{Lu}");
3521         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3522         if (m.find() && m.start() == 1)
3523             failCount++;
3524 
3525         // from a unicode category
3526         p = Pattern.compile("\\P{Lu}");
3527         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3528         if (m.find())
3529             failCount++;
3530         if (!m.hitEnd())
3531             failCount++;
3532 
3533         // block
3534         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3535         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3536         if (m.find() && m.start() == 1)
3537             failCount++;
3538 
3539         report("NonBmpClassComplement");
3540     }
3541 











































































3542 }


  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  22  * CA 95054 USA or visit www.sun.com if you need additional information or
  23  * have any questions.
  24  */
  25 
  26 /**
  27  * @test
  28  * @summary tests RegExp framework
  29  * @author Mike McCloskey
  30  * @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
  31  * 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
  32  * 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
  33  * 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
  34  * 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
  35  * 6350801 6676425 6878475 6919132 6931676 6948903
  36  */
  37 
  38 import java.util.regex.*;
  39 import java.util.Random;
  40 import java.io.*;
  41 import java.util.*;
  42 import java.nio.CharBuffer;
  43 
  44 /**
  45  * This is a test class created to check the operation of
  46  * the Pattern and Matcher classes.
  47  */
  48 public class RegExTest {
  49 
  50     private static Random generator = new Random();
  51     private static boolean failure = false;
  52     private static int failCount = 0;
  53 
  54     /**
  55      * Main to interpret arguments and run several tests.


 118         // This RFE rejected in Tiger numOccurrencesTest();
 119         javaCharClassTest();
 120         nonCaptureRepetitionTest();
 121         notCapturedGroupCurlyMatchTest();
 122         escapedSegmentTest();
 123         literalPatternTest();
 124         literalReplacementTest();
 125         regionTest();
 126         toStringTest();
 127         negatedCharClassTest();
 128         findFromTest();
 129         boundsTest();
 130         unicodeWordBoundsTest();
 131         caretAtEndTest();
 132         wordSearchTest();
 133         hitEndTest();
 134         toMatchResultTest();
 135         surrogatesInClassTest();
 136         namedGroupCaptureTest();
 137         nonBmpClassComplementTest();
 138         unicodePropertiesTest();
 139         if (failure)
 140             throw new RuntimeException("Failure in the RE handling.");
 141         else
 142             System.err.println("OKAY: All tests passed.");
 143     }
 144 
 145     // Utility functions
 146 
 147     private static String getRandomAlphaString(int length) {
 148         StringBuffer buf = new StringBuffer(length);
 149         for (int i=0; i<length; i++) {
 150             char randChar = (char)(97 + generator.nextInt(26));
 151             buf.append(randChar);
 152         }
 153         return buf.toString();
 154     }
 155 
 156     private static void check(Matcher m, String expected) {
 157         m.find();
 158         if (!m.group().equals(expected))


3498                           "zzzDogzzzDogzzz");
3499 
3500         // backref in Matcher & String
3501         if (!"abcdefghij".replaceFirst("cd(?<gn>ef)gh", "${gn}").equals("abefij") ||
3502             !"abbbcbdbefgh".replaceAll("(?<gn>[a-e])b", "${gn}").equals("abcdefgh"))
3503             failCount++;
3504 
3505         // negative
3506         checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
3507         checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
3508         checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
3509         checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
3510         checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
3511         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3512                           "gnameX");
3513         checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
3514                           null);
3515         report("NamedGroupCapture");
3516     }
3517 
3518     // This is for bug 6969132
3519     private static void nonBmpClassComplementTest() throws Exception {
3520         Pattern p = Pattern.compile("\\P{Lu}");
3521         Matcher m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3522         if (m.find() && m.start() == 1)
3523             failCount++;
3524 
3525         // from a unicode category
3526         p = Pattern.compile("\\P{Lu}");
3527         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3528         if (m.find())
3529             failCount++;
3530         if (!m.hitEnd())
3531             failCount++;
3532 
3533         // block
3534         p = Pattern.compile("\\P{InMathematicalAlphanumericSymbols}");
3535         m = p.matcher(new String(new int[] {0x1d400}, 0, 1));
3536         if (m.find() && m.start() == 1)
3537             failCount++;
3538 
3539         report("NonBmpClassComplement");
3540     }
3541 
3542     private static void unicodePropertiesTest() throws Exception {
3543         // different forms
3544         if (!Pattern.compile("\\p{IsLu}").matcher("A").matches() ||
3545             !Pattern.compile("\\p{Lu}").matcher("A").matches() ||
3546             !Pattern.compile("\\p{gc=Lu}").matcher("A").matches() ||
3547             !Pattern.compile("\\p{general_category=Lu}").matcher("A").matches() ||
3548             !Pattern.compile("\\p{IsLatin}").matcher("B").matches() ||
3549             !Pattern.compile("\\p{sc=Latin}").matcher("B").matches() ||
3550             !Pattern.compile("\\p{script=Latin}").matcher("B").matches() ||
3551             !Pattern.compile("\\p{InBasicLatin}").matcher("c").matches() ||
3552             !Pattern.compile("\\p{blk=BasicLatin}").matcher("c").matches() ||
3553             !Pattern.compile("\\p{block=BasicLatin}").matcher("c").matches())
3554             failCount++;
3555 
3556         Matcher common  = Pattern.compile("\\p{script=Common}").matcher("");
3557         Matcher unknown = Pattern.compile("\\p{IsUnknown}").matcher("");
3558         Matcher lastSM  = common;
3559         Character.UnicodeScript lastScript = Character.UnicodeScript.of(0);
3560 
3561         Matcher latin  = Pattern.compile("\\p{block=basic_latin}").matcher("");
3562         Matcher greek  = Pattern.compile("\\p{InGreek}").matcher("");
3563         Matcher lastBM = latin;
3564         Character.UnicodeBlock lastBlock = Character.UnicodeBlock.of(0);
3565 
3566         for (int cp = 1; cp < Character.MAX_CODE_POINT; cp++) {
3567             if (cp >= 0x30000 && (cp & 0x70) == 0){
3568                 continue;  // only pick couple code points, they are the same
3569             }
3570 
3571             // Unicode Script
3572             Character.UnicodeScript script = Character.UnicodeScript.of(cp);
3573             Matcher m;
3574             String str = new String(Character.toChars(cp));
3575             if (script == lastScript) {
3576                  m = lastSM;
3577                  m.reset(str);
3578             } else {
3579                  m  = Pattern.compile("\\p{Is" + script.name() + "}").matcher(str);
3580             }
3581             if (!m.matches()) {
3582                 failCount++;
3583             }
3584             Matcher other = (script == Character.UnicodeScript.COMMON)? unknown : common;
3585             other.reset(str);
3586             if (other.matches()) {
3587                 failCount++;
3588             }
3589             lastSM = m;
3590             lastScript = script;
3591 
3592             // Unicode Block
3593             Character.UnicodeBlock block = Character.UnicodeBlock.of(cp);
3594             if (block == null) {
3595                 //System.out.printf("Not a Block: cp=%x%n", cp);
3596                 continue;
3597             }
3598             if (block == lastBlock) {
3599                  m = lastBM;
3600                  m.reset(str);
3601             } else {
3602                  m  = Pattern.compile("\\p{block=" + block.toString() + "}").matcher(str);
3603             }
3604             if (!m.matches()) {
3605                 failCount++;
3606             }
3607             other = (block == Character.UnicodeBlock.BASIC_LATIN)? greek : latin;
3608             other.reset(str);
3609             if (other.matches()) {
3610                 failCount++;
3611             }
3612             lastBM = m;
3613             lastBlock = block;
3614         }
3615         report("unicodeProperties");
3616     }
3617 }