1 // 2 // Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 // This file contains test cases for regular expressions. 25 // A test case consists of three lines: 26 // The first line is a pattern used in the test 27 // The second line is the input to search for the pattern in 28 // The third line is a concatentation of the match, the number of groups, 29 // and the contents of the first four subexpressions. 30 // Empty lines and lines beginning with comment slashes are ignored. 31 // 32 // Test unsetting of backed off groups 33 ^(a)?a 34 a 35 true a 1 36 37 ^(aa(bb)?)+$ 38 aabbaa 39 true aabbaa 2 aa bb 40 41 ((a|b)?b)+ 42 b 43 true b 2 b 44 45 (aaa)?aaa 46 aaa 47 true aaa 1 48 49 ^(a(b)?)+$ 50 aba 51 true aba 2 a b 52 53 ^(a(b(c)?)?)?abc 54 abc 55 true abc 3 56 57 ^(a(b(c))).* 58 abc 59 true abc 3 abc bc c 60 61 // use of x modifier 62 abc(?x)blah 63 abcblah 64 true abcblah 0 65 66 abc(?x) blah 67 abcblah 68 true abcblah 0 69 70 abc(?x) blah blech 71 abcblahblech 72 true abcblahblech 0 73 74 abc(?x) blah # ignore comment 75 abcblah 76 true abcblah 0 77 78 // Simple alternation 79 a|b 80 a 81 true a 0 82 83 a|b 84 z 85 false 0 86 87 a|b 88 b 89 true b 0 90 91 a|b|cd 92 cd 93 true cd 0 94 95 a|ad 96 ad 97 true a 0 98 99 z(a|ac)b 100 zacb 101 true zacb 1 ac 102 103 // Simple char class 104 [abc]+ 105 ababab 106 true ababab 0 107 108 [abc]+ 109 defg 110 false 0 111 112 [abc]+[def]+[ghi]+ 113 zzzaaddggzzz 114 true aaddgg 0 115 116 // Range char class 117 [a-g]+ 118 zzzggg 119 true ggg 0 120 121 [a-g]+ 122 mmm 123 false 0 124 125 [a-]+ 126 za-9z 127 true a- 0 128 129 [a-\\u4444]+ 130 za-9z 131 true za 0 132 133 // Negated char class 134 [^abc]+ 135 ababab 136 false 0 137 138 [^abc]+ 139 aaabbbcccdefg 140 true defg 0 141 142 // Negation with nested char class and intersection 143 [^[c]] 144 c 145 false 0 146 147 [^[a-z]] 148 e 149 false 0 150 151 [^[a-z][A-Z]] 152 E 153 false 0 154 155 [^a-d[0-9][m-p]] 156 e 157 true e 0 158 159 [^a-d[0-9][m-p]] 160 8 161 false 0 162 163 [^[a-c]&&[d-f]] 164 z 165 true z 0 166 167 [^a-c&&d-f] 168 a 169 true a 0 170 171 [^a-m&&m-z] 172 m 173 false 0 174 175 [^a-m&&m-z&&a-c] 176 m 177 true m 0 178 179 [^a-cd-f&&[d-f]] 180 c 181 true c 0 182 183 [^[a-c][d-f]&&abc] 184 a 185 false 0 186 187 [^[a-c][d-f]&&abc] 188 d 189 true d 0 190 191 [^[a-c][d-f]&&abc[def]] 192 a 193 false 0 194 195 [^[a-c][d-f]&&abc[def]] 196 e 197 false 0 198 199 [^[a-c]&&[b-d]&&[c-e]] 200 a 201 true a 0 202 203 [^[a-c]&&[b-d]&&[c-e]] 204 c 205 false 0 206 207 // Making sure a ^ not in first position matches literal ^ 208 [abc^b] 209 b 210 true b 0 211 212 [abc^b] 213 ^ 214 true ^ 0 215 216 // Class union and intersection 217 [abc[def]] 218 b 219 true b 0 220 221 [abc[def]] 222 e 223 true e 0 224 225 [a-d[0-9][m-p]] 226 a 227 true a 0 228 229 [a-d[0-9][m-p]] 230 o 231 true o 0 232 233 [a-d[0-9][m-p]] 234 4 235 true 4 0 236 237 [a-d[0-9][m-p]] 238 e 239 false 0 240 241 [a-d[0-9][m-p]] 242 u 243 false 0 244 245 [[a-d][0-9][m-p]] 246 b 247 true b 0 248 249 [[a-d][0-9][m-p]] 250 z 251 false 0 252 253 [a-c[d-f[g-i]]] 254 a 255 true a 0 256 257 [a-c[d-f[g-i]]] 258 e 259 true e 0 260 261 [a-c[d-f[g-i]]] 262 h 263 true h 0 264 265 [a-c[d-f[g-i]]] 266 m 267 false 0 268 269 [a-c[d-f[g-i]]m] 270 m 271 true m 0 272 273 [abc[def]ghi] 274 a 275 true a 0 276 277 [abc[def]ghi] 278 d 279 true d 0 280 281 [abc[def]ghi] 282 h 283 true h 0 284 285 [abc[def]ghi] 286 w 287 false 0 288 289 [a-c&&[d-f]] 290 a 291 false 0 292 293 [a-c&&[d-f]] 294 e 295 false 0 296 297 [a-c&&[d-f]] 298 z 299 false 0 300 301 [[a-c]&&[d-f]] 302 a 303 false 0 304 305 [[a-c]&&[d-f]] 306 e 307 false 0 308 309 [[a-c]&&[d-f]] 310 z 311 false 0 312 313 [a-c&&d-f] 314 a 315 false 0 316 317 [a-m&&m-z] 318 m 319 true m 0 320 321 [a-m&&m-z&&a-c] 322 m 323 false 0 324 325 [a-m&&m-z&&a-z] 326 m 327 true m 0 328 329 [[a-m]&&[m-z]] 330 a 331 false 0 332 333 [[a-m]&&[m-z]] 334 m 335 true m 0 336 337 [[a-m]&&[m-z]] 338 z 339 false 0 340 341 [[a-m]&&[^a-c]] 342 a 343 false 0 344 345 [[a-m]&&[^a-c]] 346 d 347 true d 0 348 349 [a-m&&[^a-c]] 350 a 351 false 0 352 353 [a-m&&[^a-c]] 354 d 355 true d 0 356 357 [a-cd-f&&[d-f]] 358 a 359 false 0 360 361 [a-cd-f&&[d-f]] 362 e 363 true e 0 364 365 [[a-c]&&d-fa-c] 366 a 367 true a 0 368 369 [[a-c]&&[d-f][a-c]] 370 a 371 true a 0 372 373 [[a-c][d-f]&&abc] 374 a 375 true a 0 376 377 [[a-c][d-f]&&abc[def]] 378 e 379 true e 0 380 381 [[a-c]&&[b-d]&&[c-e]] 382 a 383 false 0 384 385 [[a-c]&&[b-d]&&[c-e]] 386 c 387 true c 0 388 389 [[a-c]&&[b-d][c-e]&&[u-z]] 390 c 391 false 0 392 393 [abc[^bcd]] 394 a 395 true a 0 396 397 [abc[^bcd]] 398 d 399 false 0 400 401 [a-c&&a-d&&a-eghi] 402 b 403 true b 0 404 405 [a-c&&a-d&&a-eghi] 406 g 407 false 0 408 409 [[a[b]]&&[b[a]]] 410 a 411 true a 0 412 413 [[a]&&[b][c][a]&&[^d]] 414 a 415 true a 0 416 417 [[a]&&[b][c][a]&&[^d]] 418 d 419 false 0 420 421 [[[a-d]&&[c-f]]] 422 a 423 false 0 424 425 [[[a-d]&&[c-f]]] 426 c 427 true c 0 428 429 [[[a-d]&&[c-f]]&&[c]] 430 c 431 true c 0 432 433 [[[a-d]&&[c-f]]&&[c]&&c] 434 c 435 true c 0 436 437 [[[a-d]&&[c-f]]&&[c]&&c&&c] 438 c 439 true c 0 440 441 [[[a-d]&&[c-f]]&&[c]&&c&&[cde]] 442 c 443 true c 0 444 445 [z[abc&&bcd]] 446 c 447 true c 0 448 449 [z[abc&&bcd]&&[u-z]] 450 z 451 true z 0 452 453 [x[abc&&bcd[z]]&&[u-z]] 454 z 455 false 0 456 457 [x[[wz]abc&&bcd[z]]&&[u-z]] 458 z 459 true z 0 460 461 [[abc]&&[def]abc] 462 a 463 true a 0 464 465 [[abc]&&[def]xyz[abc]] 466 a 467 true a 0 468 469 \pL 470 a 471 true a 0 472 473 \pL 474 7 475 false 0 476 477 \p{L} 478 a 479 true a 0 480 481 \p{LC} 482 a 483 true a 0 484 485 \p{LC} 486 A 487 true A 0 488 489 \p{IsL} 490 a 491 true a 0 492 493 \p{IsLC} 494 a 495 true a 0 496 497 \p{IsLC} 498 A 499 true A 0 500 501 \p{IsLC} 502 9 503 false 0 504 505 \P{IsLC} 506 9 507 true 9 0 508 509 // Guillemet left is initial quote punctuation 510 \p{Pi} 511 \u00ab 512 true \u00ab 0 513 514 \P{Pi} 515 \u00ac 516 true \u00ac 0 517 518 // Guillemet right is final quote punctuation 519 \p{IsPf} 520 \u00bb 521 true \u00bb 0 522 523 \p{P} 524 \u00bb 525 true \u00bb 0 526 527 \p{P}+ 528 \u00bb 529 true \u00bb 0 530 531 \P{IsPf} 532 \u00bc 533 true \u00bc 0 534 535 \P{IsP} 536 \u00bc 537 true \u00bc 0 538 539 \p{L1} 540 \u00bc 541 true \u00bc 0 542 543 \p{L1}+ 544 \u00bc 545 true \u00bc 0 546 547 \p{L1} 548 \u02bc 549 false 0 550 551 \p{ASCII} 552 a 553 true a 0 554 555 \p{IsASCII} 556 a 557 true a 0 558 559 \p{IsASCII} 560 \u0370 561 false 0 562 563 \pLbc 564 abc 565 true abc 0 566 567 a[r\p{InGreek}]c 568 a\u0370c 569 true a\u0370c 0 570 571 a\p{InGreek} 572 a\u0370 573 true a\u0370 0 574 575 a\P{InGreek} 576 a\u0370 577 false 0 578 579 a\P{InGreek} 580 ab 581 true ab 0 582 583 a{^InGreek} 584 - 585 error 586 587 a\p{^InGreek} 588 - 589 error 590 591 a\P{^InGreek} 592 - 593 error 594 595 a\p{InGreek} 596 a\u0370 597 true a\u0370 0 598 599 a[\p{InGreek}]c 600 a\u0370c 601 true a\u0370c 0 602 603 a[\P{InGreek}]c 604 a\u0370c 605 false 0 606 607 a[\P{InGreek}]c 608 abc 609 true abc 0 610 611 a[{^InGreek}]c 612 anc 613 true anc 0 614 615 a[{^InGreek}]c 616 azc 617 false 0 618 619 a[\p{^InGreek}]c 620 - 621 error 622 623 a[\P{^InGreek}]c 624 - 625 error 626 627 a[\p{InGreek}] 628 a\u0370 629 true a\u0370 0 630 631 a[r\p{InGreek}]c 632 arc 633 true arc 0 634 635 a[\p{InGreek}r]c 636 arc 637 true arc 0 638 639 a[r\p{InGreek}]c 640 arc 641 true arc 0 642 643 a[^\p{InGreek}]c 644 a\u0370c 645 false 0 646 647 a[^\P{InGreek}]c 648 a\u0370c 649 true a\u0370c 0 650 651 a[\p{InGreek}&&[^\u0370]]c 652 a\u0370c 653 false 0 654 655 // Test the dot metacharacter 656 a.c.+ 657 a#c%& 658 true a#c%& 0 659 660 ab. 661 ab\n 662 false 0 663 664 (?s)ab. 665 ab\n 666 true ab\n 0 667 668 a[\p{L}&&[\P{InGreek}]]c 669 a\u6000c 670 true a\u6000c 0 671 672 a[\p{L}&&[\P{InGreek}]]c 673 arc 674 true arc 0 675 676 a[\p{L}&&[\P{InGreek}]]c 677 a\u0370c 678 false 0 679 680 a\p{InGreek}c 681 a\u0370c 682 true a\u0370c 0 683 684 a\p{Sc} 685 a$ 686 true a$ 0 687 688 // Test the word char escape sequence 689 ab\wc 690 abcc 691 true abcc 0 692 693 \W\w\W 694 #r# 695 true #r# 0 696 697 \W\w\W 698 rrrr#ggg 699 false 0 700 701 abc[\w] 702 abcd 703 true abcd 0 704 705 abc[\sdef]* 706 abc def 707 true abc def 0 708 709 abc[\sy-z]* 710 abc y z 711 true abc y z 0 712 713 abc[a-d\sm-p]* 714 abcaa mn p 715 true abcaa mn p 0 716 717 // Test the whitespace escape sequence 718 ab\sc 719 ab c 720 true ab c 0 721 722 \s\s\s 723 blah err 724 false 0 725 726 \S\S\s 727 blah err 728 true ah 0 729 730 // Test the digit escape sequence 731 ab\dc 732 ab9c 733 true ab9c 0 734 735 \d\d\d 736 blah45 737 false 0 738 739 // Test the caret metacharacter 740 ^abc 741 abcdef 742 true abc 0 743 744 ^abc 745 bcdabc 746 false 0 747 748 // Greedy ? metacharacter 749 a?b 750 aaaab 751 true ab 0 752 753 a?b 754 b 755 true b 0 756 757 a?b 758 aaaccc 759 false 0 760 761 .?b 762 aaaab 763 true ab 0 764 765 // Reluctant ? metacharacter 766 a??b 767 aaaab 768 true ab 0 769 770 a??b 771 b 772 true b 0 773 774 a??b 775 aaaccc 776 false 0 777 778 .??b 779 aaaab 780 true ab 0 781 782 // Possessive ? metacharacter 783 a?+b 784 aaaab 785 true ab 0 786 787 a?+b 788 b 789 true b 0 790 791 a?+b 792 aaaccc 793 false 0 794 795 .?+b 796 aaaab 797 true ab 0 798 799 // Greedy + metacharacter 800 a+b 801 aaaab 802 true aaaab 0 803 804 a+b 805 b 806 false 0 807 808 a+b 809 aaaccc 810 false 0 811 812 .+b 813 aaaab 814 true aaaab 0 815 816 // Reluctant + metacharacter 817 a+?b 818 aaaab 819 true aaaab 0 820 821 a+?b 822 b 823 false 0 824 825 a+?b 826 aaaccc 827 false 0 828 829 .+?b 830 aaaab 831 true aaaab 0 832 833 // Possessive + metacharacter 834 a++b 835 aaaab 836 true aaaab 0 837 838 a++b 839 b 840 false 0 841 842 a++b 843 aaaccc 844 false 0 845 846 .++b 847 aaaab 848 false 0 849 850 // Greedy Repetition 851 a{2,3} 852 a 853 false 0 854 855 a{2,3} 856 aa 857 true aa 0 858 859 a{2,3} 860 aaa 861 true aaa 0 862 863 a{2,3} 864 aaaa 865 true aaa 0 866 867 a{3,} 868 zzzaaaazzz 869 true aaaa 0 870 871 a{3,} 872 zzzaazzz 873 false 0 874 875 // Reluctant Repetition 876 a{2,3}? 877 a 878 false 0 879 880 a{2,3}? 881 aa 882 true aa 0 883 884 a{2,3}? 885 aaa 886 true aa 0 887 888 a{2,3}? 889 aaaa 890 true aa 0 891 892 // Zero width Positive lookahead 893 abc(?=d) 894 zzzabcd 895 true abc 0 896 897 abc(?=d) 898 zzzabced 899 false 0 900 901 // Zero width Negative lookahead 902 abc(?!d) 903 zzabcd 904 false 0 905 906 abc(?!d) 907 zzabced 908 true abc 0 909 910 // Zero width Positive lookbehind 911 \w(?<=a) 912 ###abc### 913 true a 0 914 915 \w(?<=a) 916 ###ert### 917 false 0 918 919 // Zero width Negative lookbehind 920 (?<!a)\w 921 ###abc### 922 true a 0 923 924 (?<!a)c 925 bc 926 true c 0 927 928 (?<!a)c 929 ac 930 false 0 931 932 // Nondeterministic group 933 (a+b)+ 934 ababab 935 true ababab 1 ab 936 937 (a|b)+ 938 ccccd 939 false 1 940 941 // Deterministic group 942 (ab)+ 943 ababab 944 true ababab 1 ab 945 946 (ab)+ 947 accccd 948 false 1 949 950 (ab)* 951 ababab 952 true ababab 1 ab 953 954 (ab)(cd*) 955 zzzabczzz 956 true abc 2 ab c 957 958 abc(d)*abc 959 abcdddddabc 960 true abcdddddabc 1 d 961 962 // Escaped metacharacter 963 \* 964 * 965 true * 0 966 967 \\ 968 \ 969 true \ 0 970 971 \\ 972 \\\\ 973 true \ 0 974 975 // Back references 976 (a*)bc\1 977 zzzaabcaazzz 978 true aabcaa 1 aa 979 980 (a*)bc\1 981 zzzaabcazzz 982 true abca 1 a 983 984 (gt*)(dde)*(yu)\1\3(vv) 985 zzzgttddeddeyugttyuvvzzz 986 true gttddeddeyugttyuvv 4 gtt dde yu vv 987 988 // Greedy * metacharacter 989 a*b 990 aaaab 991 true aaaab 0 992 993 a*b 994 b 995 true b 0 996 997 a*b 998 aaaccc 999 false 0 1000 1001 .*b 1002 aaaab 1003 true aaaab 0 1004 1005 // Reluctant * metacharacter 1006 a*?b 1007 aaaab 1008 true aaaab 0 1009 1010 a*?b 1011 b 1012 true b 0 1013 1014 a*?b 1015 aaaccc 1016 false 0 1017 1018 .*?b 1019 aaaab 1020 true aaaab 0 1021 1022 // Possessive * metacharacter 1023 a*+b 1024 aaaab 1025 true aaaab 0 1026 1027 a*+b 1028 b 1029 true b 0 1030 1031 a*+b 1032 aaaccc 1033 false 0 1034 1035 .*+b 1036 aaaab 1037 false 0 1038 1039 // Case insensitivity 1040 (?i)foobar 1041 fOobAr 1042 true fOobAr 0 1043 1044 f(?i)oobar 1045 fOobAr 1046 true fOobAr 0 1047 1048 foo(?i)bar 1049 fOobAr 1050 false 0 1051 1052 (?i)foo[bar]+ 1053 foObAr 1054 true foObAr 0 1055 1056 (?i)foo[a-r]+ 1057 foObAr 1058 true foObAr 0 1059 1060 // Disable metacharacters- test both length <=3 and >3 1061 // So that the BM optimization is part of test 1062 \Q***\Eabc 1063 ***abc 1064 true ***abc 0 1065 1066 bl\Q***\Eabc 1067 bl***abc 1068 true bl***abc 0 1069 1070 \Q***abc 1071 ***abc 1072 true ***abc 0 1073 1074 blah\Q***\Eabc 1075 blah***abc 1076 true blah***abc 0 1077 1078 \Q***abc 1079 ***abc 1080 true ***abc 0 1081 1082 \Q*ab 1083 *ab 1084 true *ab 0 1085 1086 blah\Q***abc 1087 blah***abc 1088 true blah***abc 0 1089 1090 bla\Q***abc 1091 bla***abc 1092 true bla***abc 0 1093 1094 // Escapes in char classes 1095 [ab\Qdef\E] 1096 d 1097 true d 0 1098 1099 [ab\Q[\E] 1100 [ 1101 true [ 0 1102 1103 [\Q]\E] 1104 ] 1105 true ] 0 1106 1107 [\Q\\E] 1108 \ 1109 true \ 0 1110 1111 [\Q(\E] 1112 ( 1113 true ( 0 1114 1115 [\n-#] 1116 ! 1117 true ! 0 1118 1119 [\n-#] 1120 - 1121 false 0 1122 1123 [\w-#] 1124 ! 1125 false 0 1126 1127 [\w-#] 1128 a 1129 true a 0 1130 1131 [\w-#] 1132 - 1133 true - 0 1134 1135 [\w-#] 1136 # 1137 true # 0 1138 1139 [\043]+ 1140 blahblah#blech 1141 true # 0 1142 1143 [\042-\044]+ 1144 blahblah#blech 1145 true # 0 1146 1147 [\u1234-\u1236] 1148 blahblah\u1235blech 1149 true \u1235 0 1150 1151 [^\043]* 1152 blahblah#blech 1153 true blahblah 0 1154 1155 (|f)?+ 1156 foo 1157 true 1