1 // 2 // Copyright (c) 1999, 2009, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 // This file contains test cases for regular expressions. 25 // A test case consists of three lines: 26 // The first line is a pattern used in the test 27 // The second line is the input to search for the pattern in 28 // The third line is a concatentation of the match, the number of groups, 29 // and the contents of the first four subexpressions. 30 // Empty lines and lines beginning with comment slashes are ignored. 31 // 32 // Test unsetting of backed off groups 33 ^(a)?a 34 a 35 true a 1 36 37 ^(aa(bb)?)+$ 38 aabbaa 39 true aabbaa 2 aa bb 40 41 ((a|b)?b)+ 42 b 43 true b 2 b 44 45 (aaa)?aaa 46 aaa 47 true aaa 1 48 49 ^(a(b)?)+$ 50 aba 51 true aba 2 a b 52 53 ^(a(b(c)?)?)?abc 54 abc 55 true abc 3 56 57 ^(a(b(c))).* 58 abc 59 true abc 3 abc bc c 60 61 // use of x modifier 62 abc(?x)blah 63 abcblah 64 true abcblah 0 65 66 abc(?x) blah 67 abcblah 68 true abcblah 0 69 70 abc(?x) blah blech 71 abcblahblech 72 true abcblahblech 0 73 74 abc(?x) blah # ignore comment 75 abcblah 76 true abcblah 0 77 78 // Simple alternation 79 a|b 80 a 81 true a 0 82 83 a|b 84 z 85 false 0 86 87 a|b 88 b 89 true b 0 90 91 a|b|cd 92 cd 93 true cd 0 94 95 a|ad 96 ad 97 true a 0 98 99 z(a|ac)b 100 zacb 101 true zacb 1 ac 102 103 // Simple char class 104 [abc]+ 105 ababab 106 true ababab 0 107 108 [abc]+ 109 defg 110 false 0 111 112 [abc]+[def]+[ghi]+ 113 zzzaaddggzzz 114 true aaddgg 0 115 116 // Range char class 117 [a-g]+ 118 zzzggg 119 true ggg 0 120 121 [a-g]+ 122 mmm 123 false 0 124 125 [a-]+ 126 za-9z 127 true a- 0 128 129 [a-\\u4444]+ 130 za-9z 131 true za 0 132 133 // Negated char class 134 [^abc]+ 135 ababab 136 false 0 137 138 [^abc]+ 139 aaabbbcccdefg 140 true defg 0 141 142 // Making sure a ^ not in first position matches literal ^ 143 [abc^b] 144 b 145 true b 0 146 147 [abc^b] 148 ^ 149 true ^ 0 150 151 // Class union and intersection 152 [abc[def]] 153 b 154 true b 0 155 156 [abc[def]] 157 e 158 true e 0 159 160 [a-d[0-9][m-p]] 161 a 162 true a 0 163 164 [a-d[0-9][m-p]] 165 o 166 true o 0 167 168 [a-d[0-9][m-p]] 169 4 170 true 4 0 171 172 [a-d[0-9][m-p]] 173 e 174 false 0 175 176 [a-d[0-9][m-p]] 177 u 178 false 0 179 180 [[a-d][0-9][m-p]] 181 b 182 true b 0 183 184 [[a-d][0-9][m-p]] 185 z 186 false 0 187 188 [a-c[d-f[g-i]]] 189 a 190 true a 0 191 192 [a-c[d-f[g-i]]] 193 e 194 true e 0 195 196 [a-c[d-f[g-i]]] 197 h 198 true h 0 199 200 [a-c[d-f[g-i]]] 201 m 202 false 0 203 204 [a-c[d-f[g-i]]m] 205 m 206 true m 0 207 208 [abc[def]ghi] 209 a 210 true a 0 211 212 [abc[def]ghi] 213 d 214 true d 0 215 216 [abc[def]ghi] 217 h 218 true h 0 219 220 [abc[def]ghi] 221 w 222 false 0 223 224 [a-c&&[d-f]] 225 a 226 false 0 227 228 [a-c&&[d-f]] 229 e 230 false 0 231 232 [a-c&&[d-f]] 233 z 234 false 0 235 236 [[a-c]&&[d-f]] 237 a 238 false 0 239 240 [[a-c]&&[d-f]] 241 e 242 false 0 243 244 [[a-c]&&[d-f]] 245 z 246 false 0 247 248 [a-c&&d-f] 249 a 250 false 0 251 252 [a-m&&m-z] 253 m 254 true m 0 255 256 [a-m&&m-z&&a-c] 257 m 258 false 0 259 260 [a-m&&m-z&&a-z] 261 m 262 true m 0 263 264 [[a-m]&&[m-z]] 265 a 266 false 0 267 268 [[a-m]&&[m-z]] 269 m 270 true m 0 271 272 [[a-m]&&[m-z]] 273 z 274 false 0 275 276 [[a-m]&&[^a-c]] 277 a 278 false 0 279 280 [[a-m]&&[^a-c]] 281 d 282 true d 0 283 284 [a-m&&[^a-c]] 285 a 286 false 0 287 288 [a-m&&[^a-c]] 289 d 290 true d 0 291 292 [a-cd-f&&[d-f]] 293 a 294 false 0 295 296 [a-cd-f&&[d-f]] 297 e 298 true e 0 299 300 [[a-c]&&d-fa-c] 301 a 302 true a 0 303 304 [[a-c]&&[d-f][a-c]] 305 a 306 true a 0 307 308 [[a-c][d-f]&&abc] 309 a 310 true a 0 311 312 [[a-c][d-f]&&abc[def]] 313 e 314 true e 0 315 316 [[a-c]&&[b-d]&&[c-e]] 317 a 318 false 0 319 320 [[a-c]&&[b-d]&&[c-e]] 321 c 322 true c 0 323 324 [[a-c]&&[b-d][c-e]&&[u-z]] 325 c 326 false 0 327 328 [abc[^bcd]] 329 a 330 true a 0 331 332 [abc[^bcd]] 333 d 334 false 0 335 336 [a-c&&a-d&&a-eghi] 337 b 338 true b 0 339 340 [a-c&&a-d&&a-eghi] 341 g 342 false 0 343 344 [[a[b]]&&[b[a]]] 345 a 346 true a 0 347 348 [[a]&&[b][c][a]&&[^d]] 349 a 350 true a 0 351 352 [[a]&&[b][c][a]&&[^d]] 353 d 354 false 0 355 356 [[[a-d]&&[c-f]]] 357 a 358 false 0 359 360 [[[a-d]&&[c-f]]] 361 c 362 true c 0 363 364 [[[a-d]&&[c-f]]&&[c]] 365 c 366 true c 0 367 368 [[[a-d]&&[c-f]]&&[c]&&c] 369 c 370 true c 0 371 372 [[[a-d]&&[c-f]]&&[c]&&c&&c] 373 c 374 true c 0 375 376 [[[a-d]&&[c-f]]&&[c]&&c&&[cde]] 377 c 378 true c 0 379 380 [z[abc&&bcd]] 381 c 382 true c 0 383 384 [z[abc&&bcd]&&[u-z]] 385 z 386 true z 0 387 388 [x[abc&&bcd[z]]&&[u-z]] 389 z 390 false 0 391 392 [x[[wz]abc&&bcd[z]]&&[u-z]] 393 z 394 true z 0 395 396 [[abc]&&[def]abc] 397 a 398 true a 0 399 400 [[abc]&&[def]xyz[abc]] 401 a 402 true a 0 403 404 \pL 405 a 406 true a 0 407 408 \pL 409 7 410 false 0 411 412 \p{L} 413 a 414 true a 0 415 416 \p{LC} 417 a 418 true a 0 419 420 \p{LC} 421 A 422 true A 0 423 424 \p{IsL} 425 a 426 true a 0 427 428 \p{IsLC} 429 a 430 true a 0 431 432 \p{IsLC} 433 A 434 true A 0 435 436 \p{IsLC} 437 9 438 false 0 439 440 \P{IsLC} 441 9 442 true 9 0 443 444 // Guillemet left is initial quote punctuation 445 \p{Pi} 446 \u00ab 447 true \u00ab 0 448 449 \P{Pi} 450 \u00ac 451 true \u00ac 0 452 453 // Guillemet right is final quote punctuation 454 \p{IsPf} 455 \u00bb 456 true \u00bb 0 457 458 \p{P} 459 \u00bb 460 true \u00bb 0 461 462 \p{P}+ 463 \u00bb 464 true \u00bb 0 465 466 \P{IsPf} 467 \u00bc 468 true \u00bc 0 469 470 \P{IsP} 471 \u00bc 472 true \u00bc 0 473 474 \p{L1} 475 \u00bc 476 true \u00bc 0 477 478 \p{L1}+ 479 \u00bc 480 true \u00bc 0 481 482 \p{L1} 483 \u02bc 484 false 0 485 486 \p{ASCII} 487 a 488 true a 0 489 490 \p{IsASCII} 491 a 492 true a 0 493 494 \p{IsASCII} 495 \u0370 496 false 0 497 498 \pLbc 499 abc 500 true abc 0 501 502 a[r\p{InGreek}]c 503 a\u0370c 504 true a\u0370c 0 505 506 a\p{InGreek} 507 a\u0370 508 true a\u0370 0 509 510 a\P{InGreek} 511 a\u0370 512 false 0 513 514 a\P{InGreek} 515 ab 516 true ab 0 517 518 a{^InGreek} 519 - 520 error 521 522 a\p{^InGreek} 523 - 524 error 525 526 a\P{^InGreek} 527 - 528 error 529 530 a\p{InGreek} 531 a\u0370 532 true a\u0370 0 533 534 a[\p{InGreek}]c 535 a\u0370c 536 true a\u0370c 0 537 538 a[\P{InGreek}]c 539 a\u0370c 540 false 0 541 542 a[\P{InGreek}]c 543 abc 544 true abc 0 545 546 a[{^InGreek}]c 547 anc 548 true anc 0 549 550 a[{^InGreek}]c 551 azc 552 false 0 553 554 a[\p{^InGreek}]c 555 - 556 error 557 558 a[\P{^InGreek}]c 559 - 560 error 561 562 a[\p{InGreek}] 563 a\u0370 564 true a\u0370 0 565 566 a[r\p{InGreek}]c 567 arc 568 true arc 0 569 570 a[\p{InGreek}r]c 571 arc 572 true arc 0 573 574 a[r\p{InGreek}]c 575 arc 576 true arc 0 577 578 a[^\p{InGreek}]c 579 a\u0370c 580 false 0 581 582 a[^\P{InGreek}]c 583 a\u0370c 584 true a\u0370c 0 585 586 a[\p{InGreek}&&[^\u0370]]c 587 a\u0370c 588 false 0 589 590 // Test the dot metacharacter 591 a.c.+ 592 a#c%& 593 true a#c%& 0 594 595 ab. 596 ab\n 597 false 0 598 599 (?s)ab. 600 ab\n 601 true ab\n 0 602 603 a[\p{L}&&[\P{InGreek}]]c 604 a\u6000c 605 true a\u6000c 0 606 607 a[\p{L}&&[\P{InGreek}]]c 608 arc 609 true arc 0 610 611 a[\p{L}&&[\P{InGreek}]]c 612 a\u0370c 613 false 0 614 615 a\p{InGreek}c 616 a\u0370c 617 true a\u0370c 0 618 619 a\p{Sc} 620 a$ 621 true a$ 0 622 623 // Test the word char escape sequence 624 ab\wc 625 abcc 626 true abcc 0 627 628 \W\w\W 629 #r# 630 true #r# 0 631 632 \W\w\W 633 rrrr#ggg 634 false 0 635 636 abc[\w] 637 abcd 638 true abcd 0 639 640 abc[\sdef]* 641 abc def 642 true abc def 0 643 644 abc[\sy-z]* 645 abc y z 646 true abc y z 0 647 648 abc[a-d\sm-p]* 649 abcaa mn p 650 true abcaa mn p 0 651 652 // Test the whitespace escape sequence 653 ab\sc 654 ab c 655 true ab c 0 656 657 \s\s\s 658 blah err 659 false 0 660 661 \S\S\s 662 blah err 663 true ah 0 664 665 // Test the digit escape sequence 666 ab\dc 667 ab9c 668 true ab9c 0 669 670 \d\d\d 671 blah45 672 false 0 673 674 // Test the caret metacharacter 675 ^abc 676 abcdef 677 true abc 0 678 679 ^abc 680 bcdabc 681 false 0 682 683 // Greedy ? metacharacter 684 a?b 685 aaaab 686 true ab 0 687 688 a?b 689 b 690 true b 0 691 692 a?b 693 aaaccc 694 false 0 695 696 .?b 697 aaaab 698 true ab 0 699 700 // Reluctant ? metacharacter 701 a??b 702 aaaab 703 true ab 0 704 705 a??b 706 b 707 true b 0 708 709 a??b 710 aaaccc 711 false 0 712 713 .??b 714 aaaab 715 true ab 0 716 717 // Possessive ? metacharacter 718 a?+b 719 aaaab 720 true ab 0 721 722 a?+b 723 b 724 true b 0 725 726 a?+b 727 aaaccc 728 false 0 729 730 .?+b 731 aaaab 732 true ab 0 733 734 // Greedy + metacharacter 735 a+b 736 aaaab 737 true aaaab 0 738 739 a+b 740 b 741 false 0 742 743 a+b 744 aaaccc 745 false 0 746 747 .+b 748 aaaab 749 true aaaab 0 750 751 // Reluctant + metacharacter 752 a+?b 753 aaaab 754 true aaaab 0 755 756 a+?b 757 b 758 false 0 759 760 a+?b 761 aaaccc 762 false 0 763 764 .+?b 765 aaaab 766 true aaaab 0 767 768 // Possessive + metacharacter 769 a++b 770 aaaab 771 true aaaab 0 772 773 a++b 774 b 775 false 0 776 777 a++b 778 aaaccc 779 false 0 780 781 .++b 782 aaaab 783 false 0 784 785 // Greedy Repetition 786 a{2,3} 787 a 788 false 0 789 790 a{2,3} 791 aa 792 true aa 0 793 794 a{2,3} 795 aaa 796 true aaa 0 797 798 a{2,3} 799 aaaa 800 true aaa 0 801 802 a{3,} 803 zzzaaaazzz 804 true aaaa 0 805 806 a{3,} 807 zzzaazzz 808 false 0 809 810 // Reluctant Repetition 811 a{2,3}? 812 a 813 false 0 814 815 a{2,3}? 816 aa 817 true aa 0 818 819 a{2,3}? 820 aaa 821 true aa 0 822 823 a{2,3}? 824 aaaa 825 true aa 0 826 827 // Zero width Positive lookahead 828 abc(?=d) 829 zzzabcd 830 true abc 0 831 832 abc(?=d) 833 zzzabced 834 false 0 835 836 // Zero width Negative lookahead 837 abc(?!d) 838 zzabcd 839 false 0 840 841 abc(?!d) 842 zzabced 843 true abc 0 844 845 // Zero width Positive lookbehind 846 \w(?<=a) 847 ###abc### 848 true a 0 849 850 \w(?<=a) 851 ###ert### 852 false 0 853 854 // Zero width Negative lookbehind 855 (?<!a)\w 856 ###abc### 857 true a 0 858 859 (?<!a)c 860 bc 861 true c 0 862 863 (?<!a)c 864 ac 865 false 0 866 867 // Nondeterministic group 868 (a+b)+ 869 ababab 870 true ababab 1 ab 871 872 (a|b)+ 873 ccccd 874 false 1 875 876 // Deterministic group 877 (ab)+ 878 ababab 879 true ababab 1 ab 880 881 (ab)+ 882 accccd 883 false 1 884 885 (ab)* 886 ababab 887 true ababab 1 ab 888 889 (ab)(cd*) 890 zzzabczzz 891 true abc 2 ab c 892 893 abc(d)*abc 894 abcdddddabc 895 true abcdddddabc 1 d 896 897 // Escaped metacharacter 898 \* 899 * 900 true * 0 901 902 \\ 903 \ 904 true \ 0 905 906 \\ 907 \\\\ 908 true \ 0 909 910 // Back references 911 (a*)bc\1 912 zzzaabcaazzz 913 true aabcaa 1 aa 914 915 (a*)bc\1 916 zzzaabcazzz 917 true abca 1 a 918 919 (gt*)(dde)*(yu)\1\3(vv) 920 zzzgttddeddeyugttyuvvzzz 921 true gttddeddeyugttyuvv 4 gtt dde yu vv 922 923 // Greedy * metacharacter 924 a*b 925 aaaab 926 true aaaab 0 927 928 a*b 929 b 930 true b 0 931 932 a*b 933 aaaccc 934 false 0 935 936 .*b 937 aaaab 938 true aaaab 0 939 940 // Reluctant * metacharacter 941 a*?b 942 aaaab 943 true aaaab 0 944 945 a*?b 946 b 947 true b 0 948 949 a*?b 950 aaaccc 951 false 0 952 953 .*?b 954 aaaab 955 true aaaab 0 956 957 // Possessive * metacharacter 958 a*+b 959 aaaab 960 true aaaab 0 961 962 a*+b 963 b 964 true b 0 965 966 a*+b 967 aaaccc 968 false 0 969 970 .*+b 971 aaaab 972 false 0 973 974 // Case insensitivity 975 (?i)foobar 976 fOobAr 977 true fOobAr 0 978 979 f(?i)oobar 980 fOobAr 981 true fOobAr 0 982 983 foo(?i)bar 984 fOobAr 985 false 0 986 987 (?i)foo[bar]+ 988 foObAr 989 true foObAr 0 990 991 (?i)foo[a-r]+ 992 foObAr 993 true foObAr 0 994 995 // Disable metacharacters- test both length <=3 and >3 996 // So that the BM optimization is part of test 997 \Q***\Eabc 998 ***abc 999 true ***abc 0 1000 1001 bl\Q***\Eabc 1002 bl***abc 1003 true bl***abc 0 1004 1005 \Q***abc 1006 ***abc 1007 true ***abc 0 1008 1009 blah\Q***\Eabc 1010 blah***abc 1011 true blah***abc 0 1012 1013 \Q***abc 1014 ***abc 1015 true ***abc 0 1016 1017 \Q*ab 1018 *ab 1019 true *ab 0 1020 1021 blah\Q***abc 1022 blah***abc 1023 true blah***abc 0 1024 1025 bla\Q***abc 1026 bla***abc 1027 true bla***abc 0 1028 1029 // Escapes in char classes 1030 [ab\Qdef\E] 1031 d 1032 true d 0 1033 1034 [ab\Q[\E] 1035 [ 1036 true [ 0 1037 1038 [\Q]\E] 1039 ] 1040 true ] 0 1041 1042 [\Q\\E] 1043 \ 1044 true \ 0 1045 1046 [\Q(\E] 1047 ( 1048 true ( 0 1049 1050 [\n-#] 1051 ! 1052 true ! 0 1053 1054 [\n-#] 1055 - 1056 false 0 1057 1058 [\w-#] 1059 ! 1060 false 0 1061 1062 [\w-#] 1063 a 1064 true a 0 1065 1066 [\w-#] 1067 - 1068 true - 0 1069 1070 [\w-#] 1071 # 1072 true # 0 1073 1074 [\043]+ 1075 blahblah#blech 1076 true # 0 1077 1078 [\042-\044]+ 1079 blahblah#blech 1080 true # 0 1081 1082 [\u1234-\u1236] 1083 blahblah\u1235blech 1084 true \u1235 0 1085 1086 [^\043]* 1087 blahblah#blech 1088 true blahblah 0 1089 1090 (|f)?+ 1091 foo 1092 true 1