Old TestCases.txt
1 // This file contains test cases for regular expressions.
2 // A test case consists of three lines:
3 // The first line is a pattern used in the test
4 // The second line is the input to search for the pattern in
5 // The third line is a concatentation of the match, the number of groups,
6 // and the contents of the first four subexpressions.
7 // Empty lines and lines beginning with comment slashes are ignored.
8
9 // Test unsetting of backed off groups
10 ^(a)?a
11 a
12 true a 1
13
14 ^(aa(bb)?)+$
15 aabbaa
16 true aabbaa 2 aa bb
17
18 ((a|b)?b)+
19 b
20 true b 2 b
21
22 (aaa)?aaa
23 aaa
24 true aaa 1
25
26 ^(a(b)?)+$
27 aba
28 true aba 2 a b
29
30 ^(a(b(c)?)?)?abc
31 abc
32 true abc 3
33
34 ^(a(b(c))).*
35 abc
36 true abc 3 abc bc c
37
38 // use of x modifier
39 abc(?x)blah
40 abcblah
41 true abcblah 0
42
43 abc(?x) blah
44 abcblah
45 true abcblah 0
46
47 abc(?x) blah blech
48 abcblahblech
49 true abcblahblech 0
50
51 abc(?x) blah # ignore comment
52 abcblah
53 true abcblah 0
54
55 // Simple alternation
56 a|b
57 a
58 true a 0
59
60 a|b
61 z
62 false 0
63
64 a|b
65 b
66 true b 0
67
68 a|b|cd
69 cd
70 true cd 0
71
72 a|ad
73 ad
74 true a 0
75
76 z(a|ac)b
77 zacb
78 true zacb 1 ac
79
80 // Simple char class
81 [abc]+
82 ababab
83 true ababab 0
84
85 [abc]+
86 defg
87 false 0
88
89 [abc]+[def]+[ghi]+
90 zzzaaddggzzz
91 true aaddgg 0
92
93 // Range char class
94 [a-g]+
95 zzzggg
96 true ggg 0
97
98 [a-g]+
99 mmm
100 false 0
101
102 [a-]+
103 za-9z
104 true a- 0
105
106 [a-\\u4444]+
107 za-9z
108 true za 0
109
110 // Negated char class
111 [^abc]+
112 ababab
113 false 0
114
115 [^abc]+
116 aaabbbcccdefg
117 true defg 0
118
119 // Making sure a ^ not in first position matches literal ^
120 [abc^b]
121 b
122 true b 0
123
124 [abc^b]
125 ^
126 true ^ 0
127
128 // Class union and intersection
129 [abc[def]]
130 b
131 true b 0
132
133 [abc[def]]
134 e
135 true e 0
136
137 [a-d[0-9][m-p]]
138 a
139 true a 0
140
141 [a-d[0-9][m-p]]
142 o
143 true o 0
144
145 [a-d[0-9][m-p]]
146 4
147 true 4 0
148
149 [a-d[0-9][m-p]]
150 e
151 false 0
152
153 [a-d[0-9][m-p]]
154 u
155 false 0
156
157 [[a-d][0-9][m-p]]
158 b
159 true b 0
160
161 [[a-d][0-9][m-p]]
162 z
163 false 0
164
165 [a-c[d-f[g-i]]]
166 a
167 true a 0
168
169 [a-c[d-f[g-i]]]
170 e
171 true e 0
172
173 [a-c[d-f[g-i]]]
174 h
175 true h 0
176
177 [a-c[d-f[g-i]]]
178 m
179 false 0
180
181 [a-c[d-f[g-i]]m]
182 m
183 true m 0
184
185 [abc[def]ghi]
186 a
187 true a 0
188
189 [abc[def]ghi]
190 d
191 true d 0
192
193 [abc[def]ghi]
194 h
195 true h 0
196
197 [abc[def]ghi]
198 w
199 false 0
200
201 [a-c&&[d-f]]
202 a
203 false 0
204
205 [a-c&&[d-f]]
206 e
207 false 0
208
209 [a-c&&[d-f]]
210 z
211 false 0
212
213 [[a-c]&&[d-f]]
214 a
215 false 0
216
217 [[a-c]&&[d-f]]
218 e
219 false 0
220
221 [[a-c]&&[d-f]]
222 z
223 false 0
224
225 [a-c&&d-f]
226 a
227 false 0
228
229 [a-m&&m-z]
230 m
231 true m 0
232
233 [a-m&&m-z&&a-c]
234 m
235 false 0
236
237 [a-m&&m-z&&a-z]
238 m
239 true m 0
240
241 [[a-m]&&[m-z]]
242 a
243 false 0
244
245 [[a-m]&&[m-z]]
246 m
247 true m 0
248
249 [[a-m]&&[m-z]]
250 z
251 false 0
252
253 [[a-m]&&[^a-c]]
254 a
255 false 0
256
257 [[a-m]&&[^a-c]]
258 d
259 true d 0
260
261 [a-m&&[^a-c]]
262 a
263 false 0
264
265 [a-m&&[^a-c]]
266 d
267 true d 0
268
269 [a-cd-f&&[d-f]]
270 a
271 false 0
272
273 [a-cd-f&&[d-f]]
274 e
275 true e 0
276
277 [[a-c]&&d-fa-c]
278 a
279 true a 0
280
281 [[a-c]&&[d-f][a-c]]
282 a
283 true a 0
284
285 [[a-c][d-f]&&abc]
286 a
287 true a 0
288
289 [[a-c][d-f]&&abc[def]]
290 e
291 true e 0
292
293 [[a-c]&&[b-d]&&[c-e]]
294 a
295 false 0
296
297 [[a-c]&&[b-d]&&[c-e]]
298 c
299 true c 0
300
301 [[a-c]&&[b-d][c-e]&&[u-z]]
302 c
303 false 0
304
305 [abc[^bcd]]
306 a
307 true a 0
308
309 [abc[^bcd]]
310 d
311 false 0
312
313 [a-c&&a-d&&a-eghi]
314 b
315 true b 0
316
317 [a-c&&a-d&&a-eghi]
318 g
319 false 0
320
321 [[a[b]]&&[b[a]]]
322 a
323 true a 0
324
325 [[a]&&[b][c][a]&&[^d]]
326 a
327 true a 0
328
329 [[a]&&[b][c][a]&&[^d]]
330 d
331 false 0
332
333 [[[a-d]&&[c-f]]]
334 a
335 false 0
336
337 [[[a-d]&&[c-f]]]
338 c
339 true c 0
340
341 [[[a-d]&&[c-f]]&&[c]]
342 c
343 true c 0
344
345 [[[a-d]&&[c-f]]&&[c]&&c]
346 c
347 true c 0
348
349 [[[a-d]&&[c-f]]&&[c]&&c&&c]
350 c
351 true c 0
352
353 [[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
354 c
355 true c 0
356
357 [z[abc&&bcd]]
358 c
359 true c 0
360
361 [z[abc&&bcd]&&[u-z]]
362 z
363 true z 0
364
365 [x[abc&&bcd[z]]&&[u-z]]
366 z
367 false 0
368
369 [x[[wz]abc&&bcd[z]]&&[u-z]]
370 z
371 true z 0
372
373 [[abc]&&[def]abc]
374 a
375 true a 0
376
377 [[abc]&&[def]xyz[abc]]
378 a
379 true a 0
380
381 \pL
382 a
383 true a 0
384
385 \pL
386 7
387 false 0
388
389 \p{L}
390 a
391 true a 0
392
393 \p{LC}
394 a
395 true a 0
396
397 \p{LC}
398 A
399 true A 0
400
401 \p{IsL}
402 a
403 true a 0
404
405 \p{IsLC}
406 a
407 true a 0
408
409 \p{IsLC}
410 A
411 true A 0
412
413 \p{IsLC}
414 9
415 false 0
416
417 \P{IsLC}
418 9
419 true 9 0
420
421 // Guillemet left is initial quote punctuation
422 \p{Pi}
423 \u00ab
424 true \u00ab 0
425
426 \P{Pi}
427 \u00ac
428 true \u00ac 0
429
430 // Guillemet right is final quote punctuation
431 \p{IsPf}
432 \u00bb
433 true \u00bb 0
434
435 \p{P}
436 \u00bb
437 true \u00bb 0
438
439 \p{P}+
440 \u00bb
441 true \u00bb 0
442
443 \P{IsPf}
444 \u00bc
445 true \u00bc 0
446
447 \P{IsP}
448 \u00bc
449 true \u00bc 0
450
451 \p{L1}
452 \u00bc
453 true \u00bc 0
454
455 \p{L1}+
456 \u00bc
457 true \u00bc 0
458
459 \p{L1}
460 \u02bc
461 false 0
462
463 \p{ASCII}
464 a
465 true a 0
466
467 \p{IsASCII}
468 a
469 true a 0
470
471 \p{IsASCII}
472 \u0370
473 false 0
474
475 \pLbc
476 abc
477 true abc 0
478
479 a[r\p{InGreek}]c
480 a\u0370c
481 true a\u0370c 0
482
483 a\p{InGreek}
484 a\u0370
485 true a\u0370 0
486
487 a\P{InGreek}
488 a\u0370
489 false 0
490
491 a\P{InGreek}
492 ab
493 true ab 0
494
495 a{^InGreek}
496 -
497 error
498
499 a\p{^InGreek}
500 -
501 error
502
503 a\P{^InGreek}
504 -
505 error
506
507 a\p{InGreek}
508 a\u0370
509 true a\u0370 0
510
511 a[\p{InGreek}]c
512 a\u0370c
513 true a\u0370c 0
514
515 a[\P{InGreek}]c
516 a\u0370c
517 false 0
518
519 a[\P{InGreek}]c
520 abc
521 true abc 0
522
523 a[{^InGreek}]c
524 anc
525 true anc 0
526
527 a[{^InGreek}]c
528 azc
529 false 0
530
531 a[\p{^InGreek}]c
532 -
533 error
534
535 a[\P{^InGreek}]c
536 -
537 error
538
539 a[\p{InGreek}]
540 a\u0370
541 true a\u0370 0
542
543 a[r\p{InGreek}]c
544 arc
545 true arc 0
546
547 a[\p{InGreek}r]c
548 arc
549 true arc 0
550
551 a[r\p{InGreek}]c
552 arc
553 true arc 0
554
555 a[^\p{InGreek}]c
556 a\u0370c
557 false 0
558
559 a[^\P{InGreek}]c
560 a\u0370c
561 true a\u0370c 0
562
563 a[\p{InGreek}&&[^\u0370]]c
564 a\u0370c
565 false 0
566
567 // Test the dot metacharacter
568 a.c.+
569 a#c%&
570 true a#c%& 0
571
572 ab.
573 ab\n
574 false 0
575
576 (?s)ab.
577 ab\n
578 true ab\n 0
579
580 a[\p{L}&&[\P{InGreek}]]c
581 a\u6000c
582 true a\u6000c 0
583
584 a[\p{L}&&[\P{InGreek}]]c
585 arc
586 true arc 0
587
588 a[\p{L}&&[\P{InGreek}]]c
589 a\u0370c
590 false 0
591
592 a\p{InGreek}c
593 a\u0370c
594 true a\u0370c 0
595
596 a\p{Sc}
597 a$
598 true a$ 0
599
600 // Test the word char escape sequence
601 ab\wc
602 abcc
603 true abcc 0
604
605 \W\w\W
606 #r#
607 true #r# 0
608
609 \W\w\W
610 rrrr#ggg
611 false 0
612
613 abc[\w]
614 abcd
615 true abcd 0
616
617 abc[\sdef]*
618 abc def
619 true abc def 0
620
621 abc[\sy-z]*
622 abc y z
623 true abc y z 0
624
625 abc[a-d\sm-p]*
626 abcaa mn p
627 true abcaa mn p 0
628
629 // Test the whitespace escape sequence
630 ab\sc
631 ab c
632 true ab c 0
633
634 \s\s\s
635 blah err
636 false 0
637
638 \S\S\s
639 blah err
640 true ah 0
641
642 // Test the digit escape sequence
643 ab\dc
644 ab9c
645 true ab9c 0
646
647 \d\d\d
648 blah45
649 false 0
650
651 // Test the caret metacharacter
652 ^abc
653 abcdef
654 true abc 0
655
656 ^abc
657 bcdabc
658 false 0
659
660 // Greedy ? metacharacter
661 a?b
662 aaaab
663 true ab 0
664
665 a?b
666 b
667 true b 0
668
669 a?b
670 aaaccc
671 false 0
672
673 .?b
674 aaaab
675 true ab 0
676
677 // Reluctant ? metacharacter
678 a??b
679 aaaab
680 true ab 0
681
682 a??b
683 b
684 true b 0
685
686 a??b
687 aaaccc
688 false 0
689
690 .??b
691 aaaab
692 true ab 0
693
694 // Possessive ? metacharacter
695 a?+b
696 aaaab
697 true ab 0
698
699 a?+b
700 b
701 true b 0
702
703 a?+b
704 aaaccc
705 false 0
706
707 .?+b
708 aaaab
709 true ab 0
710
711 // Greedy + metacharacter
712 a+b
713 aaaab
714 true aaaab 0
715
716 a+b
717 b
718 false 0
719
720 a+b
721 aaaccc
722 false 0
723
724 .+b
725 aaaab
726 true aaaab 0
727
728 // Reluctant + metacharacter
729 a+?b
730 aaaab
731 true aaaab 0
732
733 a+?b
734 b
735 false 0
736
737 a+?b
738 aaaccc
739 false 0
740
741 .+?b
742 aaaab
743 true aaaab 0
744
745 // Possessive + metacharacter
746 a++b
747 aaaab
748 true aaaab 0
749
750 a++b
751 b
752 false 0
753
754 a++b
755 aaaccc
756 false 0
757
758 .++b
759 aaaab
760 false 0
761
762 // Greedy Repetition
763 a{2,3}
764 a
765 false 0
766
767 a{2,3}
768 aa
769 true aa 0
770
771 a{2,3}
772 aaa
773 true aaa 0
774
775 a{2,3}
776 aaaa
777 true aaa 0
778
779 a{3,}
780 zzzaaaazzz
781 true aaaa 0
782
783 a{3,}
784 zzzaazzz
785 false 0
786
787 // Reluctant Repetition
788 a{2,3}?
789 a
790 false 0
791
792 a{2,3}?
793 aa
794 true aa 0
795
796 a{2,3}?
797 aaa
798 true aa 0
799
800 a{2,3}?
801 aaaa
802 true aa 0
803
804 // Zero width Positive lookahead
805 abc(?=d)
806 zzzabcd
807 true abc 0
808
809 abc(?=d)
810 zzzabced
811 false 0
812
813 // Zero width Negative lookahead
814 abc(?!d)
815 zzabcd
816 false 0
817
818 abc(?!d)
819 zzabced
820 true abc 0
821
822 // Zero width Positive lookbehind
823 \w(?<=a)
824 ###abc###
825 true a 0
826
827 \w(?<=a)
828 ###ert###
829 false 0
830
831 // Zero width Negative lookbehind
832 (?<!a)\w
833 ###abc###
834 true a 0
835
836 (?<!a)c
837 bc
838 true c 0
839
840 (?<!a)c
841 ac
842 false 0
843
844 // Nondeterministic group
845 (a+b)+
846 ababab
847 true ababab 1 ab
848
849 (a|b)+
850 ccccd
851 false 1
852
853 // Deterministic group
854 (ab)+
855 ababab
856 true ababab 1 ab
857
858 (ab)+
859 accccd
860 false 1
861
862 (ab)*
863 ababab
864 true ababab 1 ab
865
866 (ab)(cd*)
867 zzzabczzz
868 true abc 2 ab c
869
870 abc(d)*abc
871 abcdddddabc
872 true abcdddddabc 1 d
873
874 // Escaped metacharacter
875 \*
876 *
877 true * 0
878
879 \\
880 \
881 true \ 0
882
883 \\
884 \\\\
885 true \ 0
886
887 // Back references
888 (a*)bc\1
889 zzzaabcaazzz
890 true aabcaa 1 aa
891
892 (a*)bc\1
893 zzzaabcazzz
894 true abca 1 a
895
896 (gt*)(dde)*(yu)\1\3(vv)
897 zzzgttddeddeyugttyuvvzzz
898 true gttddeddeyugttyuvv 4 gtt dde yu vv
899
900 // Greedy * metacharacter
901 a*b
902 aaaab
903 true aaaab 0
904
905 a*b
906 b
907 true b 0
908
909 a*b
910 aaaccc
911 false 0
912
913 .*b
914 aaaab
915 true aaaab 0
916
917 // Reluctant * metacharacter
918 a*?b
919 aaaab
920 true aaaab 0
921
922 a*?b
923 b
924 true b 0
925
926 a*?b
927 aaaccc
928 false 0
929
930 .*?b
931 aaaab
932 true aaaab 0
933
934 // Possessive * metacharacter
935 a*+b
936 aaaab
937 true aaaab 0
938
939 a*+b
940 b
941 true b 0
942
943 a*+b
944 aaaccc
945 false 0
946
947 .*+b
948 aaaab
949 false 0
950
951 // Case insensitivity
952 (?i)foobar
953 fOobAr
954 true fOobAr 0
955
956 f(?i)oobar
957 fOobAr
958 true fOobAr 0
959
960 foo(?i)bar
961 fOobAr
962 false 0
963
964 (?i)foo[bar]+
965 foObAr
966 true foObAr 0
967
968 (?i)foo[a-r]+
969 foObAr
970 true foObAr 0
971
972 // Disable metacharacters- test both length <=3 and >3
973 // So that the BM optimization is part of test
974 \Q***\Eabc
975 ***abc
976 true ***abc 0
977
978 bl\Q***\Eabc
979 bl***abc
980 true bl***abc 0
981
982 \Q***abc
983 ***abc
984 true ***abc 0
985
986 blah\Q***\Eabc
987 blah***abc
988 true blah***abc 0
989
990 \Q***abc
991 ***abc
992 true ***abc 0
993
994 \Q*ab
995 *ab
996 true *ab 0
997
998 blah\Q***abc
999 blah***abc
1000 true blah***abc 0
1001
1002 bla\Q***abc
1003 bla***abc
1004 true bla***abc 0
1005
1006 // Escapes in char classes
1007 [ab\Qdef\E]
1008 d
1009 true d 0
1010
1011 [ab\Q[\E]
1012 [
1013 true [ 0
1014
1015 [\Q]\E]
1016 ]
1017 true ] 0
1018
1019 [\Q\\E]
1020 \
1021 true \ 0
1022
1023 [\Q(\E]
1024 (
1025 true ( 0
1026
1027 [\n-#]
1028 !
1029 true ! 0
1030
1031 [\n-#]
1032 -
1033 false 0
1034
1035 [\w-#]
1036 !
1037 false 0
1038
1039 [\w-#]
1040 a
1041 true a 0
1042
1043 [\w-#]
1044 -
1045 true - 0
1046
1047 [\w-#]
1048 #
1049 true # 0
1050
1051 [\043]+
1052 blahblah#blech
1053 true # 0
1054
1055 [\042-\044]+
1056 blahblah#blech
1057 true # 0
1058
1059 [\u1234-\u1236]
1060 blahblah\u1235blech
1061 true \u1235 0
1062
1063 [^\043]*
1064 blahblah#blech
1065 true blahblah 0