Old TestCases.txt
  1 // This file contains test cases for regular expressions.
  2 // A test case consists of three lines:
  3 // The first line is a pattern used in the test
  4 // The second line is the input to search for the pattern in
  5 // The third line is a concatentation of the match, the number of groups,
  6 //     and the contents of the first four subexpressions.
  7 // Empty lines and lines beginning with comment slashes are ignored.
  8 
  9 // Test unsetting of backed off groups
 10 ^(a)?a
 11 a
 12 true a 1
 13 
 14 ^(aa(bb)?)+$
 15 aabbaa
 16 true aabbaa 2 aa bb
 17 
 18 ((a|b)?b)+
 19 b
 20 true b 2 b
 21 
 22 (aaa)?aaa
 23 aaa
 24 true aaa 1
 25 
 26 ^(a(b)?)+$
 27 aba
 28 true aba 2 a b
 29 
 30 ^(a(b(c)?)?)?abc
 31 abc
 32 true abc 3
 33 
 34 ^(a(b(c))).*
 35 abc
 36 true abc 3 abc bc c
 37 
 38 // use of x modifier
 39 abc(?x)blah
 40 abcblah
 41 true abcblah 0
 42 
 43 abc(?x)  blah
 44 abcblah
 45 true abcblah 0
 46 
 47 abc(?x)  blah  blech
 48 abcblahblech
 49 true abcblahblech 0
 50 
 51 abc(?x)  blah # ignore comment
 52 abcblah
 53 true abcblah 0
 54 
 55 // Simple alternation
 56 a|b
 57 a
 58 true a 0
 59 
 60 a|b
 61 z
 62 false 0
 63 
 64 a|b
 65 b
 66 true b 0
 67 
 68 a|b|cd
 69 cd
 70 true cd 0
 71 
 72 a|ad
 73 ad
 74 true a 0
 75 
 76 z(a|ac)b
 77 zacb
 78 true zacb 1 ac
 79 
 80 // Simple char class
 81 [abc]+
 82 ababab
 83 true ababab 0
 84 
 85 [abc]+
 86 defg
 87 false 0
 88 
 89 [abc]+[def]+[ghi]+
 90 zzzaaddggzzz
 91 true aaddgg 0
 92 
 93 // Range char class
 94 [a-g]+
 95 zzzggg
 96 true ggg 0
 97 
 98 [a-g]+
 99 mmm
100 false 0
101 
102 [a-]+
103 za-9z
104 true a- 0
105 
106 [a-\\u4444]+
107 za-9z
108 true za 0
109 
110 // Negated char class
111 [^abc]+
112 ababab
113 false 0
114 
115 [^abc]+
116 aaabbbcccdefg
117 true defg 0
118 
119 // Making sure a ^ not in first position matches literal ^
120 [abc^b]
121 b
122 true b 0
123 
124 [abc^b]
125 ^
126 true ^ 0
127 
128 // Class union and intersection
129 [abc[def]]
130 b
131 true b 0
132 
133 [abc[def]]
134 e
135 true e 0
136 
137 [a-d[0-9][m-p]]
138 a
139 true a 0
140 
141 [a-d[0-9][m-p]]
142 o
143 true o 0
144 
145 [a-d[0-9][m-p]]
146 4
147 true 4 0
148 
149 [a-d[0-9][m-p]]
150 e
151 false 0
152 
153 [a-d[0-9][m-p]]
154 u
155 false 0
156 
157 [[a-d][0-9][m-p]]
158 b
159 true b 0
160 
161 [[a-d][0-9][m-p]]
162 z
163 false 0
164 
165 [a-c[d-f[g-i]]]
166 a
167 true a 0
168 
169 [a-c[d-f[g-i]]]
170 e
171 true e 0
172 
173 [a-c[d-f[g-i]]]
174 h
175 true h 0
176 
177 [a-c[d-f[g-i]]]
178 m
179 false 0
180 
181 [a-c[d-f[g-i]]m]
182 m
183 true m 0
184 
185 [abc[def]ghi]
186 a
187 true a 0
188 
189 [abc[def]ghi]
190 d
191 true d 0
192 
193 [abc[def]ghi]
194 h
195 true h 0
196 
197 [abc[def]ghi]
198 w
199 false 0
200 
201 [a-c&&[d-f]]
202 a
203 false 0
204 
205 [a-c&&[d-f]]
206 e
207 false 0
208 
209 [a-c&&[d-f]]
210 z
211 false 0
212 
213 [[a-c]&&[d-f]]
214 a
215 false 0
216 
217 [[a-c]&&[d-f]]
218 e
219 false 0
220 
221 [[a-c]&&[d-f]]
222 z
223 false 0
224 
225 [a-c&&d-f]
226 a
227 false 0
228 
229 [a-m&&m-z]
230 m
231 true m 0
232 
233 [a-m&&m-z&&a-c]
234 m
235 false 0
236 
237 [a-m&&m-z&&a-z]
238 m
239 true m 0
240 
241 [[a-m]&&[m-z]]
242 a
243 false 0
244 
245 [[a-m]&&[m-z]]
246 m
247 true m 0
248 
249 [[a-m]&&[m-z]]
250 z
251 false 0
252 
253 [[a-m]&&[^a-c]]
254 a
255 false 0
256 
257 [[a-m]&&[^a-c]]
258 d
259 true d 0
260 
261 [a-m&&[^a-c]]
262 a
263 false 0
264 
265 [a-m&&[^a-c]]
266 d
267 true d 0
268 
269 [a-cd-f&&[d-f]]
270 a
271 false 0
272 
273 [a-cd-f&&[d-f]]
274 e
275 true e 0
276 
277 [[a-c]&&d-fa-c]
278 a
279 true a 0
280 
281 [[a-c]&&[d-f][a-c]]
282 a
283 true a 0
284 
285 [[a-c][d-f]&&abc]
286 a
287 true a 0
288 
289 [[a-c][d-f]&&abc[def]]
290 e
291 true e 0
292 
293 [[a-c]&&[b-d]&&[c-e]]
294 a
295 false 0
296 
297 [[a-c]&&[b-d]&&[c-e]]
298 c
299 true c 0
300 
301 [[a-c]&&[b-d][c-e]&&[u-z]]
302 c
303 false 0
304 
305 [abc[^bcd]]
306 a
307 true a 0
308 
309 [abc[^bcd]]
310 d
311 false 0
312 
313 [a-c&&a-d&&a-eghi]
314 b
315 true b 0
316 
317 [a-c&&a-d&&a-eghi]
318 g
319 false 0
320 
321 [[a[b]]&&[b[a]]]
322 a
323 true a 0
324 
325 [[a]&&[b][c][a]&&[^d]]
326 a
327 true a 0
328 
329 [[a]&&[b][c][a]&&[^d]]
330 d
331 false 0
332 
333 [[[a-d]&&[c-f]]]
334 a
335 false 0
336 
337 [[[a-d]&&[c-f]]]
338 c
339 true c 0
340 
341 [[[a-d]&&[c-f]]&&[c]]
342 c
343 true c 0
344 
345 [[[a-d]&&[c-f]]&&[c]&&c]
346 c
347 true c 0
348 
349 [[[a-d]&&[c-f]]&&[c]&&c&&c]
350 c
351 true c 0
352 
353 [[[a-d]&&[c-f]]&&[c]&&c&&[cde]]
354 c
355 true c 0
356 
357 [z[abc&&bcd]]
358 c
359 true c 0
360 
361 [z[abc&&bcd]&&[u-z]]
362 z
363 true z 0
364 
365 [x[abc&&bcd[z]]&&[u-z]]
366 z
367 false 0
368 
369 [x[[wz]abc&&bcd[z]]&&[u-z]]
370 z
371 true z 0
372 
373 [[abc]&&[def]abc]
374 a
375 true a 0
376 
377 [[abc]&&[def]xyz[abc]]
378 a
379 true a 0
380 
381 \pL
382 a
383 true a 0
384 
385 \pL
386 7
387 false 0
388 
389 \p{L}
390 a
391 true a 0
392 
393 \p{LC}
394 a
395 true a 0
396 
397 \p{LC}
398 A
399 true A 0
400 
401 \p{IsL}
402 a
403 true a 0
404 
405 \p{IsLC}
406 a
407 true a 0
408 
409 \p{IsLC}
410 A
411 true A 0
412 
413 \p{IsLC}
414 9
415 false 0
416 
417 \P{IsLC}
418 9
419 true 9 0
420 
421 // Guillemet left is initial quote punctuation
422 \p{Pi}
423 \u00ab
424 true \u00ab 0
425 
426 \P{Pi}
427 \u00ac
428 true \u00ac 0
429 
430 // Guillemet right is final quote punctuation
431 \p{IsPf}
432 \u00bb
433 true \u00bb 0
434 
435 \p{P}
436 \u00bb
437 true \u00bb 0
438 
439 \p{P}+
440 \u00bb
441 true \u00bb 0
442 
443 \P{IsPf}
444 \u00bc
445 true \u00bc 0
446 
447 \P{IsP}
448 \u00bc
449 true \u00bc 0
450 
451 \p{L1}
452 \u00bc
453 true \u00bc 0
454 
455 \p{L1}+
456 \u00bc
457 true \u00bc 0
458 
459 \p{L1}
460 \u02bc
461 false 0
462 
463 \p{ASCII}
464 a
465 true a 0
466 
467 \p{IsASCII}
468 a
469 true a 0
470 
471 \p{IsASCII}
472 \u0370
473 false 0
474 
475 \pLbc
476 abc
477 true abc 0
478 
479 a[r\p{InGreek}]c
480 a\u0370c
481 true a\u0370c 0
482 
483 a\p{InGreek}
484 a\u0370
485 true a\u0370 0
486 
487 a\P{InGreek}
488 a\u0370
489 false 0
490 
491 a\P{InGreek}
492 ab
493 true ab 0
494 
495 a{^InGreek}
496 -
497 error
498 
499 a\p{^InGreek}
500 -
501 error
502 
503 a\P{^InGreek}
504 -
505 error
506 
507 a\p{InGreek}
508 a\u0370
509 true a\u0370 0
510 
511 a[\p{InGreek}]c
512 a\u0370c
513 true a\u0370c 0
514 
515 a[\P{InGreek}]c
516 a\u0370c
517 false 0
518 
519 a[\P{InGreek}]c
520 abc
521 true abc 0
522 
523 a[{^InGreek}]c
524 anc
525 true anc 0
526 
527 a[{^InGreek}]c
528 azc
529 false 0
530 
531 a[\p{^InGreek}]c
532 -
533 error
534 
535 a[\P{^InGreek}]c
536 -
537 error
538 
539 a[\p{InGreek}]
540 a\u0370
541 true a\u0370 0
542 
543 a[r\p{InGreek}]c
544 arc
545 true arc 0
546 
547 a[\p{InGreek}r]c
548 arc
549 true arc 0
550 
551 a[r\p{InGreek}]c
552 arc
553 true arc 0
554 
555 a[^\p{InGreek}]c
556 a\u0370c
557 false 0
558 
559 a[^\P{InGreek}]c
560 a\u0370c
561 true a\u0370c 0
562 
563 a[\p{InGreek}&&[^\u0370]]c
564 a\u0370c
565 false 0
566 
567 // Test the dot metacharacter
568 a.c.+
569 a#c%&
570 true a#c%& 0
571 
572 ab.
573 ab\n
574 false 0
575 
576 (?s)ab.
577 ab\n
578 true ab\n 0
579 
580 a[\p{L}&&[\P{InGreek}]]c
581 a\u6000c
582 true a\u6000c 0
583 
584 a[\p{L}&&[\P{InGreek}]]c
585 arc
586 true arc 0
587 
588 a[\p{L}&&[\P{InGreek}]]c
589 a\u0370c
590 false 0
591 
592 a\p{InGreek}c
593 a\u0370c
594 true a\u0370c 0
595 
596 a\p{Sc}
597 a$
598 true a$ 0
599 
600 // Test the word char escape sequence
601 ab\wc
602 abcc
603 true abcc 0
604 
605 \W\w\W
606 #r#
607 true #r# 0
608 
609 \W\w\W
610 rrrr#ggg
611 false 0
612 
613 abc[\w]
614 abcd
615 true abcd 0
616 
617 abc[\sdef]*
618 abc  def
619 true abc  def 0
620 
621 abc[\sy-z]*
622 abc y z
623 true abc y z 0
624 
625 abc[a-d\sm-p]*
626 abcaa mn  p
627 true abcaa mn  p 0
628 
629 // Test the whitespace escape sequence
630 ab\sc
631 ab c
632 true ab c 0
633 
634 \s\s\s
635 blah  err
636 false 0
637 
638 \S\S\s
639 blah  err
640 true ah  0
641 
642 // Test the digit escape sequence
643 ab\dc
644 ab9c
645 true ab9c 0
646 
647 \d\d\d
648 blah45
649 false 0
650 
651 // Test the caret metacharacter
652 ^abc
653 abcdef
654 true abc 0
655 
656 ^abc
657 bcdabc
658 false 0
659 
660 // Greedy ? metacharacter
661 a?b
662 aaaab
663 true ab 0
664 
665 a?b
666 b
667 true b 0
668 
669 a?b
670 aaaccc
671 false 0
672 
673 .?b
674 aaaab
675 true ab 0
676 
677 // Reluctant ? metacharacter
678 a??b
679 aaaab
680 true ab 0
681 
682 a??b
683 b
684 true b 0
685 
686 a??b
687 aaaccc
688 false 0
689 
690 .??b
691 aaaab
692 true ab 0
693 
694 // Possessive ? metacharacter
695 a?+b
696 aaaab
697 true ab 0
698 
699 a?+b
700 b
701 true b 0
702 
703 a?+b
704 aaaccc
705 false 0
706 
707 .?+b
708 aaaab
709 true ab 0
710 
711 // Greedy + metacharacter
712 a+b
713 aaaab
714 true aaaab 0
715 
716 a+b
717 b
718 false 0
719 
720 a+b
721 aaaccc
722 false 0
723 
724 .+b
725 aaaab
726 true aaaab 0
727 
728 // Reluctant + metacharacter
729 a+?b
730 aaaab
731 true aaaab 0
732 
733 a+?b
734 b
735 false 0
736 
737 a+?b
738 aaaccc
739 false 0
740 
741 .+?b
742 aaaab
743 true aaaab 0
744 
745 // Possessive + metacharacter
746 a++b
747 aaaab
748 true aaaab 0
749 
750 a++b
751 b
752 false 0
753 
754 a++b
755 aaaccc
756 false 0
757 
758 .++b
759 aaaab
760 false 0
761 
762 // Greedy Repetition
763 a{2,3}
764 a
765 false 0
766 
767 a{2,3}
768 aa
769 true aa 0
770 
771 a{2,3}
772 aaa
773 true aaa 0
774 
775 a{2,3}
776 aaaa
777 true aaa 0
778 
779 a{3,}
780 zzzaaaazzz
781 true aaaa 0
782 
783 a{3,}
784 zzzaazzz
785 false 0
786 
787 // Reluctant Repetition
788 a{2,3}?
789 a
790 false 0
791 
792 a{2,3}?
793 aa
794 true aa 0
795 
796 a{2,3}?
797 aaa
798 true aa 0
799 
800 a{2,3}?
801 aaaa
802 true aa 0
803 
804 // Zero width Positive lookahead
805 abc(?=d)
806 zzzabcd
807 true abc 0
808 
809 abc(?=d)
810 zzzabced
811 false 0
812 
813 // Zero width Negative lookahead
814 abc(?!d)
815 zzabcd
816 false 0
817 
818 abc(?!d)
819 zzabced
820 true abc 0
821 
822 // Zero width Positive lookbehind
823 \w(?<=a)
824 ###abc###
825 true a 0
826 
827 \w(?<=a)
828 ###ert###
829 false 0
830 
831 // Zero width Negative lookbehind
832 (?<!a)\w
833 ###abc###
834 true a 0
835 
836 (?<!a)c
837 bc
838 true c 0
839 
840 (?<!a)c
841 ac
842 false 0
843 
844 // Nondeterministic group
845 (a+b)+
846 ababab
847 true ababab 1 ab
848 
849 (a|b)+
850 ccccd
851 false 1
852 
853 // Deterministic group
854 (ab)+
855 ababab
856 true ababab 1 ab
857 
858 (ab)+
859 accccd
860 false 1
861 
862 (ab)*
863 ababab
864 true ababab 1 ab
865 
866 (ab)(cd*)
867 zzzabczzz
868 true abc 2 ab c
869 
870 abc(d)*abc
871 abcdddddabc
872 true abcdddddabc 1 d
873 
874 // Escaped metacharacter
875 \*
876 *
877 true * 0
878 
879 \\
880 \
881 true \ 0
882 
883 \\
884 \\\\
885 true \ 0
886 
887 // Back references
888 (a*)bc\1
889 zzzaabcaazzz
890 true aabcaa 1 aa
891 
892 (a*)bc\1
893 zzzaabcazzz
894 true abca 1 a
895 
896 (gt*)(dde)*(yu)\1\3(vv)
897 zzzgttddeddeyugttyuvvzzz
898 true gttddeddeyugttyuvv 4 gtt dde yu vv
899 
900 // Greedy * metacharacter
901 a*b
902 aaaab
903 true aaaab 0
904 
905 a*b
906 b
907 true b 0
908 
909 a*b
910 aaaccc
911 false 0
912 
913 .*b
914 aaaab
915 true aaaab 0
916 
917 // Reluctant * metacharacter
918 a*?b
919 aaaab
920 true aaaab 0
921 
922 a*?b
923 b
924 true b 0
925 
926 a*?b
927 aaaccc
928 false 0
929 
930 .*?b
931 aaaab
932 true aaaab 0
933 
934 // Possessive * metacharacter
935 a*+b
936 aaaab
937 true aaaab 0
938 
939 a*+b
940 b
941 true b 0
942 
943 a*+b
944 aaaccc
945 false 0
946 
947 .*+b
948 aaaab
949 false 0
950 
951 // Case insensitivity
952 (?i)foobar
953 fOobAr
954 true fOobAr 0
955 
956 f(?i)oobar
957 fOobAr
958 true fOobAr 0
959 
960 foo(?i)bar
961 fOobAr
962 false 0
963 
964 (?i)foo[bar]+
965 foObAr
966 true foObAr 0
967 
968 (?i)foo[a-r]+
969 foObAr
970 true foObAr 0
971 
972 // Disable metacharacters- test both length <=3 and >3
973 // So that the BM optimization is part of test
974 \Q***\Eabc
975 ***abc
976 true ***abc 0
977 
978 bl\Q***\Eabc
979 bl***abc
980 true bl***abc 0
981 
982 \Q***abc
983 ***abc
984 true ***abc 0
985 
986 blah\Q***\Eabc
987 blah***abc
988 true blah***abc 0
989 
990 \Q***abc
991 ***abc
992 true ***abc 0
993 
994 \Q*ab
995 *ab
996 true *ab 0
997 
998 blah\Q***abc
999 blah***abc
1000 true blah***abc 0
1001 
1002 bla\Q***abc
1003 bla***abc
1004 true bla***abc 0
1005 
1006 // Escapes in char classes
1007 [ab\Qdef\E]
1008 d
1009 true d 0
1010 
1011 [ab\Q[\E]
1012 [
1013 true [ 0
1014 
1015 [\Q]\E]
1016 ]
1017 true ] 0
1018 
1019 [\Q\\E]
1020 \
1021 true \ 0
1022 
1023 [\Q(\E]
1024 (
1025 true ( 0
1026 
1027 [\n-#]
1028 !
1029 true ! 0
1030 
1031 [\n-#]
1032 -
1033 false 0
1034 
1035 [\w-#]
1036 !
1037 false 0
1038 
1039 [\w-#]
1040 a
1041 true a 0
1042 
1043 [\w-#]
1044 -
1045 true - 0
1046 
1047 [\w-#]
1048 #
1049 true # 0
1050 
1051 [\043]+
1052 blahblah#blech
1053 true # 0
1054 
1055 [\042-\044]+
1056 blahblah#blech
1057 true # 0
1058 
1059 [\u1234-\u1236]
1060 blahblah\u1235blech
1061 true \u1235 0
1062 
1063 [^\043]*
1064 blahblah#blech
1065 true blahblah 0