978 */
979 transient Node matchRoot;
980
981 /**
982 * Temporary storage used by parsing pattern slice.
983 */
984 transient int[] buffer;
985
986 /**
987 * Map the "name" of the "named capturing group" to its group id
988 * node.
989 */
990 transient volatile Map<String, Integer> namedGroups;
991
992 /**
993 * Temporary storage used while parsing group references.
994 */
995 transient GroupHead[] groupNodes;
996
997 /**
998 * Temporary null terminated code point array used by pattern compiling.
999 */
1000 private transient int[] temp;
1001
1002 /**
1003 * The number of capturing groups in this Pattern. Used by matchers to
1004 * allocate storage needed to perform a match.
1005 */
1006 transient int capturingGroupCount;
1007
1008 /**
1009 * The local variable count used by parsing tree. Used by matchers to
1010 * allocate storage needed to perform a match.
1011 */
1012 transient int localCount;
1013
1014 /**
1015 * Index into the pattern string that keeps track of how much has been
1016 * parsed.
1017 */
1321 } while ((slashEIndex = s.indexOf("\\E", current)) != -1);
1322
1323 return sb.append(s, current, s.length())
1324 .append("\\E")
1325 .toString();
1326 }
1327
1328 /**
1329 * Recompile the Pattern instance from a stream. The original pattern
1330 * string is read in and the object tree is recompiled from it.
1331 */
1332 private void readObject(java.io.ObjectInputStream s)
1333 throws java.io.IOException, ClassNotFoundException {
1334
1335 // Read in all fields
1336 s.defaultReadObject();
1337
1338 // Initialize counts
1339 capturingGroupCount = 1;
1340 localCount = 0;
1341
1342 // if length > 0, the Pattern is lazily compiled
1343 if (pattern.length() == 0) {
1344 root = new Start(lastAccept);
1345 matchRoot = lastAccept;
1346 compiled = true;
1347 }
1348 }
1349
1350 /**
1351 * This private constructor is used to create all Patterns. The pattern
1352 * string and match flags are all that is needed to completely describe
1353 * a Pattern. An empty pattern string results in an object tree with
1354 * only a Start node and a LastNode node.
1355 */
1356 private Pattern(String p, int f) {
1357 if ((f & ~ALL_FLAGS) != 0) {
1358 throw new IllegalArgumentException("Unknown flag 0x"
1359 + Integer.toHexString(f));
1360 }
1361 pattern = p;
1362 flags = f;
1363
1364 // to use UNICODE_CASE if UNICODE_CHARACTER_CLASS present
1365 if ((flags & UNICODE_CHARACTER_CLASS) != 0)
1366 flags |= UNICODE_CASE;
1367
1368 // Reset group index count
1369 capturingGroupCount = 1;
1370 localCount = 0;
1371
1372 if (pattern.length() > 0) {
1373 compile();
1374 } else {
1375 root = new Start(lastAccept);
1376 matchRoot = lastAccept;
1377 }
1378 }
1379
1380 /**
1381 * The pattern is converted to normalized form ({@linkplain
1382 * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
1383 * and then a pure group is constructed to match canonical
1384 * equivalences of the characters.
1385 */
1386 private void normalize() {
1387 int lastCodePoint = -1;
1388
1389 // Convert pattern into normalized form
1390 normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
1690 hasSupplementary = false;
1691 int c, count = 0;
1692 // Convert all chars into code points
1693 for (int x = 0; x < patternLength; x += Character.charCount(c)) {
1694 c = normalizedPattern.codePointAt(x);
1695 if (isSupplementary(c)) {
1696 hasSupplementary = true;
1697 }
1698 temp[count++] = c;
1699 }
1700
1701 patternLength = count; // patternLength now in code points
1702
1703 if (! has(LITERAL))
1704 RemoveQEQuoting();
1705
1706 // Allocate all temporary objects here.
1707 buffer = new int[32];
1708 groupNodes = new GroupHead[10];
1709 namedGroups = null;
1710
1711 if (has(LITERAL)) {
1712 // Literal pattern handling
1713 matchRoot = newSlice(temp, patternLength, hasSupplementary);
1714 matchRoot.next = lastAccept;
1715 } else {
1716 // Start recursive descent parsing
1717 matchRoot = expr(lastAccept);
1718 // Check extra pattern characters
1719 if (patternLength != cursor) {
1720 if (peek() == ')') {
1721 throw error("Unmatched closing ')'");
1722 } else {
1723 throw error("Unexpected internal error");
1724 }
1725 }
1726 }
1727
1728 // Peephole optimization
1729 if (matchRoot instanceof Slice) {
1730 root = BnM.optimize(matchRoot);
1731 if (root == matchRoot) {
1732 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1733 }
1734 } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
1735 root = matchRoot;
1736 } else {
1737 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1738 }
1739
1740 // Release temporary storage
1741 temp = null;
1742 buffer = null;
1743 groupNodes = null;
1744 patternLength = 0;
1745 compiled = true;
1746 }
1747
1748 Map<String, Integer> namedGroups() {
1749 Map<String, Integer> groups = namedGroups;
1750 if (groups == null) {
1751 namedGroups = groups = new HashMap<>(2);
1752 }
1753 return groups;
1754 }
1755
1756 /**
1757 * Used to print out a subtree of the Pattern to help with debugging.
1758 */
1759 private static void printObjectTree(Node node) {
1760 while(node != null) {
1761 if (node instanceof Prolog) {
1762 System.out.println(node);
1763 printObjectTree(((Prolog)node).loop);
1764 System.out.println("**** end contents prolog loop");
1765 } else if (node instanceof Loop) {
2055 next();
2056 }
2057 }
2058
2059 @SuppressWarnings("fallthrough")
2060 /**
2061 * Parsing of sequences between alternations.
2062 */
2063 private Node sequence(Node end) {
2064 Node head = null;
2065 Node tail = null;
2066 Node node = null;
2067 LOOP:
2068 for (;;) {
2069 int ch = peek();
2070 switch (ch) {
2071 case '(':
2072 // Because group handles its own closure,
2073 // we need to treat it differently
2074 node = group0();
2075 // Check for comment or flag group
2076 if (node == null)
2077 continue;
2078 if (head == null)
2079 head = node;
2080 else
2081 tail.next = node;
2082 // Double return: Tail was returned in root
2083 tail = root;
2084 continue;
2085 case '[':
2086 node = clazz(true);
2087 break;
2088 case '\\':
2089 ch = nextEscaped();
2090 if (ch == 'p' || ch == 'P') {
2091 boolean oneLetter = true;
2092 boolean comp = (ch == 'P');
2093 ch = next(); // Consume { if present
2094 if (ch != '{') {
2138 case ']': // Now interpreting dangling ] and } as literals
2139 case '}':
2140 node = atom();
2141 break;
2142 case '?':
2143 case '*':
2144 case '+':
2145 next();
2146 throw error("Dangling meta character '" + ((char)ch) + "'");
2147 case 0:
2148 if (cursor >= patternLength) {
2149 break LOOP;
2150 }
2151 // Fall through
2152 default:
2153 node = atom();
2154 break;
2155 }
2156
2157 node = closure(node);
2158
2159 if (head == null) {
2160 head = tail = node;
2161 } else {
2162 tail.next = node;
2163 tail = node;
2164 }
2165 }
2166 if (head == null) {
2167 return end;
2168 }
2169 tail.next = end;
2170 root = tail; //double return
2171 return head;
2172 }
2173
2174 @SuppressWarnings("fallthrough")
2175 /**
2176 * Parse and add a new Single or Slice.
2177 */
2178 private Node atom() {
2842 ASCII.isDigit(ch)) {
2843 sb.append(Character.toChars(ch));
2844 }
2845 if (sb.length() == 0)
2846 throw error("named capturing group has 0 length name");
2847 if (ch != '>')
2848 throw error("named capturing group is missing trailing '>'");
2849 return sb.toString();
2850 }
2851
2852 /**
2853 * Parses a group and returns the head node of a set of nodes that process
2854 * the group. Sometimes a double return system is used where the tail is
2855 * returned in root.
2856 */
2857 private Node group0() {
2858 boolean capturingGroup = false;
2859 Node head = null;
2860 Node tail = null;
2861 int save = flags;
2862 root = null;
2863 int ch = next();
2864 if (ch == '?') {
2865 ch = skip();
2866 switch (ch) {
2867 case ':': // (?:xxx) pure group
2868 head = createGroup(true);
2869 tail = root;
2870 head.next = expr(tail);
2871 break;
2872 case '=': // (?=xxx) and (?!xxx) lookahead
2873 case '!':
2874 head = createGroup(true);
2875 tail = root;
2876 head.next = expr(tail);
2877 if (ch == '=') {
2878 head = tail = new Pos(head);
2879 } else {
2880 head = tail = new Neg(head);
2881 }
2951 capturingGroup = true;
2952 head = createGroup(false);
2953 tail = root;
2954 head.next = expr(tail);
2955 }
2956
2957 accept(')', "Unclosed group");
2958 flags = save;
2959
2960 // Check for quantifiers
2961 Node node = closure(head);
2962 if (node == head) { // No closure
2963 root = tail;
2964 return node; // Dual return
2965 }
2966 if (head == tail) { // Zero length assertion
2967 root = node;
2968 return node; // Dual return
2969 }
2970
2971 if (node instanceof Ques) {
2972 Ques ques = (Ques) node;
2973 if (ques.type == POSSESSIVE) {
2974 root = node;
2975 return node;
2976 }
2977 tail.next = new BranchConn();
2978 tail = tail.next;
2979 if (ques.type == GREEDY) {
2980 head = new Branch(head, null, tail);
2981 } else { // Reluctant quantifier
2982 head = new Branch(null, head, tail);
2983 }
2984 root = tail;
2985 return head;
2986 } else if (node instanceof Curly) {
2987 Curly curly = (Curly) node;
2988 if (curly.type == POSSESSIVE) {
2989 root = node;
2990 return node;
2991 }
2992 // Discover if the group is deterministic
2993 TreeInfo info = new TreeInfo();
2994 if (head.study(info)) { // Deterministic
2995 GroupTail temp = (GroupTail) tail;
2996 head = root = new GroupCurly(head.next, curly.cmin,
2997 curly.cmax, curly.type,
2998 ((GroupTail)tail).localIndex,
2999 ((GroupTail)tail).groupIndex,
3000 capturingGroup);
3001 return head;
3002 } else { // Non-deterministic
3003 int temp = ((GroupHead) head).localIndex;
3004 Loop loop;
3005 if (curly.type == GREEDY)
3006 loop = new Loop(this.localCount, temp);
3007 else // Reluctant Curly
3008 loop = new LazyLoop(this.localCount, temp);
3009 Prolog prolog = new Prolog(loop);
3010 this.localCount += 1;
3011 loop.cmin = curly.cmin;
3012 loop.cmax = curly.cmax;
3013 loop.body = head;
3014 tail.next = loop;
3015 root = loop;
3016 return prolog; // Dual return
3017 }
3018 }
3019 throw error("Internal logic error");
3020 }
3021
3022 /**
3023 * Create group head and tail nodes using double return. If the group is
3024 * created with anonymous true then it is a pure group and should not
3025 * affect group counting.
3026 */
3027 private Node createGroup(boolean anonymous) {
3028 int localIndex = localCount++;
4859 }
4860 boolean match(Matcher matcher, int i, CharSequence seq) {
4861 return loop.matchInit(matcher, i, seq);
4862 }
4863 boolean study(TreeInfo info) {
4864 return loop.study(info);
4865 }
4866 }
4867
4868 /**
4869 * Handles the repetition count for a greedy Curly. The matchInit
4870 * is called from the Prolog to save the index of where the group
4871 * beginning is stored. A zero length group check occurs in the
4872 * normal match but is skipped in the matchInit.
4873 */
4874 static class Loop extends Node {
4875 Node body;
4876 int countIndex; // local count index in matcher locals
4877 int beginIndex; // group beginning index
4878 int cmin, cmax;
4879 Loop(int countIndex, int beginIndex) {
4880 this.countIndex = countIndex;
4881 this.beginIndex = beginIndex;
4882 }
4883 boolean match(Matcher matcher, int i, CharSequence seq) {
4884 // Avoid infinite loop in zero-length case.
4885 if (i > matcher.locals[beginIndex]) {
4886 int count = matcher.locals[countIndex];
4887
4888 // This block is for before we reach the minimum
4889 // iterations required for the loop to match
4890 if (count < cmin) {
4891 matcher.locals[countIndex] = count + 1;
4892 boolean b = body.match(matcher, i, seq);
4893 // If match failed we must backtrack, so
4894 // the loop count should NOT be incremented
4895 if (!b)
4896 matcher.locals[countIndex] = count;
4897 // Return success or failure since we are under
4898 // minimum
4899 return b;
4900 }
4901 // This block is for after we have the minimum
4902 // iterations required for the loop to match
4903 if (count < cmax) {
4904 matcher.locals[countIndex] = count + 1;
4905 boolean b = body.match(matcher, i, seq);
4906 // If match failed we must backtrack, so
4907 // the loop count should NOT be incremented
4908 if (!b)
4909 matcher.locals[countIndex] = count;
4910 else
4911 return true;
4912 }
4913 }
4914 return next.match(matcher, i, seq);
4915 }
4916 boolean matchInit(Matcher matcher, int i, CharSequence seq) {
4917 int save = matcher.locals[countIndex];
4918 boolean ret = false;
4919 if (0 < cmin) {
4920 matcher.locals[countIndex] = 1;
4921 ret = body.match(matcher, i, seq);
4922 } else if (0 < cmax) {
4923 matcher.locals[countIndex] = 1;
4924 ret = body.match(matcher, i, seq);
4925 if (ret == false)
4926 ret = next.match(matcher, i, seq);
4927 } else {
4928 ret = next.match(matcher, i, seq);
4929 }
4930 matcher.locals[countIndex] = save;
4931 return ret;
4932 }
4933 boolean study(TreeInfo info) {
4934 info.maxValid = false;
4935 info.deterministic = false;
4936 return false;
4937 }
4938 }
4939
4940 /**
4941 * Handles the repetition count for a reluctant Curly. The matchInit
4942 * is called from the Prolog to save the index of where the group
4943 * beginning is stored. A zero length group check occurs in the
4944 * normal match but is skipped in the matchInit.
4945 */
4946 static final class LazyLoop extends Loop {
4947 LazyLoop(int countIndex, int beginIndex) {
4948 super(countIndex, beginIndex);
4949 }
4950 boolean match(Matcher matcher, int i, CharSequence seq) {
4951 // Check for zero length group
4952 if (i > matcher.locals[beginIndex]) {
|
978 */
979 transient Node matchRoot;
980
981 /**
982 * Temporary storage used by parsing pattern slice.
983 */
984 transient int[] buffer;
985
986 /**
987 * Map the "name" of the "named capturing group" to its group id
988 * node.
989 */
990 transient volatile Map<String, Integer> namedGroups;
991
992 /**
993 * Temporary storage used while parsing group references.
994 */
995 transient GroupHead[] groupNodes;
996
997 /**
998 * Temporary storage used to store the top level closure nodes.
999 */
1000 transient ArrayList<Node> topClosureNodes;
1001
1002 /**
1003 * The number of top greedy repetition groups in this Pattern. Used by
1004 * matchers to allocate storage needed for a HashSetInt to keep the
1005 * beginning pos {@code i} of all past failed match.
1006 */
1007 transient int localTGRGroupCount;
1008
1009 /**
1010 * Temporary null terminated code point array used by pattern compiling.
1011 */
1012 private transient int[] temp;
1013
1014 /**
1015 * The number of capturing groups in this Pattern. Used by matchers to
1016 * allocate storage needed to perform a match.
1017 */
1018 transient int capturingGroupCount;
1019
1020 /**
1021 * The local variable count used by parsing tree. Used by matchers to
1022 * allocate storage needed to perform a match.
1023 */
1024 transient int localCount;
1025
1026 /**
1027 * Index into the pattern string that keeps track of how much has been
1028 * parsed.
1029 */
1333 } while ((slashEIndex = s.indexOf("\\E", current)) != -1);
1334
1335 return sb.append(s, current, s.length())
1336 .append("\\E")
1337 .toString();
1338 }
1339
1340 /**
1341 * Recompile the Pattern instance from a stream. The original pattern
1342 * string is read in and the object tree is recompiled from it.
1343 */
1344 private void readObject(java.io.ObjectInputStream s)
1345 throws java.io.IOException, ClassNotFoundException {
1346
1347 // Read in all fields
1348 s.defaultReadObject();
1349
1350 // Initialize counts
1351 capturingGroupCount = 1;
1352 localCount = 0;
1353 localTGRGroupCount = 0;
1354
1355 // if length > 0, the Pattern is lazily compiled
1356 if (pattern.length() == 0) {
1357 root = new Start(lastAccept);
1358 matchRoot = lastAccept;
1359 compiled = true;
1360 }
1361 }
1362
1363 /**
1364 * This private constructor is used to create all Patterns. The pattern
1365 * string and match flags are all that is needed to completely describe
1366 * a Pattern. An empty pattern string results in an object tree with
1367 * only a Start node and a LastNode node.
1368 */
1369 private Pattern(String p, int f) {
1370 if ((f & ~ALL_FLAGS) != 0) {
1371 throw new IllegalArgumentException("Unknown flag 0x"
1372 + Integer.toHexString(f));
1373 }
1374 pattern = p;
1375 flags = f;
1376
1377 // to use UNICODE_CASE if UNICODE_CHARACTER_CLASS present
1378 if ((flags & UNICODE_CHARACTER_CLASS) != 0)
1379 flags |= UNICODE_CASE;
1380
1381 // Reset group index count
1382 capturingGroupCount = 1;
1383 localCount = 0;
1384 localTGRGroupCount = 0;
1385
1386 if (pattern.length() > 0) {
1387 compile();
1388 } else {
1389 root = new Start(lastAccept);
1390 matchRoot = lastAccept;
1391 }
1392 }
1393
1394 /**
1395 * The pattern is converted to normalized form ({@linkplain
1396 * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
1397 * and then a pure group is constructed to match canonical
1398 * equivalences of the characters.
1399 */
1400 private void normalize() {
1401 int lastCodePoint = -1;
1402
1403 // Convert pattern into normalized form
1404 normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
1704 hasSupplementary = false;
1705 int c, count = 0;
1706 // Convert all chars into code points
1707 for (int x = 0; x < patternLength; x += Character.charCount(c)) {
1708 c = normalizedPattern.codePointAt(x);
1709 if (isSupplementary(c)) {
1710 hasSupplementary = true;
1711 }
1712 temp[count++] = c;
1713 }
1714
1715 patternLength = count; // patternLength now in code points
1716
1717 if (! has(LITERAL))
1718 RemoveQEQuoting();
1719
1720 // Allocate all temporary objects here.
1721 buffer = new int[32];
1722 groupNodes = new GroupHead[10];
1723 namedGroups = null;
1724 topClosureNodes = new ArrayList<>(10);
1725
1726 if (has(LITERAL)) {
1727 // Literal pattern handling
1728 matchRoot = newSlice(temp, patternLength, hasSupplementary);
1729 matchRoot.next = lastAccept;
1730 } else {
1731 // Start recursive descent parsing
1732 matchRoot = expr(lastAccept);
1733 // Check extra pattern characters
1734 if (patternLength != cursor) {
1735 if (peek() == ')') {
1736 throw error("Unmatched closing ')'");
1737 } else {
1738 throw error("Unexpected internal error");
1739 }
1740 }
1741 }
1742
1743 // Peephole optimization
1744 if (matchRoot instanceof Slice) {
1745 root = BnM.optimize(matchRoot);
1746 if (root == matchRoot) {
1747 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1748 }
1749 } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
1750 root = matchRoot;
1751 } else {
1752 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1753 }
1754
1755 for (Node node : topClosureNodes) {
1756 if (node instanceof Loop) {
1757 ((Loop)node).posIndex = localTGRGroupCount++;
1758 }
1759 }
1760
1761 // Release temporary storage
1762 temp = null;
1763 buffer = null;
1764 groupNodes = null;
1765 patternLength = 0;
1766 compiled = true;
1767 topClosureNodes = null;
1768 }
1769
1770 Map<String, Integer> namedGroups() {
1771 Map<String, Integer> groups = namedGroups;
1772 if (groups == null) {
1773 namedGroups = groups = new HashMap<>(2);
1774 }
1775 return groups;
1776 }
1777
1778 /**
1779 * Used to print out a subtree of the Pattern to help with debugging.
1780 */
1781 private static void printObjectTree(Node node) {
1782 while(node != null) {
1783 if (node instanceof Prolog) {
1784 System.out.println(node);
1785 printObjectTree(((Prolog)node).loop);
1786 System.out.println("**** end contents prolog loop");
1787 } else if (node instanceof Loop) {
2077 next();
2078 }
2079 }
2080
2081 @SuppressWarnings("fallthrough")
2082 /**
2083 * Parsing of sequences between alternations.
2084 */
2085 private Node sequence(Node end) {
2086 Node head = null;
2087 Node tail = null;
2088 Node node = null;
2089 LOOP:
2090 for (;;) {
2091 int ch = peek();
2092 switch (ch) {
2093 case '(':
2094 // Because group handles its own closure,
2095 // we need to treat it differently
2096 node = group0();
2097
2098 // Check for comment or flag group
2099 if (node == null)
2100 continue;
2101 if (head == null)
2102 head = node;
2103 else
2104 tail.next = node;
2105 // Double return: Tail was returned in root
2106 tail = root;
2107 continue;
2108 case '[':
2109 node = clazz(true);
2110 break;
2111 case '\\':
2112 ch = nextEscaped();
2113 if (ch == 'p' || ch == 'P') {
2114 boolean oneLetter = true;
2115 boolean comp = (ch == 'P');
2116 ch = next(); // Consume { if present
2117 if (ch != '{') {
2161 case ']': // Now interpreting dangling ] and } as literals
2162 case '}':
2163 node = atom();
2164 break;
2165 case '?':
2166 case '*':
2167 case '+':
2168 next();
2169 throw error("Dangling meta character '" + ((char)ch) + "'");
2170 case 0:
2171 if (cursor >= patternLength) {
2172 break LOOP;
2173 }
2174 // Fall through
2175 default:
2176 node = atom();
2177 break;
2178 }
2179
2180 node = closure(node);
2181 // save the greddy curly
2182 if (node instanceof Curly && ((Curly)node).type == GREEDY) {
2183 topClosureNodes.add(node);
2184 }
2185 if (head == null) {
2186 head = tail = node;
2187 } else {
2188 tail.next = node;
2189 tail = node;
2190 }
2191 }
2192 if (head == null) {
2193 return end;
2194 }
2195 tail.next = end;
2196 root = tail; //double return
2197 return head;
2198 }
2199
2200 @SuppressWarnings("fallthrough")
2201 /**
2202 * Parse and add a new Single or Slice.
2203 */
2204 private Node atom() {
2868 ASCII.isDigit(ch)) {
2869 sb.append(Character.toChars(ch));
2870 }
2871 if (sb.length() == 0)
2872 throw error("named capturing group has 0 length name");
2873 if (ch != '>')
2874 throw error("named capturing group is missing trailing '>'");
2875 return sb.toString();
2876 }
2877
2878 /**
2879 * Parses a group and returns the head node of a set of nodes that process
2880 * the group. Sometimes a double return system is used where the tail is
2881 * returned in root.
2882 */
2883 private Node group0() {
2884 boolean capturingGroup = false;
2885 Node head = null;
2886 Node tail = null;
2887 int save = flags;
2888
2889 int saveTCNCount = topClosureNodes.size();
2890
2891 root = null;
2892 int ch = next();
2893 if (ch == '?') {
2894 ch = skip();
2895 switch (ch) {
2896 case ':': // (?:xxx) pure group
2897 head = createGroup(true);
2898 tail = root;
2899 head.next = expr(tail);
2900 break;
2901 case '=': // (?=xxx) and (?!xxx) lookahead
2902 case '!':
2903 head = createGroup(true);
2904 tail = root;
2905 head.next = expr(tail);
2906 if (ch == '=') {
2907 head = tail = new Pos(head);
2908 } else {
2909 head = tail = new Neg(head);
2910 }
2980 capturingGroup = true;
2981 head = createGroup(false);
2982 tail = root;
2983 head.next = expr(tail);
2984 }
2985
2986 accept(')', "Unclosed group");
2987 flags = save;
2988
2989 // Check for quantifiers
2990 Node node = closure(head);
2991 if (node == head) { // No closure
2992 root = tail;
2993 return node; // Dual return
2994 }
2995 if (head == tail) { // Zero length assertion
2996 root = node;
2997 return node; // Dual return
2998 }
2999
3000 // have group closure, clear all embedded closure nodes
3001 // from the top list.
3002 if (saveTCNCount < topClosureNodes.size())
3003 topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
3004
3005 if (node instanceof Ques) {
3006 Ques ques = (Ques) node;
3007 if (ques.type == POSSESSIVE) {
3008 root = node;
3009 return node;
3010 }
3011 tail.next = new BranchConn();
3012 tail = tail.next;
3013 if (ques.type == GREEDY) {
3014 head = new Branch(head, null, tail);
3015 } else { // Reluctant quantifier
3016 head = new Branch(null, head, tail);
3017 }
3018 root = tail;
3019 return head;
3020 } else if (node instanceof Curly) {
3021 Curly curly = (Curly) node;
3022 if (curly.type == POSSESSIVE) {
3023 root = node;
3024 return node;
3025 }
3026 // Discover if the group is deterministic
3027 TreeInfo info = new TreeInfo();
3028 if (head.study(info)) { // Deterministic
3029 GroupTail temp = (GroupTail) tail;
3030 head = root = new GroupCurly(head.next, curly.cmin,
3031 curly.cmax, curly.type,
3032 ((GroupTail)tail).localIndex,
3033 ((GroupTail)tail).groupIndex,
3034 capturingGroup);
3035 return head;
3036 } else { // Non-deterministic
3037 int temp = ((GroupHead) head).localIndex;
3038 Loop loop;
3039 if (curly.type == GREEDY) {
3040 loop = new Loop(this.localCount, temp);
3041 // add the non-determinsitic/greeddy to the list
3042 topClosureNodes.add(loop);
3043 } else { // Reluctant Curly
3044 loop = new LazyLoop(this.localCount, temp);
3045 }
3046 Prolog prolog = new Prolog(loop);
3047 this.localCount += 1;
3048 loop.cmin = curly.cmin;
3049 loop.cmax = curly.cmax;
3050 loop.body = head;
3051 tail.next = loop;
3052 root = loop;
3053 return prolog; // Dual return
3054 }
3055 }
3056 throw error("Internal logic error");
3057 }
3058
3059 /**
3060 * Create group head and tail nodes using double return. If the group is
3061 * created with anonymous true then it is a pure group and should not
3062 * affect group counting.
3063 */
3064 private Node createGroup(boolean anonymous) {
3065 int localIndex = localCount++;
4896 }
4897 boolean match(Matcher matcher, int i, CharSequence seq) {
4898 return loop.matchInit(matcher, i, seq);
4899 }
4900 boolean study(TreeInfo info) {
4901 return loop.study(info);
4902 }
4903 }
4904
4905 /**
4906 * Handles the repetition count for a greedy Curly. The matchInit
4907 * is called from the Prolog to save the index of where the group
4908 * beginning is stored. A zero length group check occurs in the
4909 * normal match but is skipped in the matchInit.
4910 */
4911 static class Loop extends Node {
4912 Node body;
4913 int countIndex; // local count index in matcher locals
4914 int beginIndex; // group beginning index
4915 int cmin, cmax;
4916 int posIndex;
4917
4918 Loop(int countIndex, int beginIndex) {
4919 this.countIndex = countIndex;
4920 this.beginIndex = beginIndex;
4921 this.posIndex = -1;
4922 }
4923 boolean match(Matcher matcher, int i, CharSequence seq) {
4924 // Avoid infinite loop in zero-length case.
4925 if (i > matcher.locals[beginIndex]) {
4926 int count = matcher.locals[countIndex];
4927 // This block is for before we reach the minimum
4928 // iterations required for the loop to match
4929 if (count < cmin) {
4930 matcher.locals[countIndex] = count + 1;
4931 boolean b = body.match(matcher, i, seq);
4932 // If match failed we must backtrack, so
4933 // the loop count should NOT be incremented
4934 if (!b)
4935 matcher.locals[countIndex] = count;
4936 // Return success or failure since we are under
4937 // minimum
4938 return b;
4939 }
4940 // This block is for after we have the minimum
4941 // iterations required for the loop to match
4942 if (count < cmax) {
4943
4944
4945 // Let's check if we have already tried and failed
4946 // at this starting position "i" in the past.
4947 // If yes, then just return false wihtout trying
4948 // again, to stop the exponential backtracking.
4949 if (posIndex != -1 &&
4950 matcher.localsPos[posIndex].contains(i))
4951 return next.match(matcher, i, seq);
4952
4953 matcher.locals[countIndex] = count + 1;
4954 boolean b = body.match(matcher, i, seq);
4955 // If match failed we must backtrack, so
4956 // the loop count should NOT be incremented
4957 if (b)
4958 return true;
4959 matcher.locals[countIndex] = count;
4960 // save the failed position
4961 if (posIndex != -1)
4962 matcher.localsPos[posIndex].add(i);
4963
4964 }
4965 }
4966 return next.match(matcher, i, seq);
4967 }
4968
4969 boolean matchInit(Matcher matcher, int i, CharSequence seq) {
4970 int save = matcher.locals[countIndex];
4971 boolean ret = false;
4972
4973 if (posIndex != -1) {
4974 if (matcher.localsPos[posIndex] == null)
4975 matcher.localsPos[posIndex] = new HashSetInt();
4976 else
4977 matcher.localsPos[posIndex].clear();
4978 }
4979
4980 if (0 < cmin) {
4981 matcher.locals[countIndex] = 1;
4982 ret = body.match(matcher, i, seq);
4983 } else if (0 < cmax) {
4984 matcher.locals[countIndex] = 1;
4985 ret = body.match(matcher, i, seq);
4986 if (ret == false)
4987 ret = next.match(matcher, i, seq);
4988 } else {
4989 ret = next.match(matcher, i, seq);
4990 }
4991 matcher.locals[countIndex] = save;
4992 return ret;
4993 }
4994
4995 boolean study(TreeInfo info) {
4996 info.maxValid = false;
4997 info.deterministic = false;
4998 return false;
4999 }
5000 }
5001
5002 /**
5003 * Handles the repetition count for a reluctant Curly. The matchInit
5004 * is called from the Prolog to save the index of where the group
5005 * beginning is stored. A zero length group check occurs in the
5006 * normal match but is skipped in the matchInit.
5007 */
5008 static final class LazyLoop extends Loop {
5009 LazyLoop(int countIndex, int beginIndex) {
5010 super(countIndex, beginIndex);
5011 }
5012 boolean match(Matcher matcher, int i, CharSequence seq) {
5013 // Check for zero length group
5014 if (i > matcher.locals[beginIndex]) {
|