src/java.base/share/classes/java/util/regex/Pattern.java

Print this page




 978      */
 979     transient Node matchRoot;
 980 
 981     /**
 982      * Temporary storage used by parsing pattern slice.
 983      */
 984     transient int[] buffer;
 985 
 986     /**
 987      * Map the "name" of the "named capturing group" to its group id
 988      * node.
 989      */
 990     transient volatile Map<String, Integer> namedGroups;
 991 
 992     /**
 993      * Temporary storage used while parsing group references.
 994      */
 995     transient GroupHead[] groupNodes;
 996 
 997     /**












 998      * Temporary null terminated code point array used by pattern compiling.
 999      */
1000     private transient int[] temp;
1001 
1002     /**
1003      * The number of capturing groups in this Pattern. Used by matchers to
1004      * allocate storage needed to perform a match.
1005      */
1006     transient int capturingGroupCount;
1007 
1008     /**
1009      * The local variable count used by parsing tree. Used by matchers to
1010      * allocate storage needed to perform a match.
1011      */
1012     transient int localCount;
1013 
1014     /**
1015      * Index into the pattern string that keeps track of how much has been
1016      * parsed.
1017      */


1321         } while ((slashEIndex = s.indexOf("\\E", current)) != -1);
1322 
1323         return sb.append(s, current, s.length())
1324                 .append("\\E")
1325                 .toString();
1326     }
1327 
1328     /**
1329      * Recompile the Pattern instance from a stream.  The original pattern
1330      * string is read in and the object tree is recompiled from it.
1331      */
1332     private void readObject(java.io.ObjectInputStream s)
1333         throws java.io.IOException, ClassNotFoundException {
1334 
1335         // Read in all fields
1336         s.defaultReadObject();
1337 
1338         // Initialize counts
1339         capturingGroupCount = 1;
1340         localCount = 0;

1341 
1342         // if length > 0, the Pattern is lazily compiled
1343         if (pattern.length() == 0) {
1344             root = new Start(lastAccept);
1345             matchRoot = lastAccept;
1346             compiled = true;
1347         }
1348     }
1349 
1350     /**
1351      * This private constructor is used to create all Patterns. The pattern
1352      * string and match flags are all that is needed to completely describe
1353      * a Pattern. An empty pattern string results in an object tree with
1354      * only a Start node and a LastNode node.
1355      */
1356     private Pattern(String p, int f) {
1357         if ((f & ~ALL_FLAGS) != 0) {
1358             throw new IllegalArgumentException("Unknown flag 0x"
1359                                                + Integer.toHexString(f));
1360         }
1361         pattern = p;
1362         flags = f;
1363 
1364         // to use UNICODE_CASE if UNICODE_CHARACTER_CLASS present
1365         if ((flags & UNICODE_CHARACTER_CLASS) != 0)
1366             flags |= UNICODE_CASE;
1367 
1368         // Reset group index count
1369         capturingGroupCount = 1;
1370         localCount = 0;

1371 
1372         if (pattern.length() > 0) {
1373             compile();
1374         } else {
1375             root = new Start(lastAccept);
1376             matchRoot = lastAccept;
1377         }
1378     }
1379 
1380     /**
1381      * The pattern is converted to normalized form ({@linkplain
1382      * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
1383      * and then a pure group is constructed to match canonical
1384      * equivalences of the characters.
1385      */
1386     private void normalize() {
1387         int lastCodePoint = -1;
1388 
1389         // Convert pattern into normalized form
1390         normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);


1690         hasSupplementary = false;
1691         int c, count = 0;
1692         // Convert all chars into code points
1693         for (int x = 0; x < patternLength; x += Character.charCount(c)) {
1694             c = normalizedPattern.codePointAt(x);
1695             if (isSupplementary(c)) {
1696                 hasSupplementary = true;
1697             }
1698             temp[count++] = c;
1699         }
1700 
1701         patternLength = count;   // patternLength now in code points
1702 
1703         if (! has(LITERAL))
1704             RemoveQEQuoting();
1705 
1706         // Allocate all temporary objects here.
1707         buffer = new int[32];
1708         groupNodes = new GroupHead[10];
1709         namedGroups = null;

1710 
1711         if (has(LITERAL)) {
1712             // Literal pattern handling
1713             matchRoot = newSlice(temp, patternLength, hasSupplementary);
1714             matchRoot.next = lastAccept;
1715         } else {
1716             // Start recursive descent parsing
1717             matchRoot = expr(lastAccept);
1718             // Check extra pattern characters
1719             if (patternLength != cursor) {
1720                 if (peek() == ')') {
1721                     throw error("Unmatched closing ')'");
1722                 } else {
1723                     throw error("Unexpected internal error");
1724                 }
1725             }
1726         }
1727 
1728         // Peephole optimization
1729         if (matchRoot instanceof Slice) {
1730             root = BnM.optimize(matchRoot);
1731             if (root == matchRoot) {
1732                 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1733             }
1734         } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
1735             root = matchRoot;
1736         } else {
1737             root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1738         }
1739 






1740         // Release temporary storage
1741         temp = null;
1742         buffer = null;
1743         groupNodes = null;
1744         patternLength = 0;
1745         compiled = true;

1746     }
1747 
1748     Map<String, Integer> namedGroups() {
1749         Map<String, Integer> groups = namedGroups;
1750         if (groups == null) {
1751             namedGroups = groups = new HashMap<>(2);
1752         }
1753         return groups;
1754     }
1755 
1756     /**
1757      * Used to print out a subtree of the Pattern to help with debugging.
1758      */
1759     private static void printObjectTree(Node node) {
1760         while(node != null) {
1761             if (node instanceof Prolog) {
1762                 System.out.println(node);
1763                 printObjectTree(((Prolog)node).loop);
1764                 System.out.println("**** end contents prolog loop");
1765             } else if (node instanceof Loop) {


2055             next();
2056         }
2057     }
2058 
2059     @SuppressWarnings("fallthrough")
2060     /**
2061      * Parsing of sequences between alternations.
2062      */
2063     private Node sequence(Node end) {
2064         Node head = null;
2065         Node tail = null;
2066         Node node = null;
2067     LOOP:
2068         for (;;) {
2069             int ch = peek();
2070             switch (ch) {
2071             case '(':
2072                 // Because group handles its own closure,
2073                 // we need to treat it differently
2074                 node = group0();

2075                 // Check for comment or flag group
2076                 if (node == null)
2077                     continue;
2078                 if (head == null)
2079                     head = node;
2080                 else
2081                     tail.next = node;
2082                 // Double return: Tail was returned in root
2083                 tail = root;
2084                 continue;
2085             case '[':
2086                 node = clazz(true);
2087                 break;
2088             case '\\':
2089                 ch = nextEscaped();
2090                 if (ch == 'p' || ch == 'P') {
2091                     boolean oneLetter = true;
2092                     boolean comp = (ch == 'P');
2093                     ch = next(); // Consume { if present
2094                     if (ch != '{') {


2138             case ']': // Now interpreting dangling ] and } as literals
2139             case '}':
2140                 node = atom();
2141                 break;
2142             case '?':
2143             case '*':
2144             case '+':
2145                 next();
2146                 throw error("Dangling meta character '" + ((char)ch) + "'");
2147             case 0:
2148                 if (cursor >= patternLength) {
2149                     break LOOP;
2150                 }
2151                 // Fall through
2152             default:
2153                 node = atom();
2154                 break;
2155             }
2156 
2157             node = closure(node);
2158 



2159             if (head == null) {
2160                 head = tail = node;
2161             } else {
2162                 tail.next = node;
2163                 tail = node;
2164             }
2165         }
2166         if (head == null) {
2167             return end;
2168         }
2169         tail.next = end;
2170         root = tail;      //double return
2171         return head;
2172     }
2173 
2174     @SuppressWarnings("fallthrough")
2175     /**
2176      * Parse and add a new Single or Slice.
2177      */
2178     private Node atom() {


2842                ASCII.isDigit(ch)) {
2843             sb.append(Character.toChars(ch));
2844         }
2845         if (sb.length() == 0)
2846             throw error("named capturing group has 0 length name");
2847         if (ch != '>')
2848             throw error("named capturing group is missing trailing '>'");
2849         return sb.toString();
2850     }
2851 
2852     /**
2853      * Parses a group and returns the head node of a set of nodes that process
2854      * the group. Sometimes a double return system is used where the tail is
2855      * returned in root.
2856      */
2857     private Node group0() {
2858         boolean capturingGroup = false;
2859         Node head = null;
2860         Node tail = null;
2861         int save = flags;



2862         root = null;
2863         int ch = next();
2864         if (ch == '?') {
2865             ch = skip();
2866             switch (ch) {
2867             case ':':   //  (?:xxx) pure group
2868                 head = createGroup(true);
2869                 tail = root;
2870                 head.next = expr(tail);
2871                 break;
2872             case '=':   // (?=xxx) and (?!xxx) lookahead
2873             case '!':
2874                 head = createGroup(true);
2875                 tail = root;
2876                 head.next = expr(tail);
2877                 if (ch == '=') {
2878                     head = tail = new Pos(head);
2879                 } else {
2880                     head = tail = new Neg(head);
2881                 }


2951             capturingGroup = true;
2952             head = createGroup(false);
2953             tail = root;
2954             head.next = expr(tail);
2955         }
2956 
2957         accept(')', "Unclosed group");
2958         flags = save;
2959 
2960         // Check for quantifiers
2961         Node node = closure(head);
2962         if (node == head) { // No closure
2963             root = tail;
2964             return node;    // Dual return
2965         }
2966         if (head == tail) { // Zero length assertion
2967             root = node;
2968             return node;    // Dual return
2969         }
2970 





2971         if (node instanceof Ques) {
2972             Ques ques = (Ques) node;
2973             if (ques.type == POSSESSIVE) {
2974                 root = node;
2975                 return node;
2976             }
2977             tail.next = new BranchConn();
2978             tail = tail.next;
2979             if (ques.type == GREEDY) {
2980                 head = new Branch(head, null, tail);
2981             } else { // Reluctant quantifier
2982                 head = new Branch(null, head, tail);
2983             }
2984             root = tail;
2985             return head;
2986         } else if (node instanceof Curly) {
2987             Curly curly = (Curly) node;
2988             if (curly.type == POSSESSIVE) {
2989                 root = node;
2990                 return node;
2991             }
2992             // Discover if the group is deterministic
2993             TreeInfo info = new TreeInfo();
2994             if (head.study(info)) { // Deterministic
2995                 GroupTail temp = (GroupTail) tail;
2996                 head = root = new GroupCurly(head.next, curly.cmin,
2997                                    curly.cmax, curly.type,
2998                                    ((GroupTail)tail).localIndex,
2999                                    ((GroupTail)tail).groupIndex,
3000                                              capturingGroup);
3001                 return head;
3002             } else { // Non-deterministic
3003                 int temp = ((GroupHead) head).localIndex;
3004                 Loop loop;
3005                 if (curly.type == GREEDY)
3006                     loop = new Loop(this.localCount, temp);
3007                 else  // Reluctant Curly


3008                     loop = new LazyLoop(this.localCount, temp);

3009                 Prolog prolog = new Prolog(loop);
3010                 this.localCount += 1;
3011                 loop.cmin = curly.cmin;
3012                 loop.cmax = curly.cmax;
3013                 loop.body = head;
3014                 tail.next = loop;
3015                 root = loop;
3016                 return prolog; // Dual return
3017             }
3018         }
3019         throw error("Internal logic error");
3020     }
3021 
3022     /**
3023      * Create group head and tail nodes using double return. If the group is
3024      * created with anonymous true then it is a pure group and should not
3025      * affect group counting.
3026      */
3027     private Node createGroup(boolean anonymous) {
3028         int localIndex = localCount++;


4859         }
4860         boolean match(Matcher matcher, int i, CharSequence seq) {
4861             return loop.matchInit(matcher, i, seq);
4862         }
4863         boolean study(TreeInfo info) {
4864             return loop.study(info);
4865         }
4866     }
4867 
4868     /**
4869      * Handles the repetition count for a greedy Curly. The matchInit
4870      * is called from the Prolog to save the index of where the group
4871      * beginning is stored. A zero length group check occurs in the
4872      * normal match but is skipped in the matchInit.
4873      */
4874     static class Loop extends Node {
4875         Node body;
4876         int countIndex; // local count index in matcher locals
4877         int beginIndex; // group beginning index
4878         int cmin, cmax;


4879         Loop(int countIndex, int beginIndex) {
4880             this.countIndex = countIndex;
4881             this.beginIndex = beginIndex;

4882         }
4883         boolean match(Matcher matcher, int i, CharSequence seq) {
4884             // Avoid infinite loop in zero-length case.
4885             if (i > matcher.locals[beginIndex]) {
4886                 int count = matcher.locals[countIndex];
4887 
4888                 // This block is for before we reach the minimum
4889                 // iterations required for the loop to match
4890                 if (count < cmin) {
4891                     matcher.locals[countIndex] = count + 1;
4892                     boolean b = body.match(matcher, i, seq);
4893                     // If match failed we must backtrack, so
4894                     // the loop count should NOT be incremented
4895                     if (!b)
4896                         matcher.locals[countIndex] = count;
4897                     // Return success or failure since we are under
4898                     // minimum
4899                     return b;
4900                 }
4901                 // This block is for after we have the minimum
4902                 // iterations required for the loop to match
4903                 if (count < cmax) {










4904                     matcher.locals[countIndex] = count + 1;
4905                     boolean b = body.match(matcher, i, seq);
4906                     // If match failed we must backtrack, so
4907                     // the loop count should NOT be incremented
4908                     if (!b)
4909                         matcher.locals[countIndex] = count;
4910                     else
4911                         return true;





4912                 }
4913             }
4914             return next.match(matcher, i, seq);
4915         }

4916         boolean matchInit(Matcher matcher, int i, CharSequence seq) {
4917             int save = matcher.locals[countIndex];
4918             boolean ret = false;








4919             if (0 < cmin) {
4920                 matcher.locals[countIndex] = 1;
4921                 ret = body.match(matcher, i, seq);
4922             } else if (0 < cmax) {
4923                 matcher.locals[countIndex] = 1;
4924                 ret = body.match(matcher, i, seq);
4925                 if (ret == false)
4926                     ret = next.match(matcher, i, seq);
4927             } else {
4928                 ret = next.match(matcher, i, seq);
4929             }
4930             matcher.locals[countIndex] = save;
4931             return ret;
4932         }

4933         boolean study(TreeInfo info) {
4934             info.maxValid = false;
4935             info.deterministic = false;
4936             return false;
4937         }
4938     }
4939 
4940     /**
4941      * Handles the repetition count for a reluctant Curly. The matchInit
4942      * is called from the Prolog to save the index of where the group
4943      * beginning is stored. A zero length group check occurs in the
4944      * normal match but is skipped in the matchInit.
4945      */
4946     static final class LazyLoop extends Loop {
4947         LazyLoop(int countIndex, int beginIndex) {
4948             super(countIndex, beginIndex);
4949         }
4950         boolean match(Matcher matcher, int i, CharSequence seq) {
4951             // Check for zero length group
4952             if (i > matcher.locals[beginIndex]) {




 978      */
 979     transient Node matchRoot;
 980 
 981     /**
 982      * Temporary storage used by parsing pattern slice.
 983      */
 984     transient int[] buffer;
 985 
 986     /**
 987      * Map the "name" of the "named capturing group" to its group id
 988      * node.
 989      */
 990     transient volatile Map<String, Integer> namedGroups;
 991 
 992     /**
 993      * Temporary storage used while parsing group references.
 994      */
 995     transient GroupHead[] groupNodes;
 996 
 997     /**
 998      * Temporary storage used to store the top level closure nodes.
 999      */
1000     transient ArrayList<Node> topClosureNodes;
1001 
1002     /**
1003      * The number of top greedy repetition groups in this Pattern. Used by
1004      * matchers to allocate storage needed for a HashSetInt to keep the
1005      * beginning pos {@code i} of all past failed match.
1006      */
1007     transient int localTGRGroupCount;
1008 
1009     /**
1010      * Temporary null terminated code point array used by pattern compiling.
1011      */
1012     private transient int[] temp;
1013 
1014     /**
1015      * The number of capturing groups in this Pattern. Used by matchers to
1016      * allocate storage needed to perform a match.
1017      */
1018     transient int capturingGroupCount;
1019 
1020     /**
1021      * The local variable count used by parsing tree. Used by matchers to
1022      * allocate storage needed to perform a match.
1023      */
1024     transient int localCount;
1025 
1026     /**
1027      * Index into the pattern string that keeps track of how much has been
1028      * parsed.
1029      */


1333         } while ((slashEIndex = s.indexOf("\\E", current)) != -1);
1334 
1335         return sb.append(s, current, s.length())
1336                 .append("\\E")
1337                 .toString();
1338     }
1339 
1340     /**
1341      * Recompile the Pattern instance from a stream.  The original pattern
1342      * string is read in and the object tree is recompiled from it.
1343      */
1344     private void readObject(java.io.ObjectInputStream s)
1345         throws java.io.IOException, ClassNotFoundException {
1346 
1347         // Read in all fields
1348         s.defaultReadObject();
1349 
1350         // Initialize counts
1351         capturingGroupCount = 1;
1352         localCount = 0;
1353         localTGRGroupCount = 0;
1354 
1355         // if length > 0, the Pattern is lazily compiled
1356         if (pattern.length() == 0) {
1357             root = new Start(lastAccept);
1358             matchRoot = lastAccept;
1359             compiled = true;
1360         }
1361     }
1362 
1363     /**
1364      * This private constructor is used to create all Patterns. The pattern
1365      * string and match flags are all that is needed to completely describe
1366      * a Pattern. An empty pattern string results in an object tree with
1367      * only a Start node and a LastNode node.
1368      */
1369     private Pattern(String p, int f) {
1370         if ((f & ~ALL_FLAGS) != 0) {
1371             throw new IllegalArgumentException("Unknown flag 0x"
1372                                                + Integer.toHexString(f));
1373         }
1374         pattern = p;
1375         flags = f;
1376 
1377         // to use UNICODE_CASE if UNICODE_CHARACTER_CLASS present
1378         if ((flags & UNICODE_CHARACTER_CLASS) != 0)
1379             flags |= UNICODE_CASE;
1380 
1381         // Reset group index count
1382         capturingGroupCount = 1;
1383         localCount = 0;
1384         localTGRGroupCount = 0;
1385 
1386         if (pattern.length() > 0) {
1387             compile();
1388         } else {
1389             root = new Start(lastAccept);
1390             matchRoot = lastAccept;
1391         }
1392     }
1393 
1394     /**
1395      * The pattern is converted to normalized form ({@linkplain
1396      * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
1397      * and then a pure group is constructed to match canonical
1398      * equivalences of the characters.
1399      */
1400     private void normalize() {
1401         int lastCodePoint = -1;
1402 
1403         // Convert pattern into normalized form
1404         normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);


1704         hasSupplementary = false;
1705         int c, count = 0;
1706         // Convert all chars into code points
1707         for (int x = 0; x < patternLength; x += Character.charCount(c)) {
1708             c = normalizedPattern.codePointAt(x);
1709             if (isSupplementary(c)) {
1710                 hasSupplementary = true;
1711             }
1712             temp[count++] = c;
1713         }
1714 
1715         patternLength = count;   // patternLength now in code points
1716 
1717         if (! has(LITERAL))
1718             RemoveQEQuoting();
1719 
1720         // Allocate all temporary objects here.
1721         buffer = new int[32];
1722         groupNodes = new GroupHead[10];
1723         namedGroups = null;
1724         topClosureNodes = new ArrayList<>(10);
1725 
1726         if (has(LITERAL)) {
1727             // Literal pattern handling
1728             matchRoot = newSlice(temp, patternLength, hasSupplementary);
1729             matchRoot.next = lastAccept;
1730         } else {
1731             // Start recursive descent parsing
1732             matchRoot = expr(lastAccept);
1733             // Check extra pattern characters
1734             if (patternLength != cursor) {
1735                 if (peek() == ')') {
1736                     throw error("Unmatched closing ')'");
1737                 } else {
1738                     throw error("Unexpected internal error");
1739                 }
1740             }
1741         }
1742 
1743         // Peephole optimization
1744         if (matchRoot instanceof Slice) {
1745             root = BnM.optimize(matchRoot);
1746             if (root == matchRoot) {
1747                 root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1748             }
1749         } else if (matchRoot instanceof Begin || matchRoot instanceof First) {
1750             root = matchRoot;
1751         } else {
1752             root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
1753         }
1754 
1755         for (Node node : topClosureNodes) {
1756             if (node instanceof Loop) {
1757                 ((Loop)node).posIndex = localTGRGroupCount++;
1758             }
1759         }
1760 
1761         // Release temporary storage
1762         temp = null;
1763         buffer = null;
1764         groupNodes = null;
1765         patternLength = 0;
1766         compiled = true;
1767         topClosureNodes = null;
1768     }
1769 
1770     Map<String, Integer> namedGroups() {
1771         Map<String, Integer> groups = namedGroups;
1772         if (groups == null) {
1773             namedGroups = groups = new HashMap<>(2);
1774         }
1775         return groups;
1776     }
1777 
1778     /**
1779      * Used to print out a subtree of the Pattern to help with debugging.
1780      */
1781     private static void printObjectTree(Node node) {
1782         while(node != null) {
1783             if (node instanceof Prolog) {
1784                 System.out.println(node);
1785                 printObjectTree(((Prolog)node).loop);
1786                 System.out.println("**** end contents prolog loop");
1787             } else if (node instanceof Loop) {


2077             next();
2078         }
2079     }
2080 
2081     @SuppressWarnings("fallthrough")
2082     /**
2083      * Parsing of sequences between alternations.
2084      */
2085     private Node sequence(Node end) {
2086         Node head = null;
2087         Node tail = null;
2088         Node node = null;
2089     LOOP:
2090         for (;;) {
2091             int ch = peek();
2092             switch (ch) {
2093             case '(':
2094                 // Because group handles its own closure,
2095                 // we need to treat it differently
2096                 node = group0();
2097 
2098                 // Check for comment or flag group
2099                 if (node == null)
2100                     continue;
2101                 if (head == null)
2102                     head = node;
2103                 else
2104                     tail.next = node;
2105                 // Double return: Tail was returned in root
2106                 tail = root;
2107                 continue;
2108             case '[':
2109                 node = clazz(true);
2110                 break;
2111             case '\\':
2112                 ch = nextEscaped();
2113                 if (ch == 'p' || ch == 'P') {
2114                     boolean oneLetter = true;
2115                     boolean comp = (ch == 'P');
2116                     ch = next(); // Consume { if present
2117                     if (ch != '{') {


2161             case ']': // Now interpreting dangling ] and } as literals
2162             case '}':
2163                 node = atom();
2164                 break;
2165             case '?':
2166             case '*':
2167             case '+':
2168                 next();
2169                 throw error("Dangling meta character '" + ((char)ch) + "'");
2170             case 0:
2171                 if (cursor >= patternLength) {
2172                     break LOOP;
2173                 }
2174                 // Fall through
2175             default:
2176                 node = atom();
2177                 break;
2178             }
2179 
2180             node = closure(node);
2181             // save the greddy curly
2182             if (node instanceof Curly && ((Curly)node).type == GREEDY) {
2183                 topClosureNodes.add(node);
2184             }
2185             if (head == null) {
2186                 head = tail = node;
2187             } else {
2188                 tail.next = node;
2189                 tail = node;
2190             }
2191         }
2192         if (head == null) {
2193             return end;
2194         }
2195         tail.next = end;
2196         root = tail;      //double return
2197         return head;
2198     }
2199 
2200     @SuppressWarnings("fallthrough")
2201     /**
2202      * Parse and add a new Single or Slice.
2203      */
2204     private Node atom() {


2868                ASCII.isDigit(ch)) {
2869             sb.append(Character.toChars(ch));
2870         }
2871         if (sb.length() == 0)
2872             throw error("named capturing group has 0 length name");
2873         if (ch != '>')
2874             throw error("named capturing group is missing trailing '>'");
2875         return sb.toString();
2876     }
2877 
2878     /**
2879      * Parses a group and returns the head node of a set of nodes that process
2880      * the group. Sometimes a double return system is used where the tail is
2881      * returned in root.
2882      */
2883     private Node group0() {
2884         boolean capturingGroup = false;
2885         Node head = null;
2886         Node tail = null;
2887         int save = flags;
2888 
2889 int saveTCNCount = topClosureNodes.size();
2890 
2891         root = null;
2892         int ch = next();
2893         if (ch == '?') {
2894             ch = skip();
2895             switch (ch) {
2896             case ':':   //  (?:xxx) pure group
2897                 head = createGroup(true);
2898                 tail = root;
2899                 head.next = expr(tail);
2900                 break;
2901             case '=':   // (?=xxx) and (?!xxx) lookahead
2902             case '!':
2903                 head = createGroup(true);
2904                 tail = root;
2905                 head.next = expr(tail);
2906                 if (ch == '=') {
2907                     head = tail = new Pos(head);
2908                 } else {
2909                     head = tail = new Neg(head);
2910                 }


2980             capturingGroup = true;
2981             head = createGroup(false);
2982             tail = root;
2983             head.next = expr(tail);
2984         }
2985 
2986         accept(')', "Unclosed group");
2987         flags = save;
2988 
2989         // Check for quantifiers
2990         Node node = closure(head);
2991         if (node == head) { // No closure
2992             root = tail;
2993             return node;    // Dual return
2994         }
2995         if (head == tail) { // Zero length assertion
2996             root = node;
2997             return node;    // Dual return
2998         }
2999 
3000         // have group closure, clear all embedded closure nodes
3001         // from the top list.
3002         if (saveTCNCount < topClosureNodes.size())
3003             topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
3004 
3005         if (node instanceof Ques) {
3006             Ques ques = (Ques) node;
3007             if (ques.type == POSSESSIVE) {
3008                 root = node;
3009                 return node;
3010             }
3011             tail.next = new BranchConn();
3012             tail = tail.next;
3013             if (ques.type == GREEDY) {
3014                 head = new Branch(head, null, tail);
3015             } else { // Reluctant quantifier
3016                 head = new Branch(null, head, tail);
3017             }
3018             root = tail;
3019             return head;
3020         } else if (node instanceof Curly) {
3021             Curly curly = (Curly) node;
3022             if (curly.type == POSSESSIVE) {
3023                 root = node;
3024                 return node;
3025             }
3026             // Discover if the group is deterministic
3027             TreeInfo info = new TreeInfo();
3028             if (head.study(info)) { // Deterministic
3029                 GroupTail temp = (GroupTail) tail;
3030                 head = root = new GroupCurly(head.next, curly.cmin,
3031                                    curly.cmax, curly.type,
3032                                    ((GroupTail)tail).localIndex,
3033                                    ((GroupTail)tail).groupIndex,
3034                                              capturingGroup);
3035                 return head;
3036             } else { // Non-deterministic
3037                 int temp = ((GroupHead) head).localIndex;
3038                 Loop loop;
3039                 if (curly.type == GREEDY) {
3040                     loop = new Loop(this.localCount, temp);
3041                     // add the non-determinsitic/greeddy to the list
3042                     topClosureNodes.add(loop);
3043                 } else  {  // Reluctant Curly
3044                     loop = new LazyLoop(this.localCount, temp);
3045                 }
3046                 Prolog prolog = new Prolog(loop);
3047                 this.localCount += 1;
3048                 loop.cmin = curly.cmin;
3049                 loop.cmax = curly.cmax;
3050                 loop.body = head;
3051                 tail.next = loop;
3052                 root = loop;
3053                 return prolog; // Dual return
3054             }
3055         }
3056         throw error("Internal logic error");
3057     }
3058 
3059     /**
3060      * Create group head and tail nodes using double return. If the group is
3061      * created with anonymous true then it is a pure group and should not
3062      * affect group counting.
3063      */
3064     private Node createGroup(boolean anonymous) {
3065         int localIndex = localCount++;


4896         }
4897         boolean match(Matcher matcher, int i, CharSequence seq) {
4898             return loop.matchInit(matcher, i, seq);
4899         }
4900         boolean study(TreeInfo info) {
4901             return loop.study(info);
4902         }
4903     }
4904 
4905     /**
4906      * Handles the repetition count for a greedy Curly. The matchInit
4907      * is called from the Prolog to save the index of where the group
4908      * beginning is stored. A zero length group check occurs in the
4909      * normal match but is skipped in the matchInit.
4910      */
4911     static class Loop extends Node {
4912         Node body;
4913         int countIndex; // local count index in matcher locals
4914         int beginIndex; // group beginning index
4915         int cmin, cmax;
4916         int posIndex;
4917 
4918         Loop(int countIndex, int beginIndex) {
4919             this.countIndex = countIndex;
4920             this.beginIndex = beginIndex;
4921             this.posIndex = -1;
4922         }
4923         boolean match(Matcher matcher, int i, CharSequence seq) {
4924             // Avoid infinite loop in zero-length case.
4925             if (i > matcher.locals[beginIndex]) {
4926                 int count = matcher.locals[countIndex];

4927                 // This block is for before we reach the minimum
4928                 // iterations required for the loop to match
4929                 if (count < cmin) {
4930                     matcher.locals[countIndex] = count + 1;
4931                     boolean b = body.match(matcher, i, seq);
4932                     // If match failed we must backtrack, so
4933                     // the loop count should NOT be incremented
4934                     if (!b)
4935                         matcher.locals[countIndex] = count;
4936                     // Return success or failure since we are under
4937                     // minimum
4938                     return b;
4939                 }
4940                 // This block is for after we have the minimum
4941                 // iterations required for the loop to match
4942                 if (count < cmax) {
4943 
4944 
4945                     // Let's check if we have already tried and failed
4946                     // at this starting position "i" in the past.
4947                     // If yes, then just return false wihtout trying
4948                     // again, to stop the exponential backtracking.
4949                     if (posIndex != -1 &&
4950                         matcher.localsPos[posIndex].contains(i))
4951                     return next.match(matcher, i, seq);
4952 
4953                     matcher.locals[countIndex] = count + 1;
4954                     boolean b = body.match(matcher, i, seq);
4955                     // If match failed we must backtrack, so
4956                     // the loop count should NOT be incremented
4957                     if (b)


4958                         return true;
4959                     matcher.locals[countIndex] = count;
4960                     // save the failed position
4961                     if (posIndex != -1)
4962                         matcher.localsPos[posIndex].add(i);
4963 
4964                 }
4965             }
4966             return next.match(matcher, i, seq);
4967         }
4968 
4969         boolean matchInit(Matcher matcher, int i, CharSequence seq) {
4970             int save = matcher.locals[countIndex];
4971             boolean ret = false;
4972 
4973             if (posIndex != -1) {
4974                 if (matcher.localsPos[posIndex] == null)
4975                     matcher.localsPos[posIndex] = new HashSetInt();
4976                 else
4977                     matcher.localsPos[posIndex].clear();
4978             }
4979 
4980             if (0 < cmin) {
4981                 matcher.locals[countIndex] = 1;
4982                 ret = body.match(matcher, i, seq);
4983             } else if (0 < cmax) {
4984                 matcher.locals[countIndex] = 1;
4985                 ret = body.match(matcher, i, seq);
4986                 if (ret == false)
4987                     ret = next.match(matcher, i, seq);
4988             } else {
4989                 ret = next.match(matcher, i, seq);
4990             }
4991             matcher.locals[countIndex] = save;
4992             return ret;
4993         }
4994 
4995         boolean study(TreeInfo info) {
4996             info.maxValid = false;
4997             info.deterministic = false;
4998             return false;
4999         }
5000     }
5001 
5002     /**
5003      * Handles the repetition count for a reluctant Curly. The matchInit
5004      * is called from the Prolog to save the index of where the group
5005      * beginning is stored. A zero length group check occurs in the
5006      * normal match but is skipped in the matchInit.
5007      */
5008     static final class LazyLoop extends Loop {
5009         LazyLoop(int countIndex, int beginIndex) {
5010             super(countIndex, beginIndex);
5011         }
5012         boolean match(Matcher matcher, int i, CharSequence seq) {
5013             // Check for zero length group
5014             if (i > matcher.locals[beginIndex]) {