src/java.base/share/classes/java/util/regex/Pattern.java

Print this page

        

@@ -993,10 +993,22 @@
      * Temporary storage used while parsing group references.
      */
     transient GroupHead[] groupNodes;
 
     /**
+     * Temporary storage used to store the top level closure nodes.
+     */
+    transient ArrayList<Node> topClosureNodes;
+
+    /**
+     * The number of top greedy repetition groups in this Pattern. Used by
+     * matchers to allocate storage needed for a HashSetInt to keep the
+     * beginning pos {@code i} of all past failed match.
+     */
+    transient int localTGRGroupCount;
+
+    /**
      * Temporary null terminated code point array used by pattern compiling.
      */
     private transient int[] temp;
 
     /**

@@ -1336,10 +1348,11 @@
         s.defaultReadObject();
 
         // Initialize counts
         capturingGroupCount = 1;
         localCount = 0;
+        localTGRGroupCount = 0;
 
         // if length > 0, the Pattern is lazily compiled
         if (pattern.length() == 0) {
             root = new Start(lastAccept);
             matchRoot = lastAccept;

@@ -1366,10 +1379,11 @@
             flags |= UNICODE_CASE;
 
         // Reset group index count
         capturingGroupCount = 1;
         localCount = 0;
+        localTGRGroupCount = 0;
 
         if (pattern.length() > 0) {
             compile();
         } else {
             root = new Start(lastAccept);

@@ -1705,10 +1719,11 @@
 
         // Allocate all temporary objects here.
         buffer = new int[32];
         groupNodes = new GroupHead[10];
         namedGroups = null;
+        topClosureNodes = new ArrayList<>(10);
 
         if (has(LITERAL)) {
             // Literal pattern handling
             matchRoot = newSlice(temp, patternLength, hasSupplementary);
             matchRoot.next = lastAccept;

@@ -1735,16 +1750,23 @@
             root = matchRoot;
         } else {
             root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
         }
 
+        for (Node node : topClosureNodes) {
+            if (node instanceof Loop) {
+                ((Loop)node).posIndex = localTGRGroupCount++;
+            }
+        }
+
         // Release temporary storage
         temp = null;
         buffer = null;
         groupNodes = null;
         patternLength = 0;
         compiled = true;
+        topClosureNodes = null;
     }
 
     Map<String, Integer> namedGroups() {
         Map<String, Integer> groups = namedGroups;
         if (groups == null) {

@@ -2070,10 +2092,11 @@
             switch (ch) {
             case '(':
                 // Because group handles its own closure,
                 // we need to treat it differently
                 node = group0();
+
                 // Check for comment or flag group
                 if (node == null)
                     continue;
                 if (head == null)
                     head = node;

@@ -2153,11 +2176,14 @@
                 node = atom();
                 break;
             }
 
             node = closure(node);
-
+            // save the greddy curly
+            if (node instanceof Curly && ((Curly)node).type == GREEDY) {
+                topClosureNodes.add(node);
+            }
             if (head == null) {
                 head = tail = node;
             } else {
                 tail.next = node;
                 tail = node;

@@ -2857,10 +2883,13 @@
     private Node group0() {
         boolean capturingGroup = false;
         Node head = null;
         Node tail = null;
         int save = flags;
+
+int saveTCNCount = topClosureNodes.size();
+
         root = null;
         int ch = next();
         if (ch == '?') {
             ch = skip();
             switch (ch) {

@@ -2966,10 +2995,15 @@
         if (head == tail) { // Zero length assertion
             root = node;
             return node;    // Dual return
         }
 
+        // have group closure, clear all embedded closure nodes
+        // from the top list.
+        if (saveTCNCount < topClosureNodes.size())
+            topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
+
         if (node instanceof Ques) {
             Ques ques = (Ques) node;
             if (ques.type == POSSESSIVE) {
                 root = node;
                 return node;

@@ -3000,14 +3034,17 @@
                                              capturingGroup);
                 return head;
             } else { // Non-deterministic
                 int temp = ((GroupHead) head).localIndex;
                 Loop loop;
-                if (curly.type == GREEDY)
+                if (curly.type == GREEDY) {
                     loop = new Loop(this.localCount, temp);
-                else  // Reluctant Curly
+                    // add the non-determinsitic/greeddy to the list
+                    topClosureNodes.add(loop);
+                } else  {  // Reluctant Curly
                     loop = new LazyLoop(this.localCount, temp);
+                }
                 Prolog prolog = new Prolog(loop);
                 this.localCount += 1;
                 loop.cmin = curly.cmin;
                 loop.cmax = curly.cmax;
                 loop.body = head;

@@ -4874,19 +4911,21 @@
     static class Loop extends Node {
         Node body;
         int countIndex; // local count index in matcher locals
         int beginIndex; // group beginning index
         int cmin, cmax;
+        int posIndex;
+
         Loop(int countIndex, int beginIndex) {
             this.countIndex = countIndex;
             this.beginIndex = beginIndex;
+            this.posIndex = -1;
         }
         boolean match(Matcher matcher, int i, CharSequence seq) {
             // Avoid infinite loop in zero-length case.
             if (i > matcher.locals[beginIndex]) {
                 int count = matcher.locals[countIndex];
-
                 // This block is for before we reach the minimum
                 // iterations required for the loop to match
                 if (count < cmin) {
                     matcher.locals[countIndex] = count + 1;
                     boolean b = body.match(matcher, i, seq);

@@ -4899,25 +4938,47 @@
                     return b;
                 }
                 // This block is for after we have the minimum
                 // iterations required for the loop to match
                 if (count < cmax) {
+
+
+                    // Let's check if we have already tried and failed
+                    // at this starting position "i" in the past.
+                    // If yes, then just return false wihtout trying
+                    // again, to stop the exponential backtracking.
+                    if (posIndex != -1 &&
+                        matcher.localsPos[posIndex].contains(i))
+                    return next.match(matcher, i, seq);
+
                     matcher.locals[countIndex] = count + 1;
                     boolean b = body.match(matcher, i, seq);
                     // If match failed we must backtrack, so
                     // the loop count should NOT be incremented
-                    if (!b)
-                        matcher.locals[countIndex] = count;
-                    else
+                    if (b)
                         return true;
+                    matcher.locals[countIndex] = count;
+                    // save the failed position
+                    if (posIndex != -1)
+                        matcher.localsPos[posIndex].add(i);
+
                 }
             }
             return next.match(matcher, i, seq);
         }
+
         boolean matchInit(Matcher matcher, int i, CharSequence seq) {
             int save = matcher.locals[countIndex];
             boolean ret = false;
+
+            if (posIndex != -1) {
+                if (matcher.localsPos[posIndex] == null)
+                    matcher.localsPos[posIndex] = new HashSetInt();
+                else
+                    matcher.localsPos[posIndex].clear();
+            }
+
             if (0 < cmin) {
                 matcher.locals[countIndex] = 1;
                 ret = body.match(matcher, i, seq);
             } else if (0 < cmax) {
                 matcher.locals[countIndex] = 1;

@@ -4928,10 +4989,11 @@
                 ret = next.match(matcher, i, seq);
             }
             matcher.locals[countIndex] = save;
             return ret;
         }
+
         boolean study(TreeInfo info) {
             info.maxValid = false;
             info.deterministic = false;
             return false;
         }