--- old/src/java.base/share/classes/java/util/regex/Pattern.java 2016-02-29 13:29:42.309038463 -0800 +++ new/src/java.base/share/classes/java/util/regex/Pattern.java 2016-02-29 13:29:41.985039893 -0800 @@ -995,6 +995,18 @@ transient GroupHead[] groupNodes; /** + * Temporary storage used to store the top level closure nodes. + */ + transient ArrayList topClosureNodes; + + /** + * The number of top greedy repetition groups in this Pattern. Used by + * matchers to allocate storage needed for a HashSetInt to keep the + * beginning pos {@code i} of all past failed match. + */ + transient int localTGRGroupCount; + + /** * Temporary null terminated code point array used by pattern compiling. */ private transient int[] temp; @@ -1338,6 +1350,7 @@ // Initialize counts capturingGroupCount = 1; localCount = 0; + localTGRGroupCount = 0; // if length > 0, the Pattern is lazily compiled if (pattern.length() == 0) { @@ -1368,6 +1381,7 @@ // Reset group index count capturingGroupCount = 1; localCount = 0; + localTGRGroupCount = 0; if (pattern.length() > 0) { compile(); @@ -1707,6 +1721,7 @@ buffer = new int[32]; groupNodes = new GroupHead[10]; namedGroups = null; + topClosureNodes = new ArrayList<>(10); if (has(LITERAL)) { // Literal pattern handling @@ -1737,12 +1752,19 @@ root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot); } + for (Node node : topClosureNodes) { + if (node instanceof Loop) { + ((Loop)node).posIndex = localTGRGroupCount++; + } + } + // Release temporary storage temp = null; buffer = null; groupNodes = null; patternLength = 0; compiled = true; + topClosureNodes = null; } Map namedGroups() { @@ -2072,6 +2094,7 @@ // Because group handles its own closure, // we need to treat it differently node = group0(); + // Check for comment or flag group if (node == null) continue; @@ -2155,7 +2178,10 @@ } node = closure(node); - + // save the greddy curly + if (node instanceof Curly && ((Curly)node).type == GREEDY) { + topClosureNodes.add(node); + } if (head == null) { head = tail = node; } else { @@ -2859,6 +2885,9 @@ Node head = null; Node tail = null; int save = flags; + +int saveTCNCount = topClosureNodes.size(); + root = null; int ch = next(); if (ch == '?') { @@ -2968,6 +2997,11 @@ return node; // Dual return } + // have group closure, clear all embedded closure nodes + // from the top list. + if (saveTCNCount < topClosureNodes.size()) + topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear(); + if (node instanceof Ques) { Ques ques = (Ques) node; if (ques.type == POSSESSIVE) { @@ -3002,10 +3036,13 @@ } else { // Non-deterministic int temp = ((GroupHead) head).localIndex; Loop loop; - if (curly.type == GREEDY) + if (curly.type == GREEDY) { loop = new Loop(this.localCount, temp); - else // Reluctant Curly + // add the non-determinsitic/greeddy to the list + topClosureNodes.add(loop); + } else { // Reluctant Curly loop = new LazyLoop(this.localCount, temp); + } Prolog prolog = new Prolog(loop); this.localCount += 1; loop.cmin = curly.cmin; @@ -4876,15 +4913,17 @@ int countIndex; // local count index in matcher locals int beginIndex; // group beginning index int cmin, cmax; + int posIndex; + Loop(int countIndex, int beginIndex) { this.countIndex = countIndex; this.beginIndex = beginIndex; + this.posIndex = -1; } boolean match(Matcher matcher, int i, CharSequence seq) { // Avoid infinite loop in zero-length case. if (i > matcher.locals[beginIndex]) { int count = matcher.locals[countIndex]; - // This block is for before we reach the minimum // iterations required for the loop to match if (count < cmin) { @@ -4901,21 +4940,43 @@ // This block is for after we have the minimum // iterations required for the loop to match if (count < cmax) { + + + // Let's check if we have already tried and failed + // at this starting position "i" in the past. + // If yes, then just return false wihtout trying + // again, to stop the exponential backtracking. + if (posIndex != -1 && + matcher.localsPos[posIndex].contains(i)) + return next.match(matcher, i, seq); + matcher.locals[countIndex] = count + 1; boolean b = body.match(matcher, i, seq); // If match failed we must backtrack, so // the loop count should NOT be incremented - if (!b) - matcher.locals[countIndex] = count; - else + if (b) return true; + matcher.locals[countIndex] = count; + // save the failed position + if (posIndex != -1) + matcher.localsPos[posIndex].add(i); + } } return next.match(matcher, i, seq); } + boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; boolean ret = false; + + if (posIndex != -1) { + if (matcher.localsPos[posIndex] == null) + matcher.localsPos[posIndex] = new HashSetInt(); + else + matcher.localsPos[posIndex].clear(); + } + if (0 < cmin) { matcher.locals[countIndex] = 1; ret = body.match(matcher, i, seq); @@ -4930,6 +4991,7 @@ matcher.locals[countIndex] = save; return ret; } + boolean study(TreeInfo info) { info.maxValid = false; info.deterministic = false;