src/java.base/share/classes/java/util/regex/Pattern.java

Print this page

        

*** 993,1002 **** --- 993,1014 ---- * Temporary storage used while parsing group references. */ transient GroupHead[] groupNodes; /** + * Temporary storage used to store the top level closure nodes. + */ + transient ArrayList<Node> topClosureNodes; + + /** + * The number of top greedy repetition groups in this Pattern. Used by + * matchers to allocate storage needed for a HashSetInt to keep the + * beginning pos {@code i} of all past failed match. + */ + transient int localTGRGroupCount; + + /** * Temporary null terminated code point array used by pattern compiling. */ private transient int[] temp; /**
*** 1336,1345 **** --- 1348,1358 ---- s.defaultReadObject(); // Initialize counts capturingGroupCount = 1; localCount = 0; + localTGRGroupCount = 0; // if length > 0, the Pattern is lazily compiled if (pattern.length() == 0) { root = new Start(lastAccept); matchRoot = lastAccept;
*** 1366,1375 **** --- 1379,1389 ---- flags |= UNICODE_CASE; // Reset group index count capturingGroupCount = 1; localCount = 0; + localTGRGroupCount = 0; if (pattern.length() > 0) { compile(); } else { root = new Start(lastAccept);
*** 1705,1714 **** --- 1719,1729 ---- // Allocate all temporary objects here. buffer = new int[32]; groupNodes = new GroupHead[10]; namedGroups = null; + topClosureNodes = new ArrayList<>(10); if (has(LITERAL)) { // Literal pattern handling matchRoot = newSlice(temp, patternLength, hasSupplementary); matchRoot.next = lastAccept;
*** 1735,1750 **** --- 1750,1772 ---- root = matchRoot; } else { root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot); } + for (Node node : topClosureNodes) { + if (node instanceof Loop) { + ((Loop)node).posIndex = localTGRGroupCount++; + } + } + // Release temporary storage temp = null; buffer = null; groupNodes = null; patternLength = 0; compiled = true; + topClosureNodes = null; } Map<String, Integer> namedGroups() { Map<String, Integer> groups = namedGroups; if (groups == null) {
*** 2070,2079 **** --- 2092,2102 ---- switch (ch) { case '(': // Because group handles its own closure, // we need to treat it differently node = group0(); + // Check for comment or flag group if (node == null) continue; if (head == null) head = node;
*** 2153,2163 **** node = atom(); break; } node = closure(node); ! if (head == null) { head = tail = node; } else { tail.next = node; tail = node; --- 2176,2189 ---- node = atom(); break; } node = closure(node); ! // save the greddy curly ! if (node instanceof Curly && ((Curly)node).type == GREEDY) { ! topClosureNodes.add(node); ! } if (head == null) { head = tail = node; } else { tail.next = node; tail = node;
*** 2857,2866 **** --- 2883,2895 ---- private Node group0() { boolean capturingGroup = false; Node head = null; Node tail = null; int save = flags; + + int saveTCNCount = topClosureNodes.size(); + root = null; int ch = next(); if (ch == '?') { ch = skip(); switch (ch) {
*** 2966,2975 **** --- 2995,3009 ---- if (head == tail) { // Zero length assertion root = node; return node; // Dual return } + // have group closure, clear all embedded closure nodes + // from the top list. + if (saveTCNCount < topClosureNodes.size()) + topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear(); + if (node instanceof Ques) { Ques ques = (Ques) node; if (ques.type == POSSESSIVE) { root = node; return node;
*** 3000,3013 **** capturingGroup); return head; } else { // Non-deterministic int temp = ((GroupHead) head).localIndex; Loop loop; ! if (curly.type == GREEDY) loop = new Loop(this.localCount, temp); ! else // Reluctant Curly loop = new LazyLoop(this.localCount, temp); Prolog prolog = new Prolog(loop); this.localCount += 1; loop.cmin = curly.cmin; loop.cmax = curly.cmax; loop.body = head; --- 3034,3050 ---- capturingGroup); return head; } else { // Non-deterministic int temp = ((GroupHead) head).localIndex; Loop loop; ! if (curly.type == GREEDY) { loop = new Loop(this.localCount, temp); ! // add the non-determinsitic/greeddy to the list ! topClosureNodes.add(loop); ! } else { // Reluctant Curly loop = new LazyLoop(this.localCount, temp); + } Prolog prolog = new Prolog(loop); this.localCount += 1; loop.cmin = curly.cmin; loop.cmax = curly.cmax; loop.body = head;
*** 4874,4892 **** static class Loop extends Node { Node body; int countIndex; // local count index in matcher locals int beginIndex; // group beginning index int cmin, cmax; Loop(int countIndex, int beginIndex) { this.countIndex = countIndex; this.beginIndex = beginIndex; } boolean match(Matcher matcher, int i, CharSequence seq) { // Avoid infinite loop in zero-length case. if (i > matcher.locals[beginIndex]) { int count = matcher.locals[countIndex]; - // This block is for before we reach the minimum // iterations required for the loop to match if (count < cmin) { matcher.locals[countIndex] = count + 1; boolean b = body.match(matcher, i, seq); --- 4911,4931 ---- static class Loop extends Node { Node body; int countIndex; // local count index in matcher locals int beginIndex; // group beginning index int cmin, cmax; + int posIndex; + Loop(int countIndex, int beginIndex) { this.countIndex = countIndex; this.beginIndex = beginIndex; + this.posIndex = -1; } boolean match(Matcher matcher, int i, CharSequence seq) { // Avoid infinite loop in zero-length case. if (i > matcher.locals[beginIndex]) { int count = matcher.locals[countIndex]; // This block is for before we reach the minimum // iterations required for the loop to match if (count < cmin) { matcher.locals[countIndex] = count + 1; boolean b = body.match(matcher, i, seq);
*** 4899,4923 **** return b; } // This block is for after we have the minimum // iterations required for the loop to match if (count < cmax) { matcher.locals[countIndex] = count + 1; boolean b = body.match(matcher, i, seq); // If match failed we must backtrack, so // the loop count should NOT be incremented ! if (!b) ! matcher.locals[countIndex] = count; ! else return true; } } return next.match(matcher, i, seq); } boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; boolean ret = false; if (0 < cmin) { matcher.locals[countIndex] = 1; ret = body.match(matcher, i, seq); } else if (0 < cmax) { matcher.locals[countIndex] = 1; --- 4938,4984 ---- return b; } // This block is for after we have the minimum // iterations required for the loop to match if (count < cmax) { + + + // Let's check if we have already tried and failed + // at this starting position "i" in the past. + // If yes, then just return false wihtout trying + // again, to stop the exponential backtracking. + if (posIndex != -1 && + matcher.localsPos[posIndex].contains(i)) + return next.match(matcher, i, seq); + matcher.locals[countIndex] = count + 1; boolean b = body.match(matcher, i, seq); // If match failed we must backtrack, so // the loop count should NOT be incremented ! if (b) return true; + matcher.locals[countIndex] = count; + // save the failed position + if (posIndex != -1) + matcher.localsPos[posIndex].add(i); + } } return next.match(matcher, i, seq); } + boolean matchInit(Matcher matcher, int i, CharSequence seq) { int save = matcher.locals[countIndex]; boolean ret = false; + + if (posIndex != -1) { + if (matcher.localsPos[posIndex] == null) + matcher.localsPos[posIndex] = new HashSetInt(); + else + matcher.localsPos[posIndex].clear(); + } + if (0 < cmin) { matcher.locals[countIndex] = 1; ret = body.match(matcher, i, seq); } else if (0 < cmax) { matcher.locals[countIndex] = 1;
*** 4928,4937 **** --- 4989,4999 ---- ret = next.match(matcher, i, seq); } matcher.locals[countIndex] = save; return ret; } + boolean study(TreeInfo info) { info.maxValid = false; info.deterministic = false; return false; }