src/java.base/share/classes/java/util/regex/Pattern.java
Print this page
*** 993,1002 ****
--- 993,1014 ----
* Temporary storage used while parsing group references.
*/
transient GroupHead[] groupNodes;
/**
+ * Temporary storage used to store the top level closure nodes.
+ */
+ transient ArrayList<Node> topClosureNodes;
+
+ /**
+ * The number of top greedy repetition groups in this Pattern. Used by
+ * matchers to allocate storage needed for a HashSetInt to keep the
+ * beginning pos {@code i} of all past failed match.
+ */
+ transient int localTGRGroupCount;
+
+ /**
* Temporary null terminated code point array used by pattern compiling.
*/
private transient int[] temp;
/**
*** 1336,1345 ****
--- 1348,1358 ----
s.defaultReadObject();
// Initialize counts
capturingGroupCount = 1;
localCount = 0;
+ localTGRGroupCount = 0;
// if length > 0, the Pattern is lazily compiled
if (pattern.length() == 0) {
root = new Start(lastAccept);
matchRoot = lastAccept;
*** 1366,1375 ****
--- 1379,1389 ----
flags |= UNICODE_CASE;
// Reset group index count
capturingGroupCount = 1;
localCount = 0;
+ localTGRGroupCount = 0;
if (pattern.length() > 0) {
compile();
} else {
root = new Start(lastAccept);
*** 1705,1714 ****
--- 1719,1729 ----
// Allocate all temporary objects here.
buffer = new int[32];
groupNodes = new GroupHead[10];
namedGroups = null;
+ topClosureNodes = new ArrayList<>(10);
if (has(LITERAL)) {
// Literal pattern handling
matchRoot = newSlice(temp, patternLength, hasSupplementary);
matchRoot.next = lastAccept;
*** 1735,1750 ****
--- 1750,1772 ----
root = matchRoot;
} else {
root = hasSupplementary ? new StartS(matchRoot) : new Start(matchRoot);
}
+ for (Node node : topClosureNodes) {
+ if (node instanceof Loop) {
+ ((Loop)node).posIndex = localTGRGroupCount++;
+ }
+ }
+
// Release temporary storage
temp = null;
buffer = null;
groupNodes = null;
patternLength = 0;
compiled = true;
+ topClosureNodes = null;
}
Map<String, Integer> namedGroups() {
Map<String, Integer> groups = namedGroups;
if (groups == null) {
*** 2070,2079 ****
--- 2092,2102 ----
switch (ch) {
case '(':
// Because group handles its own closure,
// we need to treat it differently
node = group0();
+
// Check for comment or flag group
if (node == null)
continue;
if (head == null)
head = node;
*** 2153,2163 ****
node = atom();
break;
}
node = closure(node);
!
if (head == null) {
head = tail = node;
} else {
tail.next = node;
tail = node;
--- 2176,2189 ----
node = atom();
break;
}
node = closure(node);
! // save the greddy curly
! if (node instanceof Curly && ((Curly)node).type == GREEDY) {
! topClosureNodes.add(node);
! }
if (head == null) {
head = tail = node;
} else {
tail.next = node;
tail = node;
*** 2857,2866 ****
--- 2883,2895 ----
private Node group0() {
boolean capturingGroup = false;
Node head = null;
Node tail = null;
int save = flags;
+
+ int saveTCNCount = topClosureNodes.size();
+
root = null;
int ch = next();
if (ch == '?') {
ch = skip();
switch (ch) {
*** 2966,2975 ****
--- 2995,3009 ----
if (head == tail) { // Zero length assertion
root = node;
return node; // Dual return
}
+ // have group closure, clear all embedded closure nodes
+ // from the top list.
+ if (saveTCNCount < topClosureNodes.size())
+ topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear();
+
if (node instanceof Ques) {
Ques ques = (Ques) node;
if (ques.type == POSSESSIVE) {
root = node;
return node;
*** 3000,3013 ****
capturingGroup);
return head;
} else { // Non-deterministic
int temp = ((GroupHead) head).localIndex;
Loop loop;
! if (curly.type == GREEDY)
loop = new Loop(this.localCount, temp);
! else // Reluctant Curly
loop = new LazyLoop(this.localCount, temp);
Prolog prolog = new Prolog(loop);
this.localCount += 1;
loop.cmin = curly.cmin;
loop.cmax = curly.cmax;
loop.body = head;
--- 3034,3050 ----
capturingGroup);
return head;
} else { // Non-deterministic
int temp = ((GroupHead) head).localIndex;
Loop loop;
! if (curly.type == GREEDY) {
loop = new Loop(this.localCount, temp);
! // add the non-determinsitic/greeddy to the list
! topClosureNodes.add(loop);
! } else { // Reluctant Curly
loop = new LazyLoop(this.localCount, temp);
+ }
Prolog prolog = new Prolog(loop);
this.localCount += 1;
loop.cmin = curly.cmin;
loop.cmax = curly.cmax;
loop.body = head;
*** 4874,4892 ****
static class Loop extends Node {
Node body;
int countIndex; // local count index in matcher locals
int beginIndex; // group beginning index
int cmin, cmax;
Loop(int countIndex, int beginIndex) {
this.countIndex = countIndex;
this.beginIndex = beginIndex;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
// Avoid infinite loop in zero-length case.
if (i > matcher.locals[beginIndex]) {
int count = matcher.locals[countIndex];
-
// This block is for before we reach the minimum
// iterations required for the loop to match
if (count < cmin) {
matcher.locals[countIndex] = count + 1;
boolean b = body.match(matcher, i, seq);
--- 4911,4931 ----
static class Loop extends Node {
Node body;
int countIndex; // local count index in matcher locals
int beginIndex; // group beginning index
int cmin, cmax;
+ int posIndex;
+
Loop(int countIndex, int beginIndex) {
this.countIndex = countIndex;
this.beginIndex = beginIndex;
+ this.posIndex = -1;
}
boolean match(Matcher matcher, int i, CharSequence seq) {
// Avoid infinite loop in zero-length case.
if (i > matcher.locals[beginIndex]) {
int count = matcher.locals[countIndex];
// This block is for before we reach the minimum
// iterations required for the loop to match
if (count < cmin) {
matcher.locals[countIndex] = count + 1;
boolean b = body.match(matcher, i, seq);
*** 4899,4923 ****
return b;
}
// This block is for after we have the minimum
// iterations required for the loop to match
if (count < cmax) {
matcher.locals[countIndex] = count + 1;
boolean b = body.match(matcher, i, seq);
// If match failed we must backtrack, so
// the loop count should NOT be incremented
! if (!b)
! matcher.locals[countIndex] = count;
! else
return true;
}
}
return next.match(matcher, i, seq);
}
boolean matchInit(Matcher matcher, int i, CharSequence seq) {
int save = matcher.locals[countIndex];
boolean ret = false;
if (0 < cmin) {
matcher.locals[countIndex] = 1;
ret = body.match(matcher, i, seq);
} else if (0 < cmax) {
matcher.locals[countIndex] = 1;
--- 4938,4984 ----
return b;
}
// This block is for after we have the minimum
// iterations required for the loop to match
if (count < cmax) {
+
+
+ // Let's check if we have already tried and failed
+ // at this starting position "i" in the past.
+ // If yes, then just return false wihtout trying
+ // again, to stop the exponential backtracking.
+ if (posIndex != -1 &&
+ matcher.localsPos[posIndex].contains(i))
+ return next.match(matcher, i, seq);
+
matcher.locals[countIndex] = count + 1;
boolean b = body.match(matcher, i, seq);
// If match failed we must backtrack, so
// the loop count should NOT be incremented
! if (b)
return true;
+ matcher.locals[countIndex] = count;
+ // save the failed position
+ if (posIndex != -1)
+ matcher.localsPos[posIndex].add(i);
+
}
}
return next.match(matcher, i, seq);
}
+
boolean matchInit(Matcher matcher, int i, CharSequence seq) {
int save = matcher.locals[countIndex];
boolean ret = false;
+
+ if (posIndex != -1) {
+ if (matcher.localsPos[posIndex] == null)
+ matcher.localsPos[posIndex] = new HashSetInt();
+ else
+ matcher.localsPos[posIndex].clear();
+ }
+
if (0 < cmin) {
matcher.locals[countIndex] = 1;
ret = body.match(matcher, i, seq);
} else if (0 < cmax) {
matcher.locals[countIndex] = 1;
*** 4928,4937 ****
--- 4989,4999 ----
ret = next.match(matcher, i, seq);
}
matcher.locals[countIndex] = save;
return ret;
}
+
boolean study(TreeInfo info) {
info.maxValid = false;
info.deterministic = false;
return false;
}