< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page
rev 57941 : [mq]: 8235812-Unicode-linebreak-with-quantifier-does-not-match-valid-input

*** 2062,2072 **** */ private Node expr(Node end) { Node prev = null; Node firstTail = null; Branch branch = null; ! Node branchConn = null; for (;;) { Node node = sequence(end); Node nodeTail = root; //double return if (prev == null) { --- 2062,2072 ---- */ private Node expr(Node end) { Node prev = null; Node firstTail = null; Branch branch = null; ! BranchConn branchConn = null; for (;;) { Node node = sequence(end); Node nodeTail = root; //double return if (prev == null) {
*** 2210,2220 **** --- 2210,2237 ---- default: node = atom(); break; } + if (node instanceof LineEnding) { + LineEnding le = (LineEnding)node; + node = closureOfLineEnding(le); + + if (node != le) { + // LineEnding was replaced with an anonymous group + if (head == null) + head = node; + else + tail.next = node; + // Double return: Tail was returned in root + tail = root; + continue; + } + } else { node = closure(node); + } + /* save the top dot-greedy nodes (.*, .+) as well if (node instanceof GreedyCharProperty && ((GreedyCharProperty)node).cp instanceof Dot) { topClosureNodes.add(node); }
*** 3077,3098 **** // have group closure, clear all inner closure nodes from the // top list (no backtracking stopper optimization for inner if (saveTCNCount < topClosureNodes.size()) topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear(); if (node instanceof Ques) { Ques ques = (Ques) node; if (ques.type == Qtype.POSSESSIVE) { root = node; return node; } ! tail.next = new BranchConn(); ! tail = tail.next; if (ques.type == Qtype.GREEDY) { ! head = new Branch(head, null, tail); } else { // Reluctant quantifier ! head = new Branch(null, head, tail); } root = tail; return head; } else if (node instanceof Curly) { Curly curly = (Curly) node; --- 3094,3128 ---- // have group closure, clear all inner closure nodes from the // top list (no backtracking stopper optimization for inner if (saveTCNCount < topClosureNodes.size()) topClosureNodes.subList(saveTCNCount, topClosureNodes.size()).clear(); + return groupWithClosure(node, head, tail, capturingGroup); + } + + /** + * Transforms a Group with quantifiers into some special constructs + * (such as Branch or Loop/GroupCurly), if necessary. + * + * This method is applied either to actual groups or to the Unicode + * linebreak (aka \\R) represented as an anonymous group. + */ + private Node groupWithClosure(Node node, Node head, Node tail, + boolean capturingGroup) + { if (node instanceof Ques) { Ques ques = (Ques) node; if (ques.type == Qtype.POSSESSIVE) { root = node; return node; } ! BranchConn branchConn = new BranchConn(); ! tail = tail.next = branchConn; if (ques.type == Qtype.GREEDY) { ! head = new Branch(head, null, branchConn); } else { // Reluctant quantifier ! head = new Branch(null, head, branchConn); } root = tail; return head; } else if (node instanceof Curly) { Curly curly = (Curly) node;
*** 3266,3275 **** --- 3296,3330 ---- } return new Curly(prev, cmin, MAX_REPS, qtype); } /** + * Processing repetition of a Unicode linebreak \\R. + */ + private Node closureOfLineEnding(LineEnding le) { + int ch = peek(); + if (ch != '?' && ch != '*' && ch != '+' && ch != '{') { + return le; + } + + // Replace the LineEnding with an anonymous group + // (?:\\u000D\\u000A|[\\u000A\\u000B\\u000C\\u000D\\u0085\\u2028\\u2029]) + Node grHead = createGroup(true); + Node grTail = root; + BranchConn branchConn = new BranchConn(); + branchConn.next = grTail; + Node slice = new Slice(new int[] {0x0D, 0x0A}); + slice.next = branchConn; + Node chClass = newCharProperty(x -> x == 0x0A || x == 0x0B || + x == 0x0C || x == 0x0D || x == 0x85 || x == 0x2028 || + x == 0x2029); + chClass.next = branchConn; + grHead.next = new Branch(slice, chClass, branchConn); + return groupWithClosure(closure(grHead), grHead, grTail, false); + } + + /** * Processes repetition. If the next character peeked is a quantifier * then new nodes must be appended to handle the repetition. * Prev could be a single or a group, so it could be a chain of nodes. */ private Node closure(Node prev) {
*** 4721,4742 **** * and where it does not occur. */ static final class Branch extends Node { Node[] atoms = new Node[2]; int size = 2; ! Node conn; ! Branch(Node first, Node second, Node branchConn) { conn = branchConn; atoms[0] = first; atoms[1] = second; } void add(Node node) { if (size >= atoms.length) { ! Node[] tmp = new Node[atoms.length*2]; ! System.arraycopy(atoms, 0, tmp, 0, atoms.length); ! atoms = tmp; } atoms[size++] = node; } boolean match(Matcher matcher, int i, CharSequence seq) { --- 4776,4798 ---- * and where it does not occur. */ static final class Branch extends Node { Node[] atoms = new Node[2]; int size = 2; ! BranchConn conn; ! Branch(Node first, Node second, BranchConn branchConn) { conn = branchConn; atoms[0] = first; atoms[1] = second; } void add(Node node) { if (size >= atoms.length) { ! int len = ArraysSupport.newLength(size, ! 1, /* minimum growth */ ! size /* preferred growth */); ! atoms = Arrays.copyOf(atoms, len); } atoms[size++] = node; } boolean match(Matcher matcher, int i, CharSequence seq) {
< prev index next >