jdk Cdiff src/java.base/share/classes/java/util/regex/Pattern.java

src/java.base/share/classes/java/util/regex/Pattern.java


*** 982,991 ****
--- 982,996 ----
       * Temporary storage used by parsing pattern slice.
       */
      transient int[] buffer;
  
      /**
+      * A temporary storage used for predicate for double return.
+      */
+     transient CharPredicate predicate;
+ 
+     /**
       * Map the "name" of the "named capturing group" to its group id
       * node.
       */
      transient volatile Map<String, Integer> namedGroups;
  
*** 1024,1034 ****
  
      /**
       * If the Start node might possibly match supplementary characters.
       * It is set to true during compiling if
       * (1) There is supplementary char in pattern, or
!      * (2) There is complement node of Category or Block
       */
      private transient boolean hasSupplementary;
  
      /**
       * Compiles the given regular expression into a pattern.
--- 1029,1039 ----
  
      /**
       * If the Start node might possibly match supplementary characters.
       * It is set to true during compiling if
       * (1) There is supplementary char in pattern, or
!      * (2) There is complement node of a "family" CharProperty
       */
      private transient boolean hasSupplementary;
  
      /**
       * Compiles the given regular expression into a pattern.
*** 1752,1799 ****
          }
          return groups;
      }
  
      /**
-      * Used to print out a subtree of the Pattern to help with debugging.
-      */
-     private static void printObjectTree(Node node) {
-         while(node != null) {
-             if (node instanceof Prolog) {
-                 System.out.println(node);
-                 printObjectTree(((Prolog)node).loop);
-                 System.out.println("**** end contents prolog loop");
-             } else if (node instanceof Loop) {
-                 System.out.println(node);
-                 printObjectTree(((Loop)node).body);
-                 System.out.println("**** end contents Loop body");
-             } else if (node instanceof Curly) {
-                 System.out.println(node);
-                 printObjectTree(((Curly)node).atom);
-                 System.out.println("**** end contents Curly body");
-             } else if (node instanceof GroupCurly) {
-                 System.out.println(node);
-                 printObjectTree(((GroupCurly)node).atom);
-                 System.out.println("**** end contents GroupCurly body");
-             } else if (node instanceof GroupTail) {
-                 System.out.println(node);
-                 System.out.println("Tail next is "+node.next);
-                 return;
-             } else {
-                 System.out.println(node);
-             }
-             node = node.next;
-             if (node != null)
-                 System.out.println("->next:");
-             if (node == Pattern.accept) {
-                 System.out.println("Accept Node");
-                 node = null;
-             }
-        }
-     }
- 
-     /**
       * Used to accumulate information about a subtree of the object graph
       * so that optimizations can be applied to the subtree.
       */
      static final class TreeInfo {
          int minLength;
--- 1757,1766 ----
*** 2081,2091 ****
                      tail.next = node;
                  // Double return: Tail was returned in root
                  tail = root;
                  continue;
              case '[':
!                 node = clazz(true);
                  break;
              case '\\':
                  ch = nextEscaped();
                  if (ch == 'p' || ch == 'P') {
                      boolean oneLetter = true;
--- 2048,2058 ----
                      tail.next = node;
                  // Double return: Tail was returned in root
                  tail = root;
                  continue;
              case '[':
!                 node = newCharProperty(clazz(true));
                  break;
              case '\\':
                  ch = nextEscaped();
                  if (ch == 'p' || ch == 'P') {
                      boolean oneLetter = true;
*** 2094,2104 ****
                      if (ch != '{') {
                          unread();
                      } else {
                          oneLetter = false;
                      }
!                     node = family(oneLetter, comp);
                  } else {
                      unread();
                      node = atom();
                  }
                  break;
--- 2061,2071 ----
                      if (ch != '{') {
                          unread();
                      } else {
                          oneLetter = false;
                      }
!                     node = newCharProperty(family(oneLetter, comp));
                  } else {
                      unread();
                      node = atom();
                  }
                  break;
*** 2121,2136 ****
                      node = new Dollar(has(MULTILINE));
                  break;
              case '.':
                  next();
                  if (has(DOTALL)) {
!                     node = new All();
                  } else {
!                     if (has(UNIX_LINES))
!                         node = new UnixDot();
!                     else {
!                         node = new Dot();
                      }
                  }
                  break;
              case '|':
              case ')':
--- 2088,2103 ----
                      node = new Dollar(has(MULTILINE));
                  break;
              case '.':
                  next();
                  if (has(DOTALL)) {
!                     node = new CharProperty(ALL);
                  } else {
!                     if (has(UNIX_LINES)) {
!                         node = new CharProperty(UNIXDOT);
!                     } else {
!                         node = new CharProperty(DOT);
                      }
                  }
                  break;
              case '|':
              case ')':
*** 2153,2163 ****
                  node = atom();
                  break;
              }
  
              node = closure(node);
- 
              if (head == null) {
                  head = tail = node;
              } else {
                  tail.next = node;
                  tail = node;
--- 2120,2129 ----
*** 2211,2221 ****
                          ch = next(); // Consume { if present
                          if (ch != '{')
                              unread();
                          else
                              oneLetter = false;
!                         return family(oneLetter, comp);
                      }
                  }
                  unread();
                  prev = cursor;
                  ch = escape(false, first == 0, false);
--- 2177,2187 ----
                          ch = next(); // Consume { if present
                          if (ch != '{')
                              unread();
                          else
                              oneLetter = false;
!                         return newCharProperty(family(oneLetter, comp));
                      }
                  }
                  unread();
                  prev = cursor;
                  ch = escape(false, first == 0, false);
*** 2249,2259 ****
                  continue;
              }
              break;
          }
          if (first == 1) {
!             return newSingle(buffer[0]);
          } else {
              return newSlice(buffer, first, hasSupplementary);
          }
      }
  
--- 2215,2225 ----
                  continue;
              }
              break;
          }
          if (first == 1) {
!             return newCharProperty(single(buffer[0]));
          } else {
              return newSlice(buffer, first, hasSupplementary);
          }
      }
  
*** 2344,2366 ****
              if (create) root = new Bound(Bound.NONE, has(UNICODE_CHARACTER_CLASS));
              return -1;
          case 'C':
              break;
          case 'D':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.DIGIT).complement()
!                                : new Ctype(ASCII.DIGIT).complement();
              return -1;
          case 'E':
          case 'F':
              break;
          case 'G':
              if (inclass) break;
              if (create) root = new LastMatch();
              return -1;
          case 'H':
!             if (create) root = new HorizWS().complement();
              return -1;
          case 'I':
          case 'J':
          case 'K':
          case 'L':
--- 2310,2340 ----
              if (create) root = new Bound(Bound.NONE, has(UNICODE_CHARACTER_CLASS));
              return -1;
          case 'C':
              break;
          case 'D':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'E':
          case 'F':
              break;
          case 'G':
              if (inclass) break;
              if (create) root = new LastMatch();
              return -1;
          case 'H':
!             if (create) {
!                 predicate = HorizWS.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'I':
          case 'J':
          case 'K':
          case 'L':
*** 2375,2398 ****
          case 'R':
              if (inclass) break;
              if (create) root = new LineEnding();
              return -1;
          case 'S':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WHITE_SPACE).complement()
!                                : new Ctype(ASCII.SPACE).complement();
              return -1;
          case 'T':
          case 'U':
              break;
          case 'V':
!             if (create) root = new VertWS().complement();
              return -1;
          case 'W':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WORD).complement()
!                                : new Ctype(ASCII.WORD).complement();
              return -1;
          case 'X':
              if (inclass) break;
              if (create) {
                  root = new XGrapheme();
--- 2349,2384 ----
          case 'R':
              if (inclass) break;
              if (create) root = new LineEnding();
              return -1;
          case 'S':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'T':
          case 'U':
              break;
          case 'V':
!             if (create) {
!                 predicate = VertWS.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'W':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WORD : CharPredicates.ASCII_WORD;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'X':
              if (inclass) break;
              if (create) {
                  root = new XGrapheme();
*** 2428,2449 ****
              }
              return -1;
          case 'c':
              return c();
          case 'd':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.DIGIT)
!                                : new Ctype(ASCII.DIGIT);
              return -1;
          case 'e':
              return '\033';
          case 'f':
              return '\f';
          case 'g':
              break;
          case 'h':
!             if (create) root = new HorizWS();
              return -1;
          case 'i':
          case 'j':
              break;
          case 'k':
--- 2414,2442 ----
              }
              return -1;
          case 'c':
              return c();
          case 'd':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'e':
              return '\033';
          case 'f':
              return '\f';
          case 'g':
              break;
          case 'h':
!             if (create) {
!                 predicate = HorizWS;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'i':
          case 'j':
              break;
          case 'k':
*** 2471,2483 ****
          case 'q':
              break;
          case 'r':
              return '\r';
          case 's':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WHITE_SPACE)
!                                : new Ctype(ASCII.SPACE);
              return -1;
          case 't':
              return '\t';
          case 'u':
              return u();
--- 2464,2479 ----
          case 'q':
              break;
          case 'r':
              return '\r';
          case 's':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 't':
              return '\t';
          case 'u':
              return u();
*** 2490,2505 ****
              // the start or end value, such as [\v-...] or [...-\v], in
              // which a single definite value (0x0B) is expected. For
              // compatibility concern '\013'/0x0B is returned if isrange.
              if (isrange)
                  return '\013';
!             if (create) root = new VertWS();
              return -1;
          case 'w':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WORD)
!                                : new Ctype(ASCII.WORD);
              return -1;
          case 'x':
              return x();
          case 'y':
              break;
--- 2486,2508 ----
              // the start or end value, such as [\v-...] or [...-\v], in
              // which a single definite value (0x0B) is expected. For
              // compatibility concern '\013'/0x0B is returned if isrange.
              if (isrange)
                  return '\013';
!             if (create) {
!                 predicate = VertWS;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'w':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WORD : CharPredicates.ASCII_WORD;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'x':
              return x();
          case 'y':
              break;
*** 2518,2629 ****
       *
       * Consumes a ] on the way out if consume is true. Usually consume
       * is true except for the case of [abc&&def] where def is a separate
       * right hand node with "understood" brackets.
       */
!     private CharProperty clazz(boolean consume) {
!         CharProperty prev = null;
!         CharProperty node = null;
          BitClass bits = new BitClass();
!         boolean include = true;
!         boolean firstInClass = true;
          int ch = next();
!         for (;;) {
!             switch (ch) {
!                 case '^':
                      // Negates if first char in a class, otherwise literal
!                     if (firstInClass) {
!                         if (temp[cursor-1] != '[')
!                             break;
                          ch = next();
!                         include = !include;
!                         continue;
!                     } else {
!                         // ^ not first in class, treat as literal
!                         break;
                      }
                  case '[':
!                     firstInClass = false;
!                     node = clazz(true);
                      if (prev == null)
!                         prev = node;
                      else
!                         prev = union(prev, node);
                      ch = peek();
                      continue;
                  case '&':
-                     firstInClass = false;
                      ch = next();
                      if (ch == '&') {
                          ch = next();
!                         CharProperty rightNode = null;
                          while (ch != ']' && ch != '&') {
                              if (ch == '[') {
!                                 if (rightNode == null)
!                                     rightNode = clazz(true);
                                  else
!                                     rightNode = union(rightNode, clazz(true));
                              } else { // abc&&def
                                  unread();
!                                 rightNode = clazz(false);
                              }
                              ch = peek();
                          }
!                         if (rightNode != null)
!                             node = rightNode;
                          if (prev == null) {
!                             if (rightNode == null)
                                  throw error("Bad class syntax");
                              else
!                                 prev = rightNode;
                          } else {
!                             prev = intersection(prev, node);
                          }
                      } else {
                          // treat as a literal &
                          unread();
                          break;
                      }
                      continue;
                  case 0:
-                     firstInClass = false;
                      if (cursor >= patternLength)
                          throw error("Unclosed character class");
                      break;
                  case ']':
!                     firstInClass = false;
!                     if (prev != null) {
                          if (consume)
                              next();
                          return prev;
                      }
                      break;
                  default:
-                     firstInClass = false;
                      break;
              }
!             node = range(bits);
!             if (include) {
!                 if (prev == null) {
!                     prev = node;
!                 } else {
!                     if (prev != node)
!                         prev = union(prev, node);
!                 }
!             } else {
!                 if (prev == null) {
!                     prev = node.complement();
                  } else {
!                     if (prev != node)
!                         prev = setDifference(prev, node);
!                 }
              }
              ch = peek();
          }
      }
  
!     private CharProperty bitsOrSingle(BitClass bits, int ch) {
          /* Bits can only handle codepoints in [u+0000-u+00ff] range.
             Use "single" node instead of bits when dealing with unicode
             case folding for codepoints listed below.
             (1)Uppercase out of range: u+00ff, u+00b5
                toUpperCase(u+00ff) -> u+0178
--- 2521,2631 ----
       *
       * Consumes a ] on the way out if consume is true. Usually consume
       * is true except for the case of [abc&&def] where def is a separate
       * right hand node with "understood" brackets.
       */
!     private CharPredicate clazz(boolean consume) {
!         CharPredicate prev = null;
!         CharPredicate curr = null;
          BitClass bits = new BitClass();
!         BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch];
! 
!         boolean isNeg = false;
!         boolean hasBits = false;
          int ch = next();
! 
          // Negates if first char in a class, otherwise literal
!         if (ch == '^' && temp[cursor-1] == '[') {
              ch = next();
!             isNeg = true;
          }
+         for (;;) {
+             switch (ch) {
                  case '[':
!                     curr = clazz(true);
                      if (prev == null)
!                         prev = curr;
                      else
!                         prev = prev.union(curr);
                      ch = peek();
                      continue;
                  case '&':
                      ch = next();
                      if (ch == '&') {
                          ch = next();
!                         CharPredicate right = null;
                          while (ch != ']' && ch != '&') {
                              if (ch == '[') {
!                                 if (right == null)
!                                     right = clazz(true);
                                  else
!                                     right = right.union(clazz(true));
                              } else { // abc&&def
                                  unread();
!                                 right = clazz(false);
                              }
                              ch = peek();
                          }
!                         if (hasBits) {
!                             // bits used, union has high precedence
                              if (prev == null) {
!                                 prev = curr = bitsP;
!                             } else {
!                                 prev = prev.union(bitsP);
!                             }
!                             hasBits = false;
!                         }
!                         if (right != null)
!                             curr = right;
!                         if (prev == null) {
!                             if (right == null)
                                  throw error("Bad class syntax");
                              else
!                                 prev = right;
                          } else {
!                             prev = prev.and(curr);
                          }
                      } else {
                          // treat as a literal &
                          unread();
                          break;
                      }
                      continue;
                  case 0:
                      if (cursor >= patternLength)
                          throw error("Unclosed character class");
                      break;
                  case ']':
!                     if (prev != null || hasBits) {
                          if (consume)
                              next();
+                         if (prev == null)
+                             prev = bitsP;
+                         else if (hasBits)
+                             prev = prev.union(bitsP);
+                         if (isNeg)
+                             return prev.negate();
                          return prev;
                      }
                      break;
                  default:
                      break;
              }
!             curr = range(bits);
!             if (curr == null) {    // the bits used
!                 hasBits = true;
              } else {
!                 if (prev == null)
!                     prev = curr;
!                 else if (prev != curr)
!                     prev = prev.union(curr);
              }
              ch = peek();
          }
      }
  
!     private CharPredicate bitsOrSingle(BitClass bits, int ch) {
          /* Bits can only handle codepoints in [u+0000-u+00ff] range.
             Use "single" node instead of bits when dealing with unicode
             case folding for codepoints listed below.
             (1)Uppercase out of range: u+00ff, u+00b5
                toUpperCase(u+00ff) -> u+0178
*** 2644,2663 ****
              !(has(CASE_INSENSITIVE) && has(UNICODE_CASE) &&
                (ch == 0xff || ch == 0xb5 ||
                 ch == 0x49 || ch == 0x69 ||  //I and i
                 ch == 0x53 || ch == 0x73 ||  //S and s
                 ch == 0x4b || ch == 0x6b ||  //K and k
!                ch == 0xc5 || ch == 0xe5)))  //A+ring
!             return bits.add(ch, flags());
!         return newSingle(ch);
      }
  
      /**
       * Parse a single character or a character range in a character class
       * and return its representative node.
       */
!     private CharProperty range(BitClass bits) {
          int ch = peek();
          if (ch == '\\') {
              ch = nextEscaped();
              if (ch == 'p' || ch == 'P') { // A property
                  boolean comp = (ch == 'P');
--- 2646,2692 ----
              !(has(CASE_INSENSITIVE) && has(UNICODE_CASE) &&
                (ch == 0xff || ch == 0xb5 ||
                 ch == 0x49 || ch == 0x69 ||    //I and i
                 ch == 0x53 || ch == 0x73 ||    //S and s
                 ch == 0x4b || ch == 0x6b ||    //K and k
!                ch == 0xc5 || ch == 0xe5))) {  //A+ring {
!             bits.add(ch, flags());
!             return null;
!         }
!         return single(ch);
!     }
! 
!     /**
!      *  Returns a suitably optimized, single character predicate
!      */
!     private CharPredicate single(final int ch) {
!         if (has(CASE_INSENSITIVE)) {
!             int lower, upper;
!             if (has(UNICODE_CASE)) {
!                 upper = Character.toUpperCase(ch);
!                 lower = Character.toLowerCase(upper);
!                 // Unicode case insensitive matches
!                 if (upper != lower)
!                     return SingleU(lower);
!             } else if (ASCII.isAscii(ch)) {
!                 lower = ASCII.toLower(ch);
!                 upper = ASCII.toUpper(ch);
!                 // Case insensitive matches a given BMP character
!                 if (lower != upper)
!                     return SingleI(lower, upper);
!             }
!         }
!         if (isSupplementary(ch))
!             return SingleS(ch);
!         return Single(ch);  // Match a given BMP character
      }
  
      /**
       * Parse a single character or a character range in a character class
       * and return its representative node.
       */
!     private CharPredicate range(BitClass bits) {
          int ch = peek();
          if (ch == '\\') {
              ch = nextEscaped();
              if (ch == 'p' || ch == 'P') { // A property
                  boolean comp = (ch == 'P');
*** 2672,2682 ****
              } else { // ordinary escape
                  boolean isrange = temp[cursor+1] == '-';
                  unread();
                  ch = escape(true, true, isrange);
                  if (ch == -1)
!                     return (CharProperty) root;
              }
          } else {
              next();
          }
          if (ch >= 0) {
--- 2701,2711 ----
              } else { // ordinary escape
                  boolean isrange = temp[cursor+1] == '-';
                  unread();
                  ch = escape(true, true, isrange);
                  if (ch == -1)
!                     return predicate;
              }
          } else {
              next();
          }
          if (ch >= 0) {
*** 2694,2723 ****
                          next();
                      }
                      if (m < ch) {
                          throw error("Illegal character range");
                      }
!                     if (has(CASE_INSENSITIVE))
!                         return caseInsensitiveRangeFor(ch, m);
!                     else
!                         return rangeFor(ch, m);
                  }
              }
              return bitsOrSingle(bits, ch);
          }
          throw error("Unexpected character '"+((char)ch)+"'");
      }
  
      /**
       * Parses a Unicode character family and returns its representative node.
       */
!     private CharProperty family(boolean singleLetter,
!                                 boolean maybeComplement)
      {
          next();
          String name;
!         CharProperty node = null;
  
          if (singleLetter) {
              int c = temp[cursor];
              if (!Character.isSupplementaryCodePoint(c)) {
                  name = String.valueOf((char)c);
--- 2723,2755 ----
                          next();
                      }
                      if (m < ch) {
                          throw error("Illegal character range");
                      }
!                     if (has(CASE_INSENSITIVE)) {
!                         if (has(UNICODE_CASE))
!                             return CIRangeU(ch, m);
!                         return CIRange(ch, m);
!                     } else {
!                         return Range(ch, m);
!                     }
                  }
              }
              return bitsOrSingle(bits, ch);
          }
          throw error("Unexpected character '"+((char)ch)+"'");
      }
  
      /**
       * Parses a Unicode character family and returns its representative node.
       */
!     private CharPredicate family(boolean singleLetter,
!                                      boolean isComplement)
      {
          next();
          String name;
!         CharPredicate p = null;
  
          if (singleLetter) {
              int c = temp[cursor];
              if (!Character.isSupplementaryCodePoint(c)) {
                  name = String.valueOf((char)c);
*** 2745,2836 ****
              String value = name.substring(i + 1);
              name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
              switch (name) {
                  case "sc":
                  case "script":
!                     node = unicodeScriptPropertyFor(value);
                      break;
                  case "blk":
                  case "block":
!                     node = unicodeBlockPropertyFor(value);
                      break;
                  case "gc":
                  case "general_category":
!                     node = charPropertyNodeFor(value);
                      break;
                  default:
                      throw error("Unknown Unicode property {name=<" + name + ">, "
                                  + "value=<" + value + ">}");
!             }
          } else {
              if (name.startsWith("In")) {
!                 // \p{inBlockName}
!                 node = unicodeBlockPropertyFor(name.substring(2));
              } else if (name.startsWith("Is")) {
!                 // \p{isGeneralCategory} and \p{isScriptName}
                  name = name.substring(2);
!                 UnicodeProp uprop = UnicodeProp.forName(name);
!                 if (uprop != null)
!                     node = new Utype(uprop);
!                 if (node == null)
!                     node = CharPropertyNames.charPropertyFor(name);
!                 if (node == null)
!                     node = unicodeScriptPropertyFor(name);
              } else {
                  if (has(UNICODE_CHARACTER_CLASS)) {
!                     UnicodeProp uprop = UnicodeProp.forPOSIXName(name);
!                     if (uprop != null)
!                         node = new Utype(uprop);
                  }
!                 if (node == null)
!                     node = charPropertyNodeFor(name);
              }
          }
!         if (maybeComplement) {
!             if (node instanceof Category || node instanceof Block)
                  hasSupplementary = true;
!             node = node.complement();
          }
!         return node;
!     }
! 
! 
!     /**
!      * Returns a CharProperty matching all characters belong to
!      * a UnicodeScript.
!      */
!     private CharProperty unicodeScriptPropertyFor(String name) {
!         final Character.UnicodeScript script;
!         try {
!             script = Character.UnicodeScript.forName(name);
!         } catch (IllegalArgumentException iae) {
!             throw error("Unknown character script name {" + name + "}");
!         }
!         return new Script(script);
!     }
! 
!     /**
!      * Returns a CharProperty matching all characters in a UnicodeBlock.
!      */
!     private CharProperty unicodeBlockPropertyFor(String name) {
!         final Character.UnicodeBlock block;
!         try {
!             block = Character.UnicodeBlock.forName(name);
!         } catch (IllegalArgumentException iae) {
!             throw error("Unknown character block name {" + name + "}");
!         }
!         return new Block(block);
      }
  
!     /**
!      * Returns a CharProperty matching all characters in a named property.
!      */
!     private CharProperty charPropertyNodeFor(String name) {
!         CharProperty p = CharPropertyNames.charPropertyFor(name);
          if (p == null)
!             throw error("Unknown character property name {" + name + "}");
!         return p;
      }
  
      /**
       * Parses and returns the name of a "named capturing group", the trailing
       * ">" is consumed after parsing.
--- 2777,2842 ----
              String value = name.substring(i + 1);
              name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
              switch (name) {
                  case "sc":
                  case "script":
!                     p = CharPredicates.forUnicodeScript(value);
                      break;
                  case "blk":
                  case "block":
!                     p = CharPredicates.forUnicodeBlock(value);
                      break;
                  case "gc":
                  case "general_category":
!                     p = CharPredicates.forProperty(value);
                      break;
                  default:
+                     break;
+             }
+             if (p == null)
                  throw error("Unknown Unicode property {name=<" + name + ">, "
                               + "value=<" + value + ">}");
! 
          } else {
              if (name.startsWith("In")) {
!                 // \p{InBlockName}
!                 p = CharPredicates.forUnicodeBlock(name.substring(2));
              } else if (name.startsWith("Is")) {
!                 // \p{IsGeneralCategory} and \p{IsScriptName}
                  name = name.substring(2);
!                 p = CharPredicates.forUnicodeProperty(name);
!                 if (p == null)
!                     p = CharPredicates.forProperty(name);
!                 if (p == null)
!                     p = CharPredicates.forUnicodeScript(name);
              } else {
                  if (has(UNICODE_CHARACTER_CLASS)) {
!                     p = CharPredicates.forPOSIXName(name);
                  }
!                 if (p == null)
!                     p = CharPredicates.forProperty(name);
              }
+             if (p == null)
+                 throw error("Unknown character property name {In/Is" + name + "}");
          }
!         if (isComplement) {
!             // it might be too expensive to detect if a complement of
!             // CharProperty can match "certain" supplementary. So just
!             // go with StartS.
              hasSupplementary = true;
!             p = p.negate();
          }
!         return p;
      }
  
!     private CharProperty newCharProperty(CharPredicate p) {
          if (p == null)
!             return null;
!         if (p instanceof BmpCharPredicate)
!             return new BmpCharProperty((BmpCharPredicate)p);
!         else
!             return new CharProperty(p);
      }
  
      /**
       * Parses and returns the name of a "named capturing group", the trailing
       * ">" is consumed after parsing.
*** 2882,2892 ****
                  break;
              case '>':   // (?>xxx)  independent group
                  head = createGroup(true);
                  tail = root;
                  head.next = expr(tail);
!                 head = tail = new Ques(head, INDEPENDENT);
                  break;
              case '<':   // (?<xxx)  look behind
                  ch = read();
                  if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                      // named captured group
--- 2888,2898 ----
                  break;
              case '>':   // (?>xxx)  independent group
                  head = createGroup(true);
                  tail = root;
                  head.next = expr(tail);
!                 head = tail = new Ques(head, Qtype.INDEPENDENT);
                  break;
              case '<':   // (?<xxx)  look behind
                  ch = read();
                  if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                      // named captured group
*** 2968,2993 ****
              return node;    // Dual return
          }
  
          if (node instanceof Ques) {
              Ques ques = (Ques) node;
!             if (ques.type == POSSESSIVE) {
                  root = node;
                  return node;
              }
              tail.next = new BranchConn();
              tail = tail.next;
!             if (ques.type == GREEDY) {
                  head = new Branch(head, null, tail);
              } else { // Reluctant quantifier
                  head = new Branch(null, head, tail);
              }
              root = tail;
              return head;
          } else if (node instanceof Curly) {
              Curly curly = (Curly) node;
!             if (curly.type == POSSESSIVE) {
                  root = node;
                  return node;
              }
              // Discover if the group is deterministic
              TreeInfo info = new TreeInfo();
--- 2974,2999 ----
              return node;    // Dual return
          }
  
          if (node instanceof Ques) {
              Ques ques = (Ques) node;
!             if (ques.type == Qtype.POSSESSIVE) {
                  root = node;
                  return node;
              }
              tail.next = new BranchConn();
              tail = tail.next;
!             if (ques.type == Qtype.GREEDY) {
                  head = new Branch(head, null, tail);
              } else { // Reluctant quantifier
                  head = new Branch(null, head, tail);
              }
              root = tail;
              return head;
          } else if (node instanceof Curly) {
              Curly curly = (Curly) node;
!             if (curly.type == Qtype.POSSESSIVE) {
                  root = node;
                  return node;
              }
              // Discover if the group is deterministic
              TreeInfo info = new TreeInfo();
*** 3000,3010 ****
                                               capturingGroup);
                  return head;
              } else { // Non-deterministic
                  int temp = ((GroupHead) head).localIndex;
                  Loop loop;
!                 if (curly.type == GREEDY)
                      loop = new Loop(this.localCount, temp);
                  else  // Reluctant Curly
                      loop = new LazyLoop(this.localCount, temp);
                  Prolog prolog = new Prolog(loop);
                  this.localCount += 1;
--- 3006,3016 ----
                                               capturingGroup);
                  return head;
              } else { // Non-deterministic
                  int temp = ((GroupHead) head).localIndex;
                  Loop loop;
!                 if (curly.type == Qtype.GREEDY)
                      loop = new Loop(this.localCount, temp);
                  else  // Reluctant Curly
                      loop = new LazyLoop(this.localCount, temp);
                  Prolog prolog = new Prolog(loop);
                  this.localCount += 1;
*** 3029,3038 ****
--- 3035,3048 ----
          int groupIndex = 0;
          if (!anonymous)
              groupIndex = capturingGroupCount++;
          GroupHead head = new GroupHead(localIndex);
          root = new GroupTail(localIndex, groupIndex);
+ 
+         // for debug/print only, head.match does NOT need the "tail" info
+         head.tail = (GroupTail)root;
+ 
          if (!anonymous && groupIndex < 10)
              groupNodes[groupIndex] = head;
          return head;
      }
  
*** 3117,3133 ****
          }
      }
  
      static final int MAX_REPS   = 0x7FFFFFFF;
  
!     static final int GREEDY     = 0;
! 
!     static final int LAZY       = 1;
! 
!     static final int POSSESSIVE = 2;
  
!     static final int INDEPENDENT = 3;
  
      /**
       * Processes repetition. If the next character peeked is a quantifier
       * then new nodes must be appended to handle the repetition.
       * Prev could be a single or a group, so it could be a chain of nodes.
--- 3127,3156 ----
          }
      }
  
      static final int MAX_REPS   = 0x7FFFFFFF;
  
!     static enum Qtype {
!         GREEDY, LAZY, POSSESSIVE, INDEPENDENT
!     }
  
!     private Node curly(Node prev, int cmin) {
!         int ch = next();
!         if (ch == '?') {
!             next();
!             return new Curly(prev, cmin, MAX_REPS, Qtype.LAZY);
!         } else if (ch == '+') {
!             next();
!             return new Curly(prev, cmin, MAX_REPS, Qtype.POSSESSIVE);
!         }
!         if (prev instanceof BmpCharProperty) {
!             return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin);
!         } else if (prev instanceof CharProperty) {
!             return new CharPropertyGreedy((CharProperty)prev, cmin);
!         }
!         return new Curly(prev, cmin, MAX_REPS, Qtype.GREEDY);
!     }
  
      /**
       * Processes repetition. If the next character peeked is a quantifier
       * then new nodes must be appended to handle the repetition.
       * Prev could be a single or a group, so it could be a chain of nodes.
*** 3138,3173 ****
          switch (ch) {
          case '?':
              ch = next();
              if (ch == '?') {
                  next();
!                 return new Ques(prev, LAZY);
              } else if (ch == '+') {
                  next();
!                 return new Ques(prev, POSSESSIVE);
              }
!             return new Ques(prev, GREEDY);
          case '*':
!             ch = next();
!             if (ch == '?') {
!                 next();
!                 return new Curly(prev, 0, MAX_REPS, LAZY);
!             } else if (ch == '+') {
!                 next();
!                 return new Curly(prev, 0, MAX_REPS, POSSESSIVE);
!             }
!             return new Curly(prev, 0, MAX_REPS, GREEDY);
          case '+':
!             ch = next();
!             if (ch == '?') {
!                 next();
!                 return new Curly(prev, 1, MAX_REPS, LAZY);
!             } else if (ch == '+') {
!                 next();
!                 return new Curly(prev, 1, MAX_REPS, POSSESSIVE);
!             }
!             return new Curly(prev, 1, MAX_REPS, GREEDY);
          case '{':
              ch = temp[cursor+1];
              if (ASCII.isDigit(ch)) {
                  skip();
                  int cmin = 0;
--- 3161,3180 ----
          switch (ch) {
          case '?':
              ch = next();
              if (ch == '?') {
                  next();
!                 return new Ques(prev, Qtype.LAZY);
              } else if (ch == '+') {
                  next();
!                 return new Ques(prev, Qtype.POSSESSIVE);
              }
!             return new Ques(prev, Qtype.GREEDY);
          case '*':
!             return curly(prev, 0);
          case '+':
!             return curly(prev, 1);
          case '{':
              ch = temp[cursor+1];
              if (ASCII.isDigit(ch)) {
                  skip();
                  int cmin = 0;
*** 3192,3207 ****
                      throw error("Illegal repetition range");
                  Curly curly;
                  ch = peek();
                  if (ch == '?') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, LAZY);
                  } else if (ch == '+') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, POSSESSIVE);
                  } else {
!                     curly = new Curly(prev, cmin, cmax, GREEDY);
                  }
                  return curly;
              } else {
                  throw error("Illegal repetition");
              }
--- 3199,3214 ----
                      throw error("Illegal repetition range");
                  Curly curly;
                  ch = peek();
                  if (ch == '?') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, Qtype.LAZY);
                  } else if (ch == '+') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
                  } else {
!                     curly = new Curly(prev, cmin, cmax, Qtype.GREEDY);
                  }
                  return curly;
              } else {
                  throw error("Illegal repetition");
              }
*** 3374,3387 ****
      /**
       *  Creates a bit vector for matching Latin-1 values. A normal BitClass
       *  never matches values above Latin-1, and a complemented BitClass always
       *  matches values above Latin-1.
       */
!     private static final class BitClass extends BmpCharProperty {
          final boolean[] bits;
!         BitClass() { bits = new boolean[256]; }
!         private BitClass(boolean[] bits) { this.bits = bits; }
          BitClass add(int c, int flags) {
              assert c >= 0 && c <= 255;
              if ((flags & CASE_INSENSITIVE) != 0) {
                  if (ASCII.isAscii(c)) {
                      bits[ASCII.toUpper(c)] = true;
--- 3381,3399 ----
      /**
       *  Creates a bit vector for matching Latin-1 values. A normal BitClass
       *  never matches values above Latin-1, and a complemented BitClass always
       *  matches values above Latin-1.
       */
!     static final class BitClass extends BmpCharProperty {
          final boolean[] bits;
!         BitClass() {
!             this(new boolean[256]);
!         }
!         private BitClass(boolean[] bits) {
!             super( ch -> ch < 256 && bits[ch]);
!             this.bits = bits;
!         }
          BitClass add(int c, int flags) {
              assert c >= 0 && c <= 255;
              if ((flags & CASE_INSENSITIVE) != 0) {
                  if (ASCII.isAscii(c)) {
                      bits[ASCII.toUpper(c)] = true;
*** 3392,3427 ****
                  }
              }
              bits[c] = true;
              return this;
          }
-         boolean isSatisfiedBy(int ch) {
-             return ch < 256 && bits[ch];
-         }
-     }
- 
-     /**
-      *  Returns a suitably optimized, single character matcher.
-      */
-     private CharProperty newSingle(final int ch) {
-         if (has(CASE_INSENSITIVE)) {
-             int lower, upper;
-             if (has(UNICODE_CASE)) {
-                 upper = Character.toUpperCase(ch);
-                 lower = Character.toLowerCase(upper);
-                 if (upper != lower)
-                     return new SingleU(lower);
-             } else if (ASCII.isAscii(ch)) {
-                 lower = ASCII.toLower(ch);
-                 upper = ASCII.toUpper(ch);
-                 if (lower != upper)
-                     return new SingleI(lower, upper);
-             }
-         }
-         if (isSupplementary(ch))
-             return new SingleS(ch);    // Match a given Unicode character
-         return new Single(ch);         // Match a given BMP character
      }
  
      /**
       *  Utility method for creating a string slice matcher.
       */
--- 3404,3413 ----
*** 3825,3846 ****
  
      /**
       * Abstract node class to match one character satisfying some
       * boolean property.
       */
!     private abstract static class CharProperty extends Node {
!         abstract boolean isSatisfiedBy(int ch);
!         CharProperty complement() {
!             return new CharProperty() {
!                     boolean isSatisfiedBy(int ch) {
!                         return ! CharProperty.this.isSatisfiedBy(ch);}};
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
                  int ch = Character.codePointAt(seq, i);
!                 return isSatisfiedBy(ch)
!                     && next.match(matcher, i+Character.charCount(ch), seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
--- 3811,3831 ----
  
      /**
       * Abstract node class to match one character satisfying some
       * boolean property.
       */
!     static class CharProperty extends Node {
!         CharPredicate predicate;
! 
!         CharProperty (CharPredicate predicate) {
!             this.predicate = predicate;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
                  int ch = Character.codePointAt(seq, i);
!                 return predicate.is(ch) &&
!                        next.match(matcher, i + Character.charCount(ch), seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
*** 3853,4007 ****
  
      /**
       * Optimized version of CharProperty that works only for
       * properties never satisfied by Supplementary characters.
       */
!     private abstract static class BmpCharProperty extends CharProperty {
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
!                 return isSatisfiedBy(seq.charAt(i))
!                     && next.match(matcher, i+1, seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
      }
  
      /**
-      * Node class that matches a Supplementary Unicode character
-      */
-     static final class SingleS extends CharProperty {
-         final int c;
-         SingleS(int c) { this.c = c; }
-         boolean isSatisfiedBy(int ch) {
-             return ch == c;
-         }
-     }
- 
-     /**
-      * Optimization -- matches a given BMP character
-      */
-     static final class Single extends BmpCharProperty {
-         final int c;
-         Single(int c) { this.c = c; }
-         boolean isSatisfiedBy(int ch) {
-             return ch == c;
-         }
-     }
- 
-     /**
-      * Case insensitive matches a given BMP character
-      */
-     static final class SingleI extends BmpCharProperty {
-         final int lower;
-         final int upper;
-         SingleI(int lower, int upper) {
-             this.lower = lower;
-             this.upper = upper;
-         }
-         boolean isSatisfiedBy(int ch) {
-             return ch == lower || ch == upper;
-         }
-     }
- 
-     /**
-      * Unicode case insensitive matches a given Unicode character
-      */
-     static final class SingleU extends CharProperty {
-         final int lower;
-         SingleU(int lower) {
-             this.lower = lower;
-         }
-         boolean isSatisfiedBy(int ch) {
-             return lower == ch ||
-                 lower == Character.toLowerCase(Character.toUpperCase(ch));
-         }
-     }
- 
-     /**
-      * Node class that matches a Unicode block.
-      */
-     static final class Block extends CharProperty {
-         final Character.UnicodeBlock block;
-         Block(Character.UnicodeBlock block) {
-             this.block = block;
-         }
-         boolean isSatisfiedBy(int ch) {
-             return block == Character.UnicodeBlock.of(ch);
-         }
-     }
- 
-     /**
-      * Node class that matches a Unicode script
-      */
-     static final class Script extends CharProperty {
-         final Character.UnicodeScript script;
-         Script(Character.UnicodeScript script) {
-             this.script = script;
-         }
-         boolean isSatisfiedBy(int ch) {
-             return script == Character.UnicodeScript.of(ch);
-         }
-     }
- 
-     /**
-      * Node class that matches a Unicode category.
-      */
-     static final class Category extends CharProperty {
-         final int typeMask;
-         Category(int typeMask) { this.typeMask = typeMask; }
-         boolean isSatisfiedBy(int ch) {
-             return (typeMask & (1 << Character.getType(ch))) != 0;
-         }
-     }
- 
-     /**
-      * Node class that matches a Unicode "type"
-      */
-     static final class Utype extends CharProperty {
-         final UnicodeProp uprop;
-         Utype(UnicodeProp uprop) { this.uprop = uprop; }
-         boolean isSatisfiedBy(int ch) {
-             return uprop.is(ch);
-         }
-     }
- 
-     /**
-      * Node class that matches a POSIX type.
-      */
-     static final class Ctype extends BmpCharProperty {
-         final int ctype;
-         Ctype(int ctype) { this.ctype = ctype; }
-         boolean isSatisfiedBy(int ch) {
-             return ch < 128 && ASCII.isType(ch, ctype);
-         }
-     }
- 
-     /**
-      * Node class that matches a Perl vertical whitespace
-      */
-     static final class VertWS extends BmpCharProperty {
-         boolean isSatisfiedBy(int cp) {
-             return (cp >= 0x0A && cp <= 0x0D) ||
-                    cp == 0x85 || cp == 0x2028 || cp == 0x2029;
-         }
-     }
- 
-     /**
-      * Node class that matches a Perl horizontal whitespace
-      */
-     static final class HorizWS extends BmpCharProperty {
-         boolean isSatisfiedBy(int cp) {
-             return cp == 0x09 || cp == 0x20 || cp == 0xa0 ||
-                    cp == 0x1680 || cp == 0x180e ||
-                    cp >= 0x2000 && cp <= 0x200a ||
-                    cp == 0x202f || cp == 0x205f || cp == 0x3000;
-         }
-     }
- 
-     /**
       * Node class that matches an unicode extended grapheme cluster
       */
      static class XGrapheme extends Node {
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
--- 3838,3863 ----
  
      /**
       * Optimized version of CharProperty that works only for
       * properties never satisfied by Supplementary characters.
       */
!     private static class BmpCharProperty extends CharProperty {
!         BmpCharProperty (BmpCharPredicate predicate) {
!             super(predicate);
!         }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
!                 return predicate.is(seq.charAt(i)) &&
!                        next.match(matcher, i + 1, seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
      }
  
      /**
       * Node class that matches an unicode extended grapheme cluster
       */
      static class XGrapheme extends Node {
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
*** 4215,4299 ****
          int toLower(int c) {
              return Character.toLowerCase(Character.toUpperCase(c));
          }
      }
  
-     private static boolean inRange(int lower, int ch, int upper) {
-         return lower <= ch && ch <= upper;
-     }
- 
-     /**
-      * Returns node for matching characters within an explicit value range.
-      */
-     private static CharProperty rangeFor(final int lower,
-                                          final int upper) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return inRange(lower, ch, upper);}};
-     }
- 
-     /**
-      * Returns node for matching characters within an explicit value
-      * range in a case insensitive manner.
-      */
-     private CharProperty caseInsensitiveRangeFor(final int lower,
-                                                  final int upper) {
-         if (has(UNICODE_CASE))
-             return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     if (inRange(lower, ch, upper))
-                         return true;
-                     int up = Character.toUpperCase(ch);
-                     return inRange(lower, up, upper) ||
-                            inRange(lower, Character.toLowerCase(up), upper);}};
-         return new CharProperty() {
-             boolean isSatisfiedBy(int ch) {
-                 return inRange(lower, ch, upper) ||
-                     ASCII.isAscii(ch) &&
-                         (inRange(lower, ASCII.toUpper(ch), upper) ||
-                          inRange(lower, ASCII.toLower(ch), upper));
-             }};
-     }
- 
-     /**
-      * Implements the Unicode category ALL and the dot metacharacter when
-      * in dotall mode.
-      */
-     static final class All extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return true;
-         }
-     }
- 
-     /**
-      * Node class for the dot metacharacter when dotall is not enabled.
-      */
-     static final class Dot extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return (ch != '\n' && ch != '\r'
-                     && (ch|1) != '\u2029'
-                     && ch != '\u0085');
-         }
-     }
- 
-     /**
-      * Node class for the dot metacharacter when dotall is not enabled
-      * but UNIX_LINES is enabled.
-      */
-     static final class UnixDot extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return ch != '\n';
-         }
-     }
- 
      /**
       * The 0 or 1 quantifier. This one class implements all three types.
       */
      static final class Ques extends Node {
          Node atom;
!         int type;
!         Ques(Node node, int type) {
              this.atom = node;
              this.type = type;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              switch (type) {
--- 4071,4087 ----
          int toLower(int c) {
              return Character.toLowerCase(Character.toUpperCase(c));
          }
      }
  
      /**
       * The 0 or 1 quantifier. This one class implements all three types.
       */
      static final class Ques extends Node {
          Node atom;
!         Qtype type;
!         Ques(Node node, Qtype type) {
              this.atom = node;
              this.type = type;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              switch (type) {
*** 4309,4319 ****
              default:
                  return atom.match(matcher, i, seq) && next.match(matcher, matcher.last, seq);
              }
          }
          boolean study(TreeInfo info) {
!             if (type != INDEPENDENT) {
                  int minL = info.minLength;
                  atom.study(info);
                  info.minLength = minL;
                  info.deterministic = false;
                  return next.study(info);
--- 4097,4107 ----
              default:
                  return atom.match(matcher, i, seq) && next.match(matcher, matcher.last, seq);
              }
          }
          boolean study(TreeInfo info) {
!             if (type != Qtype.INDEPENDENT) {
                  int minL = info.minLength;
                  atom.study(info);
                  info.minLength = minL;
                  info.deterministic = false;
                  return next.study(info);
*** 4323,4343 ****
              }
          }
      }
  
      /**
       * Handles the curly-brace style repetition with a specified minimum and
       * maximum occurrences. The * quantifier is handled as a special case.
       * This class handles the three types.
       */
      static final class Curly extends Node {
          Node atom;
!         int type;
          int cmin;
          int cmax;
  
!         Curly(Node node, int cmin, int cmax, int type) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
          }
--- 4111,4204 ----
              }
          }
      }
  
      /**
+      * Handles the greedy style repetition with the minimum either be
+      * 0 or 1 and the maximum be MAX_REPS, for * and + quantifier.
+      */
+     static class CharPropertyGreedy extends Node {
+         final CharPredicate predicate;
+         final int cmin;
+ 
+         CharPropertyGreedy(CharProperty cp, int cmin) {
+             this.predicate = cp.predicate;
+             this.cmin = cmin;
+         }
+         boolean match(Matcher matcher, int i,  CharSequence seq) {
+             int n = 0;
+             int to = matcher.to;
+             // greedy, all the way down
+             while (i < to) {
+                 int ch = Character.codePointAt(seq, i);
+                 if (!predicate.is(ch))
+                    break;
+                 i += Character.charCount(ch);
+                 n++;
+             }
+             if (i >= to) {
+                 matcher.hitEnd = true;
+             }
+             while (n >= cmin) {
+                 if (next.match(matcher, i, seq))
+                     return true;
+                 if (n == cmin)
+                     return false;
+                  // backing off if match fails
+                 int ch = Character.codePointBefore(seq, i);
+                 i -= Character.charCount(ch);
+                 n--;
+             }
+             return false; 
+         }
+ 
+         boolean study(TreeInfo info) {
+             info.minLength += cmin;
+             if (info.maxValid) {
+                 info.maxLength += MAX_REPS;
+             }
+             info.deterministic = false;
+             return next.study(info);
+         }
+     }
+ 
+     static final class BmpCharPropertyGreedy extends CharPropertyGreedy {
+ 
+         BmpCharPropertyGreedy(BmpCharProperty bcp, int cmin) {
+             super(bcp, cmin);
+         }
+ 
+         boolean match(Matcher matcher, int i,  CharSequence seq) {
+             int n = 0;
+             int to = matcher.to;
+             while (i < to && predicate.is(seq.charAt(i))) {
+                 i++; n++;
+             }
+             if (i >= to) {
+                 matcher.hitEnd = true;
+             }
+             while (n >= cmin) {
+                 if (next.match(matcher, i, seq))
+                     return true;
+                 i--; n--;  // backing off if match fails
+             }
+             return false; 
+         }
+     }
+ 
+     /**
       * Handles the curly-brace style repetition with a specified minimum and
       * maximum occurrences. The * quantifier is handled as a special case.
       * This class handles the three types.
       */
      static final class Curly extends Node {
          Node atom;
!         Qtype type;
          int cmin;
          int cmax;
  
!         Curly(Node node, int cmin, int cmax, Qtype type) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
          }
*** 4348,4360 ****
                      i = matcher.last;
                      continue;
                  }
                  return false;
              }
!             if (type == GREEDY)
                  return match0(matcher, i, j, seq);
!             else if (type == LAZY)
                  return match1(matcher, i, j, seq);
              else
                  return match2(matcher, i, j, seq);
          }
          // Greedy match.
--- 4209,4221 ----
                      i = matcher.last;
                      continue;
                  }
                  return false;
              }
!             if (type == Qtype.GREEDY)
                  return match0(matcher, i, j, seq);
!             else if (type == Qtype.LAZY)
                  return match1(matcher, i, j, seq);
              else
                  return match2(matcher, i, j, seq);
          }
          // Greedy match.
*** 4472,4489 ****
       * If capture is true then this class saves group settings and ensures
       * that groups are unset when backing off of a group match.
       */
      static final class GroupCurly extends Node {
          Node atom;
!         int type;
          int cmin;
          int cmax;
          int localIndex;
          int groupIndex;
          boolean capture;
  
!         GroupCurly(Node node, int cmin, int cmax, int type, int local,
                     int group, boolean capture) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
--- 4333,4350 ----
       * If capture is true then this class saves group settings and ensures
       * that groups are unset when backing off of a group match.
       */
      static final class GroupCurly extends Node {
          Node atom;
!         Qtype type;
          int cmin;
          int cmax;
          int localIndex;
          int groupIndex;
          boolean capture;
  
!         GroupCurly(Node node, int cmin, int cmax, Qtype type, int local,
                     int group, boolean capture) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
*** 4519,4531 ****
                      ret = false;
                      break;
                  }
              }
              if (ret) {
!                 if (type == GREEDY) {
                      ret = match0(matcher, i, cmin, seq);
!                 } else if (type == LAZY) {
                      ret = match1(matcher, i, cmin, seq);
                  } else {
                      ret = match2(matcher, i, cmin, seq);
                  }
              }
--- 4380,4392 ----
                      ret = false;
                      break;
                  }
              }
              if (ret) {
!                 if (type == Qtype.GREEDY) {
                      ret = match0(matcher, i, cmin, seq);
!                 } else if (type == Qtype.LAZY) {
                      ret = match1(matcher, i, cmin, seq);
                  } else {
                      ret = match2(matcher, i, cmin, seq);
                  }
              }
*** 4767,4776 ****
--- 4628,4638 ----
       * indicate that we do not want to unset the group if the reference
       * doesn't match.
       */
      static final class GroupHead extends Node {
          int localIndex;
+         GroupTail tail;    // for debug/print only, match does not need to know
          GroupHead(int localCount) {
              localIndex = localCount;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              int save = matcher.locals[localIndex];
*** 5360,5399 ****
              return !conditionMatched && next.match(matcher, i, seq);
          }
      }
  
      /**
-      * Returns the set union of two CharProperty nodes.
-      */
-     private static CharProperty union(final CharProperty lhs,
-                                       final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return lhs.isSatisfiedBy(ch) || rhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
-      * Returns the set intersection of two CharProperty nodes.
-      */
-     private static CharProperty intersection(final CharProperty lhs,
-                                              final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return lhs.isSatisfiedBy(ch) && rhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
-      * Returns the set difference of two CharProperty nodes.
-      */
-     private static CharProperty setDifference(final CharProperty lhs,
-                                               final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return ! rhs.isSatisfiedBy(ch) && lhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
       * Handles word boundaries. Includes a field to allow this one class to
       * deal with the different types of word boundaries we can match. The word
       * characters include underscores, letters, and digits. Non spacing marks
       * can are also part of a word if they have a base character, otherwise
       * they are ignored for purposes of finding word boundaries.
--- 5222,5231 ----
*** 5409,5419 ****
              type = n;
              this.useUWORD = useUWORD;
          }
  
          boolean isWord(int ch) {
!             return useUWORD ? UnicodeProp.WORD.is(ch)
                              : (ch == '_' || Character.isLetterOrDigit(ch));
          }
  
          int check(Matcher matcher, int i, CharSequence seq) {
              int ch;
--- 5241,5251 ----
              type = n;
              this.useUWORD = useUWORD;
          }
  
          boolean isWord(int ch) {
!             return useUWORD ? CharPredicates.WORD.is(ch)
                              : (ch == '_' || Character.isLetterOrDigit(ch));
          }
  
          int check(Matcher matcher, int i, CharSequence seq) {
              int ch;
*** 5655,5874 ****
              matcher.hitEnd = true;
              return false;
          }
      }
  
! ///////////////////////////////////////////////////////////////////////////////
! ///////////////////////////////////////////////////////////////////////////////
  
      /**
!      *  This must be the very first initializer.
       */
!     static Node accept = new Node();
  
!     static Node lastAccept = new LastNode();
  
!     private static class CharPropertyNames {
  
!         static CharProperty charPropertyFor(String name) {
!             CharPropertyFactory m = map.get(name);
!             return m == null ? null : m.make();
          }
  
!         private abstract static class CharPropertyFactory {
!             abstract CharProperty make();
          }
  
!         private static void defCategory(String name,
!                                         final int typeMask) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return new Category(typeMask);}});
          }
  
!         private static void defRange(String name,
!                                      final int lower, final int upper) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return rangeFor(lower, upper);}});
          }
  
!         private static void defCtype(String name,
!                                      final int ctype) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return new Ctype(ctype);}});
          }
  
!         private abstract static class CloneableProperty
!             extends CharProperty implements Cloneable
!         {
!             public CloneableProperty clone() {
!                 try {
!                     return (CloneableProperty) super.clone();
!                 } catch (CloneNotSupportedException e) {
!                     throw new AssertionError(e);
!                 }
!             }
!         }
! 
!         private static void defClone(String name,
!                                      final CloneableProperty p) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return p.clone();}});
!         }
! 
!         private static final HashMap<String, CharPropertyFactory> map
!             = new HashMap<>();
! 
!         static {
!             // Unicode character property aliases, defined in
!             // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
!             defCategory("Cn", 1<<Character.UNASSIGNED);
!             defCategory("Lu", 1<<Character.UPPERCASE_LETTER);
!             defCategory("Ll", 1<<Character.LOWERCASE_LETTER);
!             defCategory("Lt", 1<<Character.TITLECASE_LETTER);
!             defCategory("Lm", 1<<Character.MODIFIER_LETTER);
!             defCategory("Lo", 1<<Character.OTHER_LETTER);
!             defCategory("Mn", 1<<Character.NON_SPACING_MARK);
!             defCategory("Me", 1<<Character.ENCLOSING_MARK);
!             defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK);
!             defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER);
!             defCategory("Nl", 1<<Character.LETTER_NUMBER);
!             defCategory("No", 1<<Character.OTHER_NUMBER);
!             defCategory("Zs", 1<<Character.SPACE_SEPARATOR);
!             defCategory("Zl", 1<<Character.LINE_SEPARATOR);
!             defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR);
!             defCategory("Cc", 1<<Character.CONTROL);
!             defCategory("Cf", 1<<Character.FORMAT);
!             defCategory("Co", 1<<Character.PRIVATE_USE);
!             defCategory("Cs", 1<<Character.SURROGATE);
!             defCategory("Pd", 1<<Character.DASH_PUNCTUATION);
!             defCategory("Ps", 1<<Character.START_PUNCTUATION);
!             defCategory("Pe", 1<<Character.END_PUNCTUATION);
!             defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION);
!             defCategory("Po", 1<<Character.OTHER_PUNCTUATION);
!             defCategory("Sm", 1<<Character.MATH_SYMBOL);
!             defCategory("Sc", 1<<Character.CURRENCY_SYMBOL);
!             defCategory("Sk", 1<<Character.MODIFIER_SYMBOL);
!             defCategory("So", 1<<Character.OTHER_SYMBOL);
!             defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION);
!             defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION);
!             defCategory("L", ((1<<Character.UPPERCASE_LETTER) |
!                               (1<<Character.LOWERCASE_LETTER) |
!                               (1<<Character.TITLECASE_LETTER) |
!                               (1<<Character.MODIFIER_LETTER)  |
!                               (1<<Character.OTHER_LETTER)));
!             defCategory("M", ((1<<Character.NON_SPACING_MARK) |
!                               (1<<Character.ENCLOSING_MARK)   |
!                               (1<<Character.COMBINING_SPACING_MARK)));
!             defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) |
!                               (1<<Character.LETTER_NUMBER)        |
!                               (1<<Character.OTHER_NUMBER)));
!             defCategory("Z", ((1<<Character.SPACE_SEPARATOR) |
!                               (1<<Character.LINE_SEPARATOR)  |
!                               (1<<Character.PARAGRAPH_SEPARATOR)));
!             defCategory("C", ((1<<Character.CONTROL)     |
!                               (1<<Character.FORMAT)      |
!                               (1<<Character.PRIVATE_USE) |
!                               (1<<Character.SURROGATE))); // Other
!             defCategory("P", ((1<<Character.DASH_PUNCTUATION)      |
!                               (1<<Character.START_PUNCTUATION)     |
!                               (1<<Character.END_PUNCTUATION)       |
!                               (1<<Character.CONNECTOR_PUNCTUATION) |
!                               (1<<Character.OTHER_PUNCTUATION)     |
!                               (1<<Character.INITIAL_QUOTE_PUNCTUATION) |
!                               (1<<Character.FINAL_QUOTE_PUNCTUATION)));
!             defCategory("S", ((1<<Character.MATH_SYMBOL)     |
!                               (1<<Character.CURRENCY_SYMBOL) |
!                               (1<<Character.MODIFIER_SYMBOL) |
!                               (1<<Character.OTHER_SYMBOL)));
!             defCategory("LC", ((1<<Character.UPPERCASE_LETTER) |
!                                (1<<Character.LOWERCASE_LETTER) |
!                                (1<<Character.TITLECASE_LETTER)));
!             defCategory("LD", ((1<<Character.UPPERCASE_LETTER) |
!                                (1<<Character.LOWERCASE_LETTER) |
!                                (1<<Character.TITLECASE_LETTER) |
!                                (1<<Character.MODIFIER_LETTER)  |
!                                (1<<Character.OTHER_LETTER)     |
!                                (1<<Character.DECIMAL_DIGIT_NUMBER)));
!             defRange("L1", 0x00, 0xFF); // Latin-1
!             map.put("all", new CharPropertyFactory() {
!                     CharProperty make() { return new All(); }});
! 
!             // Posix regular expression character classes, defined in
!             // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
!             defRange("ASCII", 0x00, 0x7F);   // ASCII
!             defCtype("Alnum", ASCII.ALNUM);  // Alphanumeric characters
!             defCtype("Alpha", ASCII.ALPHA);  // Alphabetic characters
!             defCtype("Blank", ASCII.BLANK);  // Space and tab characters
!             defCtype("Cntrl", ASCII.CNTRL);  // Control characters
!             defRange("Digit", '0', '9');     // Numeric characters
!             defCtype("Graph", ASCII.GRAPH);  // printable and visible
!             defRange("Lower", 'a', 'z');     // Lower-case alphabetic
!             defRange("Print", 0x20, 0x7E);   // Printable characters
!             defCtype("Punct", ASCII.PUNCT);  // Punctuation characters
!             defCtype("Space", ASCII.SPACE);  // Space characters
!             defRange("Upper", 'A', 'Z');     // Upper-case alphabetic
!             defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits
! 
!             // Java character properties, defined by methods in Character.java
!             defClone("javaLowerCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLowerCase(ch);}});
!             defClone("javaUpperCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUpperCase(ch);}});
!             defClone("javaAlphabetic", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isAlphabetic(ch);}});
!             defClone("javaIdeographic", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isIdeographic(ch);}});
!             defClone("javaTitleCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isTitleCase(ch);}});
!             defClone("javaDigit", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isDigit(ch);}});
!             defClone("javaDefined", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isDefined(ch);}});
!             defClone("javaLetter", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLetter(ch);}});
!             defClone("javaLetterOrDigit", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLetterOrDigit(ch);}});
!             defClone("javaJavaIdentifierStart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isJavaIdentifierStart(ch);}});
!             defClone("javaJavaIdentifierPart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isJavaIdentifierPart(ch);}});
!             defClone("javaUnicodeIdentifierStart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUnicodeIdentifierStart(ch);}});
!             defClone("javaUnicodeIdentifierPart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUnicodeIdentifierPart(ch);}});
!             defClone("javaIdentifierIgnorable", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isIdentifierIgnorable(ch);}});
!             defClone("javaSpaceChar", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isSpaceChar(ch);}});
!             defClone("javaWhitespace", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isWhitespace(ch);}});
!             defClone("javaISOControl", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isISOControl(ch);}});
!             defClone("javaMirrored", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isMirrored(ch);}});
          }
      }
  
      /**
       * Creates a predicate which can be used to match a string.
       *
       * @return  The predicate which can be used for matching on a string
       * @since   1.8
       */
--- 5487,5646 ----
              matcher.hitEnd = true;
              return false;
          }
      }
  
!     @FunctionalInterface
!     static interface CharPredicate {
!         boolean is(int ch);
! 
!         default CharPredicate and(CharPredicate p) {
!             return ch -> is(ch) && p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p) {
!             return ch -> is(ch) || p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p1,
!                                     CharPredicate p2 ) {
!             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
!         }
!         default CharPredicate negate() {
!             return ch -> !is(ch);
!         }
!     }
! 
!     static interface BmpCharPredicate extends CharPredicate {
! 
!         default CharPredicate and(CharPredicate p) {
!             if(p instanceof BmpCharPredicate)
!                 return (BmpCharPredicate)((ch) -> is(ch) && p.is(ch));
!             return ch -> is(ch) && p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p) {
!             if (p instanceof BmpCharPredicate)
!                 return (BmpCharPredicate)((ch) -> is(ch) || p.is(ch));
!             return ch -> is(ch) || p.is(ch);
!         }
!         static CharPredicate union(CharPredicate... predicates) {
!             CharPredicate cp = ch -> {
!                 for (CharPredicate p : predicates) {
!                     if (!p.is(ch))
!                         return false;
!                 }
!                 return true;
!             };
!             for (CharPredicate p : predicates) {
!                 if (! (p instanceof BmpCharPredicate))
!                     return cp;
!             }
!             return (BmpCharPredicate)cp;
!         }
!     }
  
      /**
!      * matches a Perl vertical whitespace
       */
!     static BmpCharPredicate VertWS = cp ->
!         (cp >= 0x0A && cp <= 0x0D) || cp == 0x85 || cp == 0x2028 || cp == 0x2029;
  
!     /**
!      * matches a Perl horizontal whitespace
!      */
!     static BmpCharPredicate HorizWS = cp ->
!         cp == 0x09 || cp == 0x20 || cp == 0xa0 || cp == 0x1680 ||
!         cp == 0x180e || cp >= 0x2000 && cp <= 0x200a ||  cp == 0x202f ||
!         cp == 0x205f || cp == 0x3000;
  
!     /**
!      *  for the Unicode category ALL and the dot metacharacter when
!      *  in dotall mode.
!      */
!     static CharPredicate ALL = ch -> true;
  
!     /**
!      * for the dot metacharacter when dotall is not enabled.
!      */
!     static CharPredicate DOT = ch -> (ch != '\n' && ch != '\r'
!                                           && (ch|1) != '\u2029'
!                                           && ch != '\u0085');
!     /**
!      *  the dot metacharacter when dotall is not enabled but UNIX_LINES is enabled.
!      */
!     static CharPredicate UNIXDOT = ch ->  ch != '\n';
! 
!     /**
!      * Indicate that matches a Supplementary Unicode character
!      */
!     static CharPredicate SingleS(int c) {
!         return ch -> ch == c;
      }
  
!     /**
!      * A bmp/optimized predicate of single
!      */
!     static BmpCharPredicate Single(int c) {
!         return ch -> ch == c;
      }
  
!     /**
!      * Case insensitive matches a given BMP character
!      */
!     static BmpCharPredicate SingleI(int lower, int upper) {
!         return ch -> ch == lower || ch == upper;
      }
  
!     /**
!      * Unicode case insensitive matches a given Unicode character
!      */
!     static CharPredicate SingleU(int lower) {
!         return ch -> lower == ch ||
!                      lower == Character.toLowerCase(Character.toUpperCase(ch));
      }
  
!     private static boolean inRange(int lower, int ch, int upper) {
!         return lower <= ch && ch <= upper;
      }
  
!     /**
!      * Charactrs within a explicit value range
!      */
!     static CharPredicate Range(int lower, int upper) {
!         if (upper < Character.MIN_HIGH_SURROGATE ||
!             lower > Character.MAX_HIGH_SURROGATE &&
!             upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
!             return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
!         return ch -> inRange(lower, ch, upper);
!     }
! 
!    /**
!     * Charactrs within a explicit value range in a case insensitive manner.
!     */
!     static CharPredicate CIRange(int lower, int upper) {
!         return ch -> inRange(lower, ch, upper) ||
!                      ASCII.isAscii(ch) &&
!                      (inRange(lower, ASCII.toUpper(ch), upper) ||
!                       inRange(lower, ASCII.toLower(ch), upper));
      }
+ 
+     static CharPredicate CIRangeU(int lower, int upper) {
+         return ch -> {
+             if (inRange(lower, ch, upper))
+                 return true;
+             int up = Character.toUpperCase(ch);
+             return inRange(lower, up, upper) ||
+                    inRange(lower, Character.toLowerCase(up), upper);
+         };
      }
  
      /**
+      *  This must be the very first initializer.
+      */
+     static Node accept = new Node();
+ 
+     static Node lastAccept = new LastNode();
+ 
+     /**
       * Creates a predicate which can be used to match a string.
       *
       * @return  The predicate which can be used for matching on a string
       * @since   1.8
       */