jdk Cdiff src/java.base/share/classes/java/util/regex/Pattern.java

src/java.base/share/classes/java/util/regex/Pattern.java


*** 24,38 ****
--- 24,41 ----
   */
  
  package java.util.regex;
  
  import java.text.Normalizer;
+ import java.text.Normalizer.Form;
  import java.util.Locale;
  import java.util.Iterator;
  import java.util.Map;
  import java.util.ArrayList;
  import java.util.HashMap;
+ import java.util.LinkedHashSet;
+ import java.util.Set;
  import java.util.Arrays;
  import java.util.NoSuchElementException;
  import java.util.Spliterator;
  import java.util.Spliterators;
  import java.util.function.Predicate;
*** 982,991 ****
--- 985,999 ----
       * Temporary storage used by parsing pattern slice.
       */
      transient int[] buffer;
  
      /**
+      * A temporary storage used for predicate for double return.
+      */
+     transient CharPredicate predicate;
+ 
+     /**
       * Map the "name" of the "named capturing group" to its group id
       * node.
       */
      transient volatile Map<String, Integer> namedGroups;
  
*** 1024,1034 ****
  
      /**
       * If the Start node might possibly match supplementary characters.
       * It is set to true during compiling if
       * (1) There is supplementary char in pattern, or
!      * (2) There is complement node of Category or Block
       */
      private transient boolean hasSupplementary;
  
      /**
       * Compiles the given regular expression into a pattern.
--- 1032,1042 ----
  
      /**
       * If the Start node might possibly match supplementary characters.
       * It is set to true during compiling if
       * (1) There is supplementary char in pattern, or
!      * (2) There is complement node of a "family" CharProperty
       */
      private transient boolean hasSupplementary;
  
      /**
       * Compiles the given regular expression into a pattern.
*** 1376,1513 ****
              matchRoot = lastAccept;
          }
      }
  
      /**
!      * The pattern is converted to normalized form ({@linkplain
!      * java.text.Normalizer.Form.NFD NFD}, canonical decomposition)
!      * and then a pure group is constructed to match canonical
!      * equivalences of the characters.
       */
!     private void normalize() {
!         int lastCodePoint = -1;
! 
!         // Convert pattern into normalized form
!         normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD);
!         patternLength = normalizedPattern.length();
! 
!         // Modify pattern to match canonical equivalences
!         StringBuilder newPattern = new StringBuilder(patternLength);
!         for(int i=0; i<patternLength; ) {
!             int c = normalizedPattern.codePointAt(i);
!             StringBuilder sequenceBuffer;
!             if ((Character.getType(c) == Character.NON_SPACING_MARK)
!                 && (lastCodePoint != -1)) {
!                 sequenceBuffer = new StringBuilder();
!                 sequenceBuffer.appendCodePoint(lastCodePoint);
!                 sequenceBuffer.appendCodePoint(c);
!                 while(Character.getType(c) == Character.NON_SPACING_MARK) {
!                     i += Character.charCount(c);
!                     if (i >= patternLength)
!                         break;
!                     c = normalizedPattern.codePointAt(i);
!                     sequenceBuffer.appendCodePoint(c);
                  }
!                 String ea = produceEquivalentAlternation(
!                                                sequenceBuffer.toString());
!                 newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint));
!                 newPattern.append("(?:").append(ea).append(")");
!             } else if (c == '[' && lastCodePoint != '\\') {
!                 i = normalizeCharClass(newPattern, i);
!             } else {
!                 newPattern.appendCodePoint(c);
              }
-             lastCodePoint = c;
-             i += Character.charCount(c);
          }
!         normalizedPattern = newPattern.toString();
      }
  
!     /**
!      * Complete the character class being parsed and add a set
!      * of alternations to it that will match the canonical equivalences
!      * of the characters within the class.
!      */
!     private int normalizeCharClass(StringBuilder newPattern, int i) {
!         StringBuilder charClass = new StringBuilder();
!         StringBuilder eq = null;
!         int lastCodePoint = -1;
!         String result;
! 
!         i++;
!         charClass.append("[");
!         while(true) {
!             int c = normalizedPattern.codePointAt(i);
!             StringBuilder sequenceBuffer;
! 
!             if (c == ']' && lastCodePoint != '\\') {
!                 charClass.append((char)c);
!                 break;
!             } else if (Character.getType(c) == Character.NON_SPACING_MARK) {
!                 sequenceBuffer = new StringBuilder();
!                 sequenceBuffer.appendCodePoint(lastCodePoint);
!                 while(Character.getType(c) == Character.NON_SPACING_MARK) {
!                     sequenceBuffer.appendCodePoint(c);
!                     i += Character.charCount(c);
!                     if (i >= normalizedPattern.length())
                          break;
!                     c = normalizedPattern.codePointAt(i);
                  }
!                 String ea = produceEquivalentAlternation(
!                                                   sequenceBuffer.toString());
! 
!                 charClass.setLength(charClass.length()-Character.charCount(lastCodePoint));
!                 if (eq == null)
!                     eq = new StringBuilder();
!                 eq.append('|');
!                 eq.append(ea);
!             } else {
!                 charClass.appendCodePoint(c);
!                 i++;
              }
-             if (i == normalizedPattern.length())
-                 throw error("Unclosed character class");
-             lastCodePoint = c;
          }
! 
!         if (eq != null) {
!             result = "(?:"+charClass.toString()+eq.toString()+")";
!         } else {
!             result = charClass.toString();
          }
  
!         newPattern.append(result);
!         return i;
      }
  
      /**
       * Given a specific sequence composed of a regular character and
       * combining marks that follow it, produce the alternation that will
       * match all canonical equivalences of that sequence.
       */
!     private String produceEquivalentAlternation(String source) {
!         int len = countChars(source, 0, 1);
!         if (source.length() == len)
!             // source has one character.
!             return source;
! 
!         String base = source.substring(0,len);
!         String combiningMarks = source.substring(len);
! 
          String[] perms = producePermutations(combiningMarks);
-         StringBuilder result = new StringBuilder(source);
- 
          // Add combined permutations
!         for(int x=0; x<perms.length; x++) {
              String next = base + perms[x];
!             if (x>0)
!                 result.append("|"+next);
              next = composeOneStep(next);
!             if (next != null)
!                 result.append("|"+produceEquivalentAlternation(next));
          }
-         return result.toString();
      }
  
      /**
       * Returns an array of strings that have all the possible
       * permutations of the characters in the input string.
--- 1384,1528 ----
              matchRoot = lastAccept;
          }
      }
  
      /**
!      * The pattern is converted to normalized form ({@link
!      * java.text.Normalizer.Form.NFC NFC}, canonical decomposition,
!      * followed by canonical composition for the character class
!      * part, and {@link java.text.Normalizer.Form.NFD NFD},
!      * canonical decomposition) for the rest), and then a pure
!      * group is constructed to match canonical equivalences of the
!      * characters.
       */
!     private static String normalize(String pattern) {
!         int plen = pattern.length();
!         StringBuilder pbuf = new StringBuilder(plen);
!         char last = 0;
!         int lastStart = 0;
!         char cc = 0;
!         for (int i = 0; i < plen;) {
!             char c = pattern.charAt(i);
!             if (cc == 0 &&    // top level
!                 c == '\\' && i + 1 < plen && pattern.charAt(i + 1) == '\\') {
!                 i += 2; last = 0;
!                 continue;
              }
!             if (c == '[' && last != '\\') {
!                 if (cc == 0) {
!                     if (lastStart < i)
!                         normalizeSlice(pattern, lastStart, i, pbuf);
!                     lastStart = i;
!                 }
!                 cc++;
!             } else if (c == ']' && last != '\\') {
!                 cc--;
!                 if (cc == 0) {
!                     normalizeClazz(pattern, lastStart, i + 1, pbuf);
!                     lastStart = i + 1;
                  }
              }
!             last = c;
!             i++;
!         }
!         assert (cc == 0);
!         if (lastStart < plen)
!             normalizeSlice(pattern, lastStart, plen, pbuf);
!         return pbuf.toString();
      }
  
!     private static void normalizeSlice(String src, int off, int limit,
!                                        StringBuilder dst)
!     {
!         int len = src.length();
!         int off0 = off;
!         while (off < limit && ASCII.isAscii(src.charAt(off))) {
!             off++;
!         }
!         if (off == limit) {
!             dst.append(src, off0, limit);
!             return;
!         }
!         off--;
!         if (off < off0)
!             off = off0;
!         else
!             dst.append(src, off0, off);
!         while (off < limit) {
!             int ch0 = src.codePointAt(off);
!             if (".$|()[]{}^?*+\\".indexOf(ch0) != -1) {
!                 dst.append((char)ch0);
!                 off++;
!                 continue;
!             }
!             int j = off + Character.charCount(ch0);
!             int ch1;
!             while (j < limit) {
!                 ch1 = src.codePointAt(j);
!                 if (Grapheme.isBoundary(ch0, ch1))
                      break;
!                 ch0 = ch1;
!                 j += Character.charCount(ch1);
              }
!             String seq = src.substring(off, j);
!             String nfd = Normalizer.normalize(seq, Normalizer.Form.NFD);
!             off = j;
!             if (nfd.length() > 1) {
!                 ch0 = nfd.codePointAt(0);
!                 ch1 = nfd.codePointAt(Character.charCount(ch0));
!                 if (Character.getType(ch1) == Character.NON_SPACING_MARK) {
!                     Set<String> altns = new LinkedHashSet<>();
!                     altns.add(seq);
!                     produceEquivalentAlternation(nfd, altns);
!                     dst.append("(?:");
!                     altns.forEach( s -> dst.append(s + "|"));
!                     dst.delete(dst.length() - 1, dst.length());
!                     dst.append(")");
!                     continue;
                  }
              }
!             String nfc = Normalizer.normalize(seq, Normalizer.Form.NFC);
!             if (!seq.equals(nfc) && !nfd.equals(nfc))
!                 dst.append("(?:" + seq + "|" + nfd  + "|" + nfc + ")");
!             else if (!seq.equals(nfd))
!                 dst.append("(?:" + seq + "|" + nfd + ")");
!             else
!                 dst.append(seq);
!         }
      }
  
!     private static void normalizeClazz(String src, int off, int limit,
!                                        StringBuilder dst)
!     {
!         dst.append(Normalizer.normalize(src.substring(off, limit), Form.NFC));
      }
  
      /**
       * Given a specific sequence composed of a regular character and
       * combining marks that follow it, produce the alternation that will
       * match all canonical equivalences of that sequence.
       */
!     private static void produceEquivalentAlternation(String src,
!                                                      Set<String> dst)
!     {
!         int len = countChars(src, 0, 1);
!         if (src.length() == len) {
!             dst.add(src);  // source has one character.
!             return;
!         }
!         String base = src.substring(0,len);
!         String combiningMarks = src.substring(len);
          String[] perms = producePermutations(combiningMarks);
          // Add combined permutations
!         for(int x = 0; x < perms.length; x++) {
              String next = base + perms[x];
!             dst.add(next);
              next = composeOneStep(next);
!             if (next != null) {
!                 produceEquivalentAlternation(next, dst);
!             }
          }
      }
  
      /**
       * Returns an array of strings that have all the possible
       * permutations of the characters in the input string.
*** 1515,1525 ****
       * of a set of combining marks. Note that some of the permutations
       * are invalid because of combining class collisions, and these
       * possibilities must be removed because they are not canonically
       * equivalent.
       */
!     private String[] producePermutations(String input) {
          if (input.length() == countChars(input, 0, 1))
              return new String[] {input};
  
          if (input.length() == countChars(input, 0, 2)) {
              int c0 = Character.codePointAt(input, 0);
--- 1530,1540 ----
       * of a set of combining marks. Note that some of the permutations
       * are invalid because of combining class collisions, and these
       * possibilities must be removed because they are not canonically
       * equivalent.
       */
!     private static String[] producePermutations(String input) {
          if (input.length() == countChars(input, 0, 1))
              return new String[] {input};
  
          if (input.length() == countChars(input, 0, 2)) {
              int c0 = Character.codePointAt(input, 0);
*** 1573,1598 ****
          String[] result = new String[index];
          System.arraycopy(temp, 0, result, 0, index);
          return result;
      }
  
!     private int getClass(int c) {
          return sun.text.Normalizer.getCombiningClass(c);
      }
  
      /**
       * Attempts to compose input by combining the first character
       * with the first combining mark following it. Returns a String
       * that is the composition of the leading character with its first
       * combining mark followed by the remaining combining marks. Returns
       * null if the first two characters cannot be further composed.
       */
!     private String composeOneStep(String input) {
          int len = countChars(input, 0, 2);
          String firstTwoCharacters = input.substring(0, len);
          String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
- 
          if (result.equals(firstTwoCharacters))
              return null;
          else {
              String remainder = input.substring(len);
              return result + remainder;
--- 1588,1612 ----
          String[] result = new String[index];
          System.arraycopy(temp, 0, result, 0, index);
          return result;
      }
  
!     private static int getClass(int c) {
          return sun.text.Normalizer.getCombiningClass(c);
      }
  
      /**
       * Attempts to compose input by combining the first character
       * with the first combining mark following it. Returns a String
       * that is the composition of the leading character with its first
       * combining mark followed by the remaining combining marks. Returns
       * null if the first two characters cannot be further composed.
       */
!     private static String composeOneStep(String input) {
          int len = countChars(input, 0, 2);
          String firstTwoCharacters = input.substring(0, len);
          String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC);
          if (result.equals(firstTwoCharacters))
              return null;
          else {
              String remainder = input.substring(len);
              return result + remainder;
*** 1675,1685 ****
       * of the expression which will create the object tree.
       */
      private void compile() {
          // Handle canonical equivalences
          if (has(CANON_EQ) && !has(LITERAL)) {
!             normalize();
          } else {
              normalizedPattern = pattern;
          }
          patternLength = normalizedPattern.length();
  
--- 1689,1699 ----
       * of the expression which will create the object tree.
       */
      private void compile() {
          // Handle canonical equivalences
          if (has(CANON_EQ) && !has(LITERAL)) {
!             normalizedPattern = normalize(pattern);
          } else {
              normalizedPattern = pattern;
          }
          patternLength = normalizedPattern.length();
  
*** 1752,1799 ****
          }
          return groups;
      }
  
      /**
-      * Used to print out a subtree of the Pattern to help with debugging.
-      */
-     private static void printObjectTree(Node node) {
-         while(node != null) {
-             if (node instanceof Prolog) {
-                 System.out.println(node);
-                 printObjectTree(((Prolog)node).loop);
-                 System.out.println("**** end contents prolog loop");
-             } else if (node instanceof Loop) {
-                 System.out.println(node);
-                 printObjectTree(((Loop)node).body);
-                 System.out.println("**** end contents Loop body");
-             } else if (node instanceof Curly) {
-                 System.out.println(node);
-                 printObjectTree(((Curly)node).atom);
-                 System.out.println("**** end contents Curly body");
-             } else if (node instanceof GroupCurly) {
-                 System.out.println(node);
-                 printObjectTree(((GroupCurly)node).atom);
-                 System.out.println("**** end contents GroupCurly body");
-             } else if (node instanceof GroupTail) {
-                 System.out.println(node);
-                 System.out.println("Tail next is "+node.next);
-                 return;
-             } else {
-                 System.out.println(node);
-             }
-             node = node.next;
-             if (node != null)
-                 System.out.println("->next:");
-             if (node == Pattern.accept) {
-                 System.out.println("Accept Node");
-                 node = null;
-             }
-        }
-     }
- 
-     /**
       * Used to accumulate information about a subtree of the object graph
       * so that optimizations can be applied to the subtree.
       */
      static final class TreeInfo {
          int minLength;
--- 1766,1775 ----
*** 2081,2091 ****
                      tail.next = node;
                  // Double return: Tail was returned in root
                  tail = root;
                  continue;
              case '[':
!                 node = clazz(true);
                  break;
              case '\\':
                  ch = nextEscaped();
                  if (ch == 'p' || ch == 'P') {
                      boolean oneLetter = true;
--- 2057,2070 ----
                      tail.next = node;
                  // Double return: Tail was returned in root
                  tail = root;
                  continue;
              case '[':
!                 if (has(CANON_EQ) && !has(LITERAL))
!                     node = new NFCCharProperty(clazz(true));
!                 else
!                     node = newCharProperty(clazz(true));
                  break;
              case '\\':
                  ch = nextEscaped();
                  if (ch == 'p' || ch == 'P') {
                      boolean oneLetter = true;
*** 2094,2104 ****
                      if (ch != '{') {
                          unread();
                      } else {
                          oneLetter = false;
                      }
!                     node = family(oneLetter, comp);
                  } else {
                      unread();
                      node = atom();
                  }
                  break;
--- 2073,2087 ----
                      if (ch != '{') {
                          unread();
                      } else {
                          oneLetter = false;
                      }
!                     // node = newCharProperty(family(oneLetter, comp));
!                     if (has(CANON_EQ) && !has(LITERAL))
!                         node = new NFCCharProperty(family(oneLetter, comp));
!                     else
!                         node = newCharProperty(family(oneLetter, comp));
                  } else {
                      unread();
                      node = atom();
                  }
                  break;
*** 2121,2136 ****
                      node = new Dollar(has(MULTILINE));
                  break;
              case '.':
                  next();
                  if (has(DOTALL)) {
!                     node = new All();
                  } else {
!                     if (has(UNIX_LINES))
!                         node = new UnixDot();
!                     else {
!                         node = new Dot();
                      }
                  }
                  break;
              case '|':
              case ')':
--- 2104,2119 ----
                      node = new Dollar(has(MULTILINE));
                  break;
              case '.':
                  next();
                  if (has(DOTALL)) {
!                     node = new CharProperty(ALL);
                  } else {
!                     if (has(UNIX_LINES)) {
!                         node = new CharProperty(UNIXDOT);
!                     } else {
!                         node = new CharProperty(DOT);
                      }
                  }
                  break;
              case '|':
              case ')':
*** 2153,2163 ****
                  node = atom();
                  break;
              }
  
              node = closure(node);
- 
              if (head == null) {
                  head = tail = node;
              } else {
                  tail.next = node;
                  tail = node;
--- 2136,2145 ----
*** 2211,2221 ****
                          ch = next(); // Consume { if present
                          if (ch != '{')
                              unread();
                          else
                              oneLetter = false;
!                         return family(oneLetter, comp);
                      }
                  }
                  unread();
                  prev = cursor;
                  ch = escape(false, first == 0, false);
--- 2193,2206 ----
                          ch = next(); // Consume { if present
                          if (ch != '{')
                              unread();
                          else
                              oneLetter = false;
!                         if (has(CANON_EQ) && !has(LITERAL))
!                             return new NFCCharProperty(family(oneLetter, comp));
!                         else
!                             return newCharProperty(family(oneLetter, comp));
                      }
                  }
                  unread();
                  prev = cursor;
                  ch = escape(false, first == 0, false);
*** 2249,2259 ****
                  continue;
              }
              break;
          }
          if (first == 1) {
!             return newSingle(buffer[0]);
          } else {
              return newSlice(buffer, first, hasSupplementary);
          }
      }
  
--- 2234,2244 ----
                  continue;
              }
              break;
          }
          if (first == 1) {
!             return newCharProperty(single(buffer[0]));
          } else {
              return newSlice(buffer, first, hasSupplementary);
          }
      }
  
*** 2344,2366 ****
              if (create) root = new Bound(Bound.NONE, has(UNICODE_CHARACTER_CLASS));
              return -1;
          case 'C':
              break;
          case 'D':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.DIGIT).complement()
!                                : new Ctype(ASCII.DIGIT).complement();
              return -1;
          case 'E':
          case 'F':
              break;
          case 'G':
              if (inclass) break;
              if (create) root = new LastMatch();
              return -1;
          case 'H':
!             if (create) root = new HorizWS().complement();
              return -1;
          case 'I':
          case 'J':
          case 'K':
          case 'L':
--- 2329,2359 ----
              if (create) root = new Bound(Bound.NONE, has(UNICODE_CHARACTER_CLASS));
              return -1;
          case 'C':
              break;
          case 'D':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'E':
          case 'F':
              break;
          case 'G':
              if (inclass) break;
              if (create) root = new LastMatch();
              return -1;
          case 'H':
!             if (create) {
!                 predicate = HorizWS.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'I':
          case 'J':
          case 'K':
          case 'L':
*** 2375,2398 ****
          case 'R':
              if (inclass) break;
              if (create) root = new LineEnding();
              return -1;
          case 'S':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WHITE_SPACE).complement()
!                                : new Ctype(ASCII.SPACE).complement();
              return -1;
          case 'T':
          case 'U':
              break;
          case 'V':
!             if (create) root = new VertWS().complement();
              return -1;
          case 'W':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WORD).complement()
!                                : new Ctype(ASCII.WORD).complement();
              return -1;
          case 'X':
              if (inclass) break;
              if (create) {
                  root = new XGrapheme();
--- 2368,2403 ----
          case 'R':
              if (inclass) break;
              if (create) root = new LineEnding();
              return -1;
          case 'S':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'T':
          case 'U':
              break;
          case 'V':
!             if (create) {
!                 predicate = VertWS.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'W':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WORD : CharPredicates.ASCII_WORD;
!                 predicate = predicate.negate();
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'X':
              if (inclass) break;
              if (create) {
                  root = new XGrapheme();
*** 2428,2449 ****
              }
              return -1;
          case 'c':
              return c();
          case 'd':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.DIGIT)
!                                : new Ctype(ASCII.DIGIT);
              return -1;
          case 'e':
              return '\033';
          case 'f':
              return '\f';
          case 'g':
              break;
          case 'h':
!             if (create) root = new HorizWS();
              return -1;
          case 'i':
          case 'j':
              break;
          case 'k':
--- 2433,2461 ----
              }
              return -1;
          case 'c':
              return c();
          case 'd':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.DIGIT : CharPredicates.ASCII_DIGIT;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'e':
              return '\033';
          case 'f':
              return '\f';
          case 'g':
              break;
          case 'h':
!             if (create) {
!                 predicate = HorizWS;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'i':
          case 'j':
              break;
          case 'k':
*** 2471,2483 ****
          case 'q':
              break;
          case 'r':
              return '\r';
          case 's':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WHITE_SPACE)
!                                : new Ctype(ASCII.SPACE);
              return -1;
          case 't':
              return '\t';
          case 'u':
              return u();
--- 2483,2498 ----
          case 'q':
              break;
          case 'r':
              return '\r';
          case 's':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WHITE_SPACE : CharPredicates.ASCII_SPACE;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 't':
              return '\t';
          case 'u':
              return u();
*** 2490,2505 ****
              // the start or end value, such as [\v-...] or [...-\v], in
              // which a single definite value (0x0B) is expected. For
              // compatibility concern '\013'/0x0B is returned if isrange.
              if (isrange)
                  return '\013';
!             if (create) root = new VertWS();
              return -1;
          case 'w':
!             if (create) root = has(UNICODE_CHARACTER_CLASS)
!                                ? new Utype(UnicodeProp.WORD)
!                                : new Ctype(ASCII.WORD);
              return -1;
          case 'x':
              return x();
          case 'y':
              break;
--- 2505,2527 ----
              // the start or end value, such as [\v-...] or [...-\v], in
              // which a single definite value (0x0B) is expected. For
              // compatibility concern '\013'/0x0B is returned if isrange.
              if (isrange)
                  return '\013';
!             if (create) {
!                 predicate = VertWS;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'w':
!             if (create) {
!                 predicate = has(UNICODE_CHARACTER_CLASS) ?
!                             CharPredicates.WORD : CharPredicates.ASCII_WORD;
!                 if (!inclass)
!                     root = newCharProperty(predicate);
!             }
              return -1;
          case 'x':
              return x();
          case 'y':
              break;
*** 2518,2629 ****
       *
       * Consumes a ] on the way out if consume is true. Usually consume
       * is true except for the case of [abc&&def] where def is a separate
       * right hand node with "understood" brackets.
       */
!     private CharProperty clazz(boolean consume) {
!         CharProperty prev = null;
!         CharProperty node = null;
          BitClass bits = new BitClass();
!         boolean include = true;
!         boolean firstInClass = true;
          int ch = next();
!         for (;;) {
!             switch (ch) {
!                 case '^':
                      // Negates if first char in a class, otherwise literal
!                     if (firstInClass) {
!                         if (temp[cursor-1] != '[')
!                             break;
                          ch = next();
!                         include = !include;
!                         continue;
!                     } else {
!                         // ^ not first in class, treat as literal
!                         break;
                      }
                  case '[':
!                     firstInClass = false;
!                     node = clazz(true);
                      if (prev == null)
!                         prev = node;
                      else
!                         prev = union(prev, node);
                      ch = peek();
                      continue;
                  case '&':
-                     firstInClass = false;
                      ch = next();
                      if (ch == '&') {
                          ch = next();
!                         CharProperty rightNode = null;
                          while (ch != ']' && ch != '&') {
                              if (ch == '[') {
!                                 if (rightNode == null)
!                                     rightNode = clazz(true);
                                  else
!                                     rightNode = union(rightNode, clazz(true));
                              } else { // abc&&def
                                  unread();
!                                 rightNode = clazz(false);
                              }
                              ch = peek();
                          }
!                         if (rightNode != null)
!                             node = rightNode;
                          if (prev == null) {
!                             if (rightNode == null)
                                  throw error("Bad class syntax");
                              else
!                                 prev = rightNode;
                          } else {
!                             prev = intersection(prev, node);
                          }
                      } else {
                          // treat as a literal &
                          unread();
                          break;
                      }
                      continue;
                  case 0:
-                     firstInClass = false;
                      if (cursor >= patternLength)
                          throw error("Unclosed character class");
                      break;
                  case ']':
!                     firstInClass = false;
!                     if (prev != null) {
                          if (consume)
                              next();
                          return prev;
                      }
                      break;
                  default:
-                     firstInClass = false;
                      break;
              }
!             node = range(bits);
!             if (include) {
!                 if (prev == null) {
!                     prev = node;
!                 } else {
!                     if (prev != node)
!                         prev = union(prev, node);
!                 }
              } else {
!                 if (prev == null) {
!                     prev = node.complement();
!                 } else {
!                     if (prev != node)
!                         prev = setDifference(prev, node);
!                 }
              }
              ch = peek();
          }
      }
  
!     private CharProperty bitsOrSingle(BitClass bits, int ch) {
          /* Bits can only handle codepoints in [u+0000-u+00ff] range.
             Use "single" node instead of bits when dealing with unicode
             case folding for codepoints listed below.
             (1)Uppercase out of range: u+00ff, u+00b5
                toUpperCase(u+00ff) -> u+0178
--- 2540,2650 ----
       *
       * Consumes a ] on the way out if consume is true. Usually consume
       * is true except for the case of [abc&&def] where def is a separate
       * right hand node with "understood" brackets.
       */
!     private CharPredicate clazz(boolean consume) {
!         CharPredicate prev = null;
!         CharPredicate curr = null;
          BitClass bits = new BitClass();
!         BmpCharPredicate bitsP = ch -> ch < 256 && bits.bits[ch];
! 
!         boolean isNeg = false;
!         boolean hasBits = false;
          int ch = next();
! 
          // Negates if first char in a class, otherwise literal
!         if (ch == '^' && temp[cursor-1] == '[') {
              ch = next();
!             isNeg = true;
          }
+         for (;;) {
+             switch (ch) {
                  case '[':
!                     curr = clazz(true);
                      if (prev == null)
!                         prev = curr;
                      else
!                         prev = prev.union(curr);
                      ch = peek();
                      continue;
                  case '&':
                      ch = next();
                      if (ch == '&') {
                          ch = next();
!                         CharPredicate right = null;
                          while (ch != ']' && ch != '&') {
                              if (ch == '[') {
!                                 if (right == null)
!                                     right = clazz(true);
                                  else
!                                     right = right.union(clazz(true));
                              } else { // abc&&def
                                  unread();
!                                 right = clazz(false);
                              }
                              ch = peek();
                          }
!                         if (hasBits) {
!                             // bits used, union has high precedence
                              if (prev == null) {
!                                 prev = curr = bitsP;
!                             } else {
!                                 prev = prev.union(bitsP);
!                             }
!                             hasBits = false;
!                         }
!                         if (right != null)
!                             curr = right;
!                         if (prev == null) {
!                             if (right == null)
                                  throw error("Bad class syntax");
                              else
!                                 prev = right;
                          } else {
!                             prev = prev.and(curr);
                          }
                      } else {
                          // treat as a literal &
                          unread();
                          break;
                      }
                      continue;
                  case 0:
                      if (cursor >= patternLength)
                          throw error("Unclosed character class");
                      break;
                  case ']':
!                     if (prev != null || hasBits) {
                          if (consume)
                              next();
+                         if (prev == null)
+                             prev = bitsP;
+                         else if (hasBits)
+                             prev = prev.union(bitsP);
+                         if (isNeg)
+                             return prev.negate();
                          return prev;
                      }
                      break;
                  default:
                      break;
              }
!             curr = range(bits);
!             if (curr == null) {    // the bits used
!                 hasBits = true;
              } else {
!                 if (prev == null)
!                     prev = curr;
!                 else if (prev != curr)
!                     prev = prev.union(curr);
              }
              ch = peek();
          }
      }
  
!     private CharPredicate bitsOrSingle(BitClass bits, int ch) {
          /* Bits can only handle codepoints in [u+0000-u+00ff] range.
             Use "single" node instead of bits when dealing with unicode
             case folding for codepoints listed below.
             (1)Uppercase out of range: u+00ff, u+00b5
                toUpperCase(u+00ff) -> u+0178
*** 2644,2663 ****
              !(has(CASE_INSENSITIVE) && has(UNICODE_CASE) &&
                (ch == 0xff || ch == 0xb5 ||
                 ch == 0x49 || ch == 0x69 ||  //I and i
                 ch == 0x53 || ch == 0x73 ||  //S and s
                 ch == 0x4b || ch == 0x6b ||  //K and k
!                ch == 0xc5 || ch == 0xe5)))  //A+ring
!             return bits.add(ch, flags());
!         return newSingle(ch);
      }
  
      /**
       * Parse a single character or a character range in a character class
       * and return its representative node.
       */
!     private CharProperty range(BitClass bits) {
          int ch = peek();
          if (ch == '\\') {
              ch = nextEscaped();
              if (ch == 'p' || ch == 'P') { // A property
                  boolean comp = (ch == 'P');
--- 2665,2711 ----
              !(has(CASE_INSENSITIVE) && has(UNICODE_CASE) &&
                (ch == 0xff || ch == 0xb5 ||
                 ch == 0x49 || ch == 0x69 ||    //I and i
                 ch == 0x53 || ch == 0x73 ||    //S and s
                 ch == 0x4b || ch == 0x6b ||    //K and k
!                ch == 0xc5 || ch == 0xe5))) {  //A+ring {
!             bits.add(ch, flags());
!             return null;
!         }
!         return single(ch);
!     }
! 
!     /**
!      *  Returns a suitably optimized, single character predicate
!      */
!     private CharPredicate single(final int ch) {
!         if (has(CASE_INSENSITIVE)) {
!             int lower, upper;
!             if (has(UNICODE_CASE)) {
!                 upper = Character.toUpperCase(ch);
!                 lower = Character.toLowerCase(upper);
!                 // Unicode case insensitive matches
!                 if (upper != lower)
!                     return SingleU(lower);
!             } else if (ASCII.isAscii(ch)) {
!                 lower = ASCII.toLower(ch);
!                 upper = ASCII.toUpper(ch);
!                 // Case insensitive matches a given BMP character
!                 if (lower != upper)
!                     return SingleI(lower, upper);
!             }
!         }
!         if (isSupplementary(ch))
!             return SingleS(ch);
!         return Single(ch);  // Match a given BMP character
      }
  
      /**
       * Parse a single character or a character range in a character class
       * and return its representative node.
       */
!     private CharPredicate range(BitClass bits) {
          int ch = peek();
          if (ch == '\\') {
              ch = nextEscaped();
              if (ch == 'p' || ch == 'P') { // A property
                  boolean comp = (ch == 'P');
*** 2672,2682 ****
              } else { // ordinary escape
                  boolean isrange = temp[cursor+1] == '-';
                  unread();
                  ch = escape(true, true, isrange);
                  if (ch == -1)
!                     return (CharProperty) root;
              }
          } else {
              next();
          }
          if (ch >= 0) {
--- 2720,2730 ----
              } else { // ordinary escape
                  boolean isrange = temp[cursor+1] == '-';
                  unread();
                  ch = escape(true, true, isrange);
                  if (ch == -1)
!                     return predicate;
              }
          } else {
              next();
          }
          if (ch >= 0) {
*** 2694,2723 ****
                          next();
                      }
                      if (m < ch) {
                          throw error("Illegal character range");
                      }
!                     if (has(CASE_INSENSITIVE))
!                         return caseInsensitiveRangeFor(ch, m);
!                     else
!                         return rangeFor(ch, m);
                  }
              }
              return bitsOrSingle(bits, ch);
          }
          throw error("Unexpected character '"+((char)ch)+"'");
      }
  
      /**
       * Parses a Unicode character family and returns its representative node.
       */
!     private CharProperty family(boolean singleLetter,
!                                 boolean maybeComplement)
      {
          next();
          String name;
!         CharProperty node = null;
  
          if (singleLetter) {
              int c = temp[cursor];
              if (!Character.isSupplementaryCodePoint(c)) {
                  name = String.valueOf((char)c);
--- 2742,2774 ----
                          next();
                      }
                      if (m < ch) {
                          throw error("Illegal character range");
                      }
!                     if (has(CASE_INSENSITIVE)) {
!                         if (has(UNICODE_CASE))
!                             return CIRangeU(ch, m);
!                         return CIRange(ch, m);
!                     } else {
!                         return Range(ch, m);
!                     }
                  }
              }
              return bitsOrSingle(bits, ch);
          }
          throw error("Unexpected character '"+((char)ch)+"'");
      }
  
      /**
       * Parses a Unicode character family and returns its representative node.
       */
!     private CharPredicate family(boolean singleLetter,
!                                      boolean isComplement)
      {
          next();
          String name;
!         CharPredicate p = null;
  
          if (singleLetter) {
              int c = temp[cursor];
              if (!Character.isSupplementaryCodePoint(c)) {
                  name = String.valueOf((char)c);
*** 2745,2836 ****
              String value = name.substring(i + 1);
              name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
              switch (name) {
                  case "sc":
                  case "script":
!                     node = unicodeScriptPropertyFor(value);
                      break;
                  case "blk":
                  case "block":
!                     node = unicodeBlockPropertyFor(value);
                      break;
                  case "gc":
                  case "general_category":
!                     node = charPropertyNodeFor(value);
                      break;
                  default:
                      throw error("Unknown Unicode property {name=<" + name + ">, "
                                  + "value=<" + value + ">}");
!             }
          } else {
              if (name.startsWith("In")) {
!                 // \p{inBlockName}
!                 node = unicodeBlockPropertyFor(name.substring(2));
              } else if (name.startsWith("Is")) {
!                 // \p{isGeneralCategory} and \p{isScriptName}
                  name = name.substring(2);
!                 UnicodeProp uprop = UnicodeProp.forName(name);
!                 if (uprop != null)
!                     node = new Utype(uprop);
!                 if (node == null)
!                     node = CharPropertyNames.charPropertyFor(name);
!                 if (node == null)
!                     node = unicodeScriptPropertyFor(name);
              } else {
                  if (has(UNICODE_CHARACTER_CLASS)) {
!                     UnicodeProp uprop = UnicodeProp.forPOSIXName(name);
!                     if (uprop != null)
!                         node = new Utype(uprop);
                  }
!                 if (node == null)
!                     node = charPropertyNodeFor(name);
              }
          }
!         if (maybeComplement) {
!             if (node instanceof Category || node instanceof Block)
                  hasSupplementary = true;
!             node = node.complement();
!         }
!         return node;
!     }
! 
! 
!     /**
!      * Returns a CharProperty matching all characters belong to
!      * a UnicodeScript.
!      */
!     private CharProperty unicodeScriptPropertyFor(String name) {
!         final Character.UnicodeScript script;
!         try {
!             script = Character.UnicodeScript.forName(name);
!         } catch (IllegalArgumentException iae) {
!             throw error("Unknown character script name {" + name + "}");
!         }
!         return new Script(script);
!     }
! 
!     /**
!      * Returns a CharProperty matching all characters in a UnicodeBlock.
!      */
!     private CharProperty unicodeBlockPropertyFor(String name) {
!         final Character.UnicodeBlock block;
!         try {
!             block = Character.UnicodeBlock.forName(name);
!         } catch (IllegalArgumentException iae) {
!             throw error("Unknown character block name {" + name + "}");
          }
!         return new Block(block);
      }
  
!     /**
!      * Returns a CharProperty matching all characters in a named property.
!      */
!     private CharProperty charPropertyNodeFor(String name) {
!         CharProperty p = CharPropertyNames.charPropertyFor(name);
          if (p == null)
!             throw error("Unknown character property name {" + name + "}");
!         return p;
      }
  
      /**
       * Parses and returns the name of a "named capturing group", the trailing
       * ">" is consumed after parsing.
--- 2796,2861 ----
              String value = name.substring(i + 1);
              name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
              switch (name) {
                  case "sc":
                  case "script":
!                     p = CharPredicates.forUnicodeScript(value);
                      break;
                  case "blk":
                  case "block":
!                     p = CharPredicates.forUnicodeBlock(value);
                      break;
                  case "gc":
                  case "general_category":
!                     p = CharPredicates.forProperty(value);
                      break;
                  default:
+                     break;
+             }
+             if (p == null)
                  throw error("Unknown Unicode property {name=<" + name + ">, "
                               + "value=<" + value + ">}");
! 
          } else {
              if (name.startsWith("In")) {
!                 // \p{InBlockName}
!                 p = CharPredicates.forUnicodeBlock(name.substring(2));
              } else if (name.startsWith("Is")) {
!                 // \p{IsGeneralCategory} and \p{IsScriptName}
                  name = name.substring(2);
!                 p = CharPredicates.forUnicodeProperty(name);
!                 if (p == null)
!                     p = CharPredicates.forProperty(name);
!                 if (p == null)
!                     p = CharPredicates.forUnicodeScript(name);
              } else {
                  if (has(UNICODE_CHARACTER_CLASS)) {
!                     p = CharPredicates.forPOSIXName(name);
                  }
!                 if (p == null)
!                     p = CharPredicates.forProperty(name);
              }
+             if (p == null)
+                 throw error("Unknown character property name {In/Is" + name + "}");
          }
!         if (isComplement) {
!             // it might be too expensive to detect if a complement of
!             // CharProperty can match "certain" supplementary. So just
!             // go with StartS.
              hasSupplementary = true;
!             p = p.negate();
          }
!         return p;
      }
  
!     private CharProperty newCharProperty(CharPredicate p) {
          if (p == null)
!             return null;
!         if (p instanceof BmpCharPredicate)
!             return new BmpCharProperty((BmpCharPredicate)p);
!         else
!             return new CharProperty(p);
      }
  
      /**
       * Parses and returns the name of a "named capturing group", the trailing
       * ">" is consumed after parsing.
*** 2882,2892 ****
                  break;
              case '>':   // (?>xxx)  independent group
                  head = createGroup(true);
                  tail = root;
                  head.next = expr(tail);
!                 head = tail = new Ques(head, INDEPENDENT);
                  break;
              case '<':   // (?<xxx)  look behind
                  ch = read();
                  if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                      // named captured group
--- 2907,2917 ----
                  break;
              case '>':   // (?>xxx)  independent group
                  head = createGroup(true);
                  tail = root;
                  head.next = expr(tail);
!                 head = tail = new Ques(head, Qtype.INDEPENDENT);
                  break;
              case '<':   // (?<xxx)  look behind
                  ch = read();
                  if (ASCII.isLower(ch) || ASCII.isUpper(ch)) {
                      // named captured group
*** 2968,2993 ****
              return node;    // Dual return
          }
  
          if (node instanceof Ques) {
              Ques ques = (Ques) node;
!             if (ques.type == POSSESSIVE) {
                  root = node;
                  return node;
              }
              tail.next = new BranchConn();
              tail = tail.next;
!             if (ques.type == GREEDY) {
                  head = new Branch(head, null, tail);
              } else { // Reluctant quantifier
                  head = new Branch(null, head, tail);
              }
              root = tail;
              return head;
          } else if (node instanceof Curly) {
              Curly curly = (Curly) node;
!             if (curly.type == POSSESSIVE) {
                  root = node;
                  return node;
              }
              // Discover if the group is deterministic
              TreeInfo info = new TreeInfo();
--- 2993,3018 ----
              return node;    // Dual return
          }
  
          if (node instanceof Ques) {
              Ques ques = (Ques) node;
!             if (ques.type == Qtype.POSSESSIVE) {
                  root = node;
                  return node;
              }
              tail.next = new BranchConn();
              tail = tail.next;
!             if (ques.type == Qtype.GREEDY) {
                  head = new Branch(head, null, tail);
              } else { // Reluctant quantifier
                  head = new Branch(null, head, tail);
              }
              root = tail;
              return head;
          } else if (node instanceof Curly) {
              Curly curly = (Curly) node;
!             if (curly.type == Qtype.POSSESSIVE) {
                  root = node;
                  return node;
              }
              // Discover if the group is deterministic
              TreeInfo info = new TreeInfo();
*** 3000,3010 ****
                                               capturingGroup);
                  return head;
              } else { // Non-deterministic
                  int temp = ((GroupHead) head).localIndex;
                  Loop loop;
!                 if (curly.type == GREEDY)
                      loop = new Loop(this.localCount, temp);
                  else  // Reluctant Curly
                      loop = new LazyLoop(this.localCount, temp);
                  Prolog prolog = new Prolog(loop);
                  this.localCount += 1;
--- 3025,3035 ----
                                               capturingGroup);
                  return head;
              } else { // Non-deterministic
                  int temp = ((GroupHead) head).localIndex;
                  Loop loop;
!                 if (curly.type == Qtype.GREEDY)
                      loop = new Loop(this.localCount, temp);
                  else  // Reluctant Curly
                      loop = new LazyLoop(this.localCount, temp);
                  Prolog prolog = new Prolog(loop);
                  this.localCount += 1;
*** 3029,3038 ****
--- 3054,3067 ----
          int groupIndex = 0;
          if (!anonymous)
              groupIndex = capturingGroupCount++;
          GroupHead head = new GroupHead(localIndex);
          root = new GroupTail(localIndex, groupIndex);
+ 
+         // for debug/print only, head.match does NOT need the "tail" info
+         head.tail = (GroupTail)root;
+ 
          if (!anonymous && groupIndex < 10)
              groupNodes[groupIndex] = head;
          return head;
      }
  
*** 3117,3133 ****
          }
      }
  
      static final int MAX_REPS   = 0x7FFFFFFF;
  
!     static final int GREEDY     = 0;
! 
!     static final int LAZY       = 1;
! 
!     static final int POSSESSIVE = 2;
  
!     static final int INDEPENDENT = 3;
  
      /**
       * Processes repetition. If the next character peeked is a quantifier
       * then new nodes must be appended to handle the repetition.
       * Prev could be a single or a group, so it could be a chain of nodes.
--- 3146,3175 ----
          }
      }
  
      static final int MAX_REPS   = 0x7FFFFFFF;
  
!     static enum Qtype {
!         GREEDY, LAZY, POSSESSIVE, INDEPENDENT
!     }
  
!     private Node curly(Node prev, int cmin) {
!         int ch = next();
!         if (ch == '?') {
!             next();
!             return new Curly(prev, cmin, MAX_REPS, Qtype.LAZY);
!         } else if (ch == '+') {
!             next();
!             return new Curly(prev, cmin, MAX_REPS, Qtype.POSSESSIVE);
!         }
!         if (prev instanceof BmpCharProperty) {
!             return new BmpCharPropertyGreedy((BmpCharProperty)prev, cmin);
!         } else if (prev instanceof CharProperty) {
!             return new CharPropertyGreedy((CharProperty)prev, cmin);
!         }
!         return new Curly(prev, cmin, MAX_REPS, Qtype.GREEDY);
!     }
  
      /**
       * Processes repetition. If the next character peeked is a quantifier
       * then new nodes must be appended to handle the repetition.
       * Prev could be a single or a group, so it could be a chain of nodes.
*** 3138,3173 ****
          switch (ch) {
          case '?':
              ch = next();
              if (ch == '?') {
                  next();
!                 return new Ques(prev, LAZY);
              } else if (ch == '+') {
                  next();
!                 return new Ques(prev, POSSESSIVE);
              }
!             return new Ques(prev, GREEDY);
          case '*':
!             ch = next();
!             if (ch == '?') {
!                 next();
!                 return new Curly(prev, 0, MAX_REPS, LAZY);
!             } else if (ch == '+') {
!                 next();
!                 return new Curly(prev, 0, MAX_REPS, POSSESSIVE);
!             }
!             return new Curly(prev, 0, MAX_REPS, GREEDY);
          case '+':
!             ch = next();
!             if (ch == '?') {
!                 next();
!                 return new Curly(prev, 1, MAX_REPS, LAZY);
!             } else if (ch == '+') {
!                 next();
!                 return new Curly(prev, 1, MAX_REPS, POSSESSIVE);
!             }
!             return new Curly(prev, 1, MAX_REPS, GREEDY);
          case '{':
              ch = temp[cursor+1];
              if (ASCII.isDigit(ch)) {
                  skip();
                  int cmin = 0;
--- 3180,3199 ----
          switch (ch) {
          case '?':
              ch = next();
              if (ch == '?') {
                  next();
!                 return new Ques(prev, Qtype.LAZY);
              } else if (ch == '+') {
                  next();
!                 return new Ques(prev, Qtype.POSSESSIVE);
              }
!             return new Ques(prev, Qtype.GREEDY);
          case '*':
!             return curly(prev, 0);
          case '+':
!             return curly(prev, 1);
          case '{':
              ch = temp[cursor+1];
              if (ASCII.isDigit(ch)) {
                  skip();
                  int cmin = 0;
*** 3192,3207 ****
                      throw error("Illegal repetition range");
                  Curly curly;
                  ch = peek();
                  if (ch == '?') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, LAZY);
                  } else if (ch == '+') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, POSSESSIVE);
                  } else {
!                     curly = new Curly(prev, cmin, cmax, GREEDY);
                  }
                  return curly;
              } else {
                  throw error("Illegal repetition");
              }
--- 3218,3233 ----
                      throw error("Illegal repetition range");
                  Curly curly;
                  ch = peek();
                  if (ch == '?') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, Qtype.LAZY);
                  } else if (ch == '+') {
                      next();
!                     curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
                  } else {
!                     curly = new Curly(prev, cmin, cmax, Qtype.GREEDY);
                  }
                  return curly;
              } else {
                  throw error("Illegal repetition");
              }
*** 3374,3387 ****
      /**
       *  Creates a bit vector for matching Latin-1 values. A normal BitClass
       *  never matches values above Latin-1, and a complemented BitClass always
       *  matches values above Latin-1.
       */
!     private static final class BitClass extends BmpCharProperty {
          final boolean[] bits;
!         BitClass() { bits = new boolean[256]; }
!         private BitClass(boolean[] bits) { this.bits = bits; }
          BitClass add(int c, int flags) {
              assert c >= 0 && c <= 255;
              if ((flags & CASE_INSENSITIVE) != 0) {
                  if (ASCII.isAscii(c)) {
                      bits[ASCII.toUpper(c)] = true;
--- 3400,3418 ----
      /**
       *  Creates a bit vector for matching Latin-1 values. A normal BitClass
       *  never matches values above Latin-1, and a complemented BitClass always
       *  matches values above Latin-1.
       */
!     static final class BitClass extends BmpCharProperty {
          final boolean[] bits;
!         BitClass() {
!             this(new boolean[256]);
!         }
!         private BitClass(boolean[] bits) {
!             super( ch -> ch < 256 && bits[ch]);
!             this.bits = bits;
!         }
          BitClass add(int c, int flags) {
              assert c >= 0 && c <= 255;
              if ((flags & CASE_INSENSITIVE) != 0) {
                  if (ASCII.isAscii(c)) {
                      bits[ASCII.toUpper(c)] = true;
*** 3392,3427 ****
                  }
              }
              bits[c] = true;
              return this;
          }
-         boolean isSatisfiedBy(int ch) {
-             return ch < 256 && bits[ch];
-         }
-     }
- 
-     /**
-      *  Returns a suitably optimized, single character matcher.
-      */
-     private CharProperty newSingle(final int ch) {
-         if (has(CASE_INSENSITIVE)) {
-             int lower, upper;
-             if (has(UNICODE_CASE)) {
-                 upper = Character.toUpperCase(ch);
-                 lower = Character.toLowerCase(upper);
-                 if (upper != lower)
-                     return new SingleU(lower);
-             } else if (ASCII.isAscii(ch)) {
-                 lower = ASCII.toLower(ch);
-                 upper = ASCII.toUpper(ch);
-                 if (lower != upper)
-                     return new SingleI(lower, upper);
-             }
-         }
-         if (isSupplementary(ch))
-             return new SingleS(ch);    // Match a given Unicode character
-         return new Single(ch);         // Match a given BMP character
      }
  
      /**
       *  Utility method for creating a string slice matcher.
       */
--- 3423,3432 ----
*** 3825,3846 ****
  
      /**
       * Abstract node class to match one character satisfying some
       * boolean property.
       */
!     private abstract static class CharProperty extends Node {
!         abstract boolean isSatisfiedBy(int ch);
!         CharProperty complement() {
!             return new CharProperty() {
!                     boolean isSatisfiedBy(int ch) {
!                         return ! CharProperty.this.isSatisfiedBy(ch);}};
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
                  int ch = Character.codePointAt(seq, i);
!                 return isSatisfiedBy(ch)
!                     && next.match(matcher, i+Character.charCount(ch), seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
--- 3830,3850 ----
  
      /**
       * Abstract node class to match one character satisfying some
       * boolean property.
       */
!     static class CharProperty extends Node {
!         CharPredicate predicate;
! 
!         CharProperty (CharPredicate predicate) {
!             this.predicate = predicate;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
                  int ch = Character.codePointAt(seq, i);
!                 return predicate.is(ch) &&
!                        next.match(matcher, i + Character.charCount(ch), seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
*** 3853,4003 ****
  
      /**
       * Optimized version of CharProperty that works only for
       * properties never satisfied by Supplementary characters.
       */
!     private abstract static class BmpCharProperty extends CharProperty {
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
!                 return isSatisfiedBy(seq.charAt(i))
!                     && next.match(matcher, i+1, seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
      }
  
!     /**
!      * Node class that matches a Supplementary Unicode character
!      */
!     static final class SingleS extends CharProperty {
!         final int c;
!         SingleS(int c) { this.c = c; }
!         boolean isSatisfiedBy(int ch) {
!             return ch == c;
!         }
      }
  
!     /**
!      * Optimization -- matches a given BMP character
!      */
!     static final class Single extends BmpCharProperty {
!         final int c;
!         Single(int c) { this.c = c; }
!         boolean isSatisfiedBy(int ch) {
!             return ch == c;
!         }
!     }
! 
!     /**
!      * Case insensitive matches a given BMP character
!      */
!     static final class SingleI extends BmpCharProperty {
!         final int lower;
!         final int upper;
!         SingleI(int lower, int upper) {
!             this.lower = lower;
!             this.upper = upper;
!         }
!         boolean isSatisfiedBy(int ch) {
!             return ch == lower || ch == upper;
!         }
!     }
! 
!     /**
!      * Unicode case insensitive matches a given Unicode character
!      */
!     static final class SingleU extends CharProperty {
!         final int lower;
!         SingleU(int lower) {
!             this.lower = lower;
!         }
!         boolean isSatisfiedBy(int ch) {
!             return lower == ch ||
!                 lower == Character.toLowerCase(Character.toUpperCase(ch));
!         }
!     }
! 
!     /**
!      * Node class that matches a Unicode block.
!      */
!     static final class Block extends CharProperty {
!         final Character.UnicodeBlock block;
!         Block(Character.UnicodeBlock block) {
!             this.block = block;
!         }
!         boolean isSatisfiedBy(int ch) {
!             return block == Character.UnicodeBlock.of(ch);
!         }
!     }
! 
!     /**
!      * Node class that matches a Unicode script
!      */
!     static final class Script extends CharProperty {
!         final Character.UnicodeScript script;
!         Script(Character.UnicodeScript script) {
!             this.script = script;
!         }
!         boolean isSatisfiedBy(int ch) {
!             return script == Character.UnicodeScript.of(ch);
!         }
!     }
! 
!     /**
!      * Node class that matches a Unicode category.
!      */
!     static final class Category extends CharProperty {
!         final int typeMask;
!         Category(int typeMask) { this.typeMask = typeMask; }
!         boolean isSatisfiedBy(int ch) {
!             return (typeMask & (1 << Character.getType(ch))) != 0;
!         }
      }
! 
!     /**
!      * Node class that matches a Unicode "type"
!      */
!     static final class Utype extends CharProperty {
!         final UnicodeProp uprop;
!         Utype(UnicodeProp uprop) { this.uprop = uprop; }
!         boolean isSatisfiedBy(int ch) {
!             return uprop.is(ch);
          }
      }
  
!     /**
!      * Node class that matches a POSIX type.
!      */
!     static final class Ctype extends BmpCharProperty {
!         final int ctype;
!         Ctype(int ctype) { this.ctype = ctype; }
!         boolean isSatisfiedBy(int ch) {
!             return ch < 128 && ASCII.isType(ch, ctype);
          }
      }
! 
!     /**
!      * Node class that matches a Perl vertical whitespace
!      */
!     static final class VertWS extends BmpCharProperty {
!         boolean isSatisfiedBy(int cp) {
!             return (cp >= 0x0A && cp <= 0x0D) ||
!                    cp == 0x85 || cp == 0x2028 || cp == 0x2029;
          }
      }
  
!     /**
!      * Node class that matches a Perl horizontal whitespace
!      */
!     static final class HorizWS extends BmpCharProperty {
!         boolean isSatisfiedBy(int cp) {
!             return cp == 0x09 || cp == 0x20 || cp == 0xa0 ||
!                    cp == 0x1680 || cp == 0x180e ||
!                    cp >= 0x2000 && cp <= 0x200a ||
!                    cp == 0x202f || cp == 0x205f || cp == 0x3000;
          }
      }
  
      /**
       * Node class that matches an unicode extended grapheme cluster
--- 3857,3928 ----
  
      /**
       * Optimized version of CharProperty that works only for
       * properties never satisfied by Supplementary characters.
       */
!     private static class BmpCharProperty extends CharProperty {
!         BmpCharProperty (BmpCharPredicate predicate) {
!             super(predicate);
!         }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              if (i < matcher.to) {
!                 return predicate.is(seq.charAt(i)) &&
!                        next.match(matcher, i + 1, seq);
              } else {
                  matcher.hitEnd = true;
                  return false;
              }
          }
      }
  
!     private static class NFCCharProperty extends Node {
!         CharPredicate predicate;
!         NFCCharProperty (CharPredicate predicate) {
!             this.predicate = predicate;
          }
  
!         boolean match(Matcher matcher, int i, CharSequence seq) {
!             if (i < matcher.to) {
!                 int ch0 = Character.codePointAt(seq, i);
!                 int n = Character.charCount(ch0);
!                 int j = i + n;
!                 while (j < matcher.to) {
!                     int ch1 = Character.codePointAt(seq, j);
!                     if (Grapheme.isBoundary(ch0, ch1))
!                         break;
!                     ch0 = ch1;
!                     j += Character.charCount(ch1);
                  }
!                 if (i + n == j) {    // single, assume nfc cp
!                     if (predicate.is(ch0))
!                         return next.match(matcher, j, seq);
!                 } else {
!                     while (i + n < j) {
!                         String nfc = Normalizer.normalize(
!                             seq.toString().substring(i, j), Normalizer.Form.NFC);
!                         if (nfc.codePointCount(0, nfc.length()) == 1) {
!                             if (predicate.is(nfc.codePointAt(0)) &&
!                                 next.match(matcher, j, seq)) {
!                                 return true;
                              }
                          }
  
!                         ch0 = Character.codePointBefore(seq, j);
!                         j -= Character.charCount(ch0);
                      }
                  }
!                 if (j < matcher.to)
!                     return false;
              }
+             matcher.hitEnd = true;
+             return false;
          }
  
!         boolean study(TreeInfo info) {
!             info.minLength++;
!             info.deterministic = false;
!             return next.study(info);
          }
      }
  
      /**
       * Node class that matches an unicode extended grapheme cluster
*** 4215,4299 ****
          int toLower(int c) {
              return Character.toLowerCase(Character.toUpperCase(c));
          }
      }
  
-     private static boolean inRange(int lower, int ch, int upper) {
-         return lower <= ch && ch <= upper;
-     }
- 
-     /**
-      * Returns node for matching characters within an explicit value range.
-      */
-     private static CharProperty rangeFor(final int lower,
-                                          final int upper) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return inRange(lower, ch, upper);}};
-     }
- 
-     /**
-      * Returns node for matching characters within an explicit value
-      * range in a case insensitive manner.
-      */
-     private CharProperty caseInsensitiveRangeFor(final int lower,
-                                                  final int upper) {
-         if (has(UNICODE_CASE))
-             return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     if (inRange(lower, ch, upper))
-                         return true;
-                     int up = Character.toUpperCase(ch);
-                     return inRange(lower, up, upper) ||
-                            inRange(lower, Character.toLowerCase(up), upper);}};
-         return new CharProperty() {
-             boolean isSatisfiedBy(int ch) {
-                 return inRange(lower, ch, upper) ||
-                     ASCII.isAscii(ch) &&
-                         (inRange(lower, ASCII.toUpper(ch), upper) ||
-                          inRange(lower, ASCII.toLower(ch), upper));
-             }};
-     }
- 
-     /**
-      * Implements the Unicode category ALL and the dot metacharacter when
-      * in dotall mode.
-      */
-     static final class All extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return true;
-         }
-     }
- 
-     /**
-      * Node class for the dot metacharacter when dotall is not enabled.
-      */
-     static final class Dot extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return (ch != '\n' && ch != '\r'
-                     && (ch|1) != '\u2029'
-                     && ch != '\u0085');
-         }
-     }
- 
-     /**
-      * Node class for the dot metacharacter when dotall is not enabled
-      * but UNIX_LINES is enabled.
-      */
-     static final class UnixDot extends CharProperty {
-         boolean isSatisfiedBy(int ch) {
-             return ch != '\n';
-         }
-     }
- 
      /**
       * The 0 or 1 quantifier. This one class implements all three types.
       */
      static final class Ques extends Node {
          Node atom;
!         int type;
!         Ques(Node node, int type) {
              this.atom = node;
              this.type = type;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              switch (type) {
--- 4140,4156 ----
          int toLower(int c) {
              return Character.toLowerCase(Character.toUpperCase(c));
          }
      }
  
      /**
       * The 0 or 1 quantifier. This one class implements all three types.
       */
      static final class Ques extends Node {
          Node atom;
!         Qtype type;
!         Ques(Node node, Qtype type) {
              this.atom = node;
              this.type = type;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              switch (type) {
*** 4309,4319 ****
              default:
                  return atom.match(matcher, i, seq) && next.match(matcher, matcher.last, seq);
              }
          }
          boolean study(TreeInfo info) {
!             if (type != INDEPENDENT) {
                  int minL = info.minLength;
                  atom.study(info);
                  info.minLength = minL;
                  info.deterministic = false;
                  return next.study(info);
--- 4166,4176 ----
              default:
                  return atom.match(matcher, i, seq) && next.match(matcher, matcher.last, seq);
              }
          }
          boolean study(TreeInfo info) {
!             if (type != Qtype.INDEPENDENT) {
                  int minL = info.minLength;
                  atom.study(info);
                  info.minLength = minL;
                  info.deterministic = false;
                  return next.study(info);
*** 4323,4343 ****
              }
          }
      }
  
      /**
       * Handles the curly-brace style repetition with a specified minimum and
       * maximum occurrences. The * quantifier is handled as a special case.
       * This class handles the three types.
       */
      static final class Curly extends Node {
          Node atom;
!         int type;
          int cmin;
          int cmax;
  
!         Curly(Node node, int cmin, int cmax, int type) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
          }
--- 4180,4273 ----
              }
          }
      }
  
      /**
+      * Handles the greedy style repetition with the minimum either be
+      * 0 or 1 and the maximum be MAX_REPS, for * and + quantifier.
+      */
+     static class CharPropertyGreedy extends Node {
+         final CharPredicate predicate;
+         final int cmin;
+ 
+         CharPropertyGreedy(CharProperty cp, int cmin) {
+             this.predicate = cp.predicate;
+             this.cmin = cmin;
+         }
+         boolean match(Matcher matcher, int i,  CharSequence seq) {
+             int n = 0;
+             int to = matcher.to;
+             // greedy, all the way down
+             while (i < to) {
+                 int ch = Character.codePointAt(seq, i);
+                 if (!predicate.is(ch))
+                    break;
+                 i += Character.charCount(ch);
+                 n++;
+             }
+             if (i >= to) {
+                 matcher.hitEnd = true;
+             }
+             while (n >= cmin) {
+                 if (next.match(matcher, i, seq))
+                     return true;
+                 if (n == cmin)
+                     return false;
+                  // backing off if match fails
+                 int ch = Character.codePointBefore(seq, i);
+                 i -= Character.charCount(ch);
+                 n--;
+             }
+             return false; 
+         }
+ 
+         boolean study(TreeInfo info) {
+             info.minLength += cmin;
+             if (info.maxValid) {
+                 info.maxLength += MAX_REPS;
+             }
+             info.deterministic = false;
+             return next.study(info);
+         }
+     }
+ 
+     static final class BmpCharPropertyGreedy extends CharPropertyGreedy {
+ 
+         BmpCharPropertyGreedy(BmpCharProperty bcp, int cmin) {
+             super(bcp, cmin);
+         }
+ 
+         boolean match(Matcher matcher, int i,  CharSequence seq) {
+             int n = 0;
+             int to = matcher.to;
+             while (i < to && predicate.is(seq.charAt(i))) {
+                 i++; n++;
+             }
+             if (i >= to) {
+                 matcher.hitEnd = true;
+             }
+             while (n >= cmin) {
+                 if (next.match(matcher, i, seq))
+                     return true;
+                 i--; n--;  // backing off if match fails
+             }
+             return false; 
+         }
+     }
+ 
+     /**
       * Handles the curly-brace style repetition with a specified minimum and
       * maximum occurrences. The * quantifier is handled as a special case.
       * This class handles the three types.
       */
      static final class Curly extends Node {
          Node atom;
!         Qtype type;
          int cmin;
          int cmax;
  
!         Curly(Node node, int cmin, int cmax, Qtype type) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
          }
*** 4348,4360 ****
                      i = matcher.last;
                      continue;
                  }
                  return false;
              }
!             if (type == GREEDY)
                  return match0(matcher, i, j, seq);
!             else if (type == LAZY)
                  return match1(matcher, i, j, seq);
              else
                  return match2(matcher, i, j, seq);
          }
          // Greedy match.
--- 4278,4290 ----
                      i = matcher.last;
                      continue;
                  }
                  return false;
              }
!             if (type == Qtype.GREEDY)
                  return match0(matcher, i, j, seq);
!             else if (type == Qtype.LAZY)
                  return match1(matcher, i, j, seq);
              else
                  return match2(matcher, i, j, seq);
          }
          // Greedy match.
*** 4472,4489 ****
       * If capture is true then this class saves group settings and ensures
       * that groups are unset when backing off of a group match.
       */
      static final class GroupCurly extends Node {
          Node atom;
!         int type;
          int cmin;
          int cmax;
          int localIndex;
          int groupIndex;
          boolean capture;
  
!         GroupCurly(Node node, int cmin, int cmax, int type, int local,
                     int group, boolean capture) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
--- 4402,4419 ----
       * If capture is true then this class saves group settings and ensures
       * that groups are unset when backing off of a group match.
       */
      static final class GroupCurly extends Node {
          Node atom;
!         Qtype type;
          int cmin;
          int cmax;
          int localIndex;
          int groupIndex;
          boolean capture;
  
!         GroupCurly(Node node, int cmin, int cmax, Qtype type, int local,
                     int group, boolean capture) {
              this.atom = node;
              this.type = type;
              this.cmin = cmin;
              this.cmax = cmax;
*** 4519,4531 ****
                      ret = false;
                      break;
                  }
              }
              if (ret) {
!                 if (type == GREEDY) {
                      ret = match0(matcher, i, cmin, seq);
!                 } else if (type == LAZY) {
                      ret = match1(matcher, i, cmin, seq);
                  } else {
                      ret = match2(matcher, i, cmin, seq);
                  }
              }
--- 4449,4461 ----
                      ret = false;
                      break;
                  }
              }
              if (ret) {
!                 if (type == Qtype.GREEDY) {
                      ret = match0(matcher, i, cmin, seq);
!                 } else if (type == Qtype.LAZY) {
                      ret = match1(matcher, i, cmin, seq);
                  } else {
                      ret = match2(matcher, i, cmin, seq);
                  }
              }
*** 4767,4776 ****
--- 4697,4707 ----
       * indicate that we do not want to unset the group if the reference
       * doesn't match.
       */
      static final class GroupHead extends Node {
          int localIndex;
+         GroupTail tail;    // for debug/print only, match does not need to know
          GroupHead(int localCount) {
              localIndex = localCount;
          }
          boolean match(Matcher matcher, int i, CharSequence seq) {
              int save = matcher.locals[localIndex];
*** 5360,5399 ****
              return !conditionMatched && next.match(matcher, i, seq);
          }
      }
  
      /**
-      * Returns the set union of two CharProperty nodes.
-      */
-     private static CharProperty union(final CharProperty lhs,
-                                       final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return lhs.isSatisfiedBy(ch) || rhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
-      * Returns the set intersection of two CharProperty nodes.
-      */
-     private static CharProperty intersection(final CharProperty lhs,
-                                              final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return lhs.isSatisfiedBy(ch) && rhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
-      * Returns the set difference of two CharProperty nodes.
-      */
-     private static CharProperty setDifference(final CharProperty lhs,
-                                               final CharProperty rhs) {
-         return new CharProperty() {
-                 boolean isSatisfiedBy(int ch) {
-                     return ! rhs.isSatisfiedBy(ch) && lhs.isSatisfiedBy(ch);}};
-     }
- 
-     /**
       * Handles word boundaries. Includes a field to allow this one class to
       * deal with the different types of word boundaries we can match. The word
       * characters include underscores, letters, and digits. Non spacing marks
       * can are also part of a word if they have a base character, otherwise
       * they are ignored for purposes of finding word boundaries.
--- 5291,5300 ----
*** 5409,5419 ****
              type = n;
              this.useUWORD = useUWORD;
          }
  
          boolean isWord(int ch) {
!             return useUWORD ? UnicodeProp.WORD.is(ch)
                              : (ch == '_' || Character.isLetterOrDigit(ch));
          }
  
          int check(Matcher matcher, int i, CharSequence seq) {
              int ch;
--- 5310,5320 ----
              type = n;
              this.useUWORD = useUWORD;
          }
  
          boolean isWord(int ch) {
!             return useUWORD ? CharPredicates.WORD.is(ch)
                              : (ch == '_' || Character.isLetterOrDigit(ch));
          }
  
          int check(Matcher matcher, int i, CharSequence seq) {
              int ch;
*** 5655,5873 ****
              matcher.hitEnd = true;
              return false;
          }
      }
  
! ///////////////////////////////////////////////////////////////////////////////
! ///////////////////////////////////////////////////////////////////////////////
  
      /**
!      *  This must be the very first initializer.
       */
!     static Node accept = new Node();
  
!     static Node lastAccept = new LastNode();
  
!     private static class CharPropertyNames {
  
!         static CharProperty charPropertyFor(String name) {
!             CharPropertyFactory m = map.get(name);
!             return m == null ? null : m.make();
          }
  
!         private abstract static class CharPropertyFactory {
!             abstract CharProperty make();
          }
  
!         private static void defCategory(String name,
!                                         final int typeMask) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return new Category(typeMask);}});
          }
  
!         private static void defRange(String name,
!                                      final int lower, final int upper) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return rangeFor(lower, upper);}});
          }
  
!         private static void defCtype(String name,
!                                      final int ctype) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return new Ctype(ctype);}});
          }
  
!         private abstract static class CloneableProperty
!             extends CharProperty implements Cloneable
!         {
!             public CloneableProperty clone() {
!                 try {
!                     return (CloneableProperty) super.clone();
!                 } catch (CloneNotSupportedException e) {
!                     throw new AssertionError(e);
!                 }
!             }
!         }
! 
!         private static void defClone(String name,
!                                      final CloneableProperty p) {
!             map.put(name, new CharPropertyFactory() {
!                     CharProperty make() { return p.clone();}});
!         }
! 
!         private static final HashMap<String, CharPropertyFactory> map
!             = new HashMap<>();
! 
!         static {
!             // Unicode character property aliases, defined in
!             // http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
!             defCategory("Cn", 1<<Character.UNASSIGNED);
!             defCategory("Lu", 1<<Character.UPPERCASE_LETTER);
!             defCategory("Ll", 1<<Character.LOWERCASE_LETTER);
!             defCategory("Lt", 1<<Character.TITLECASE_LETTER);
!             defCategory("Lm", 1<<Character.MODIFIER_LETTER);
!             defCategory("Lo", 1<<Character.OTHER_LETTER);
!             defCategory("Mn", 1<<Character.NON_SPACING_MARK);
!             defCategory("Me", 1<<Character.ENCLOSING_MARK);
!             defCategory("Mc", 1<<Character.COMBINING_SPACING_MARK);
!             defCategory("Nd", 1<<Character.DECIMAL_DIGIT_NUMBER);
!             defCategory("Nl", 1<<Character.LETTER_NUMBER);
!             defCategory("No", 1<<Character.OTHER_NUMBER);
!             defCategory("Zs", 1<<Character.SPACE_SEPARATOR);
!             defCategory("Zl", 1<<Character.LINE_SEPARATOR);
!             defCategory("Zp", 1<<Character.PARAGRAPH_SEPARATOR);
!             defCategory("Cc", 1<<Character.CONTROL);
!             defCategory("Cf", 1<<Character.FORMAT);
!             defCategory("Co", 1<<Character.PRIVATE_USE);
!             defCategory("Cs", 1<<Character.SURROGATE);
!             defCategory("Pd", 1<<Character.DASH_PUNCTUATION);
!             defCategory("Ps", 1<<Character.START_PUNCTUATION);
!             defCategory("Pe", 1<<Character.END_PUNCTUATION);
!             defCategory("Pc", 1<<Character.CONNECTOR_PUNCTUATION);
!             defCategory("Po", 1<<Character.OTHER_PUNCTUATION);
!             defCategory("Sm", 1<<Character.MATH_SYMBOL);
!             defCategory("Sc", 1<<Character.CURRENCY_SYMBOL);
!             defCategory("Sk", 1<<Character.MODIFIER_SYMBOL);
!             defCategory("So", 1<<Character.OTHER_SYMBOL);
!             defCategory("Pi", 1<<Character.INITIAL_QUOTE_PUNCTUATION);
!             defCategory("Pf", 1<<Character.FINAL_QUOTE_PUNCTUATION);
!             defCategory("L", ((1<<Character.UPPERCASE_LETTER) |
!                               (1<<Character.LOWERCASE_LETTER) |
!                               (1<<Character.TITLECASE_LETTER) |
!                               (1<<Character.MODIFIER_LETTER)  |
!                               (1<<Character.OTHER_LETTER)));
!             defCategory("M", ((1<<Character.NON_SPACING_MARK) |
!                               (1<<Character.ENCLOSING_MARK)   |
!                               (1<<Character.COMBINING_SPACING_MARK)));
!             defCategory("N", ((1<<Character.DECIMAL_DIGIT_NUMBER) |
!                               (1<<Character.LETTER_NUMBER)        |
!                               (1<<Character.OTHER_NUMBER)));
!             defCategory("Z", ((1<<Character.SPACE_SEPARATOR) |
!                               (1<<Character.LINE_SEPARATOR)  |
!                               (1<<Character.PARAGRAPH_SEPARATOR)));
!             defCategory("C", ((1<<Character.CONTROL)     |
!                               (1<<Character.FORMAT)      |
!                               (1<<Character.PRIVATE_USE) |
!                               (1<<Character.SURROGATE))); // Other
!             defCategory("P", ((1<<Character.DASH_PUNCTUATION)      |
!                               (1<<Character.START_PUNCTUATION)     |
!                               (1<<Character.END_PUNCTUATION)       |
!                               (1<<Character.CONNECTOR_PUNCTUATION) |
!                               (1<<Character.OTHER_PUNCTUATION)     |
!                               (1<<Character.INITIAL_QUOTE_PUNCTUATION) |
!                               (1<<Character.FINAL_QUOTE_PUNCTUATION)));
!             defCategory("S", ((1<<Character.MATH_SYMBOL)     |
!                               (1<<Character.CURRENCY_SYMBOL) |
!                               (1<<Character.MODIFIER_SYMBOL) |
!                               (1<<Character.OTHER_SYMBOL)));
!             defCategory("LC", ((1<<Character.UPPERCASE_LETTER) |
!                                (1<<Character.LOWERCASE_LETTER) |
!                                (1<<Character.TITLECASE_LETTER)));
!             defCategory("LD", ((1<<Character.UPPERCASE_LETTER) |
!                                (1<<Character.LOWERCASE_LETTER) |
!                                (1<<Character.TITLECASE_LETTER) |
!                                (1<<Character.MODIFIER_LETTER)  |
!                                (1<<Character.OTHER_LETTER)     |
!                                (1<<Character.DECIMAL_DIGIT_NUMBER)));
!             defRange("L1", 0x00, 0xFF); // Latin-1
!             map.put("all", new CharPropertyFactory() {
!                     CharProperty make() { return new All(); }});
! 
!             // Posix regular expression character classes, defined in
!             // http://www.unix.org/onlinepubs/009695399/basedefs/xbd_chap09.html
!             defRange("ASCII", 0x00, 0x7F);   // ASCII
!             defCtype("Alnum", ASCII.ALNUM);  // Alphanumeric characters
!             defCtype("Alpha", ASCII.ALPHA);  // Alphabetic characters
!             defCtype("Blank", ASCII.BLANK);  // Space and tab characters
!             defCtype("Cntrl", ASCII.CNTRL);  // Control characters
!             defRange("Digit", '0', '9');     // Numeric characters
!             defCtype("Graph", ASCII.GRAPH);  // printable and visible
!             defRange("Lower", 'a', 'z');     // Lower-case alphabetic
!             defRange("Print", 0x20, 0x7E);   // Printable characters
!             defCtype("Punct", ASCII.PUNCT);  // Punctuation characters
!             defCtype("Space", ASCII.SPACE);  // Space characters
!             defRange("Upper", 'A', 'Z');     // Upper-case alphabetic
!             defCtype("XDigit",ASCII.XDIGIT); // hexadecimal digits
! 
!             // Java character properties, defined by methods in Character.java
!             defClone("javaLowerCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLowerCase(ch);}});
!             defClone("javaUpperCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUpperCase(ch);}});
!             defClone("javaAlphabetic", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isAlphabetic(ch);}});
!             defClone("javaIdeographic", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isIdeographic(ch);}});
!             defClone("javaTitleCase", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isTitleCase(ch);}});
!             defClone("javaDigit", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isDigit(ch);}});
!             defClone("javaDefined", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isDefined(ch);}});
!             defClone("javaLetter", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLetter(ch);}});
!             defClone("javaLetterOrDigit", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isLetterOrDigit(ch);}});
!             defClone("javaJavaIdentifierStart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isJavaIdentifierStart(ch);}});
!             defClone("javaJavaIdentifierPart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isJavaIdentifierPart(ch);}});
!             defClone("javaUnicodeIdentifierStart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUnicodeIdentifierStart(ch);}});
!             defClone("javaUnicodeIdentifierPart", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isUnicodeIdentifierPart(ch);}});
!             defClone("javaIdentifierIgnorable", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isIdentifierIgnorable(ch);}});
!             defClone("javaSpaceChar", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isSpaceChar(ch);}});
!             defClone("javaWhitespace", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isWhitespace(ch);}});
!             defClone("javaISOControl", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isISOControl(ch);}});
!             defClone("javaMirrored", new CloneableProperty() {
!                 boolean isSatisfiedBy(int ch) {
!                     return Character.isMirrored(ch);}});
          }
      }
  
      /**
       * Creates a predicate which can be used to match a string.
       *
       * @return  The predicate which can be used for matching on a string
       * @since   1.8
--- 5556,5714 ----
              matcher.hitEnd = true;
              return false;
          }
      }
  
!     @FunctionalInterface
!     static interface CharPredicate {
!         boolean is(int ch);
! 
!         default CharPredicate and(CharPredicate p) {
!             return ch -> is(ch) && p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p) {
!             return ch -> is(ch) || p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p1,
!                                     CharPredicate p2 ) {
!             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
!         }
!         default CharPredicate negate() {
!             return ch -> !is(ch);
!         }
!     }
! 
!     static interface BmpCharPredicate extends CharPredicate {
! 
!         default CharPredicate and(CharPredicate p) {
!             if(p instanceof BmpCharPredicate)
!                 return (BmpCharPredicate)((ch) -> is(ch) && p.is(ch));
!             return ch -> is(ch) && p.is(ch);
!         }
!         default CharPredicate union(CharPredicate p) {
!             if (p instanceof BmpCharPredicate)
!                 return (BmpCharPredicate)((ch) -> is(ch) || p.is(ch));
!             return ch -> is(ch) || p.is(ch);
!         }
!         static CharPredicate union(CharPredicate... predicates) {
!             CharPredicate cp = ch -> {
!                 for (CharPredicate p : predicates) {
!                     if (!p.is(ch))
!                         return false;
!                 }
!                 return true;
!             };
!             for (CharPredicate p : predicates) {
!                 if (! (p instanceof BmpCharPredicate))
!                     return cp;
!             }
!             return (BmpCharPredicate)cp;
!         }
!     }
! 
!     /**
!      * matches a Perl vertical whitespace
!      */
!     static BmpCharPredicate VertWS = cp ->
!         (cp >= 0x0A && cp <= 0x0D) || cp == 0x85 || cp == 0x2028 || cp == 0x2029;
  
      /**
!      * matches a Perl horizontal whitespace
       */
!     static BmpCharPredicate HorizWS = cp ->
!         cp == 0x09 || cp == 0x20 || cp == 0xa0 || cp == 0x1680 ||
!         cp == 0x180e || cp >= 0x2000 && cp <= 0x200a ||  cp == 0x202f ||
!         cp == 0x205f || cp == 0x3000;
  
!     /**
!      *  for the Unicode category ALL and the dot metacharacter when
!      *  in dotall mode.
!      */
!     static CharPredicate ALL = ch -> true;
  
!     /**
!      * for the dot metacharacter when dotall is not enabled.
!      */
!     static CharPredicate DOT = ch -> (ch != '\n' && ch != '\r'
!                                           && (ch|1) != '\u2029'
!                                           && ch != '\u0085');
!     /**
!      *  the dot metacharacter when dotall is not enabled but UNIX_LINES is enabled.
!      */
!     static CharPredicate UNIXDOT = ch ->  ch != '\n';
  
!     /**
!      * Indicate that matches a Supplementary Unicode character
!      */
!     static CharPredicate SingleS(int c) {
!         return ch -> ch == c;
      }
  
!     /**
!      * A bmp/optimized predicate of single
!      */
!     static BmpCharPredicate Single(int c) {
!         return ch -> ch == c;
      }
  
!     /**
!      * Case insensitive matches a given BMP character
!      */
!     static BmpCharPredicate SingleI(int lower, int upper) {
!         return ch -> ch == lower || ch == upper;
      }
  
!     /**
!      * Unicode case insensitive matches a given Unicode character
!      */
!     static CharPredicate SingleU(int lower) {
!         return ch -> lower == ch ||
!                      lower == Character.toLowerCase(Character.toUpperCase(ch));
      }
  
!     private static boolean inRange(int lower, int ch, int upper) {
!         return lower <= ch && ch <= upper;
      }
  
!     /**
!      * Charactrs within a explicit value range
!      */
!     static CharPredicate Range(int lower, int upper) {
!         if (upper < Character.MIN_HIGH_SURROGATE ||
!             lower > Character.MAX_HIGH_SURROGATE &&
!             upper < Character.MIN_SUPPLEMENTARY_CODE_POINT)
!             return (BmpCharPredicate)(ch -> inRange(lower, ch, upper));
!         return ch -> inRange(lower, ch, upper);
      }
+ 
+    /**
+     * Charactrs within a explicit value range in a case insensitive manner.
+     */
+     static CharPredicate CIRange(int lower, int upper) {
+         return ch -> inRange(lower, ch, upper) ||
+                      ASCII.isAscii(ch) &&
+                      (inRange(lower, ASCII.toUpper(ch), upper) ||
+                       inRange(lower, ASCII.toLower(ch), upper));
      }
  
+     static CharPredicate CIRangeU(int lower, int upper) {
+         return ch -> {
+             if (inRange(lower, ch, upper))
+                 return true;
+             int up = Character.toUpperCase(ch);
+             return inRange(lower, up, upper) ||
+                    inRange(lower, Character.toLowerCase(up), upper);
+         };
+     }
+ 
+     /**
+      *  This must be the very first initializer.
+      */
+     static Node accept = new Node();
+ 
+     static Node lastAccept = new LastNode();
+ 
      /**
       * Creates a predicate which can be used to match a string.
       *
       * @return  The predicate which can be used for matching on a string
       * @since   1.8