< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page
rev 54580 : [mq]: 8214245-Case-insensitive-matching-doesnt-work-correctly-for-POSIX-character-classes


2855                 throw error("Empty character family");
2856             name = new String(temp, i, j-i-1);
2857         }
2858 
2859         int i = name.indexOf('=');
2860         if (i != -1) {
2861             // property construct \p{name=value}
2862             String value = name.substring(i + 1);
2863             name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
2864             switch (name) {
2865                 case "sc":
2866                 case "script":
2867                     p = CharPredicates.forUnicodeScript(value);
2868                     break;
2869                 case "blk":
2870                 case "block":
2871                     p = CharPredicates.forUnicodeBlock(value);
2872                     break;
2873                 case "gc":
2874                 case "general_category":
2875                     p = CharPredicates.forProperty(value);
2876                     break;
2877                 default:
2878                     break;
2879             }
2880             if (p == null)
2881                 throw error("Unknown Unicode property {name=<" + name + ">, "
2882                              + "value=<" + value + ">}");
2883 
2884         } else {
2885             if (name.startsWith("In")) {
2886                 // \p{InBlockName}
2887                 p = CharPredicates.forUnicodeBlock(name.substring(2));
2888             } else if (name.startsWith("Is")) {
2889                 // \p{IsGeneralCategory} and \p{IsScriptName}
2890                 name = name.substring(2);
2891                 p = CharPredicates.forUnicodeProperty(name);
2892                 if (p == null)
2893                     p = CharPredicates.forProperty(name);
2894                 if (p == null)
2895                     p = CharPredicates.forUnicodeScript(name);
2896             } else {
2897                 if (has(UNICODE_CHARACTER_CLASS)) {
2898                     p = CharPredicates.forPOSIXName(name);
2899                 }
2900                 if (p == null)
2901                     p = CharPredicates.forProperty(name);
2902             }
2903             if (p == null)
2904                 throw error("Unknown character property name {In/Is" + name + "}");
2905         }
2906         if (isComplement) {
2907             // it might be too expensive to detect if a complement of
2908             // CharProperty can match "certain" supplementary. So just
2909             // go with StartS.
2910             hasSupplementary = true;
2911             p = p.negate();
2912         }
2913         return p;
2914     }
2915 
2916     private CharProperty newCharProperty(CharPredicate p) {
2917         if (p == null)
2918             return null;
2919         if (p instanceof BmpCharPredicate)
2920             return new BmpCharProperty((BmpCharPredicate)p);
2921         else


5650                     return true;
5651                 }
5652                 i += countChars(seq, i, 1);
5653             }
5654             matcher.hitEnd = true;
5655             return false;
5656         }
5657     }
5658 
5659     @FunctionalInterface
5660     static interface CharPredicate {
5661         boolean is(int ch);
5662 
5663         default CharPredicate and(CharPredicate p) {
5664             return ch -> is(ch) && p.is(ch);
5665         }
5666         default CharPredicate union(CharPredicate p) {
5667             return ch -> is(ch) || p.is(ch);
5668         }
5669         default CharPredicate union(CharPredicate p1,
5670                                     CharPredicate p2 ) {
5671             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
5672         }
5673         default CharPredicate negate() {
5674             return ch -> !is(ch);
5675         }
5676     }
5677 
5678     static interface BmpCharPredicate extends CharPredicate {
5679 
5680         default CharPredicate and(CharPredicate p) {
5681             if(p instanceof BmpCharPredicate)
5682                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
5683             return ch -> is(ch) && p.is(ch);
5684         }
5685         default CharPredicate union(CharPredicate p) {
5686             if (p instanceof BmpCharPredicate)
5687                 return (BmpCharPredicate)(ch -> is(ch) || p.is(ch));
5688             return ch -> is(ch) || p.is(ch);
5689         }
5690         static CharPredicate union(CharPredicate... predicates) {
5691             CharPredicate cp = ch -> {
5692                 for (CharPredicate p : predicates) {
5693                     if (!p.is(ch))
5694                         return false;
5695                 }
5696                 return true;
5697             };
5698             for (CharPredicate p : predicates) {
5699                 if (! (p instanceof BmpCharPredicate))
5700                     return cp;
5701             }




2855                 throw error("Empty character family");
2856             name = new String(temp, i, j-i-1);
2857         }
2858 
2859         int i = name.indexOf('=');
2860         if (i != -1) {
2861             // property construct \p{name=value}
2862             String value = name.substring(i + 1);
2863             name = name.substring(0, i).toLowerCase(Locale.ENGLISH);
2864             switch (name) {
2865                 case "sc":
2866                 case "script":
2867                     p = CharPredicates.forUnicodeScript(value);
2868                     break;
2869                 case "blk":
2870                 case "block":
2871                     p = CharPredicates.forUnicodeBlock(value);
2872                     break;
2873                 case "gc":
2874                 case "general_category":
2875                     p = CharPredicates.forProperty(value, has(CASE_INSENSITIVE));
2876                     break;
2877                 default:
2878                     break;
2879             }
2880             if (p == null)
2881                 throw error("Unknown Unicode property {name=<" + name + ">, "
2882                              + "value=<" + value + ">}");
2883 
2884         } else {
2885             if (name.startsWith("In")) {
2886                 // \p{InBlockName}
2887                 p = CharPredicates.forUnicodeBlock(name.substring(2));
2888             } else if (name.startsWith("Is")) {
2889                 // \p{IsGeneralCategory} and \p{IsScriptName}
2890                 name = name.substring(2);
2891                 p = CharPredicates.forUnicodeProperty(name, has(CASE_INSENSITIVE));
2892                 if (p == null)
2893                     p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE));
2894                 if (p == null)
2895                     p = CharPredicates.forUnicodeScript(name);
2896             } else {
2897                 if (has(UNICODE_CHARACTER_CLASS)) {
2898                     p = CharPredicates.forPOSIXName(name, has(CASE_INSENSITIVE));
2899                 }
2900                 if (p == null)
2901                     p = CharPredicates.forProperty(name, has(CASE_INSENSITIVE));
2902             }
2903             if (p == null)
2904                 throw error("Unknown character property name {In/Is" + name + "}");
2905         }
2906         if (isComplement) {
2907             // it might be too expensive to detect if a complement of
2908             // CharProperty can match "certain" supplementary. So just
2909             // go with StartS.
2910             hasSupplementary = true;
2911             p = p.negate();
2912         }
2913         return p;
2914     }
2915 
2916     private CharProperty newCharProperty(CharPredicate p) {
2917         if (p == null)
2918             return null;
2919         if (p instanceof BmpCharPredicate)
2920             return new BmpCharProperty((BmpCharPredicate)p);
2921         else


5650                     return true;
5651                 }
5652                 i += countChars(seq, i, 1);
5653             }
5654             matcher.hitEnd = true;
5655             return false;
5656         }
5657     }
5658 
5659     @FunctionalInterface
5660     static interface CharPredicate {
5661         boolean is(int ch);
5662 
5663         default CharPredicate and(CharPredicate p) {
5664             return ch -> is(ch) && p.is(ch);
5665         }
5666         default CharPredicate union(CharPredicate p) {
5667             return ch -> is(ch) || p.is(ch);
5668         }
5669         default CharPredicate union(CharPredicate p1,
5670                                     CharPredicate p2) {
5671             return ch -> is(ch) || p1.is(ch) || p2.is(ch);
5672         }
5673         default CharPredicate negate() {
5674             return ch -> !is(ch);
5675         }
5676     }
5677 
5678     static interface BmpCharPredicate extends CharPredicate {
5679 
5680         default CharPredicate and(CharPredicate p) {
5681             if (p instanceof BmpCharPredicate)
5682                 return (BmpCharPredicate)(ch -> is(ch) && p.is(ch));
5683             return ch -> is(ch) && p.is(ch);
5684         }
5685         default CharPredicate union(CharPredicate p) {
5686             if (p instanceof BmpCharPredicate)
5687                 return (BmpCharPredicate)(ch -> is(ch) || p.is(ch));
5688             return ch -> is(ch) || p.is(ch);
5689         }
5690         static CharPredicate union(CharPredicate... predicates) {
5691             CharPredicate cp = ch -> {
5692                 for (CharPredicate p : predicates) {
5693                     if (!p.is(ch))
5694                         return false;
5695                 }
5696                 return true;
5697             };
5698             for (CharPredicate p : predicates) {
5699                 if (! (p instanceof BmpCharPredicate))
5700                     return cp;
5701             }


< prev index next >