src/share/classes/java/util/regex/Pattern.java

Print this page




2480                                 prev = rightNode;
2481                         } else {
2482                             prev = intersection(prev, node);
2483                         }
2484                     } else {
2485                         // treat as a literal &
2486                         unread();
2487                         break;
2488                     }
2489                     continue;
2490                 case 0:
2491                     firstInClass = false;
2492                     if (cursor >= patternLength)
2493                         throw error("Unclosed character class");
2494                     break;
2495                 case ']':
2496                     firstInClass = false;
2497                     if (prev != null) {
2498                         if (consume)
2499                             next();

2500                         return prev;


2501                     }
2502                     break;
2503                 default:
2504                     firstInClass = false;
2505                     break;
2506             }
2507             node = range(bits);
2508             if (include) {
2509                 if (prev == null) {
2510                     prev = node;
2511                 } else {
2512                     if (prev != node)
2513                         prev = union(prev, node);
2514                 }
2515             } else {
2516                 if (prev == null) {
2517                     prev = node.complement();
2518                 } else {
2519                     if (prev != node)
2520                         prev = setDifference(prev, node);
2521                 }
2522             }
2523             ch = peek();
2524         }
2525     }
2526 
2527     private CharProperty bitsOrSingle(BitClass bits, int ch) {
2528         /* Bits can only handle codepoints in [u+0000-u+00ff] range.
2529            Use "single" node instead of bits when dealing with unicode
2530            case folding for codepoints listed below.
2531            (1)Uppercase out of range: u+00ff, u+00b5
2532               toUpperCase(u+00ff) -> u+0178
2533               toUpperCase(u+00b5) -> u+039c
2534            (2)LatinSmallLetterLongS u+17f
2535               toUpperCase(u+017f) -> u+0053
2536            (3)LatinSmallLetterDotlessI u+131
2537               toUpperCase(u+0131) -> u+0049
2538            (4)LatinCapitalLetterIWithDotAbove u+0130
2539               toLowerCase(u+0130) -> u+0069
2540            (5)KelvinSign u+212a
2541               toLowerCase(u+212a) ==> u+006B
2542            (6)AngstromSign u+212b


5145      * Returns the set union of two CharProperty nodes.
5146      */
5147     private static CharProperty union(final CharProperty lhs,
5148                                       final CharProperty rhs) {
5149         return new CharProperty() {
5150                 boolean isSatisfiedBy(int ch) {
5151                     return lhs.isSatisfiedBy(ch) || rhs.isSatisfiedBy(ch);}};
5152     }
5153 
5154     /**
5155      * Returns the set intersection of two CharProperty nodes.
5156      */
5157     private static CharProperty intersection(final CharProperty lhs,
5158                                              final CharProperty rhs) {
5159         return new CharProperty() {
5160                 boolean isSatisfiedBy(int ch) {
5161                     return lhs.isSatisfiedBy(ch) && rhs.isSatisfiedBy(ch);}};
5162     }
5163 
5164     /**
5165      * Returns the set difference of two CharProperty nodes.
5166      */
5167     private static CharProperty setDifference(final CharProperty lhs,
5168                                               final CharProperty rhs) {
5169         return new CharProperty() {
5170                 boolean isSatisfiedBy(int ch) {
5171                     return ! rhs.isSatisfiedBy(ch) && lhs.isSatisfiedBy(ch);}};
5172     }
5173 
5174     /**
5175      * Handles word boundaries. Includes a field to allow this one class to
5176      * deal with the different types of word boundaries we can match. The word
5177      * characters include underscores, letters, and digits. Non spacing marks
5178      * can are also part of a word if they have a base character, otherwise
5179      * they are ignored for purposes of finding word boundaries.
5180      */
5181     static final class Bound extends Node {
5182         static int LEFT = 0x1;
5183         static int RIGHT= 0x2;
5184         static int BOTH = 0x3;
5185         static int NONE = 0x4;
5186         int type;
5187         boolean useUWORD;
5188         Bound(int n, boolean useUWORD) {
5189             type = n;
5190             this.useUWORD = useUWORD;
5191         }
5192 
5193         boolean isWord(int ch) {
5194             return useUWORD ? UnicodeProp.WORD.is(ch)




2480                                 prev = rightNode;
2481                         } else {
2482                             prev = intersection(prev, node);
2483                         }
2484                     } else {
2485                         // treat as a literal &
2486                         unread();
2487                         break;
2488                     }
2489                     continue;
2490                 case 0:
2491                     firstInClass = false;
2492                     if (cursor >= patternLength)
2493                         throw error("Unclosed character class");
2494                     break;
2495                 case ']':
2496                     firstInClass = false;
2497                     if (prev != null) {
2498                         if (consume)
2499                             next();
2500                        if (include)
2501                            return prev;
2502                        else
2503                            return prev.complement();
2504                     }
2505                     break;
2506                 default:
2507                     firstInClass = false;
2508                     break;
2509             }
2510             node = range(bits);

2511             if (prev == null) {
2512                 prev = node;
2513             } else {
2514                 if (prev != node)
2515                     prev = union(prev, node);
2516             }








2517             ch = peek();
2518         }
2519     }
2520 
2521     private CharProperty bitsOrSingle(BitClass bits, int ch) {
2522         /* Bits can only handle codepoints in [u+0000-u+00ff] range.
2523            Use "single" node instead of bits when dealing with unicode
2524            case folding for codepoints listed below.
2525            (1)Uppercase out of range: u+00ff, u+00b5
2526               toUpperCase(u+00ff) -> u+0178
2527               toUpperCase(u+00b5) -> u+039c
2528            (2)LatinSmallLetterLongS u+17f
2529               toUpperCase(u+017f) -> u+0053
2530            (3)LatinSmallLetterDotlessI u+131
2531               toUpperCase(u+0131) -> u+0049
2532            (4)LatinCapitalLetterIWithDotAbove u+0130
2533               toLowerCase(u+0130) -> u+0069
2534            (5)KelvinSign u+212a
2535               toLowerCase(u+212a) ==> u+006B
2536            (6)AngstromSign u+212b


5139      * Returns the set union of two CharProperty nodes.
5140      */
5141     private static CharProperty union(final CharProperty lhs,
5142                                       final CharProperty rhs) {
5143         return new CharProperty() {
5144                 boolean isSatisfiedBy(int ch) {
5145                     return lhs.isSatisfiedBy(ch) || rhs.isSatisfiedBy(ch);}};
5146     }
5147 
5148     /**
5149      * Returns the set intersection of two CharProperty nodes.
5150      */
5151     private static CharProperty intersection(final CharProperty lhs,
5152                                              final CharProperty rhs) {
5153         return new CharProperty() {
5154                 boolean isSatisfiedBy(int ch) {
5155                     return lhs.isSatisfiedBy(ch) && rhs.isSatisfiedBy(ch);}};
5156     }
5157 
5158     /**










5159      * Handles word boundaries. Includes a field to allow this one class to
5160      * deal with the different types of word boundaries we can match. The word
5161      * characters include underscores, letters, and digits. Non spacing marks
5162      * can are also part of a word if they have a base character, otherwise
5163      * they are ignored for purposes of finding word boundaries.
5164      */
5165     static final class Bound extends Node {
5166         static int LEFT = 0x1;
5167         static int RIGHT= 0x2;
5168         static int BOTH = 0x3;
5169         static int NONE = 0x4;
5170         int type;
5171         boolean useUWORD;
5172         Bound(int n, boolean useUWORD) {
5173             type = n;
5174             this.useUWORD = useUWORD;
5175         }
5176 
5177         boolean isWord(int ch) {
5178             return useUWORD ? UnicodeProp.WORD.is(ch)