26 package java.util.regex;
27
28 import java.text.Normalizer;
29 import java.text.Normalizer.Form;
30 import java.util.Locale;
31 import java.util.Iterator;
32 import java.util.Map;
33 import java.util.ArrayList;
34 import java.util.HashMap;
35 import java.util.LinkedHashSet;
36 import java.util.List;
37 import java.util.Set;
38 import java.util.Arrays;
39 import java.util.NoSuchElementException;
40 import java.util.Spliterator;
41 import java.util.Spliterators;
42 import java.util.function.Predicate;
43 import java.util.stream.Stream;
44 import java.util.stream.StreamSupport;
45
46
47 /**
48 * A compiled representation of a regular expression.
49 *
50 * <p> A regular expression, specified as a string, must first be compiled into
51 * an instance of this class. The resulting pattern can then be used to create
52 * a {@link Matcher} object that can match arbitrary {@linkplain
53 * java.lang.CharSequence character sequences} against the regular
54 * expression. All of the state involved in performing a match resides in the
55 * matcher, so many matchers can share the same pattern.
56 *
57 * <p> A typical invocation sequence is thus
58 *
59 * <blockquote><pre>
60 * Pattern p = Pattern.{@link #compile compile}("a*b");
61 * Matcher m = p.{@link #matcher matcher}("aaaaab");
62 * boolean b = m.{@link Matcher#matches matches}();</pre></blockquote>
63 *
64 * <p> A {@link #matches matches} method is defined by this class as a
65 * convenience for when a regular expression is used just once. This method
2298 // Fall through
2299 default:
2300 prev = cursor;
2301 append(ch, first);
2302 first++;
2303 if (isSupplementary(ch)) {
2304 hasSupplementary = true;
2305 }
2306 ch = next();
2307 continue;
2308 }
2309 break;
2310 }
2311 if (first == 1) {
2312 return newCharProperty(single(buffer[0]));
2313 } else {
2314 return newSlice(buffer, first, hasSupplementary);
2315 }
2316 }
2317
2318 private void append(int ch, int len) {
2319 if (len >= buffer.length) {
2320 int[] tmp = new int[len+len];
2321 System.arraycopy(buffer, 0, tmp, 0, len);
2322 buffer = tmp;
2323 }
2324 buffer[len] = ch;
2325 }
2326
2327 /**
2328 * Parses a backref greedily, taking as many numbers as it
2329 * can. The first digit is always treated as a backref, but
2330 * multi digit numbers are only treated as a backref if at
2331 * least that many backrefs exist at this point in the regex.
2332 */
2333 private Node ref(int refNum) {
2334 boolean done = false;
2335 while(!done) {
2336 int ch = peek();
2337 switch(ch) {
2338 case '0':
2339 case '1':
2340 case '2':
2341 case '3':
2342 case '4':
2343 case '5':
2344 case '6':
|
26 package java.util.regex;
27
28 import java.text.Normalizer;
29 import java.text.Normalizer.Form;
30 import java.util.Locale;
31 import java.util.Iterator;
32 import java.util.Map;
33 import java.util.ArrayList;
34 import java.util.HashMap;
35 import java.util.LinkedHashSet;
36 import java.util.List;
37 import java.util.Set;
38 import java.util.Arrays;
39 import java.util.NoSuchElementException;
40 import java.util.Spliterator;
41 import java.util.Spliterators;
42 import java.util.function.Predicate;
43 import java.util.stream.Stream;
44 import java.util.stream.StreamSupport;
45
46 import jdk.internal.util.ArraysSupport;
47
48 /**
49 * A compiled representation of a regular expression.
50 *
51 * <p> A regular expression, specified as a string, must first be compiled into
52 * an instance of this class. The resulting pattern can then be used to create
53 * a {@link Matcher} object that can match arbitrary {@linkplain
54 * java.lang.CharSequence character sequences} against the regular
55 * expression. All of the state involved in performing a match resides in the
56 * matcher, so many matchers can share the same pattern.
57 *
58 * <p> A typical invocation sequence is thus
59 *
60 * <blockquote><pre>
61 * Pattern p = Pattern.{@link #compile compile}("a*b");
62 * Matcher m = p.{@link #matcher matcher}("aaaaab");
63 * boolean b = m.{@link Matcher#matches matches}();</pre></blockquote>
64 *
65 * <p> A {@link #matches matches} method is defined by this class as a
66 * convenience for when a regular expression is used just once. This method
2299 // Fall through
2300 default:
2301 prev = cursor;
2302 append(ch, first);
2303 first++;
2304 if (isSupplementary(ch)) {
2305 hasSupplementary = true;
2306 }
2307 ch = next();
2308 continue;
2309 }
2310 break;
2311 }
2312 if (first == 1) {
2313 return newCharProperty(single(buffer[0]));
2314 } else {
2315 return newSlice(buffer, first, hasSupplementary);
2316 }
2317 }
2318
2319 private void append(int ch, int index) {
2320 int oldCapacity = buffer.length;
2321 if (index - oldCapacity >= 0) {
2322 int newCapacity = ArraysSupport.newCapacity(oldCapacity,
2323 1 + index - oldCapacity, oldCapacity);
2324 buffer = Arrays.copyOf(buffer, newCapacity);
2325 }
2326 buffer[index] = ch;
2327 }
2328
2329 /**
2330 * Parses a backref greedily, taking as many numbers as it
2331 * can. The first digit is always treated as a backref, but
2332 * multi digit numbers are only treated as a backref if at
2333 * least that many backrefs exist at this point in the regex.
2334 */
2335 private Node ref(int refNum) {
2336 boolean done = false;
2337 while(!done) {
2338 int ch = peek();
2339 switch(ch) {
2340 case '0':
2341 case '1':
2342 case '2':
2343 case '3':
2344 case '4':
2345 case '5':
2346 case '6':
|