< prev index next >

src/java.base/share/classes/java/util/regex/Pattern.java

Print this page




  64  * <p> A {@link #matches matches} method is defined by this class as a
  65  * convenience for when a regular expression is used just once.  This method
  66  * compiles an expression and matches an input sequence against it in a single
  67  * invocation.  The statement
  68  *
  69  * <blockquote><pre>
  70  * boolean b = Pattern.matches("a*b", "aaaaab");</pre></blockquote>
  71  *
  72  * is equivalent to the three statements above, though for repeated matches it
  73  * is less efficient since it does not allow the compiled pattern to be reused.
  74  *
  75  * <p> Instances of this class are immutable and are safe for use by multiple
  76  * concurrent threads.  Instances of the {@link Matcher} class are not safe for
  77  * such use.
  78  *
  79  *
  80  * <h3><a id="sum">Summary of regular-expression constructs</a></h3>
  81  *
  82  * <table class="borderless">
  83  * <caption style="display:none">Regular expression constructs, and what they match</caption>
  84  * <thead>
  85  * <tr style="text-align:left">
  86  * <th style="text-align:left" id="construct">Construct</th>
  87  * <th style="text-align:left" id="matches">Matches</th>
  88  * </tr>
  89  * </thead>
  90  * <tbody>
  91  *
  92  * <tr><th>&nbsp;</th></tr>
  93  * <tr style="text-align:left"><th colspan="2" id="characters">Characters</th></tr>
  94  *
  95  * <tr><td style="vertical-align:top" headers="construct characters"><i>x</i></td>
  96  *     <td headers="matches">The character <i>x</i></td></tr>
  97  * <tr><td style="vertical-align:top" headers="construct characters">{@code \\}</td>
  98  *     <td headers="matches">The backslash character</td></tr>
  99  * <tr><td style="vertical-align:top" headers="construct characters">{@code \0}<i>n</i></td>
 100  *     <td headers="matches">The character with octal value {@code 0}<i>n</i>
 101  *         (0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 102  * <tr><td style="vertical-align:top" headers="construct characters">{@code \0}<i>nn</i></td>
 103  *     <td headers="matches">The character with octal value {@code 0}<i>nn</i>
 104  *         (0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 105  * <tr><td style="vertical-align:top" headers="construct characters">{@code \0}<i>mnn</i></td>
 106  *     <td headers="matches">The character with octal value {@code 0}<i>mnn</i>
 107  *         (0&nbsp;{@code <=}&nbsp;<i>m</i>&nbsp;{@code <=}&nbsp;3,
 108  *         0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 109  * <tr><td style="vertical-align:top" headers="construct characters">{@code \x}<i>hh</i></td>
 110  *     <td headers="matches">The character with hexadecimal&nbsp;value&nbsp;{@code 0x}<i>hh</i></td></tr>
 111  * <tr><td style="vertical-align:top" headers="construct characters"><code>\u</code><i>hhhh</i></td>
 112  *     <td headers="matches">The character with hexadecimal&nbsp;value&nbsp;{@code 0x}<i>hhhh</i></td></tr>
 113  * <tr><td style="vertical-align:top" headers="construct characters"><code>\x</code><i>{h...h}</i></td>
 114  *     <td headers="matches">The character with hexadecimal&nbsp;value&nbsp;{@code 0x}<i>h...h</i>
 115  *         ({@link java.lang.Character#MIN_CODE_POINT Character.MIN_CODE_POINT}
 116  *         &nbsp;&lt;=&nbsp;{@code 0x}<i>h...h</i>&nbsp;&lt;=&nbsp;
 117  *          {@link java.lang.Character#MAX_CODE_POINT Character.MAX_CODE_POINT})</td></tr>
 118  * <tr><td style="vertical-align:top" headers="construct characters"><code>\N{</code><i>name</i><code>}</code></td>
 119  *     <td headers="matches">The character with Unicode character name <i>'name'</i></td></tr>
 120  * <tr><td style="vertical-align:top" headers="matches">{@code \t}</td>
 121  *     <td headers="matches">The tab character (<code>'\u0009'</code>)</td></tr>
 122  * <tr><td style="vertical-align:top" headers="construct characters">{@code \n}</td>
 123  *     <td headers="matches">The newline (line feed) character (<code>'\u000A'</code>)</td></tr>
 124  * <tr><td style="vertical-align:top" headers="construct characters">{@code \r}</td>
 125  *     <td headers="matches">The carriage-return character (<code>'\u000D'</code>)</td></tr>
 126  * <tr><td style="vertical-align:top" headers="construct characters">{@code \f}</td>
 127  *     <td headers="matches">The form-feed character (<code>'\u000C'</code>)</td></tr>
 128  * <tr><td style="vertical-align:top" headers="construct characters">{@code \a}</td>
 129  *     <td headers="matches">The alert (bell) character (<code>'\u0007'</code>)</td></tr>
 130  * <tr><td style="vertical-align:top" headers="construct characters">{@code \e}</td>
 131  *     <td headers="matches">The escape character (<code>'\u001B'</code>)</td></tr>
 132  * <tr><td style="vertical-align:top" headers="construct characters">{@code \c}<i>x</i></td>
 133  *     <td headers="matches">The control character corresponding to <i>x</i></td></tr>
 134  *
 135  * <tr><th>&nbsp;</th></tr>
 136  * <tr style="text-align:left"><th colspan="2" id="classes">Character classes</th></tr>
 137  *
 138  * <tr><td style="vertical-align:top" headers="construct classes">{@code [abc]}</td>
 139  *     <td headers="matches">{@code a}, {@code b}, or {@code c} (simple class)</td></tr>
 140  * <tr><td style="vertical-align:top" headers="construct classes">{@code [^abc]}</td>
 141  *     <td headers="matches">Any character except {@code a}, {@code b}, or {@code c} (negation)</td></tr>
 142  * <tr><td style="vertical-align:top" headers="construct classes">{@code [a-zA-Z]}</td>
 143  *     <td headers="matches">{@code a} through {@code z}
 144  *         or {@code A} through {@code Z}, inclusive (range)</td></tr>
 145  * <tr><td style="vertical-align:top" headers="construct classes">{@code [a-d[m-p]]}</td>
 146  *     <td headers="matches">{@code a} through {@code d},
 147  *      or {@code m} through {@code p}: {@code [a-dm-p]} (union)</td></tr>
 148  * <tr><td style="vertical-align:top" headers="construct classes">{@code [a-z&&[def]]}</td>
 149  *     <td headers="matches">{@code d}, {@code e}, or {@code f} (intersection)</tr>
 150  * <tr><td style="vertical-align:top" headers="construct classes">{@code [a-z&&[^bc]]}</td>
 151  *     <td headers="matches">{@code a} through {@code z},
 152  *         except for {@code b} and {@code c}: {@code [ad-z]} (subtraction)</td></tr>
 153  * <tr><td style="vertical-align:top" headers="construct classes">{@code [a-z&&[^m-p]]}</td>
 154  *     <td headers="matches">{@code a} through {@code z},
 155  *          and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction)</td></tr>
 156  * <tr><th>&nbsp;</th></tr>
 157  *
 158  * <tr style="text-align:left"><th colspan="2" id="predef">Predefined character classes</th></tr>
 159  *
 160  * <tr><td style="vertical-align:top" headers="construct predef">{@code .}</td>
 161  *     <td headers="matches">Any character (may or may not match <a href="#lt">line terminators</a>)</td></tr>
 162  * <tr><td style="vertical-align:top" headers="construct predef">{@code \d}</td>
 163  *     <td headers="matches">A digit: {@code [0-9]}</td></tr>
 164  * <tr><td style="vertical-align:top" headers="construct predef">{@code \D}</td>
 165  *     <td headers="matches">A non-digit: {@code [^0-9]}</td></tr>
 166  * <tr><td style="vertical-align:top" headers="construct predef">{@code \h}</td>
 167  *     <td headers="matches">A horizontal whitespace character:
 168  *     <code>[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]</code></td></tr>
 169  * <tr><td style="vertical-align:top" headers="construct predef">{@code \H}</td>
 170  *     <td headers="matches">A non-horizontal whitespace character: {@code [^\h]}</td></tr>
 171  * <tr><td style="vertical-align:top" headers="construct predef">{@code \s}</td>
 172  *     <td headers="matches">A whitespace character: {@code [ \t\n\x0B\f\r]}</td></tr>
 173  * <tr><td style="vertical-align:top" headers="construct predef">{@code \S}</td>
 174  *     <td headers="matches">A non-whitespace character: {@code [^\s]}</td></tr>
 175  * <tr><td style="vertical-align:top" headers="construct predef">{@code \v}</td>
 176  *     <td headers="matches">A vertical whitespace character: <code>[\n\x0B\f\r\x85\u2028\u2029]</code>
 177  *     </td></tr>
 178  * <tr><td style="vertical-align:top" headers="construct predef">{@code \V}</td>
 179  *     <td headers="matches">A non-vertical whitespace character: {@code [^\v]}</td></tr>
 180  * <tr><td style="vertical-align:top" headers="construct predef">{@code \w}</td>
 181  *     <td headers="matches">A word character: {@code [a-zA-Z_0-9]}</td></tr>
 182  * <tr><td style="vertical-align:top" headers="construct predef">{@code \W}</td>
 183  *     <td headers="matches">A non-word character: {@code [^\w]}</td></tr>
 184  * <tr><th>&nbsp;</th></tr>
 185  * <tr style="text-align:left"><th colspan="2" id="posix"><b>POSIX character classes (US-ASCII only)</b></th></tr>
 186  *
 187  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Lower}}</td>
 188  *     <td headers="matches">A lower-case alphabetic character: {@code [a-z]}</td></tr>
 189  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Upper}}</td>
 190  *     <td headers="matches">An upper-case alphabetic character:{@code [A-Z]}</td></tr>
 191  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{ASCII}}</td>
 192  *     <td headers="matches">All ASCII:{@code [\x00-\x7F]}</td></tr>
 193  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Alpha}}</td>
 194  *     <td headers="matches">An alphabetic character:{@code [\p{Lower}\p{Upper}]}</td></tr>
 195  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Digit}}</td>
 196  *     <td headers="matches">A decimal digit: {@code [0-9]}</td></tr>
 197  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Alnum}}</td>
 198  *     <td headers="matches">An alphanumeric character:{@code [\p{Alpha}\p{Digit}]}</td></tr>
 199  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Punct}}</td>
 200  *     <td headers="matches">Punctuation: One of {@code !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~}</td></tr>
 201  *     <!-- {@code [\!"#\$%&'\(\)\*\+,\-\./:;\<=\>\?@\[\\\]\^_`\{\|\}~]}
 202  *          {@code [\X21-\X2F\X31-\X40\X5B-\X60\X7B-\X7E]} -->
 203  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Graph}}</td>
 204  *     <td headers="matches">A visible character: {@code [\p{Alnum}\p{Punct}]}</td></tr>
 205  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Print}}</td>
 206  *     <td headers="matches">A printable character: {@code [\p{Graph}\x20]}</td></tr>
 207  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Blank}}</td>
 208  *     <td headers="matches">A space or a tab: {@code [ \t]}</td></tr>
 209  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Cntrl}}</td>
 210  *     <td headers="matches">A control character: {@code [\x00-\x1F\x7F]}</td></tr>
 211  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{XDigit}}</td>
 212  *     <td headers="matches">A hexadecimal digit: {@code [0-9a-fA-F]}</td></tr>
 213  * <tr><td style="vertical-align:top" headers="construct posix">{@code \p{Space}}</td>
 214  *     <td headers="matches">A whitespace character: {@code [ \t\n\x0B\f\r]}</td></tr>
 215  *
 216  * <tr><th>&nbsp;</th></tr>
 217  * <tr style="text-align:left"><th colspan="2">java.lang.Character classes (simple <a href="#jcc">java character type</a>)</th></tr>
 218  *
 219  * <tr><td style="vertical-align:top">{@code \p{javaLowerCase}}</td>
 220  *     <td>Equivalent to java.lang.Character.isLowerCase()</td></tr>
 221  * <tr><td style="vertical-align:top">{@code \p{javaUpperCase}}</td>
 222  *     <td>Equivalent to java.lang.Character.isUpperCase()</td></tr>
 223  * <tr><td style="vertical-align:top">{@code \p{javaWhitespace}}</td>
 224  *     <td>Equivalent to java.lang.Character.isWhitespace()</td></tr>
 225  * <tr><td style="vertical-align:top">{@code \p{javaMirrored}}</td>
 226  *     <td>Equivalent to java.lang.Character.isMirrored()</td></tr>
 227  *
 228  * <tr><th>&nbsp;</th></tr>
 229  * <tr style="text-align:left"><th colspan="2" id="unicode">Classes for Unicode scripts, blocks, categories and binary properties</th></tr>
 230  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \p{IsLatin}}</td>
 231  *     <td headers="matches">A Latin&nbsp;script character (<a href="#usc">script</a>)</td></tr>
 232  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \p{InGreek}}</td>
 233  *     <td headers="matches">A character in the Greek&nbsp;block (<a href="#ubc">block</a>)</td></tr>
 234  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \p{Lu}}</td>
 235  *     <td headers="matches">An uppercase letter (<a href="#ucc">category</a>)</td></tr>
 236  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \p{IsAlphabetic}}</td>
 237  *     <td headers="matches">An alphabetic character (<a href="#ubpc">binary property</a>)</td></tr>
 238  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \p{Sc}}</td>
 239  *     <td headers="matches">A currency symbol</td></tr>
 240  * <tr><td style="vertical-align:top" headers="construct unicode">{@code \P{InGreek}}</td>
 241  *     <td headers="matches">Any character except one in the Greek block (negation)</td></tr>
 242  * <tr><td style="vertical-align:top" headers="construct unicode">{@code [\p{L}&&[^\p{Lu}]]}</td>
 243  *     <td headers="matches">Any letter except an uppercase letter (subtraction)</td></tr>
 244  *
 245  * <tr><th>&nbsp;</th></tr>
 246  * <tr style="text-align:left"><th colspan="2" id="bounds">Boundary matchers</th></tr>
 247  *
 248  * <tr><td style="vertical-align:top" headers="construct bounds">{@code ^}</td>
 249  *     <td headers="matches">The beginning of a line</td></tr>
 250  * <tr><td style="vertical-align:top" headers="construct bounds">{@code $}</td>
 251  *     <td headers="matches">The end of a line</td></tr>
 252  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \b}</td>
 253  *     <td headers="matches">A word boundary</td></tr>
 254  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \b{g}}</td>
 255  *     <td headers="matches">A Unicode extended grapheme cluster boundary</td></tr>
 256  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \B}</td>
 257  *     <td headers="matches">A non-word boundary</td></tr>
 258  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \A}</td>
 259  *     <td headers="matches">The beginning of the input</td></tr>
 260  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \G}</td>
 261  *     <td headers="matches">The end of the previous match</td></tr>
 262  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \Z}</td>
 263  *     <td headers="matches">The end of the input but for the final
 264  *         <a href="#lt">terminator</a>, if&nbsp;any</td></tr>
 265  * <tr><td style="vertical-align:top" headers="construct bounds">{@code \z}</td>
 266  *     <td headers="matches">The end of the input</td></tr>
 267  *
 268  * <tr><th>&nbsp;</th></tr>
 269  * <tr style="text-align:left"><th colspan="2" id="lineending">Linebreak matcher</th></tr>
 270  * <tr><td style="vertical-align:top" headers="construct lineending">{@code \R}</td>
 271  *     <td headers="matches">Any Unicode linebreak sequence, is equivalent to
 272  *     <code>\u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029]
 273  *     </code></td></tr>
 274  *
 275  * <tr><th>&nbsp;</th></tr>
 276  * <tr style="text-align:left"><th colspan="2" id="grapheme">Unicode Extended Grapheme matcher</th></tr>
 277  * <tr><td style="vertical-align:top" headers="construct grapheme">{@code \X}</td>
 278  *     <td headers="matches">Any Unicode extended grapheme cluster</td></tr>
 279  *
 280  * <tr><th>&nbsp;</th></tr>
 281  * <tr style="text-align:left"><th colspan="2" id="greedy">Greedy quantifiers</th></tr>
 282  *
 283  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i>{@code ?}</td>
 284  *     <td headers="matches"><i>X</i>, once or not at all</td></tr>
 285  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i>{@code *}</td>
 286  *     <td headers="matches"><i>X</i>, zero or more times</td></tr>
 287  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i>{@code +}</td>
 288  *     <td headers="matches"><i>X</i>, one or more times</td></tr>
 289  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i><code>{</code><i>n</i><code>}</code></td>
 290  *     <td headers="matches"><i>X</i>, exactly <i>n</i> times</td></tr>
 291  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i><code>{</code><i>n</i>{@code ,}}</td>
 292  *     <td headers="matches"><i>X</i>, at least <i>n</i> times</td></tr>
 293  * <tr><td style="vertical-align:top" headers="construct greedy"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}</code></td>
 294  *     <td headers="matches"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 295  *
 296  * <tr><th>&nbsp;</th></tr>
 297  * <tr style="text-align:left"><th colspan="2" id="reluc">Reluctant quantifiers</th></tr>
 298  *
 299  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i>{@code ??}</td>
 300  *     <td headers="matches"><i>X</i>, once or not at all</td></tr>
 301  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i>{@code *?}</td>
 302  *     <td headers="matches"><i>X</i>, zero or more times</td></tr>
 303  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i>{@code +?}</td>
 304  *     <td headers="matches"><i>X</i>, one or more times</td></tr>
 305  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i><code>{</code><i>n</i><code>}?</code></td>
 306  *     <td headers="matches"><i>X</i>, exactly <i>n</i> times</td></tr>
 307  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i><code>{</code><i>n</i><code>,}?</code></td>
 308  *     <td headers="matches"><i>X</i>, at least <i>n</i> times</td></tr>
 309  * <tr><td style="vertical-align:top" headers="construct reluc"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}?</code></td>
 310  *     <td headers="matches"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 311  *
 312  * <tr><th>&nbsp;</th></tr>
 313  * <tr style="text-align:left"><th colspan="2" id="poss">Possessive quantifiers</th></tr>
 314  *
 315  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i>{@code ?+}</td>
 316  *     <td headers="matches"><i>X</i>, once or not at all</td></tr>
 317  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i>{@code *+}</td>
 318  *     <td headers="matches"><i>X</i>, zero or more times</td></tr>
 319  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i>{@code ++}</td>
 320  *     <td headers="matches"><i>X</i>, one or more times</td></tr>
 321  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i><code>{</code><i>n</i><code>}+</code></td>
 322  *     <td headers="matches"><i>X</i>, exactly <i>n</i> times</td></tr>
 323  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i><code>{</code><i>n</i><code>,}+</code></td>
 324  *     <td headers="matches"><i>X</i>, at least <i>n</i> times</td></tr>
 325  * <tr><td style="vertical-align:top" headers="construct poss"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}+</code></td>
 326  *     <td headers="matches"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 327  *
 328  * <tr><th>&nbsp;</th></tr>
 329  * <tr style="text-align:left"><th colspan="2" id="logical">Logical operators</th></tr>
 330  *
 331  * <tr><td style="vertical-align:top" headers="construct logical"><i>XY</i></td>
 332  *     <td headers="matches"><i>X</i> followed by <i>Y</i></td></tr>
 333  * <tr><td style="vertical-align:top" headers="construct logical"><i>X</i>{@code |}<i>Y</i></td>
 334  *     <td headers="matches">Either <i>X</i> or <i>Y</i></td></tr>
 335  * <tr><td style="vertical-align:top" headers="construct logical">{@code (}<i>X</i>{@code )}</td>
 336  *     <td headers="matches">X, as a <a href="#cg">capturing group</a></td></tr>
 337  *
 338  * <tr><th>&nbsp;</th></tr>
 339  * <tr style="text-align:left"><th colspan="2" id="backref">Back references</th></tr>
 340  *
 341  * <tr><td style="vertical-align:bottom" headers="construct backref">{@code \}<i>n</i></td>
 342  *     <td style="vertical-align:bottom" headers="matches">Whatever the <i>n</i><sup>th</sup>
 343  *     <a href="#cg">capturing group</a> matched</td></tr>
 344  *
 345  * <tr><td style="vertical-align:bottom" headers="construct backref">{@code \}<i>k</i>&lt;<i>name</i>&gt;</td>
 346  *     <td style="vertical-align:bottom" headers="matches">Whatever the

























































 347  *     <a href="#groupname">named-capturing group</a> "name" matched</td></tr>
 348  *
 349  * <tr><th>&nbsp;</th></tr>
 350  * <tr style="text-align:left"><th colspan="2" id="quot">Quotation</th></tr>
 351  *
 352  * <tr><td style="vertical-align:top" headers="construct quot">{@code \}</td>
 353  *     <td headers="matches">Nothing, but quotes the following character</td></tr>
 354  * <tr><td style="vertical-align:top" headers="construct quot">{@code \Q}</td>
 355  *     <td headers="matches">Nothing, but quotes all characters until {@code \E}</td></tr>
 356  * <tr><td style="vertical-align:top" headers="construct quot">{@code \E}</td>
 357  *     <td headers="matches">Nothing, but ends quoting started by {@code \Q}</td></tr>
 358  *     <!-- Metachars: !$()*+.<>?[\]^{|} -->
 359  *
 360  * <tr><th>&nbsp;</th></tr>
 361  * <tr style="text-align:left"><th colspan="2" id="special">Special constructs (named-capturing and non-capturing)</th></tr>
 362  *
 363  * <tr><td style="vertical-align:top" headers="construct special"><code>(?&lt;<a href="#groupname">name</a>&gt;</code><i>X</i>{@code )}</td>
 364  *     <td headers="matches"><i>X</i>, as a named-capturing group</td></tr>
 365  * <tr><td style="vertical-align:top" headers="construct special">{@code (?:}<i>X</i>{@code )}</td>
 366  *     <td headers="matches"><i>X</i>, as a non-capturing group</td></tr>
 367  * <tr><td style="vertical-align:top" headers="construct special"><code>(?idmsuxU-idmsuxU)&nbsp;</code></td>
 368  *     <td headers="matches">Nothing, but turns match flags <a href="#CASE_INSENSITIVE">i</a>
 369  * <a href="#UNIX_LINES">d</a> <a href="#MULTILINE">m</a> <a href="#DOTALL">s</a>
 370  * <a href="#UNICODE_CASE">u</a> <a href="#COMMENTS">x</a> <a href="#UNICODE_CHARACTER_CLASS">U</a>
 371  * on - off</td></tr>
 372  * <tr><td style="vertical-align:top" headers="construct special"><code>(?idmsux-idmsux:</code><i>X</i>{@code )}&nbsp;&nbsp;</td>
 373  *     <td headers="matches"><i>X</i>, as a <a href="#cg">non-capturing group</a> with the
 374  *         given flags <a href="#CASE_INSENSITIVE">i</a> <a href="#UNIX_LINES">d</a>
 375  * <a href="#MULTILINE">m</a> <a href="#DOTALL">s</a> <a href="#UNICODE_CASE">u</a >
 376  * <a href="#COMMENTS">x</a> on - off</td></tr>
 377  * <tr><td style="vertical-align:top" headers="construct special">{@code (?=}<i>X</i>{@code )}</td>
 378  *     <td headers="matches"><i>X</i>, via zero-width positive lookahead</td></tr>
 379  * <tr><td style="vertical-align:top" headers="construct special">{@code (?!}<i>X</i>{@code )}</td>
 380  *     <td headers="matches"><i>X</i>, via zero-width negative lookahead</td></tr>
 381  * <tr><td style="vertical-align:top" headers="construct special">{@code (?<=}<i>X</i>{@code )}</td>
 382  *     <td headers="matches"><i>X</i>, via zero-width positive lookbehind</td></tr>
 383  * <tr><td style="vertical-align:top" headers="construct special">{@code (?<!}<i>X</i>{@code )}</td>
 384  *     <td headers="matches"><i>X</i>, via zero-width negative lookbehind</td></tr>
 385  * <tr><td style="vertical-align:top" headers="construct special">{@code (?>}<i>X</i>{@code )}</td>
 386  *     <td headers="matches"><i>X</i>, as an independent, non-capturing group</td></tr>
 387  *
 388  * </tbody>
 389  * </table>
 390  *
 391  * <hr>
 392  *
 393  *
 394  * <h3><a id="bs">Backslashes, escapes, and quoting</a></h3>
 395  *
 396  * <p> The backslash character ({@code '\'}) serves to introduce escaped
 397  * constructs, as defined in the table above, as well as to quote characters
 398  * that otherwise would be interpreted as unescaped constructs.  Thus the
 399  * expression {@code \\} matches a single backslash and <code>\{</code> matches a
 400  * left brace.
 401  *
 402  * <p> It is an error to use a backslash prior to any alphabetic character that
 403  * does not denote an escaped construct; these are reserved for future
 404  * extensions to the regular-expression language.  A backslash may be used
 405  * prior to a non-alphabetic character regardless of whether that character is
 406  * part of an unescaped construct.


 415  * <code>"\b"</code>, for example, matches a single backspace character when
 416  * interpreted as a regular expression, while {@code "\\b"} matches a
 417  * word boundary.  The string literal {@code "\(hello\)"} is illegal
 418  * and leads to a compile-time error; in order to match the string
 419  * {@code (hello)} the string literal {@code "\\(hello\\)"}
 420  * must be used.
 421  *
 422  * <h3><a id="cc">Character Classes</a></h3>
 423  *
 424  *    <p> Character classes may appear within other character classes, and
 425  *    may be composed by the union operator (implicit) and the intersection
 426  *    operator ({@code &&}).
 427  *    The union operator denotes a class that contains every character that is
 428  *    in at least one of its operand classes.  The intersection operator
 429  *    denotes a class that contains every character that is in both of its
 430  *    operand classes.
 431  *
 432  *    <p> The precedence of character-class operators is as follows, from
 433  *    highest to lowest:
 434  *
 435  *    <blockquote><table>
 436  *      <caption style="display:none">Precedence of character class operators.</caption>



 437  *      <tbody>
 438  *      <tr><th>1&nbsp;&nbsp;&nbsp;&nbsp;</th>
 439  *        <td>Literal escape&nbsp;&nbsp;&nbsp;&nbsp;</td>
 440  *        <td>{@code \x}</td></tr>
 441  *     <tr><th>2&nbsp;&nbsp;&nbsp;&nbsp;</th>
 442  *        <td>Grouping</td>
 443  *        <td>{@code [...]}</td></tr>
 444  *     <tr><th>3&nbsp;&nbsp;&nbsp;&nbsp;</th>
 445  *        <td>Range</td>
 446  *        <td>{@code a-z}</td></tr>
 447  *      <tr><th>4&nbsp;&nbsp;&nbsp;&nbsp;</th>
 448  *        <td>Union</td>
 449  *        <td>{@code [a-e][i-u]}</td></tr>
 450  *      <tr><th>5&nbsp;&nbsp;&nbsp;&nbsp;</th>
 451  *        <td>Intersection</td>
 452  *        <td>{@code [a-z&&[aeiou]]}</td></tr>
 453  *      </tbody>
 454  *    </table></blockquote>
 455  *
 456  *    <p> Note that a different set of metacharacters are in effect inside
 457  *    a character class than outside a character class. For instance, the
 458  *    regular expression {@code .} loses its special meaning inside a
 459  *    character class, while the expression {@code -} becomes a range
 460  *    forming metacharacter.
 461  *
 462  * <h3><a id="lt">Line terminators</a></h3>
 463  *
 464  * <p> A <i>line terminator</i> is a one- or two-character sequence that marks
 465  * the end of a line of the input character sequence.  The following are
 466  * recognized as line terminators:
 467  *
 468  * <ul>
 469  *
 470  *   <li> A newline (line feed) character&nbsp;({@code '\n'}),
 471  *
 472  *   <li> A carriage-return character followed immediately by a newline
 473  *   character&nbsp;({@code "\r\n"}),
 474  *
 475  *   <li> A standalone carriage-return character&nbsp;({@code '\r'}),
 476  *
 477  *   <li> A next-line character&nbsp;(<code>'\u0085'</code>),
 478  *
 479  *   <li> A line-separator character&nbsp;(<code>'\u2028'</code>), or
 480  *
 481  *   <li> A paragraph-separator character&nbsp;(<code>'\u2029'</code>).
 482  *
 483  * </ul>
 484  * <p>If {@link #UNIX_LINES} mode is activated, then the only line terminators
 485  * recognized are newline characters.
 486  *
 487  * <p> The regular expression {@code .} matches any character except a line
 488  * terminator unless the {@link #DOTALL} flag is specified.
 489  *
 490  * <p> By default, the regular expressions {@code ^} and {@code $} ignore
 491  * line terminators and only match at the beginning and the end, respectively,
 492  * of the entire input sequence. If {@link #MULTILINE} mode is activated then
 493  * {@code ^} matches at the beginning of input and after any line terminator
 494  * except at the end of input. When in {@link #MULTILINE} mode {@code $}
 495  * matches just before a line terminator or the end of the input sequence.
 496  *
 497  * <h3><a id="cg">Groups and capturing</a></h3>
 498  *
 499  * <h4><a id="gnumber">Group number</a></h4>
 500  * <p> Capturing groups are numbered by counting their opening parentheses from
 501  * left to right.  In the expression {@code ((A)(B(C)))}, for example, there
 502  * are four such groups: </p>
 503  *
 504  * <blockquote><table>
 505  * <caption style="display:none">Capturing group numberings</caption>
 506  * <tbody>
 507  * <tr><th>1&nbsp;&nbsp;&nbsp;&nbsp;</th>
 508  *     <td>{@code ((A)(B(C)))}</td></tr>
 509  * <tr><th>2&nbsp;&nbsp;&nbsp;&nbsp;</th>
 510  *     <td>{@code (A)}</td></tr>
 511  * <tr><th>3&nbsp;&nbsp;&nbsp;&nbsp;</th>
 512  *     <td>{@code (B(C))}</td></tr>
 513  * <tr><th>4&nbsp;&nbsp;&nbsp;&nbsp;</th>
 514  *     <td>{@code (C)}</td></tr>
 515  * </tbody>
 516  * </table></blockquote>
 517  *
 518  * <p> Group zero always stands for the entire expression.
 519  *
 520  * <p> Capturing groups are so named because, during a match, each subsequence
 521  * of the input sequence that matches such a group is saved.  The captured
 522  * subsequence may be used later in the expression, via a back reference, and
 523  * may also be retrieved from the matcher once the match operation is complete.
 524  *
 525  * <h4><a id="groupname">Group name</a></h4>
 526  * <p>A capturing group can also be assigned a "name", a {@code named-capturing group},
 527  * and then be back-referenced later by the "name". Group names are composed of
 528  * the following characters. The first character must be a {@code letter}.
 529  *
 530  * <ul>
 531  *   <li> The uppercase letters {@code 'A'} through {@code 'Z'}
 532  *        (<code>'\u0041'</code>&nbsp;through&nbsp;<code>'\u005a'</code>),
 533  *   <li> The lowercase letters {@code 'a'} through {@code 'z'}
 534  *        (<code>'\u0061'</code>&nbsp;through&nbsp;<code>'\u007a'</code>),
 535  *   <li> The digits {@code '0'} through {@code '9'}
 536  *        (<code>'\u0030'</code>&nbsp;through&nbsp;<code>'\u0039'</code>),


 632  *   <li> Ideographic
 633  *   <li> Letter
 634  *   <li> Lowercase
 635  *   <li> Uppercase
 636  *   <li> Titlecase
 637  *   <li> Punctuation
 638  *   <Li> Control
 639  *   <li> White_Space
 640  *   <li> Digit
 641  *   <li> Hex_Digit
 642  *   <li> Join_Control
 643  *   <li> Noncharacter_Code_Point
 644  *   <li> Assigned
 645  * </ul>
 646  * <p>
 647  * The following <b>Predefined Character classes</b> and <b>POSIX character classes</b>
 648  * are in conformance with the recommendation of <i>Annex C: Compatibility Properties</i>
 649  * of <a href="http://www.unicode.org/reports/tr18/"><i>Unicode Regular Expression
 650  * </i></a>, when {@link #UNICODE_CHARACTER_CLASS} flag is specified.
 651  *
 652  * <table>
 653  * <caption style="display:none">predefined and posix character classes in Unicode mode</caption>
 654  * <thead>
 655  * <tr style="text-align:left">
 656  * <th style="text-align:left" id="predef_classes">Classes</th>
 657  * <th style="text-align:left" id="predef_matches">Matches</th>
 658  * </tr>
 659  * </thead>
 660  * <tbody>
 661  * <tr><td>{@code \p{Lower}}</td>
 662  *     <td>A lowercase character:{@code \p{IsLowercase}}</td></tr>
 663  * <tr><td>{@code \p{Upper}}</td>
 664  *     <td>An uppercase character:{@code \p{IsUppercase}}</td></tr>
 665  * <tr><td>{@code \p{ASCII}}</td>
 666  *     <td>All ASCII:{@code [\x00-\x7F]}</td></tr>
 667  * <tr><td>{@code \p{Alpha}}</td>
 668  *     <td>An alphabetic character:{@code \p{IsAlphabetic}}</td></tr>
 669  * <tr><td>{@code \p{Digit}}</td>
 670  *     <td>A decimal digit character:{@code \p{IsDigit}}</td></tr>
 671  * <tr><td>{@code \p{Alnum}}</td>
 672  *     <td>An alphanumeric character:{@code [\p{IsAlphabetic}\p{IsDigit}]}</td></tr>
 673  * <tr><td>{@code \p{Punct}}</td>
 674  *     <td>A punctuation character:{@code \p{IsPunctuation}}</td></tr>
 675  * <tr><td>{@code \p{Graph}}</td>
 676  *     <td>A visible character: {@code [^\p{IsWhite_Space}\p{gc=Cc}\p{gc=Cs}\p{gc=Cn}]}</td></tr>
 677  * <tr><td>{@code \p{Print}}</td>
 678  *     <td>A printable character: {@code [\p{Graph}\p{Blank}&&[^\p{Cntrl}]]}</td></tr>
 679  * <tr><td>{@code \p{Blank}}</td>
 680  *     <td>A space or a tab: {@code [\p{IsWhite_Space}&&[^\p{gc=Zl}\p{gc=Zp}\x0a\x0b\x0c\x0d\x85]]}</td></tr>
 681  * <tr><td>{@code \p{Cntrl}}</td>
 682  *     <td>A control character: {@code \p{gc=Cc}}</td></tr>
 683  * <tr><td>{@code \p{XDigit}}</td>
 684  *     <td>A hexadecimal digit: {@code [\p{gc=Nd}\p{IsHex_Digit}]}</td></tr>
 685  * <tr><td>{@code \p{Space}}</td>
 686  *     <td>A whitespace character:{@code \p{IsWhite_Space}}</td></tr>
 687  * <tr><td>{@code \d}</td>
 688  *     <td>A digit: {@code \p{IsDigit}}</td></tr>
 689  * <tr><td>{@code \D}</td>
 690  *     <td>A non-digit: {@code [^\d]}</td></tr>
 691  * <tr><td>{@code \s}</td>
 692  *     <td>A whitespace character: {@code \p{IsWhite_Space}}</td></tr>
 693  * <tr><td>{@code \S}</td>
 694  *     <td>A non-whitespace character: {@code [^\s]}</td></tr>
 695  * <tr><td>{@code \w}</td>
 696  *     <td>A word character: {@code [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}]}</td></tr>
 697  * <tr><td>{@code \W}</td>
 698  *     <td>A non-word character: {@code [^\w]}</td></tr>
 699  * </tbody>
 700  * </table>
 701  * <p>
 702  * <a id="jcc">
 703  * Categories that behave like the java.lang.Character
 704  * boolean is<i>methodname</i> methods (except for the deprecated ones) are
 705  * available through the same <code>\p{</code><i>prop</i><code>}</code> syntax where
 706  * the specified property has the name <code>java<i>methodname</i></code></a>.
 707  *
 708  * <h3> Comparison to Perl 5 </h3>
 709  *
 710  * <p>The {@code Pattern} engine performs traditional NFA-based matching
 711  * with ordered alternation as occurs in Perl 5.
 712  *
 713  * <p> Perl constructs not supported by this class: </p>
 714  *
 715  * <ul>
 716  *    <li><p> The backreference constructs, <code>\g{</code><i>n</i><code>}</code> for
 717  *    the <i>n</i><sup>th</sup><a href="#cg">capturing group</a> and


1202      *
1203      * <p> When there is a positive-width match at the beginning of the input
1204      * sequence then an empty leading substring is included at the beginning
1205      * of the resulting array. A zero-width match at the beginning however
1206      * never produces such empty leading substring.
1207      *
1208      * <p> The {@code limit} parameter controls the number of times the
1209      * pattern is applied and therefore affects the length of the resulting
1210      * array.  If the limit <i>n</i> is greater than zero then the pattern
1211      * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's
1212      * length will be no greater than <i>n</i>, and the array's last entry
1213      * will contain all input beyond the last matched delimiter.  If <i>n</i>
1214      * is non-positive then the pattern will be applied as many times as
1215      * possible and the array can have any length.  If <i>n</i> is zero then
1216      * the pattern will be applied as many times as possible, the array can
1217      * have any length, and trailing empty strings will be discarded.
1218      *
1219      * <p> The input {@code "boo:and:foo"}, for example, yields the following
1220      * results with these parameters:
1221      *
1222      * <blockquote><table>
1223      * <caption>Split examples showing regex, limit, and result</caption>
1224      * <thead>
1225      * <tr><th style="text-align:left"><i>Regex&nbsp;&nbsp;&nbsp;&nbsp;</i></th>
1226      *     <th style="text-align:left"><i>Limit&nbsp;&nbsp;&nbsp;&nbsp;</i></th>
1227      *     <th style="text-align:left"><i>Result&nbsp;&nbsp;&nbsp;&nbsp;</i></th></tr>


1228      * </thead>
1229      * <tbody>
1230      * <tr><td style="text-align:center">:</td>
1231      *     <td style="text-align:center">2</td>
1232      *     <td>{@code { "boo", "and:foo" }}</td></tr>
1233      * <tr><td style="text-align:center">:</td>
1234      *     <td style="text-align:center">5</td>
1235      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1236      * <tr><td style="text-align:center">:</td>
1237      *     <td style="text-align:center">-2</td>
1238      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1239      * <tr><td style="text-align:center">o</td>
1240      *     <td style="text-align:center">5</td>
1241      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
1242      * <tr><td style="text-align:center">o</td>
1243      *     <td style="text-align:center">-2</td>
1244      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
1245      * <tr><td style="text-align:center">o</td>
1246      *     <td style="text-align:center">0</td>
1247      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
1248      * </tbody>
1249      * </table></blockquote>
1250      *
1251      * @param  input
1252      *         The character sequence to be split
1253      *
1254      * @param  limit
1255      *         The result threshold, as described above
1256      *
1257      * @return  The array of strings computed by splitting the input
1258      *          around matches of this pattern
1259      */
1260     public String[] split(CharSequence input, int limit) {
1261         int index = 0;
1262         boolean matchLimited = limit > 0;
1263         ArrayList<String> matchList = new ArrayList<>();
1264         Matcher m = matcher(input);
1265 
1266         // Add segments before each match found
1267         while(m.find()) {
1268             if (!matchLimited || matchList.size() < limit - 1) {
1269                 if (index == 0 && index == m.start() && m.start() == m.end()) {


1293         // Construct result
1294         int resultSize = matchList.size();
1295         if (limit == 0)
1296             while (resultSize > 0 && matchList.get(resultSize-1).equals(""))
1297                 resultSize--;
1298         String[] result = new String[resultSize];
1299         return matchList.subList(0, resultSize).toArray(result);
1300     }
1301 
1302     /**
1303      * Splits the given input sequence around matches of this pattern.
1304      *
1305      * <p> This method works as if by invoking the two-argument {@link
1306      * #split(java.lang.CharSequence, int) split} method with the given input
1307      * sequence and a limit argument of zero.  Trailing empty strings are
1308      * therefore not included in the resulting array. </p>
1309      *
1310      * <p> The input {@code "boo:and:foo"}, for example, yields the following
1311      * results with these expressions:
1312      *
1313      * <blockquote><table>
1314      * <caption style="display:none">Split examples showing regex and result</caption>
1315      * <thead>
1316      * <tr><th style="text-align:left"><i>Regex&nbsp;&nbsp;&nbsp;&nbsp;</i></th>
1317      *     <th style="text-align:left"><i>Result</i></th></tr>


1318      * </thead>
1319      * <tbody>
1320      * <tr><td style="text-align:center">:</td>
1321      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1322      * <tr><td style="text-align:center">o</td>
1323      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
1324      * </tbody>
1325      * </table></blockquote>
1326      *
1327      *
1328      * @param  input
1329      *         The character sequence to be split
1330      *
1331      * @return  The array of strings computed by splitting the input
1332      *          around matches of this pattern
1333      */
1334     public String[] split(CharSequence input) {
1335         return split(input, 0);
1336     }
1337 
1338     /**
1339      * Returns a literal pattern {@code String} for the specified
1340      * {@code String}.
1341      *
1342      * <p>This method produces a {@code String} that can be used to
1343      * create a {@code Pattern} that would match the string
1344      * {@code s} as if it were a literal pattern.</p> Metacharacters
1345      * or escape sequences in the input sequence will be given no special




  64  * <p> A {@link #matches matches} method is defined by this class as a
  65  * convenience for when a regular expression is used just once.  This method
  66  * compiles an expression and matches an input sequence against it in a single
  67  * invocation.  The statement
  68  *
  69  * <blockquote><pre>
  70  * boolean b = Pattern.matches("a*b", "aaaaab");</pre></blockquote>
  71  *
  72  * is equivalent to the three statements above, though for repeated matches it
  73  * is less efficient since it does not allow the compiled pattern to be reused.
  74  *
  75  * <p> Instances of this class are immutable and are safe for use by multiple
  76  * concurrent threads.  Instances of the {@link Matcher} class are not safe for
  77  * such use.
  78  *
  79  *
  80  * <h3><a id="sum">Summary of regular-expression constructs</a></h3>
  81  *
  82  * <table class="borderless">
  83  * <caption style="display:none">Regular expression constructs, and what they match</caption>
  84  * <thead style="text-align:left">
  85  * <tr>
  86  * <th id="construct">Construct</th>
  87  * <th id="matches">Matches</th>
  88  * </tr>
  89  * </thead>
  90  * <tbody style="text-align:left">
  91  *
  92  * <tr><th colspan="2" style="padding-top:20px" id="characters">Characters</th></tr>

  93  *
  94  * <tr><th style="vertical-align:top; font-weight: normal" id="x"><i>x</i></th>
  95  *     <td headers="matches characters x">The character <i>x</i></td></tr>
  96  * <tr><th style="vertical-align:top; font-weight: normal" id="backslash">{@code \\}</th>
  97  *     <td headers="matches characters backslash">The backslash character</td></tr>
  98  * <tr><th style="vertical-align:top; font-weight: normal" id="octal_n">{@code \0}<i>n</i></th>
  99  *     <td headers="matches characters octal_n">The character with octal value {@code 0}<i>n</i>
 100  *         (0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 101  * <tr><th style="vertical-align:top; font-weight: normal" id="octal_nn">{@code \0}<i>nn</i></th>
 102  *     <td headers="matches characters octal_nn">The character with octal value {@code 0}<i>nn</i>
 103  *         (0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 104  * <tr><th style="vertical-align:top; font-weight: normal" id="octal_nnn">{@code \0}<i>mnn</i></th>
 105  *     <td headers="matches characters octal_nnn">The character with octal value {@code 0}<i>mnn</i>
 106  *         (0&nbsp;{@code <=}&nbsp;<i>m</i>&nbsp;{@code <=}&nbsp;3,
 107  *         0&nbsp;{@code <=}&nbsp;<i>n</i>&nbsp;{@code <=}&nbsp;7)</td></tr>
 108  * <tr><th style="vertical-align:top; font-weight: normal" id="hex_hh">{@code \x}<i>hh</i></th>
 109  *     <td headers="matches characters hex_hh">The character with hexadecimal value {@code 0x}<i>hh</i></td></tr>
 110  * <tr><th style="vertical-align:top; font-weight: normal" id="hex_hhhh"><code>\u</code><i>hhhh</i></th>
 111  *     <td headers="matches characters hex_hhhh">The character with hexadecimal&nbsp;value&nbsp;{@code 0x}<i>hhhh</i></td></tr>
 112  * <tr><th style="vertical-align:top; font-weight: normal" id="hex_h_h"><code>\x</code><i>{h...h}</i></th>
 113  *     <td headers="matches characters hex_h_h">The character with hexadecimal value {@code 0x}<i>h...h</i>
 114  *         ({@link java.lang.Character#MIN_CODE_POINT Character.MIN_CODE_POINT}
 115  *         &nbsp;&lt;=&nbsp;{@code 0x}<i>h...h</i>&nbsp;&lt;=&nbsp;
 116  *          {@link java.lang.Character#MAX_CODE_POINT Character.MAX_CODE_POINT})</td></tr>
 117  * <tr><th style="vertical-align:top; font-weight: normal" id="unicode_name"><code>\N{</code><i>name</i><code>}</code></th>
 118  *     <td headers="matches characters unicode_name">The character with Unicode character name <i>'name'</i></td></tr>
 119  * <tr><th style="vertical-align:top; font-weight:normal" id="tab">{@code \t}</th>
 120  *     <td headers="matches characters tab">The tab character (<code>'\u0009'</code>)</td></tr>
 121  * <tr><th style="vertical-align:top; font-weight:normal" id="newline">{@code \n}</th>
 122  *     <td headers="matches characters newline">The newline (line feed) character (<code>'\u000A'</code>)</td></tr>
 123  * <tr><th style="vertical-align:top; font-weight:normal" id="return">{@code \r}</th>
 124  *     <td headers="matches characters return">The carriage-return character (<code>'\u000D'</code>)</td></tr>
 125  * <tr><th style="vertical-align:top; font-weight:normal" id="form_feed">{@code \f}</th>
 126  *     <td headers="matches characters form_feed">The form-feed character (<code>'\u000C'</code>)</td></tr>
 127  * <tr><th style="vertical-align:top; font-weight:normal" id="bell">{@code \a}</th>
 128  *     <td headers="matches characters bell">The alert (bell) character (<code>'\u0007'</code>)</td></tr>
 129  * <tr><th style="vertical-align:top; font-weight:normal" id="escape">{@code \e}</th>
 130  *     <td headers="matches characters escape">The escape character (<code>'\u001B'</code>)</td></tr>
 131  * <tr><th style="vertical-align:top; font-weight:normal" id="ctrl_x">{@code \c}<i>x</i></th>
 132  *     <td headers="matches characters ctrl_x">The control character corresponding to <i>x</i></td></tr>
 133  *
 134  *  <tr><th colspan="2" style="padding-top:20px" id="classes">Character classes</th></tr>
 135  *
 136  * <tr><th style="vertical-align:top; font-weight:normal" id="simple">{@code [abc]}</th>
 137  *     <td headers="matches classes simple">{@code a}, {@code b}, or {@code c} (simple class)</td></tr>
 138  * <tr><th style="vertical-align:top; font-weight:normal" id="negation">{@code [^abc]}</th>
 139  *     <td headers="matches classes negation">Any character except {@code a}, {@code b}, or {@code c} (negation)</td></tr>
 140  * <tr><th style="vertical-align:top; font-weight:normal" id="range">{@code [a-zA-Z]}</th>
 141  *     <td headers="matches classes range">{@code a} through {@code z}

 142  *         or {@code A} through {@code Z}, inclusive (range)</td></tr>
 143  * <tr><th style="vertical-align:top; font-weight:normal" id="union">{@code [a-d[m-p]]}</th>
 144  *     <td headers="matches classes union">{@code a} through {@code d},
 145  *      or {@code m} through {@code p}: {@code [a-dm-p]} (union)</td></tr>
 146  * <tr><th style="vertical-align:top; font-weight:normal" id="intersection">{@code [a-z&&[def]]}</th>
 147  *     <td headers="matches classes intersection">{@code d}, {@code e}, or {@code f} (intersection)</tr>
 148  * <tr><th style="vertical-align:top; font-weight:normal" id="subtraction1">{@code [a-z&&[^bc]]}</th>
 149  *     <td headers="matches classes subtraction1">{@code a} through {@code z},
 150  *         except for {@code b} and {@code c}: {@code [ad-z]} (subtraction)</td></tr>
 151  * <tr><th style="vertical-align:top; font-weight:normal" id="subtraction2">{@code [a-z&&[^m-p]]}</th>
 152  *     <td headers="matches classes subtraction2">{@code a} through {@code z},
 153  *          and not {@code m} through {@code p}: {@code [a-lq-z]}(subtraction)</td></tr>

 154  *
 155  * <tr><th colspan="2" style="padding-top:20px" id="predef">Predefined character classes</th></tr>
 156  *
 157  * <tr><th style="vertical-align:top; font-weight:normal" id="any">{@code .}</th>
 158  *     <td headers="matches predef any">Any character (may or may not match <a href="#lt">line terminators</a>)</td></tr>
 159  * <tr><th style="vertical-align:top; font-weight:normal" id="digit">{@code \d}</th>
 160  *     <td headers="matches predef digit">A digit: {@code [0-9]}</td></tr>
 161  * <tr><th style="vertical-align:top; font-weight:normal" id="non_digit">{@code \D}</th>
 162  *     <td headers="matches predef non_digit">A non-digit: {@code [^0-9]}</td></tr>
 163  * <tr><th style="vertical-align:top; font-weight:normal" id="horiz_white">{@code \h}</th>
 164  *     <td headers="matches predef horiz_white">A horizontal whitespace character:
 165  *     <code>[ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]</code></td></tr>
 166  * <tr><th style="vertical-align:top; font-weight:normal" id="non_horiz_white">{@code \H}</th>
 167  *     <td headers="matches predef non_horiz_white">A non-horizontal whitespace character: {@code [^\h]}</td></tr>
 168  * <tr><th style="vertical-align:top; font-weight:normal" id="white">{@code \s}</th>
 169  *     <td headers="matches predef white">A whitespace character: {@code [ \t\n\x0B\f\r]}</td></tr>
 170  * <tr><th style="vertical-align:top; font-weight:normal" id="non_white">{@code \S}</th>
 171  *     <td headers="matches predef non_white">A non-whitespace character: {@code [^\s]}</td></tr>
 172  * <tr><th style="vertical-align:top; font-weight:normal" id="vert_white">{@code \v}</th>
 173  *     <td headers="matches predef vert_white">A vertical whitespace character: <code>[\n\x0B\f\r\x85\u2028\u2029]</code>
 174  *     </td></tr>
 175  * <tr><th style="vertical-align:top; font-weight:normal" id="non_vert_white">{@code \V}</th>
 176  *     <td headers="matches predef non_vert_white">A non-vertical whitespace character: {@code [^\v]}</td></tr>
 177  * <tr><th style="vertical-align:top; font-weight:normal" id="word">{@code \w}</th>
 178  *     <td headers="matches predef word">A word character: {@code [a-zA-Z_0-9]}</td></tr>
 179  * <tr><th style="vertical-align:top; font-weight:normal" id="non_word">{@code \W}</th>
 180  *     <td headers="matches predef non_word">A non-word character: {@code [^\w]}</td></tr>
 181  *
 182  * <tr><th colspan="2" style="padding-top:20px" id="posix"><b>POSIX character classes (US-ASCII only)</b></th></tr>
 183  *
 184  * <tr><th style="vertical-align:top; font-weight:normal" id="Lower">{@code \p{Lower}}</th>
 185  *     <td headers="matches posix Lower">A lower-case alphabetic character: {@code [a-z]}</td></tr>
 186  * <tr><th style="vertical-align:top; font-weight:normal" id="Upper">{@code \p{Upper}}</th>
 187  *     <td headers="matches posix Upper">An upper-case alphabetic character:{@code [A-Z]}</td></tr>
 188  * <tr><th style="vertical-align:top; font-weight:normal" id="ASCII">{@code \p{ASCII}}</th>
 189  *     <td headers="matches posix ASCII">All ASCII:{@code [\x00-\x7F]}</td></tr>
 190  * <tr><th style="vertical-align:top; font-weight:normal" id="Alpha">{@code \p{Alpha}}</th>
 191  *     <td headers="matches posix Alpha">An alphabetic character:{@code [\p{Lower}\p{Upper}]}</td></tr>
 192  * <tr><th style="vertical-align:top; font-weight:normal" id="Digit">{@code \p{Digit}}</th>
 193  *     <td headers="matches posix Digit">A decimal digit: {@code [0-9]}</td></tr>
 194  * <tr><th style="vertical-align:top; font-weight:normal" id="Alnum">{@code \p{Alnum}}</th>
 195  *     <td headers="matches posix Alnum">An alphanumeric character:{@code [\p{Alpha}\p{Digit}]}</td></tr>
 196  * <tr><th style="vertical-align:top; font-weight:normal" id="Punct">{@code \p{Punct}}</th>
 197  *     <td headers="matches posix Punct">Punctuation: One of {@code !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~}</td></tr>
 198  *     <!-- {@code [\!"#\$%&'\(\)\*\+,\-\./:;\<=\>\?@\[\\\]\^_`\{\|\}~]}
 199  *          {@code [\X21-\X2F\X31-\X40\X5B-\X60\X7B-\X7E]} -->
 200  * <tr><th style="vertical-align:top; font-weight:normal" id="Graph">{@code \p{Graph}}</th>
 201  *     <td headers="matches posix Graph">A visible character: {@code [\p{Alnum}\p{Punct}]}</td></tr>
 202  * <tr><th style="vertical-align:top; font-weight:normal" id="Print">{@code \p{Print}}</th>
 203  *     <td headers="matches posix Print">A printable character: {@code [\p{Graph}\x20]}</td></tr>
 204  * <tr><th style="vertical-align:top; font-weight:normal" id="Blank">{@code \p{Blank}}</th>
 205  *     <td headers="matches posix Blank">A space or a tab: {@code [ \t]}</td></tr>
 206  * <tr><th style="vertical-align:top; font-weight:normal" id="Cntrl">{@code \p{Cntrl}}</th>
 207  *     <td headers="matches posix Cntrl">A control character: {@code [\x00-\x1F\x7F]}</td></tr>
 208  * <tr><th style="vertical-align:top; font-weight:normal" id="XDigit">{@code \p{XDigit}}</th>
 209  *     <td headers="matches posix XDigit">A hexadecimal digit: {@code [0-9a-fA-F]}</td></tr>
 210  * <tr><th style="vertical-align:top; font-weight:normal" id="Space">{@code \p{Space}}</th>
 211  *     <td headers="matches posix Space">A whitespace character: {@code [ \t\n\x0B\f\r]}</td></tr>
 212  *
 213  * <tr><th colspan="2" style="padding-top:20px" id="java">java.lang.Character classes (simple <a href="#jcc">java character type</a>)</th></tr>
 214  *
 215  * <tr><th style="vertical-align:top; font-weight:normal" id="javaLowerCase">{@code \p{javaLowerCase}}</th>
 216  *     <td headers="matches java javaLowerCase">Equivalent to java.lang.Character.isLowerCase()</td></tr>
 217  * <tr><th style="vertical-align:top; font-weight:normal" id="javaUpperCase">{@code \p{javaUpperCase}}</th>
 218  *     <td headers="matches java javaUpperCase">Equivalent to java.lang.Character.isUpperCase()</td></tr>
 219  * <tr><th style="vertical-align:top; font-weight:normal" id="javaWhitespace">{@code \p{javaWhitespace}}</th>
 220  *     <td headers="matches java javaWhitespace">Equivalent to java.lang.Character.isWhitespace()</td></tr>
 221  * <tr><th style="vertical-align:top; font-weight:normal" id="javaMirrored">{@code \p{javaMirrored}}</th>
 222  *     <td headers="matches java javaMirrored">Equivalent to java.lang.Character.isMirrored()</td></tr>
 223  *
 224  * <tr><th colspan="2" style="padding-top:20px"  id="unicode">Classes for Unicode scripts, blocks, categories and binary properties</th></tr>
 225  *
 226  * <tr><th style="vertical-align:top; font-weight:normal" id="IsLatin">{@code \p{IsLatin}}</th>
 227  *     <td headers="matches unicode IsLatin">A Latin&nbsp;script character (<a href="#usc">script</a>)</td></tr>
 228  * <tr><th style="vertical-align:top; font-weight:normal" id="InGreek">{@code \p{InGreek}}</th>
 229  *     <td headers="matches unicode InGreek">A character in the Greek&nbsp;block (<a href="#ubc">block</a>)</td></tr>
 230  * <tr><th style="vertical-align:top; font-weight:normal" id="Lu">{@code \p{Lu}}</th>
 231  *     <td headers="matches unicode Lu">An uppercase letter (<a href="#ucc">category</a>)</td></tr>
 232  * <tr><th style="vertical-align:top; font-weight:normal" id="IsAlphabetic">{@code \p{IsAlphabetic}}</th>
 233  *     <td headers="matches unicode IsAlphabetic">An alphabetic character (<a href="#ubpc">binary property</a>)</td></tr>
 234  * <tr><th style="vertical-align:top; font-weight:normal" id="Sc">{@code \p{Sc}}</th>
 235  *     <td headers="matches unicode Sc">A currency symbol</td></tr>
 236  * <tr><th style="vertical-align:top; font-weight:normal" id="not_InGreek">{@code \P{InGreek}}</th>
 237  *     <td headers="matches unicode not_InGreek">Any character except one in the Greek block (negation)</td></tr>
 238  * <tr><th style="vertical-align:top; font-weight:normal" id="not_uppercase">{@code [\p{L}&&[^\p{Lu}]]}</th>
 239  *     <td headers="matches unicode not_uppercase">Any letter except an uppercase letter (subtraction)</td></tr>
 240  *
 241  * <tr><th colspan="2" style="padding-top:20px" id="bounds">Boundary matchers</th></tr>
 242  *
 243  * <tr><th style="vertical-align:top; font-weight:normal" id="begin_line">{@code ^}</th>
 244  *     <td headers="matches bounds begin_line">The beginning of a line</td></tr>
 245  * <tr><th style="vertical-align:top; font-weight:normal" id="end_line">{@code $}</th>
 246  *     <td headers="matches bounds end_line">The end of a line</td></tr>
 247  * <tr><th style="vertical-align:top; font-weight:normal" id="word_boundary">{@code \b}</th>
 248  *     <td headers="matches bounds word_boundary">A word boundary</td></tr>
 249  * <tr><th style="vertical-align:top; font-weight:normal" id="grapheme_cluster_boundary">{@code \b{g}}</th>
 250  *     <td headers="matches bounds grapheme_cluster_boundary">A Unicode extended grapheme cluster boundary</td></tr>
 251  * <tr><th style="vertical-align:top; font-weight:normal" id="non_word_boundary">{@code \B}</th>
 252  *     <td headers="matches bounds non_word_boundary">A non-word boundary</td></tr>
 253  * <tr><th style="vertical-align:top; font-weight:normal" id="begin_input">{@code \A}</th>
 254  *     <td headers="matches bounds begin_input">The beginning of the input</td></tr>
 255  * <tr><th style="vertical-align:top; font-weight:normal" id="end_prev_match">{@code \G}</th>
 256  *     <td headers="matches bounds end_prev_match">The end of the previous match</td></tr>
 257  * <tr><th style="vertical-align:top; font-weight:normal" id="end_input_except_term">{@code \Z}</th>
 258  *     <td headers="matches bounds end_input_except_term">The end of the input but for the final


 259  *         <a href="#lt">terminator</a>, if&nbsp;any</td></tr>
 260  * <tr><th style="vertical-align:top; font-weight:normal" id="end_input">{@code \z}</th>
 261  *     <td headers="matches bounds end_input">The end of the input</td></tr>
 262  *
 263  * <tr><th colspan="2" style="padding-top:20px" id="linebreak">Linebreak matcher</th></tr>
 264  *
 265  * <tr><th style="vertical-align:top; font-weight:normal" id="any_unicode_linebreak">{@code \R}</th>
 266  *     <td headers="matches linebreak any_unicode_linebreak">Any Unicode linebreak sequence, is equivalent to
 267  *     <code>\u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029]
 268  *     </code></td></tr>
 269  *
 270  * <tr><th colspan="2" style="padding-top:20px" id="grapheme">Unicode Extended Grapheme matcher</th></tr>





























































 271  *
 272  * <tr><th style="vertical-align:top; font-weight:normal" id="grapheme_any">{@code \X}</th>
 273  *     <td headers="matches grapheme grapheme_any">Any Unicode extended grapheme cluster</td></tr>
 274  *
 275  * <tr><th colspan="2" style="padding-top:20px" id="greedy">Greedy quantifiers</th></tr>


 276  *
 277  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_once_or_not"><i>X</i>{@code ?}</th>
 278  *     <td headers="matches greedy greedy_once_or_not"><i>X</i>, once or not at all</td></tr>
 279  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_zero_or_more"><i>X</i>{@code *}</th>
 280  *     <td headers="matches greedy greedy_zero_or_more"><i>X</i>, zero or more times</td></tr>
 281  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_one_or_more"><i>X</i>{@code +}</th>
 282  *     <td headers="matches greedy greedy_one_or_more"><i>X</i>, one or more times</td></tr>
 283  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_exactly"><i>X</i><code>{</code><i>n</i><code>}</code></th>
 284  *     <td headers="matches greedy greedy_exactly"><i>X</i>, exactly <i>n</i> times</td></tr>
 285  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_at_least"><i>X</i><code>{</code><i>n</i>{@code ,}}</th>
 286  *     <td headers="matches greedy greedy_at_least"><i>X</i>, at least <i>n</i> times</td></tr>
 287  * <tr><th style="vertical-align:top; font-weight:normal" id="greedy_at_least_up_to"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}</code></th>
 288  *     <td headers="matches greedy greedy_at_least_up_to"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 289  *
 290  * <tr><th colspan="2" style="padding-top:20px" id="reluc">Reluctant quantifiers</th></tr>
 291  *
 292  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_once_or_not"><i>X</i>{@code ??}</th>
 293  *     <td headers="matches reluc reluc_once_or_not"><i>X</i>, once or not at all</td></tr>
 294  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_zero_or_more"><i>X</i>{@code *?}</th>
 295  *     <td headers="matches reluc reluc_zero_or_more"><i>X</i>, zero or more times</td></tr>
 296  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_one_or_more"><i>X</i>{@code +?}</th>
 297  *     <td headers="matches reluc reluc_one_or_more"><i>X</i>, one or more times</td></tr>
 298  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_exactly"><i>X</i><code>{</code><i>n</i><code>}?</code></th>
 299  *     <td headers="matches reluc reluc_exactly"><i>X</i>, exactly <i>n</i> times</td></tr>
 300  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_at_least"><i>X</i><code>{</code><i>n</i><code>,}?</code></th>
 301  *     <td headers="matches reluc reluc_at_least"><i>X</i>, at least <i>n</i> times</td></tr>
 302  * <tr><th style="vertical-align:top; font-weight:normal" id="reluc_at_least_up_to"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}?</code></th>
 303  *     <td headers="matches reluc reluc_at_least_up_to"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 304  *
 305  * <tr><th colspan="2" style="padding-top:20px" id="poss">Possessive quantifiers</th></tr>
 306  *
 307  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_once_or_not"><i>X</i>{@code ?+}</th>
 308  *     <td headers="matches poss poss_once_or_not"><i>X</i>, once or not at all</td></tr>
 309  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_zero_or_more"><i>X</i>{@code *+}</th>
 310  *     <td headers="matches poss poss_zero_or_more"><i>X</i>, zero or more times</td></tr>
 311  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_one_or_more"><i>X</i>{@code ++}</th>
 312  *     <td headers="matches poss poss_one_or_more"><i>X</i>, one or more times</td></tr>
 313  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_exactly"><i>X</i><code>{</code><i>n</i><code>}+</code></th>
 314  *     <td headers="matches poss poss_exactly"><i>X</i>, exactly <i>n</i> times</td></tr>
 315  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_at_least"><i>X</i><code>{</code><i>n</i><code>,}+</code></th>
 316  *     <td headers="matches poss poss_at_least"><i>X</i>, at least <i>n</i> times</td></tr>
 317  * <tr><th style="vertical-align:top; font-weight:normal" id="poss_at_least_up_to"><i>X</i><code>{</code><i>n</i>{@code ,}<i>m</i><code>}+</code></th>
 318  *     <td headers="matches poss poss_at_least_up_to"><i>X</i>, at least <i>n</i> but not more than <i>m</i> times</td></tr>
 319  *
 320  * <tr><th colspan="2" style="padding-top:20px" id="logical">Logical operators</th></tr>
 321  *
 322  * <tr><th style="vertical-align:top; font-weight:normal" id="concat"><i>XY</i></th>
 323  *     <td headers="matches logical concat"><i>X</i> followed by <i>Y</i></td></tr>
 324  * <tr><th style="vertical-align:top; font-weight:normal" id="alternate"><i>X</i>{@code |}<i>Y</i></th>
 325  *     <td headers="matches logical alternate">Either <i>X</i> or <i>Y</i></td></tr>
 326  * <tr><th style="vertical-align:top; font-weight:normal" id="group">{@code (}<i>X</i>{@code )}</th>
 327  *     <td headers="matches logical group">X, as a <a href="#cg">capturing group</a></td></tr>
 328  *
 329  * <tr><th colspan="2" style="padding-top:20px" id="backref">Back references</th></tr>
 330  *
 331  * <tr><th style="vertical-align:top; font-weight:normal" id="back_nth">{@code \}<i>n</i></th>
 332  *     <td headers="matches backref back_nth">Whatever the <i>n</i><sup>th</sup>
 333  *     <a href="#cg">capturing group</a> matched</td></tr>
 334  * <tr><th style="vertical-align:top; font-weight:normal" id="back_named">{@code \}<i>k</i>&lt;<i>name</i>&gt;</th>
 335  *     <td headers="matches backref back_named">Whatever the
 336  *     <a href="#groupname">named-capturing group</a> "name" matched</td></tr>
 337  *
 338  * <tr><th colspan="2" style="padding-top:20px" id="quote">Quotation</th></tr>

 339  *
 340  * <tr><th style="vertical-align:top; font-weight:normal" id="quote_follow">{@code \}</th>
 341  *     <td headers="matches quote quote_follow">Nothing, but quotes the following character</td></tr>
 342  * <tr><th style="vertical-align:top; font-weight:normal" id="quote_begin">{@code \Q}</th>
 343  *     <td headers="matches quote quote_begin">Nothing, but quotes all characters until {@code \E}</td></tr>
 344  * <tr><th style="vertical-align:top; font-weight:normal" id="quote_end">{@code \E}</th>
 345  *     <td headers="matches quote quote_end">Nothing, but ends quoting started by {@code \Q}</td></tr>
 346  *     <!-- Metachars: !$()*+.<>?[\]^{|} -->
 347  *
 348  * <tr><th colspan="2" style="padding-top:20px" id="special">Special constructs (named-capturing and non-capturing)</th></tr>

 349  *
 350  * <tr><th style="vertical-align:top; font-weight:normal" id="named_group"><code>(?&lt;<a href="#groupname">name</a>&gt;</code><i>X</i>{@code )}</th>
 351  *     <td headers="matches special named_group"><i>X</i>, as a named-capturing group</td></tr>
 352  * <tr><th style="vertical-align:top; font-weight:normal" id="non_capture_group">{@code (?:}<i>X</i>{@code )}</th>
 353  *     <td headers="matches special non_capture_group"><i>X</i>, as a non-capturing group</td></tr>
 354  * <tr><th style="vertical-align:top; font-weight:normal" id="flags"><code>(?idmsuxU-idmsuxU)&nbsp;</code></th>
 355  *     <td headers="matches special flags">Nothing, but turns match flags <a href="#CASE_INSENSITIVE">i</a>
 356  * <a href="#UNIX_LINES">d</a> <a href="#MULTILINE">m</a> <a href="#DOTALL">s</a>
 357  * <a href="#UNICODE_CASE">u</a> <a href="#COMMENTS">x</a> <a href="#UNICODE_CHARACTER_CLASS">U</a>
 358  * on - off</td></tr>
 359  * <tr><th style="vertical-align:top; font-weight:normal" id="non_capture_group_flags"><code>(?idmsux-idmsux:</code><i>X</i>{@code )}&nbsp;&nbsp;</th>
 360  *     <td headers="matches special non_capture_group_flags"><i>X</i>, as a <a href="#cg">non-capturing group</a> with the
 361  *         given flags <a href="#CASE_INSENSITIVE">i</a> <a href="#UNIX_LINES">d</a>
 362  * <a href="#MULTILINE">m</a> <a href="#DOTALL">s</a> <a href="#UNICODE_CASE">u</a >
 363  * <a href="#COMMENTS">x</a> on - off</td></tr>
 364  * <tr><th style="vertical-align:top; font-weight:normal" id="pos_lookahead">{@code (?=}<i>X</i>{@code )}</th>
 365  *     <td headers="matches special pos_lookahead"><i>X</i>, via zero-width positive lookahead</td></tr>
 366  * <tr><th style="vertical-align:top; font-weight:normal" id="neg_lookahead">{@code (?!}<i>X</i>{@code )}</th>
 367  *     <td headers="matches special neg_lookahead"><i>X</i>, via zero-width negative lookahead</td></tr>
 368  * <tr><th style="vertical-align:top; font-weight:normal" id="pos_lookbehind">{@code (?<=}<i>X</i>{@code )}</th>
 369  *     <td headers="matches special pos_lookbehind"><i>X</i>, via zero-width positive lookbehind</td></tr>
 370  * <tr><th style="vertical-align:top; font-weight:normal" id="neg_lookbehind">{@code (?<!}<i>X</i>{@code )}</th>
 371  *     <td headers="matches special neg_lookbehind"><i>X</i>, via zero-width negative lookbehind</td></tr>
 372  * <tr><th style="vertical-align:top; font-weight:normal" id="indep_non_capture_group">{@code (?>}<i>X</i>{@code )}</th>
 373  *     <td headers="matches special indep_non_capture_group"><i>X</i>, as an independent, non-capturing group</td></tr>
 374  *
 375  * </tbody>
 376  * </table>
 377  *
 378  * <hr>
 379  *
 380  *
 381  * <h3><a id="bs">Backslashes, escapes, and quoting</a></h3>
 382  *
 383  * <p> The backslash character ({@code '\'}) serves to introduce escaped
 384  * constructs, as defined in the table above, as well as to quote characters
 385  * that otherwise would be interpreted as unescaped constructs.  Thus the
 386  * expression {@code \\} matches a single backslash and <code>\{</code> matches a
 387  * left brace.
 388  *
 389  * <p> It is an error to use a backslash prior to any alphabetic character that
 390  * does not denote an escaped construct; these are reserved for future
 391  * extensions to the regular-expression language.  A backslash may be used
 392  * prior to a non-alphabetic character regardless of whether that character is
 393  * part of an unescaped construct.


 402  * <code>"\b"</code>, for example, matches a single backspace character when
 403  * interpreted as a regular expression, while {@code "\\b"} matches a
 404  * word boundary.  The string literal {@code "\(hello\)"} is illegal
 405  * and leads to a compile-time error; in order to match the string
 406  * {@code (hello)} the string literal {@code "\\(hello\\)"}
 407  * must be used.
 408  *
 409  * <h3><a id="cc">Character Classes</a></h3>
 410  *
 411  *    <p> Character classes may appear within other character classes, and
 412  *    may be composed by the union operator (implicit) and the intersection
 413  *    operator ({@code &&}).
 414  *    The union operator denotes a class that contains every character that is
 415  *    in at least one of its operand classes.  The intersection operator
 416  *    denotes a class that contains every character that is in both of its
 417  *    operand classes.
 418  *
 419  *    <p> The precedence of character-class operators is as follows, from
 420  *    highest to lowest:
 421  *
 422  *    <table class="striped" style="margin-left: 2em;">
 423  *      <caption style="display:none">Precedence of character class operators.</caption>
 424  *      <thead>
 425  *      <tr><th scope="col">Precedence<th scope="col">Name<th scope="col">Example
 426  *      </thead>
 427  *      <tbody>
 428  *      <tr><th scope="row">1</th>
 429  *        <td>Literal escape&nbsp;&nbsp;&nbsp;&nbsp;</td>
 430  *        <td>{@code \x}</td></tr>
 431  *     <tr><th scope="row">2</th>
 432  *        <td>Grouping</td>
 433  *        <td>{@code [...]}</td></tr>
 434  *     <tr><th scope="row">3</th>
 435  *        <td>Range</td>
 436  *        <td>{@code a-z}</td></tr>
 437  *      <tr><th scope="row">4</th>
 438  *        <td>Union</td>
 439  *        <td>{@code [a-e][i-u]}</td></tr>
 440  *      <tr><th scope="row">5</th>
 441  *        <td>Intersection</td>
 442  *        <td>{@code [a-z&&[aeiou]]}</td></tr>
 443  *      </tbody>
 444  *    </table>
 445  *
 446  *    <p> Note that a different set of metacharacters are in effect inside
 447  *    a character class than outside a character class. For instance, the
 448  *    regular expression {@code .} loses its special meaning inside a
 449  *    character class, while the expression {@code -} becomes a range
 450  *    forming metacharacter.
 451  *
 452  * <h3><a id="lt">Line terminators</a></h3>
 453  *
 454  * <p> A <i>line terminator</i> is a one- or two-character sequence that marks
 455  * the end of a line of the input character sequence.  The following are
 456  * recognized as line terminators:
 457  *
 458  * <ul>
 459  *
 460  *   <li> A newline (line feed) character ({@code '\n'}),
 461  *
 462  *   <li> A carriage-return character followed immediately by a newline
 463  *   character ({@code "\r\n"}),
 464  *
 465  *   <li> A standalone carriage-return character ({@code '\r'}),
 466  *
 467  *   <li> A next-line character (<code>'\u0085'</code>),
 468  *
 469  *   <li> A line-separator character (<code>'\u2028'</code>), or
 470  *
 471  *   <li> A paragraph-separator character (<code>'\u2029'</code>).
 472  *
 473  * </ul>
 474  * <p>If {@link #UNIX_LINES} mode is activated, then the only line terminators
 475  * recognized are newline characters.
 476  *
 477  * <p> The regular expression {@code .} matches any character except a line
 478  * terminator unless the {@link #DOTALL} flag is specified.
 479  *
 480  * <p> By default, the regular expressions {@code ^} and {@code $} ignore
 481  * line terminators and only match at the beginning and the end, respectively,
 482  * of the entire input sequence. If {@link #MULTILINE} mode is activated then
 483  * {@code ^} matches at the beginning of input and after any line terminator
 484  * except at the end of input. When in {@link #MULTILINE} mode {@code $}
 485  * matches just before a line terminator or the end of the input sequence.
 486  *
 487  * <h3><a id="cg">Groups and capturing</a></h3>
 488  *
 489  * <h4><a id="gnumber">Group number</a></h4>
 490  * <p> Capturing groups are numbered by counting their opening parentheses from
 491  * left to right.  In the expression {@code ((A)(B(C)))}, for example, there
 492  * are four such groups: </p>
 493  *
 494  * <ol style="margin-left:2em;">
 495  *   <li> {@code ((A)(B(C)))}
 496  *   <li> {@code (A)}
 497  *   <li> {@code (B(C))}
 498  *   <li> {@code (C)}
 499  * </ol>







 500  *
 501  * <p> Group zero always stands for the entire expression.
 502  *
 503  * <p> Capturing groups are so named because, during a match, each subsequence
 504  * of the input sequence that matches such a group is saved.  The captured
 505  * subsequence may be used later in the expression, via a back reference, and
 506  * may also be retrieved from the matcher once the match operation is complete.
 507  *
 508  * <h4><a id="groupname">Group name</a></h4>
 509  * <p>A capturing group can also be assigned a "name", a {@code named-capturing group},
 510  * and then be back-referenced later by the "name". Group names are composed of
 511  * the following characters. The first character must be a {@code letter}.
 512  *
 513  * <ul>
 514  *   <li> The uppercase letters {@code 'A'} through {@code 'Z'}
 515  *        (<code>'\u0041'</code>&nbsp;through&nbsp;<code>'\u005a'</code>),
 516  *   <li> The lowercase letters {@code 'a'} through {@code 'z'}
 517  *        (<code>'\u0061'</code>&nbsp;through&nbsp;<code>'\u007a'</code>),
 518  *   <li> The digits {@code '0'} through {@code '9'}
 519  *        (<code>'\u0030'</code>&nbsp;through&nbsp;<code>'\u0039'</code>),


 615  *   <li> Ideographic
 616  *   <li> Letter
 617  *   <li> Lowercase
 618  *   <li> Uppercase
 619  *   <li> Titlecase
 620  *   <li> Punctuation
 621  *   <Li> Control
 622  *   <li> White_Space
 623  *   <li> Digit
 624  *   <li> Hex_Digit
 625  *   <li> Join_Control
 626  *   <li> Noncharacter_Code_Point
 627  *   <li> Assigned
 628  * </ul>
 629  * <p>
 630  * The following <b>Predefined Character classes</b> and <b>POSIX character classes</b>
 631  * are in conformance with the recommendation of <i>Annex C: Compatibility Properties</i>
 632  * of <a href="http://www.unicode.org/reports/tr18/"><i>Unicode Regular Expression
 633  * </i></a>, when {@link #UNICODE_CHARACTER_CLASS} flag is specified.
 634  *
 635  * <table class="striped">
 636  * <caption style="display:none">predefined and posix character classes in Unicode mode</caption>
 637  * <thead>
 638  * <tr>
 639  * <th scope="col" id="predef_classes">Classes</th>
 640  * <th scope="col" id="predef_matches">Matches</th>
 641  * </tr>
 642  * </thead>
 643  * <tbody>
 644  * <tr><th scope="row">{@code \p{Lower}}</th>
 645  *     <td>A lowercase character:{@code \p{IsLowercase}}</td></tr>
 646  * <tr><th scope="row">{@code \p{Upper}}</th>
 647  *     <td>An uppercase character:{@code \p{IsUppercase}}</td></tr>
 648  * <tr><th scope="row">{@code \p{ASCII}}</th>
 649  *     <td>All ASCII:{@code [\x00-\x7F]}</td></tr>
 650  * <tr><th scope="row">{@code \p{Alpha}}</th>
 651  *     <td>An alphabetic character:{@code \p{IsAlphabetic}}</td></tr>
 652  * <tr><th scope="row">{@code \p{Digit}}</th>
 653  *     <td>A decimal digit character:{@code \p{IsDigit}}</td></tr>
 654  * <tr><th scope="row">{@code \p{Alnum}}</th>
 655  *     <td>An alphanumeric character:{@code [\p{IsAlphabetic}\p{IsDigit}]}</td></tr>
 656  * <tr><th scope="row">{@code \p{Punct}}</th>
 657  *     <td>A punctuation character:{@code \p{IsPunctuation}}</td></tr>
 658  * <tr><th scope="row">{@code \p{Graph}}</th>
 659  *     <td>A visible character: {@code [^\p{IsWhite_Space}\p{gc=Cc}\p{gc=Cs}\p{gc=Cn}]}</td></tr>
 660  * <tr><th scope="row">{@code \p{Print}}</th>
 661  *     <td>A printable character: {@code [\p{Graph}\p{Blank}&&[^\p{Cntrl}]]}</td></tr>
 662  * <tr><th scope="row">{@code \p{Blank}}</th>
 663  *     <td>A space or a tab: {@code [\p{IsWhite_Space}&&[^\p{gc=Zl}\p{gc=Zp}\x0a\x0b\x0c\x0d\x85]]}</td></tr>
 664  * <tr><th scope="row">{@code \p{Cntrl}}</th>
 665  *     <td>A control character: {@code \p{gc=Cc}}</td></tr>
 666  * <tr><th scope="row">{@code \p{XDigit}}</th>
 667  *     <td>A hexadecimal digit: {@code [\p{gc=Nd}\p{IsHex_Digit}]}</td></tr>
 668  * <tr><th scope="row">{@code \p{Space}}</th>
 669  *     <td>A whitespace character:{@code \p{IsWhite_Space}}</td></tr>
 670  * <tr><th scope="row">{@code \d}</th>
 671  *     <td>A digit: {@code \p{IsDigit}}</td></tr>
 672  * <tr><th scope="row">{@code \D}</th>
 673  *     <td>A non-digit: {@code [^\d]}</td></tr>
 674  * <tr><th scope="row">{@code \s}</th>
 675  *     <td>A whitespace character: {@code \p{IsWhite_Space}}</td></tr>
 676  * <tr><th scope="row">{@code \S}</th>
 677  *     <td>A non-whitespace character: {@code [^\s]}</td></tr>
 678  * <tr><th scope="row">{@code \w}</th>
 679  *     <td>A word character: {@code [\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}]}</td></tr>
 680  * <tr><th scope="row">{@code \W}</th>
 681  *     <td>A non-word character: {@code [^\w]}</td></tr>
 682  * </tbody>
 683  * </table>
 684  * <p>
 685  * <a id="jcc">
 686  * Categories that behave like the java.lang.Character
 687  * boolean is<i>methodname</i> methods (except for the deprecated ones) are
 688  * available through the same <code>\p{</code><i>prop</i><code>}</code> syntax where
 689  * the specified property has the name <code>java<i>methodname</i></code></a>.
 690  *
 691  * <h3> Comparison to Perl 5 </h3>
 692  *
 693  * <p>The {@code Pattern} engine performs traditional NFA-based matching
 694  * with ordered alternation as occurs in Perl 5.
 695  *
 696  * <p> Perl constructs not supported by this class: </p>
 697  *
 698  * <ul>
 699  *    <li><p> The backreference constructs, <code>\g{</code><i>n</i><code>}</code> for
 700  *    the <i>n</i><sup>th</sup><a href="#cg">capturing group</a> and


1185      *
1186      * <p> When there is a positive-width match at the beginning of the input
1187      * sequence then an empty leading substring is included at the beginning
1188      * of the resulting array. A zero-width match at the beginning however
1189      * never produces such empty leading substring.
1190      *
1191      * <p> The {@code limit} parameter controls the number of times the
1192      * pattern is applied and therefore affects the length of the resulting
1193      * array.  If the limit <i>n</i> is greater than zero then the pattern
1194      * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's
1195      * length will be no greater than <i>n</i>, and the array's last entry
1196      * will contain all input beyond the last matched delimiter.  If <i>n</i>
1197      * is non-positive then the pattern will be applied as many times as
1198      * possible and the array can have any length.  If <i>n</i> is zero then
1199      * the pattern will be applied as many times as possible, the array can
1200      * have any length, and trailing empty strings will be discarded.
1201      *
1202      * <p> The input {@code "boo:and:foo"}, for example, yields the following
1203      * results with these parameters:
1204      *
1205      * <table class="plain" style="margin-left:2em;">
1206      * <caption style="display:none">Split example showing regex, limit, and result</caption>
1207      * <thead>
1208      * <tr>
1209      *     <th scope="col">Regex</th>
1210      *     <th scope="col">Limit</th>
1211      *     <th scope="col">Result</th>
1212      * </tr>
1213      * </thead>
1214      * <tbody>
1215      * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
1216      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
1217      *     <td>{@code { "boo", "and:foo" }}</td></tr>
1218      * <tr><!-- : -->
1219      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
1220      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1221      * <tr><!-- : -->
1222      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
1223      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1224      * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
1225      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
1226      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
1227      * <tr><!-- o -->
1228      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
1229      *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
1230      * <tr><!-- o -->
1231      *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
1232      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
1233      * </tbody>
1234      * </table>
1235      *
1236      * @param  input
1237      *         The character sequence to be split
1238      *
1239      * @param  limit
1240      *         The result threshold, as described above
1241      *
1242      * @return  The array of strings computed by splitting the input
1243      *          around matches of this pattern
1244      */
1245     public String[] split(CharSequence input, int limit) {
1246         int index = 0;
1247         boolean matchLimited = limit > 0;
1248         ArrayList<String> matchList = new ArrayList<>();
1249         Matcher m = matcher(input);
1250 
1251         // Add segments before each match found
1252         while(m.find()) {
1253             if (!matchLimited || matchList.size() < limit - 1) {
1254                 if (index == 0 && index == m.start() && m.start() == m.end()) {


1278         // Construct result
1279         int resultSize = matchList.size();
1280         if (limit == 0)
1281             while (resultSize > 0 && matchList.get(resultSize-1).equals(""))
1282                 resultSize--;
1283         String[] result = new String[resultSize];
1284         return matchList.subList(0, resultSize).toArray(result);
1285     }
1286 
1287     /**
1288      * Splits the given input sequence around matches of this pattern.
1289      *
1290      * <p> This method works as if by invoking the two-argument {@link
1291      * #split(java.lang.CharSequence, int) split} method with the given input
1292      * sequence and a limit argument of zero.  Trailing empty strings are
1293      * therefore not included in the resulting array. </p>
1294      *
1295      * <p> The input {@code "boo:and:foo"}, for example, yields the following
1296      * results with these expressions:
1297      *
1298      * <table class="plain" style="margin-left:2em">
1299      * <caption style="display:none">Split examples showing regex and result</caption>
1300      * <thead>
1301      * <tr>
1302      *  <th scope="col">Regex</th>
1303      *  <th scope="col">Result</th>
1304      * </tr>
1305      * </thead>
1306      * <tbody>
1307      * <tr><th scope="row" style="text-weight:normal">:</th>
1308      *     <td>{@code { "boo", "and", "foo" }}</td></tr>
1309      * <tr><th scope="row" style="text-weight:normal">o</th>
1310      *     <td>{@code { "b", "", ":and:f" }}</td></tr>
1311      * </tbody>
1312      * </table>
1313      *
1314      *
1315      * @param  input
1316      *         The character sequence to be split
1317      *
1318      * @return  The array of strings computed by splitting the input
1319      *          around matches of this pattern
1320      */
1321     public String[] split(CharSequence input) {
1322         return split(input, 0);
1323     }
1324 
1325     /**
1326      * Returns a literal pattern {@code String} for the specified
1327      * {@code String}.
1328      *
1329      * <p>This method produces a {@code String} that can be used to
1330      * create a {@code Pattern} that would match the string
1331      * {@code s} as if it were a literal pattern.</p> Metacharacters
1332      * or escape sequences in the input sequence will be given no special


< prev index next >