--- old/src/java.base/share/classes/java/util/regex/Pattern.java 2015-08-07 21:15:28.182583611 +0400 +++ new/src/java.base/share/classes/java/util/regex/Pattern.java 2015-08-07 21:15:27.974583620 +0400 @@ -88,40 +88,40 @@ * * x * The character x - * \\ + * {@code \\} * The backslash character - * \0n - * The character with octal value 0n - * (0 <= n <= 7) - * \0nn - * The character with octal value 0nn - * (0 <= n <= 7) - * \0mnn - * The character with octal value 0mnn - * (0 <= m <= 3, - * 0 <= n <= 7) - * \xhh - * The character with hexadecimal value 0xhh - * \uhhhh - * The character with hexadecimal value 0xhhhh - * \x{h...h} - * The character with hexadecimal value 0xh...h + * {@code \0}n + * The character with octal value {@code 0}n + * (0 {@code <=} n {@code <=} 7) + * {@code \0}nn + * The character with octal value {@code 0}nn + * (0 {@code <=} n {@code <=} 7) + * {@code \0}mnn + * The character with octal value {@code 0}mnn + * (0 {@code <=} m {@code <=} 3, + * 0 {@code <=} n {@code <=} 7) + * {@code \x}hh + * The character with hexadecimal value {@code 0x}hh + * \uhhhh + * The character with hexadecimal value {@code 0x}hhhh + * \x{h...h} + * The character with hexadecimal value {@code 0x}h...h * ({@link java.lang.Character#MIN_CODE_POINT Character.MIN_CODE_POINT} - *  <= 0xh...h <=  + *  <= {@code 0x}h...h <=  * {@link java.lang.Character#MAX_CODE_POINT Character.MAX_CODE_POINT}) - * \t - * The tab character ('\u0009') - * \n - * The newline (line feed) character ('\u000A') - * \r - * The carriage-return character ('\u000D') - * \f - * The form-feed character ('\u000C') - * \a - * The alert (bell) character ('\u0007') - * \e - * The escape character ('\u001B') - * \cx + * {@code \t} + * The tab character ('\u0009') + * {@code \n} + * The newline (line feed) character ('\u000A') + * {@code \r} + * The carriage-return character ('\u000D') + * {@code \f} + * The form-feed character ('\u000C') + * {@code \a} + * The alert (bell) character ('\u0007') + * {@code \e} + * The escape character ('\u001B') + * {@code \c}x * The control character corresponding to x * *   @@ -149,30 +149,30 @@ * * Predefined character classes * - * . + * {@code .} * Any character (may or may not match line terminators) - * \d - * A digit: [0-9] - * \D - * A non-digit: [^0-9] - * \h + * {@code \d} + * A digit: {@code [0-9]} + * {@code \D} + * A non-digit: {@code [^0-9]} + * {@code \h} * A horizontal whitespace character: - * [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000] - * \H - * A non-horizontal whitespace character: [^\h] - * \s - * A whitespace character: [ \t\n\x0B\f\r] - * \S - * A non-whitespace character: [^\s] - * \v - * A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029] + * [ \t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000] + * {@code \H} + * A non-horizontal whitespace character: {@code [^\h]} + * {@code \s} + * A whitespace character: {@code [ \t\n\x0B\f\r]} + * {@code \S} + * A non-whitespace character: {@code [^\s]} + * {@code \v} + * A vertical whitespace character: [\n\x0B\f\r\x85\u2028\u2029] * - * \V - * A non-vertical whitespace character: [^\v] - * \w - * A word character: [a-zA-Z_0-9] - * \W - * A non-word character: [^\w] + * {@code \V} + * A non-vertical whitespace character: {@code [^\v]} + * {@code \w} + * A word character: {@code [a-zA-Z_0-9]} + * {@code \W} + * A non-word character: {@code [^\w]} *   * POSIX character classes (US-ASCII only) * @@ -208,13 +208,13 @@ *   * java.lang.Character classes (simple java character type) * - * \p{javaLowerCase} + * {@code \p{javaLowerCase}} * Equivalent to java.lang.Character.isLowerCase() - * \p{javaUpperCase} + * {@code \p{javaUpperCase}} * Equivalent to java.lang.Character.isUpperCase() - * \p{javaWhitespace} + * {@code \p{javaWhitespace}} * Equivalent to java.lang.Character.isWhitespace() - * \p{javaMirrored} + * {@code \p{javaMirrored}} * Equivalent to java.lang.Character.isMirrored() * *   @@ -237,77 +237,77 @@ *   * Boundary matchers * - * ^ + * {@code ^} * The beginning of a line - * $ + * {@code $} * The end of a line - * \b + * {@code \b} * A word boundary - * \B + * {@code \B} * A non-word boundary - * \A + * {@code \A} * The beginning of the input - * \G + * {@code \G} * The end of the previous match - * \Z + * {@code \Z} * The end of the input but for the final * terminator, if any - * \z + * {@code \z} * The end of the input * *   * Linebreak matcher - * \R + * {@code \R} * Any Unicode linebreak sequence, is equivalent to - * \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029] - * + * \u000D\u000A|[\u000A\u000B\u000C\u000D\u0085\u2028\u2029] + * * *   * Greedy quantifiers * - * X? + * X{@code ?} * X, once or not at all - * X* + * X{@code *} * X, zero or more times - * X+ + * X{@code +} * X, one or more times - * X{n} + * X{n} * X, exactly n times - * X{n,} + * X{n{@code ,}} * X, at least n times - * X{n,m} + * X{n{@code ,}m} * X, at least n but not more than m times * *   * Reluctant quantifiers * - * X?? + * X{@code ??} * X, once or not at all - * X*? + * X{@code *?} * X, zero or more times - * X+? + * X{@code +?} * X, one or more times - * X{n}? + * X{n}? * X, exactly n times - * X{n,}? + * X{n,}? * X, at least n times - * X{n,m}? + * X{n{@code ,}m}? * X, at least n but not more than m times * *   * Possessive quantifiers * - * X?+ + * X{@code ?+} * X, once or not at all - * X*+ + * X{@code *+} * X, zero or more times - * X++ + * X{@code ++} * X, one or more times - * X{n}+ + * X{n}+ * X, exactly n times - * X{n,}+ + * X{n,}+ * X, at least n times - * X{n,m}+ + * X{n{@code ,}m}+ * X, at least n but not more than m times * *   @@ -315,59 +315,59 @@ * * XY * X followed by Y - * X|Y + * X{@code |}Y * Either X or Y - * (X) + * {@code (}X{@code )} * X, as a capturing group * *   * Back references * - * \n + * {@code \}n * Whatever the nth * capturing group matched * - * \k<name> + * {@code \}k<name> * Whatever the * named-capturing group "name" matched * *   * Quotation * - * \ + * {@code \} * Nothing, but quotes the following character - * \Q - * Nothing, but quotes all characters until \E - * \E - * Nothing, but ends quoting started by \Q + * {@code \Q} + * Nothing, but quotes all characters until {@code \E} + * {@code \E} + * Nothing, but ends quoting started by {@code \Q} * * *   * Special constructs (named-capturing and non-capturing) * - * (?<name>X) + * (?<name>X{@code )} * X, as a named-capturing group - * (?:X) + * {@code (?:}X{@code )} * X, as a non-capturing group - * (?idmsuxU-idmsuxU)  + * (?idmsuxU-idmsuxU)  * Nothing, but turns match flags i * d m s * u x U * on - off - * (?idmsux-idmsux:X)   + * (?idmsux-idmsux:X{@code )}   * X, as a non-capturing group with the * given flags i d * m s u * x on - off - * (?=X) + * {@code (?=}X{@code )} * X, via zero-width positive lookahead - * (?!X) + * {@code (?!}X{@code )} * X, via zero-width negative lookahead - * (?<=X) + * {@code (?<=}X{@code )} * X, via zero-width positive lookbehind - * (?<!X) + * {@code (?X{@code )} * X, via zero-width negative lookbehind - * (?>X) + * {@code (?>}X{@code )} * X, as an independent, non-capturing group * * @@ -377,10 +377,10 @@ * *

Backslashes, escapes, and quoting

* - *

The backslash character ('\') serves to introduce escaped + *

The backslash character ({@code '\'}) serves to introduce escaped * constructs, as defined in the table above, as well as to quote characters * that otherwise would be interpreted as unescaped constructs. Thus the - * expression \\ matches a single backslash and \{ matches a + * expression {@code \\} matches a single backslash and \{ matches a * left brace. * *

It is an error to use a backslash prior to any alphabetic character that @@ -396,18 +396,18 @@ * It is therefore necessary to double backslashes in string * literals that represent regular expressions to protect them from * interpretation by the Java bytecode compiler. The string literal - * "\b", for example, matches a single backspace character when - * interpreted as a regular expression, while "\\b" matches a - * word boundary. The string literal "\(hello\)" is illegal + * "\b", for example, matches a single backspace character when + * interpreted as a regular expression, while {@code "\\b"} matches a + * word boundary. The string literal {@code "\(hello\)"} is illegal * and leads to a compile-time error; in order to match the string - * (hello) the string literal "\\(hello\\)" + * {@code (hello)} the string literal {@code "\\(hello\\)"} * must be used. * *

Character Classes

* *

Character classes may appear within other character classes, and * may be composed by the union operator (implicit) and the intersection - * operator (&&). + * operator ({@code &&}). * The union operator denotes a class that contains every character that is * in at least one of its operand classes. The intersection operator * denotes a class that contains every character that is in both of its @@ -420,16 +420,16 @@ * summary="Precedence of character class operators."> * 1     * Literal escape     - * \x + * {@code \x} * 2     * Grouping - * [...] + * {@code [...]} * 3     * Range - * a-z + * {@code a-z} * 4     * Union - * [a-e][i-u] + * {@code [a-e][i-u]} * 5     * Intersection * {@code [a-z&&[aeiou]]} @@ -437,8 +437,8 @@ * *

Note that a different set of metacharacters are in effect inside * a character class than outside a character class. For instance, the - * regular expression . loses its special meaning inside a - * character class, while the expression - becomes a range + * regular expression {@code .} loses its special meaning inside a + * character class, while the expression {@code -} becomes a range * forming metacharacter. * *

Line terminators

@@ -449,49 +449,49 @@ * * *

If {@link #UNIX_LINES} mode is activated, then the only line terminators * recognized are newline characters. * - *

The regular expression . matches any character except a line + *

The regular expression {@code .} matches any character except a line * terminator unless the {@link #DOTALL} flag is specified. * - *

By default, the regular expressions ^ and $ ignore + *

By default, the regular expressions {@code ^} and {@code $} ignore * line terminators and only match at the beginning and the end, respectively, * of the entire input sequence. If {@link #MULTILINE} mode is activated then - * ^ matches at the beginning of input and after any line terminator - * except at the end of input. When in {@link #MULTILINE} mode $ + * {@code ^} matches at the beginning of input and after any line terminator + * except at the end of input. When in {@link #MULTILINE} mode {@code $} * matches just before a line terminator or the end of the input sequence. * *

Groups and capturing

* *

Group number

*

Capturing groups are numbered by counting their opening parentheses from - * left to right. In the expression ((A)(B(C))), for example, there + * left to right. In the expression {@code ((A)(B(C)))}, for example, there * are four such groups:

* *
* - * + * * - * + * * - * + * * - * + * *
1    ((A)(B(C)))
{@code ((A)(B(C)))}
2    (A)
{@code (A)}
3    (B(C))
{@code (B(C))}
4    (C)
{@code (C)}
* *

Group zero always stands for the entire expression. @@ -502,31 +502,31 @@ * may also be retrieved from the matcher once the match operation is complete. * *

Group name

- *

A capturing group can also be assigned a "name", a named-capturing group, + *

A capturing group can also be assigned a "name", a {@code named-capturing group}, * and then be back-referenced later by the "name". Group names are composed of - * the following characters. The first character must be a letter. + * the following characters. The first character must be a {@code letter}. * *

* - *

A named-capturing group is still numbered as described in + *

A {@code named-capturing group} is still numbered as described in * Group number. * *

The captured input associated with a group is always the subsequence * that the group most recently matched. If a group is evaluated a second time * because of quantification then its previously-captured value, if any, will * be retained if the second evaluation fails. Matching the string - * "aba" against the expression (a(b)?)+, for example, leaves - * group two set to "b". All captured input is discarded at the + * {@code "aba"} against the expression {@code (a(b)?)+}, for example, leaves + * group two set to {@code "b"}. All captured input is discarded at the * beginning of each match. * - *

Groups beginning with (? are either pure, non-capturing groups + *

Groups beginning with {@code (?} are either pure, non-capturing groups * that do not capture text and do not count towards the group total, or * named-capturing group. * @@ -537,26 +537,26 @@ * Standard #18: Unicode Regular Expression, plus RL2.1 * Canonical Equivalents. *

- * Unicode escape sequences such as \u2014 in Java source code + * Unicode escape sequences such as \u2014 in Java source code * are processed as described in section 3.3 of * The Java™ Language Specification. * Such escape sequences are also implemented directly by the regular-expression * parser so that Unicode escapes can be used in expressions that are read from - * files or from the keyboard. Thus the strings "\u2014" and - * "\\u2014", while not equal, compile into the same pattern, which - * matches the character with hexadecimal value 0x2014. + * files or from the keyboard. Thus the strings "\u2014" and + * {@code "\\u2014"}, while not equal, compile into the same pattern, which + * matches the character with hexadecimal value {@code 0x2014}. *

* A Unicode character can also be represented in a regular-expression by * using its Hex notation(hexadecimal code point value) directly as described in construct - * \x{...}, for example a supplementary character U+2011F - * can be specified as \x{2011F}, instead of two consecutive + * \x{...}, for example a supplementary character U+2011F + * can be specified as \x{2011F}, instead of two consecutive * Unicode escape sequences of the surrogate pair - * \uD840\uDD1F. + * \uD840\uDD1F. *

* Unicode scripts, blocks, categories and binary properties are written with - * the \p and \P constructs as in Perl. - * \p{prop} matches if - * the input has the property prop, while \P{prop} + * the {@code \p} and {@code \P} constructs as in Perl. + * \p{prop} matches if + * the input has the property prop, while \P{prop} * does not match if the input has that property. *

* Scripts, blocks, categories and binary properties can be used both inside @@ -567,7 +567,7 @@ * {@code IsHiragana}, or by using the {@code script} keyword (or its short * form {@code sc}) as in {@code script=Hiragana} or {@code sc=Hiragana}. *

- * The script names supported by Pattern are the valid script names + * The script names supported by {@code Pattern} are the valid script names * accepted and defined by * {@link java.lang.Character.UnicodeScript#forName(String) UnicodeScript.forName}. * @@ -576,7 +576,7 @@ * {@code InMongolian}, or by using the keyword {@code block} (or its short * form {@code blk}) as in {@code block=Mongolian} or {@code blk=Mongolian}. *

- * The block names supported by Pattern are the valid block names + * The block names supported by {@code Pattern} are the valid block names * accepted and defined by * {@link java.lang.Character.UnicodeBlock#forName(String) UnicodeBlock.forName}. *

@@ -595,7 +595,7 @@ *

* * Binary properties are specified with the prefix {@code Is}, as in - * {@code IsAlphabetic}. The supported binary properties by Pattern + * {@code IsAlphabetic}. The supported binary properties by {@code Pattern} * are *