New src/share/classes/java/lang/Character.java

   1 /*
   2  * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of class
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides a large number of static methods for
  40  * determining a character's category (lowercase letter, digit, etc.)
  41  * and for converting characters from uppercase to lowercase and vice
  42  * versa.
  43  *
  44  * <h3><a id="conformance">Unicode Conformance</a></h3>
  45  * <p>
  46  * The fields and methods of class {@code Character} are defined in terms
  47  * of character information from the Unicode Standard, specifically the
  48  * <i>UnicodeData</i> file that is part of the Unicode Character Database.
  49  * This file specifies properties including name and category for every
  50  * assigned Unicode code point or character range. The file is available
  51  * from the Unicode Consortium at
  52  * <a href="http://www.unicode.org">http://www.unicode.org</a>.
  53  * <p>
  54  * The Java SE 8 Platform uses character information from version 6.2
  55  * of the Unicode Standard, with two extensions. First, the Java SE 8 Platform
  56  * allows an implementation of class {@code Character} to use the Japanese Era
  57  * code point, {@code U+32FF}, from the first version of the Unicode Standard
  58  * after 6.2 that assigns the code point. Second, in recognition of the fact
  59  * that new currencies appear frequently, the Java SE 8 Platform allows an
  60  * implementation of class {@code Character} to use the Currency Symbols
  61  * block from version 10.0 of the Unicode Standard. Consequently, the
  62  * behavior of fields and methods of class {@code Character} may vary across
  63  * implementations of the Java SE 8 Platform when processing the aforementioned
  64  * code points ( outside of version 6.2 ), except for the following methods
  65  * that define Java identifiers:
  66  * {@link #isJavaIdentifierStart(int)}, {@link #isJavaIdentifierStart(char)},
  67  * {@link #isJavaIdentifierPart(int)}, and {@link #isJavaIdentifierPart(char)}.
  68  * Code points in Java identifiers must be drawn from version 6.2 of
  69  * the Unicode Standard.
  70  *
  71  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  72  *
  73  * <p>The {@code char} data type (and therefore the value that a
  74  * {@code Character} object encapsulates) are based on the
  75  * original Unicode specification, which defined characters as
  76  * fixed-width 16-bit entities. The Unicode Standard has since been
  77  * changed to allow for characters whose representation requires more
  78  * than 16 bits.  The range of legal <em>code point</em>s is now
  79  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  80  * (Refer to the <a
  81  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  82  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  83  * Standard.)
  84  *
  85  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  86  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  87  * <a name="supplementary">Characters</a> whose code points are greater
  88  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  89  * platform uses the UTF-16 representation in {@code char} arrays and
  90  * in the {@code String} and {@code StringBuffer} classes. In
  91  * this representation, supplementary characters are represented as a pair
  92  * of {@code char} values, the first from the <em>high-surrogates</em>
  93  * range, (\uD800-\uDBFF), the second from the
  94  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  95  *
  96  * <p>A {@code char} value, therefore, represents Basic
  97  * Multilingual Plane (BMP) code points, including the surrogate
  98  * code points, or code units of the UTF-16 encoding. An
  99  * {@code int} value represents all Unicode code points,
 100  * including supplementary code points. The lower (least significant)
 101  * 21 bits of {@code int} are used to represent Unicode code
 102  * points and the upper (most significant) 11 bits must be zero.
 103  * Unless otherwise specified, the behavior with respect to
 104  * supplementary characters and surrogate {@code char} values is
 105  * as follows:
 106  *
 107  * <ul>
 108  * <li>The methods that only accept a {@code char} value cannot support
 109  * supplementary characters. They treat {@code char} values from the
 110  * surrogate ranges as undefined characters. For example,
 111  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 112  * this specific value if followed by any low-surrogate value in a string
 113  * would represent a letter.
 114  *
 115  * <li>The methods that accept an {@code int} value support all
 116  * Unicode characters, including supplementary characters. For
 117  * example, {@code Character.isLetter(0x2F81A)} returns
 118  * {@code true} because the code point value represents a letter
 119  * (a CJK ideograph).
 120  * </ul>
 121  *
 122  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 123  * used for character values in the range between U+0000 and U+10FFFF,
 124  * and <em>Unicode code unit</em> is used for 16-bit
 125  * {@code char} values that are code units of the <em>UTF-16</em>
 126  * encoding. For more information on Unicode terminology, refer to the
 127  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 128  *
 129  * @author  Lee Boynton
 130  * @author  Guy Steele
 131  * @author  Akira Tanaka
 132  * @author  Martin Buchholz
 133  * @author  Ulf Zibis
 134  * @since   1.0
 135  */
 136 public final
 137 class Character implements java.io.Serializable, Comparable<Character> {
 138     /**
 139      * The minimum radix available for conversion to and from strings.
 140      * The constant value of this field is the smallest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MIN_RADIX = 2;
 151 
 152     /**
 153      * The maximum radix available for conversion to and from strings.
 154      * The constant value of this field is the largest value permitted
 155      * for the radix argument in radix-conversion methods such as the
 156      * {@code digit} method, the {@code forDigit} method, and the
 157      * {@code toString} method of class {@code Integer}.
 158      *
 159      * @see     Character#digit(char, int)
 160      * @see     Character#forDigit(int, int)
 161      * @see     Integer#toString(int, int)
 162      * @see     Integer#valueOf(String)
 163      */
 164     public static final int MAX_RADIX = 36;
 165 
 166     /**
 167      * The constant value of this field is the smallest value of type
 168      * {@code char}, {@code '\u005Cu0000'}.
 169      *
 170      * @since   1.0.2
 171      */
 172     public static final char MIN_VALUE = '\u0000';
 173 
 174     /**
 175      * The constant value of this field is the largest value of type
 176      * {@code char}, {@code '\u005CuFFFF'}.
 177      *
 178      * @since   1.0.2
 179      */
 180     public static final char MAX_VALUE = '\uFFFF';
 181 
 182     /**
 183      * The {@code Class} instance representing the primitive type
 184      * {@code char}.
 185      *
 186      * @since   1.1
 187      */
 188     @SuppressWarnings("unchecked")
 189     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 190 
 191     /*
 192      * Normative general types
 193      */
 194 
 195     /*
 196      * General character types
 197      */
 198 
 199     /**
 200      * General category "Cn" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte UNASSIGNED = 0;
 204 
 205     /**
 206      * General category "Lu" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte UPPERCASE_LETTER = 1;
 210 
 211     /**
 212      * General category "Ll" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte LOWERCASE_LETTER = 2;
 216 
 217     /**
 218      * General category "Lt" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte TITLECASE_LETTER = 3;
 222 
 223     /**
 224      * General category "Lm" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte MODIFIER_LETTER = 4;
 228 
 229     /**
 230      * General category "Lo" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte OTHER_LETTER = 5;
 234 
 235     /**
 236      * General category "Mn" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte NON_SPACING_MARK = 6;
 240 
 241     /**
 242      * General category "Me" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte ENCLOSING_MARK = 7;
 246 
 247     /**
 248      * General category "Mc" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte COMBINING_SPACING_MARK = 8;
 252 
 253     /**
 254      * General category "Nd" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 258 
 259     /**
 260      * General category "Nl" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte LETTER_NUMBER = 10;
 264 
 265     /**
 266      * General category "No" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte OTHER_NUMBER = 11;
 270 
 271     /**
 272      * General category "Zs" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte SPACE_SEPARATOR = 12;
 276 
 277     /**
 278      * General category "Zl" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte LINE_SEPARATOR = 13;
 282 
 283     /**
 284      * General category "Zp" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte PARAGRAPH_SEPARATOR = 14;
 288 
 289     /**
 290      * General category "Cc" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte CONTROL = 15;
 294 
 295     /**
 296      * General category "Cf" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte FORMAT = 16;
 300 
 301     /**
 302      * General category "Co" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte PRIVATE_USE = 18;
 306 
 307     /**
 308      * General category "Cs" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte SURROGATE = 19;
 312 
 313     /**
 314      * General category "Pd" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte DASH_PUNCTUATION = 20;
 318 
 319     /**
 320      * General category "Ps" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte START_PUNCTUATION = 21;
 324 
 325     /**
 326      * General category "Pe" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte END_PUNCTUATION = 22;
 330 
 331     /**
 332      * General category "Pc" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte CONNECTOR_PUNCTUATION = 23;
 336 
 337     /**
 338      * General category "Po" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte OTHER_PUNCTUATION = 24;
 342 
 343     /**
 344      * General category "Sm" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MATH_SYMBOL = 25;
 348 
 349     /**
 350      * General category "Sc" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte CURRENCY_SYMBOL = 26;
 354 
 355     /**
 356      * General category "Sk" in the Unicode specification.
 357      * @since   1.1
 358      */
 359     public static final byte MODIFIER_SYMBOL = 27;
 360 
 361     /**
 362      * General category "So" in the Unicode specification.
 363      * @since   1.1
 364      */
 365     public static final byte OTHER_SYMBOL = 28;
 366 
 367     /**
 368      * General category "Pi" in the Unicode specification.
 369      * @since   1.4
 370      */
 371     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 372 
 373     /**
 374      * General category "Pf" in the Unicode specification.
 375      * @since   1.4
 376      */
 377     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 378 
 379     /**
 380      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 381      */
 382     static final int ERROR = 0xFFFFFFFF;
 383 
 384 
 385     /**
 386      * Undefined bidirectional character type. Undefined {@code char}
 387      * values have undefined directionality in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 391 
 392     /**
 393      * Strong bidirectional character type "L" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 397 
 398     /**
 399      * Strong bidirectional character type "R" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 403 
 404     /**
 405     * Strong bidirectional character type "AL" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 409 
 410     /**
 411      * Weak bidirectional character type "EN" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 415 
 416     /**
 417      * Weak bidirectional character type "ES" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 421 
 422     /**
 423      * Weak bidirectional character type "ET" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 427 
 428     /**
 429      * Weak bidirectional character type "AN" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 433 
 434     /**
 435      * Weak bidirectional character type "CS" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 439 
 440     /**
 441      * Weak bidirectional character type "NSM" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 445 
 446     /**
 447      * Weak bidirectional character type "BN" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 451 
 452     /**
 453      * Neutral bidirectional character type "B" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 457 
 458     /**
 459      * Neutral bidirectional character type "S" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 463 
 464     /**
 465      * Neutral bidirectional character type "WS" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 469 
 470     /**
 471      * Neutral bidirectional character type "ON" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 475 
 476     /**
 477      * Strong bidirectional character type "LRE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 481 
 482     /**
 483      * Strong bidirectional character type "LRO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 487 
 488     /**
 489      * Strong bidirectional character type "RLE" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 493 
 494     /**
 495      * Strong bidirectional character type "RLO" in the Unicode specification.
 496      * @since 1.4
 497      */
 498     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 499 
 500     /**
 501      * Weak bidirectional character type "PDF" in the Unicode specification.
 502      * @since 1.4
 503      */
 504     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 505 
 506     /**
 507      * The minimum value of a
 508      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 509      * Unicode high-surrogate code unit</a>
 510      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 511      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 512      *
 513      * @since 1.5
 514      */
 515     public static final char MIN_HIGH_SURROGATE = '\uD800';
 516 
 517     /**
 518      * The maximum value of a
 519      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 520      * Unicode high-surrogate code unit</a>
 521      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 522      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 523      *
 524      * @since 1.5
 525      */
 526     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 527 
 528     /**
 529      * The minimum value of a
 530      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 531      * Unicode low-surrogate code unit</a>
 532      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 533      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 534      *
 535      * @since 1.5
 536      */
 537     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 538 
 539     /**
 540      * The maximum value of a
 541      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 542      * Unicode low-surrogate code unit</a>
 543      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 544      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 545      *
 546      * @since 1.5
 547      */
 548     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 549 
 550     /**
 551      * The minimum value of a Unicode surrogate code unit in the
 552      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 553      *
 554      * @since 1.5
 555      */
 556     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 557 
 558     /**
 559      * The maximum value of a Unicode surrogate code unit in the
 560      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 561      *
 562      * @since 1.5
 563      */
 564     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 565 
 566     /**
 567      * The minimum value of a
 568      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 569      * Unicode supplementary code point</a>, constant {@code U+10000}.
 570      *
 571      * @since 1.5
 572      */
 573     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 574 
 575     /**
 576      * The minimum value of a
 577      * <a href="http://www.unicode.org/glossary/#code_point">
 578      * Unicode code point</a>, constant {@code U+0000}.
 579      *
 580      * @since 1.5
 581      */
 582     public static final int MIN_CODE_POINT = 0x000000;
 583 
 584     /**
 585      * The maximum value of a
 586      * <a href="http://www.unicode.org/glossary/#code_point">
 587      * Unicode code point</a>, constant {@code U+10FFFF}.
 588      *
 589      * @since 1.5
 590      */
 591     public static final int MAX_CODE_POINT = 0X10FFFF;
 592 
 593 
 594     /**
 595      * Instances of this class represent particular subsets of the Unicode
 596      * character set.  The only family of subsets defined in the
 597      * {@code Character} class is {@link Character.UnicodeBlock}.
 598      * Other portions of the Java API may define other subsets for their
 599      * own purposes.
 600      *
 601      * @since 1.2
 602      */
 603     public static class Subset  {
 604 
 605         private String name;
 606 
 607         /**
 608          * Constructs a new {@code Subset} instance.
 609          *
 610          * @param  name  The name of this subset
 611          * @exception NullPointerException if name is {@code null}
 612          */
 613         protected Subset(String name) {
 614             if (name == null) {
 615                 throw new NullPointerException("name");
 616             }
 617             this.name = name;
 618         }
 619 
 620         /**
 621          * Compares two {@code Subset} objects for equality.
 622          * This method returns {@code true} if and only if
 623          * {@code this} and the argument refer to the same
 624          * object; since this method is {@code final}, this
 625          * guarantee holds for all subclasses.
 626          */
 627         public final boolean equals(Object obj) {
 628             return (this == obj);
 629         }
 630 
 631         /**
 632          * Returns the standard hash code as defined by the
 633          * {@link Object#hashCode} method.  This method
 634          * is {@code final} in order to ensure that the
 635          * {@code equals} and {@code hashCode} methods will
 636          * be consistent in all subclasses.
 637          */
 638         public final int hashCode() {
 639             return super.hashCode();
 640         }
 641 
 642         /**
 643          * Returns the name of this subset.
 644          */
 645         public final String toString() {
 646             return name;
 647         }
 648     }
 649 
 650     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 651     // for the latest specification of Unicode Blocks.
 652 
 653     /**
 654      * A family of character subsets representing the character blocks in the
 655      * Unicode specification. Character blocks generally define characters
 656      * used for a specific script or purpose. A character is contained by
 657      * at most one Unicode block.
 658      *
 659      * @since 1.2
 660      */
 661     public static final class UnicodeBlock extends Subset {
 662 
 663         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 664 
 665         /**
 666          * Creates a UnicodeBlock with the given identifier name.
 667          * This name must be the same as the block identifier.
 668          */
 669         private UnicodeBlock(String idName) {
 670             super(idName);
 671             map.put(idName, this);
 672         }
 673 
 674         /**
 675          * Creates a UnicodeBlock with the given identifier name and
 676          * alias name.
 677          */
 678         private UnicodeBlock(String idName, String alias) {
 679             this(idName);
 680             map.put(alias, this);
 681         }
 682 
 683         /**
 684          * Creates a UnicodeBlock with the given identifier name and
 685          * alias names.
 686          */
 687         private UnicodeBlock(String idName, String... aliases) {
 688             this(idName);
 689             for (String alias : aliases)
 690                 map.put(alias, this);
 691         }
 692 
 693         /**
 694          * Constant for the "Basic Latin" Unicode character block.
 695          * @since 1.2
 696          */
 697         public static final UnicodeBlock  BASIC_LATIN =
 698             new UnicodeBlock("BASIC_LATIN",
 699                              "BASIC LATIN",
 700                              "BASICLATIN");
 701 
 702         /**
 703          * Constant for the "Latin-1 Supplement" Unicode character block.
 704          * @since 1.2
 705          */
 706         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 707             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 708                              "LATIN-1 SUPPLEMENT",
 709                              "LATIN-1SUPPLEMENT");
 710 
 711         /**
 712          * Constant for the "Latin Extended-A" Unicode character block.
 713          * @since 1.2
 714          */
 715         public static final UnicodeBlock LATIN_EXTENDED_A =
 716             new UnicodeBlock("LATIN_EXTENDED_A",
 717                              "LATIN EXTENDED-A",
 718                              "LATINEXTENDED-A");
 719 
 720         /**
 721          * Constant for the "Latin Extended-B" Unicode character block.
 722          * @since 1.2
 723          */
 724         public static final UnicodeBlock LATIN_EXTENDED_B =
 725             new UnicodeBlock("LATIN_EXTENDED_B",
 726                              "LATIN EXTENDED-B",
 727                              "LATINEXTENDED-B");
 728 
 729         /**
 730          * Constant for the "IPA Extensions" Unicode character block.
 731          * @since 1.2
 732          */
 733         public static final UnicodeBlock IPA_EXTENSIONS =
 734             new UnicodeBlock("IPA_EXTENSIONS",
 735                              "IPA EXTENSIONS",
 736                              "IPAEXTENSIONS");
 737 
 738         /**
 739          * Constant for the "Spacing Modifier Letters" Unicode character block.
 740          * @since 1.2
 741          */
 742         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 743             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 744                              "SPACING MODIFIER LETTERS",
 745                              "SPACINGMODIFIERLETTERS");
 746 
 747         /**
 748          * Constant for the "Combining Diacritical Marks" Unicode character block.
 749          * @since 1.2
 750          */
 751         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 752             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 753                              "COMBINING DIACRITICAL MARKS",
 754                              "COMBININGDIACRITICALMARKS");
 755 
 756         /**
 757          * Constant for the "Greek and Coptic" Unicode character block.
 758          * <p>
 759          * This block was previously known as the "Greek" block.
 760          *
 761          * @since 1.2
 762          */
 763         public static final UnicodeBlock GREEK =
 764             new UnicodeBlock("GREEK",
 765                              "GREEK AND COPTIC",
 766                              "GREEKANDCOPTIC");
 767 
 768         /**
 769          * Constant for the "Cyrillic" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock CYRILLIC =
 773             new UnicodeBlock("CYRILLIC");
 774 
 775         /**
 776          * Constant for the "Armenian" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARMENIAN =
 780             new UnicodeBlock("ARMENIAN");
 781 
 782         /**
 783          * Constant for the "Hebrew" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock HEBREW =
 787             new UnicodeBlock("HEBREW");
 788 
 789         /**
 790          * Constant for the "Arabic" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock ARABIC =
 794             new UnicodeBlock("ARABIC");
 795 
 796         /**
 797          * Constant for the "Devanagari" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock DEVANAGARI =
 801             new UnicodeBlock("DEVANAGARI");
 802 
 803         /**
 804          * Constant for the "Bengali" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock BENGALI =
 808             new UnicodeBlock("BENGALI");
 809 
 810         /**
 811          * Constant for the "Gurmukhi" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock GURMUKHI =
 815             new UnicodeBlock("GURMUKHI");
 816 
 817         /**
 818          * Constant for the "Gujarati" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock GUJARATI =
 822             new UnicodeBlock("GUJARATI");
 823 
 824         /**
 825          * Constant for the "Oriya" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock ORIYA =
 829             new UnicodeBlock("ORIYA");
 830 
 831         /**
 832          * Constant for the "Tamil" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock TAMIL =
 836             new UnicodeBlock("TAMIL");
 837 
 838         /**
 839          * Constant for the "Telugu" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock TELUGU =
 843             new UnicodeBlock("TELUGU");
 844 
 845         /**
 846          * Constant for the "Kannada" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock KANNADA =
 850             new UnicodeBlock("KANNADA");
 851 
 852         /**
 853          * Constant for the "Malayalam" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock MALAYALAM =
 857             new UnicodeBlock("MALAYALAM");
 858 
 859         /**
 860          * Constant for the "Thai" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock THAI =
 864             new UnicodeBlock("THAI");
 865 
 866         /**
 867          * Constant for the "Lao" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock LAO =
 871             new UnicodeBlock("LAO");
 872 
 873         /**
 874          * Constant for the "Tibetan" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock TIBETAN =
 878             new UnicodeBlock("TIBETAN");
 879 
 880         /**
 881          * Constant for the "Georgian" Unicode character block.
 882          * @since 1.2
 883          */
 884         public static final UnicodeBlock GEORGIAN =
 885             new UnicodeBlock("GEORGIAN");
 886 
 887         /**
 888          * Constant for the "Hangul Jamo" Unicode character block.
 889          * @since 1.2
 890          */
 891         public static final UnicodeBlock HANGUL_JAMO =
 892             new UnicodeBlock("HANGUL_JAMO",
 893                              "HANGUL JAMO",
 894                              "HANGULJAMO");
 895 
 896         /**
 897          * Constant for the "Latin Extended Additional" Unicode character block.
 898          * @since 1.2
 899          */
 900         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 901             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 902                              "LATIN EXTENDED ADDITIONAL",
 903                              "LATINEXTENDEDADDITIONAL");
 904 
 905         /**
 906          * Constant for the "Greek Extended" Unicode character block.
 907          * @since 1.2
 908          */
 909         public static final UnicodeBlock GREEK_EXTENDED =
 910             new UnicodeBlock("GREEK_EXTENDED",
 911                              "GREEK EXTENDED",
 912                              "GREEKEXTENDED");
 913 
 914         /**
 915          * Constant for the "General Punctuation" Unicode character block.
 916          * @since 1.2
 917          */
 918         public static final UnicodeBlock GENERAL_PUNCTUATION =
 919             new UnicodeBlock("GENERAL_PUNCTUATION",
 920                              "GENERAL PUNCTUATION",
 921                              "GENERALPUNCTUATION");
 922 
 923         /**
 924          * Constant for the "Superscripts and Subscripts" Unicode character
 925          * block.
 926          * @since 1.2
 927          */
 928         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 929             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 930                              "SUPERSCRIPTS AND SUBSCRIPTS",
 931                              "SUPERSCRIPTSANDSUBSCRIPTS");
 932 
 933         /**
 934          * Constant for the "Currency Symbols" Unicode character block.
 935          * @since 1.2
 936          */
 937         public static final UnicodeBlock CURRENCY_SYMBOLS =
 938             new UnicodeBlock("CURRENCY_SYMBOLS",
 939                              "CURRENCY SYMBOLS",
 940                              "CURRENCYSYMBOLS");
 941 
 942         /**
 943          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 944          * character block.
 945          * <p>
 946          * This block was previously known as "Combining Marks for Symbols".
 947          * @since 1.2
 948          */
 949         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 950             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 951                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 952                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 953                              "COMBINING MARKS FOR SYMBOLS",
 954                              "COMBININGMARKSFORSYMBOLS");
 955 
 956         /**
 957          * Constant for the "Letterlike Symbols" Unicode character block.
 958          * @since 1.2
 959          */
 960         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 961             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 962                              "LETTERLIKE SYMBOLS",
 963                              "LETTERLIKESYMBOLS");
 964 
 965         /**
 966          * Constant for the "Number Forms" Unicode character block.
 967          * @since 1.2
 968          */
 969         public static final UnicodeBlock NUMBER_FORMS =
 970             new UnicodeBlock("NUMBER_FORMS",
 971                              "NUMBER FORMS",
 972                              "NUMBERFORMS");
 973 
 974         /**
 975          * Constant for the "Arrows" Unicode character block.
 976          * @since 1.2
 977          */
 978         public static final UnicodeBlock ARROWS =
 979             new UnicodeBlock("ARROWS");
 980 
 981         /**
 982          * Constant for the "Mathematical Operators" Unicode character block.
 983          * @since 1.2
 984          */
 985         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 986             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 987                              "MATHEMATICAL OPERATORS",
 988                              "MATHEMATICALOPERATORS");
 989 
 990         /**
 991          * Constant for the "Miscellaneous Technical" Unicode character block.
 992          * @since 1.2
 993          */
 994         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 995             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 996                              "MISCELLANEOUS TECHNICAL",
 997                              "MISCELLANEOUSTECHNICAL");
 998 
 999         /**
1000          * Constant for the "Control Pictures" Unicode character block.
1001          * @since 1.2
1002          */
1003         public static final UnicodeBlock CONTROL_PICTURES =
1004             new UnicodeBlock("CONTROL_PICTURES",
1005                              "CONTROL PICTURES",
1006                              "CONTROLPICTURES");
1007 
1008         /**
1009          * Constant for the "Optical Character Recognition" Unicode character block.
1010          * @since 1.2
1011          */
1012         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1013             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1014                              "OPTICAL CHARACTER RECOGNITION",
1015                              "OPTICALCHARACTERRECOGNITION");
1016 
1017         /**
1018          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1019          * @since 1.2
1020          */
1021         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1022             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1023                              "ENCLOSED ALPHANUMERICS",
1024                              "ENCLOSEDALPHANUMERICS");
1025 
1026         /**
1027          * Constant for the "Box Drawing" Unicode character block.
1028          * @since 1.2
1029          */
1030         public static final UnicodeBlock BOX_DRAWING =
1031             new UnicodeBlock("BOX_DRAWING",
1032                              "BOX DRAWING",
1033                              "BOXDRAWING");
1034 
1035         /**
1036          * Constant for the "Block Elements" Unicode character block.
1037          * @since 1.2
1038          */
1039         public static final UnicodeBlock BLOCK_ELEMENTS =
1040             new UnicodeBlock("BLOCK_ELEMENTS",
1041                              "BLOCK ELEMENTS",
1042                              "BLOCKELEMENTS");
1043 
1044         /**
1045          * Constant for the "Geometric Shapes" Unicode character block.
1046          * @since 1.2
1047          */
1048         public static final UnicodeBlock GEOMETRIC_SHAPES =
1049             new UnicodeBlock("GEOMETRIC_SHAPES",
1050                              "GEOMETRIC SHAPES",
1051                              "GEOMETRICSHAPES");
1052 
1053         /**
1054          * Constant for the "Miscellaneous Symbols" Unicode character block.
1055          * @since 1.2
1056          */
1057         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1058             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1059                              "MISCELLANEOUS SYMBOLS",
1060                              "MISCELLANEOUSSYMBOLS");
1061 
1062         /**
1063          * Constant for the "Dingbats" Unicode character block.
1064          * @since 1.2
1065          */
1066         public static final UnicodeBlock DINGBATS =
1067             new UnicodeBlock("DINGBATS");
1068 
1069         /**
1070          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1071          * @since 1.2
1072          */
1073         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1074             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1075                              "CJK SYMBOLS AND PUNCTUATION",
1076                              "CJKSYMBOLSANDPUNCTUATION");
1077 
1078         /**
1079          * Constant for the "Hiragana" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock HIRAGANA =
1083             new UnicodeBlock("HIRAGANA");
1084 
1085         /**
1086          * Constant for the "Katakana" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock KATAKANA =
1090             new UnicodeBlock("KATAKANA");
1091 
1092         /**
1093          * Constant for the "Bopomofo" Unicode character block.
1094          * @since 1.2
1095          */
1096         public static final UnicodeBlock BOPOMOFO =
1097             new UnicodeBlock("BOPOMOFO");
1098 
1099         /**
1100          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1101          * @since 1.2
1102          */
1103         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1104             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1105                              "HANGUL COMPATIBILITY JAMO",
1106                              "HANGULCOMPATIBILITYJAMO");
1107 
1108         /**
1109          * Constant for the "Kanbun" Unicode character block.
1110          * @since 1.2
1111          */
1112         public static final UnicodeBlock KANBUN =
1113             new UnicodeBlock("KANBUN");
1114 
1115         /**
1116          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1117          * @since 1.2
1118          */
1119         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1120             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1121                              "ENCLOSED CJK LETTERS AND MONTHS",
1122                              "ENCLOSEDCJKLETTERSANDMONTHS");
1123 
1124         /**
1125          * Constant for the "CJK Compatibility" Unicode character block.
1126          * @since 1.2
1127          */
1128         public static final UnicodeBlock CJK_COMPATIBILITY =
1129             new UnicodeBlock("CJK_COMPATIBILITY",
1130                              "CJK COMPATIBILITY",
1131                              "CJKCOMPATIBILITY");
1132 
1133         /**
1134          * Constant for the "CJK Unified Ideographs" Unicode character block.
1135          * @since 1.2
1136          */
1137         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1138             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1139                              "CJK UNIFIED IDEOGRAPHS",
1140                              "CJKUNIFIEDIDEOGRAPHS");
1141 
1142         /**
1143          * Constant for the "Hangul Syllables" Unicode character block.
1144          * @since 1.2
1145          */
1146         public static final UnicodeBlock HANGUL_SYLLABLES =
1147             new UnicodeBlock("HANGUL_SYLLABLES",
1148                              "HANGUL SYLLABLES",
1149                              "HANGULSYLLABLES");
1150 
1151         /**
1152          * Constant for the "Private Use Area" Unicode character block.
1153          * @since 1.2
1154          */
1155         public static final UnicodeBlock PRIVATE_USE_AREA =
1156             new UnicodeBlock("PRIVATE_USE_AREA",
1157                              "PRIVATE USE AREA",
1158                              "PRIVATEUSEAREA");
1159 
1160         /**
1161          * Constant for the "CJK Compatibility Ideographs" Unicode character
1162          * block.
1163          * @since 1.2
1164          */
1165         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1166             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1167                              "CJK COMPATIBILITY IDEOGRAPHS",
1168                              "CJKCOMPATIBILITYIDEOGRAPHS");
1169 
1170         /**
1171          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1172          * @since 1.2
1173          */
1174         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1175             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1176                              "ALPHABETIC PRESENTATION FORMS",
1177                              "ALPHABETICPRESENTATIONFORMS");
1178 
1179         /**
1180          * Constant for the "Arabic Presentation Forms-A" Unicode character
1181          * block.
1182          * @since 1.2
1183          */
1184         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1185             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1186                              "ARABIC PRESENTATION FORMS-A",
1187                              "ARABICPRESENTATIONFORMS-A");
1188 
1189         /**
1190          * Constant for the "Combining Half Marks" Unicode character block.
1191          * @since 1.2
1192          */
1193         public static final UnicodeBlock COMBINING_HALF_MARKS =
1194             new UnicodeBlock("COMBINING_HALF_MARKS",
1195                              "COMBINING HALF MARKS",
1196                              "COMBININGHALFMARKS");
1197 
1198         /**
1199          * Constant for the "CJK Compatibility Forms" Unicode character block.
1200          * @since 1.2
1201          */
1202         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1203             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1204                              "CJK COMPATIBILITY FORMS",
1205                              "CJKCOMPATIBILITYFORMS");
1206 
1207         /**
1208          * Constant for the "Small Form Variants" Unicode character block.
1209          * @since 1.2
1210          */
1211         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1212             new UnicodeBlock("SMALL_FORM_VARIANTS",
1213                              "SMALL FORM VARIANTS",
1214                              "SMALLFORMVARIANTS");
1215 
1216         /**
1217          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1218          * @since 1.2
1219          */
1220         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1221             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1222                              "ARABIC PRESENTATION FORMS-B",
1223                              "ARABICPRESENTATIONFORMS-B");
1224 
1225         /**
1226          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1227          * block.
1228          * @since 1.2
1229          */
1230         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1231             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1232                              "HALFWIDTH AND FULLWIDTH FORMS",
1233                              "HALFWIDTHANDFULLWIDTHFORMS");
1234 
1235         /**
1236          * Constant for the "Specials" Unicode character block.
1237          * @since 1.2
1238          */
1239         public static final UnicodeBlock SPECIALS =
1240             new UnicodeBlock("SPECIALS");
1241 
1242         /**
1243          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1244          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1245          *             {@link #LOW_SURROGATES}. These new constants match
1246          *             the block definitions of the Unicode Standard.
1247          *             The {@link #of(char)} and {@link #of(int)} methods
1248          *             return the new constants, not SURROGATES_AREA.
1249          */
1250         @Deprecated
1251         public static final UnicodeBlock SURROGATES_AREA =
1252             new UnicodeBlock("SURROGATES_AREA");
1253 
1254         /**
1255          * Constant for the "Syriac" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock SYRIAC =
1259             new UnicodeBlock("SYRIAC");
1260 
1261         /**
1262          * Constant for the "Thaana" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock THAANA =
1266             new UnicodeBlock("THAANA");
1267 
1268         /**
1269          * Constant for the "Sinhala" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock SINHALA =
1273             new UnicodeBlock("SINHALA");
1274 
1275         /**
1276          * Constant for the "Myanmar" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock MYANMAR =
1280             new UnicodeBlock("MYANMAR");
1281 
1282         /**
1283          * Constant for the "Ethiopic" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock ETHIOPIC =
1287             new UnicodeBlock("ETHIOPIC");
1288 
1289         /**
1290          * Constant for the "Cherokee" Unicode character block.
1291          * @since 1.4
1292          */
1293         public static final UnicodeBlock CHEROKEE =
1294             new UnicodeBlock("CHEROKEE");
1295 
1296         /**
1297          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1298          * @since 1.4
1299          */
1300         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1301             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1302                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1303                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1304 
1305         /**
1306          * Constant for the "Ogham" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock OGHAM =
1310             new UnicodeBlock("OGHAM");
1311 
1312         /**
1313          * Constant for the "Runic" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock RUNIC =
1317             new UnicodeBlock("RUNIC");
1318 
1319         /**
1320          * Constant for the "Khmer" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock KHMER =
1324             new UnicodeBlock("KHMER");
1325 
1326         /**
1327          * Constant for the "Mongolian" Unicode character block.
1328          * @since 1.4
1329          */
1330         public static final UnicodeBlock MONGOLIAN =
1331             new UnicodeBlock("MONGOLIAN");
1332 
1333         /**
1334          * Constant for the "Braille Patterns" Unicode character block.
1335          * @since 1.4
1336          */
1337         public static final UnicodeBlock BRAILLE_PATTERNS =
1338             new UnicodeBlock("BRAILLE_PATTERNS",
1339                              "BRAILLE PATTERNS",
1340                              "BRAILLEPATTERNS");
1341 
1342         /**
1343          * Constant for the "CJK Radicals Supplement" Unicode character block.
1344          * @since 1.4
1345          */
1346         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1347             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1348                              "CJK RADICALS SUPPLEMENT",
1349                              "CJKRADICALSSUPPLEMENT");
1350 
1351         /**
1352          * Constant for the "Kangxi Radicals" Unicode character block.
1353          * @since 1.4
1354          */
1355         public static final UnicodeBlock KANGXI_RADICALS =
1356             new UnicodeBlock("KANGXI_RADICALS",
1357                              "KANGXI RADICALS",
1358                              "KANGXIRADICALS");
1359 
1360         /**
1361          * Constant for the "Ideographic Description Characters" Unicode character block.
1362          * @since 1.4
1363          */
1364         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1365             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1366                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1367                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1368 
1369         /**
1370          * Constant for the "Bopomofo Extended" Unicode character block.
1371          * @since 1.4
1372          */
1373         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1374             new UnicodeBlock("BOPOMOFO_EXTENDED",
1375                              "BOPOMOFO EXTENDED",
1376                              "BOPOMOFOEXTENDED");
1377 
1378         /**
1379          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1380          * @since 1.4
1381          */
1382         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1383             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1384                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1385                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1386 
1387         /**
1388          * Constant for the "Yi Syllables" Unicode character block.
1389          * @since 1.4
1390          */
1391         public static final UnicodeBlock YI_SYLLABLES =
1392             new UnicodeBlock("YI_SYLLABLES",
1393                              "YI SYLLABLES",
1394                              "YISYLLABLES");
1395 
1396         /**
1397          * Constant for the "Yi Radicals" Unicode character block.
1398          * @since 1.4
1399          */
1400         public static final UnicodeBlock YI_RADICALS =
1401             new UnicodeBlock("YI_RADICALS",
1402                              "YI RADICALS",
1403                              "YIRADICALS");
1404 
1405         /**
1406          * Constant for the "Cyrillic Supplementary" Unicode character block.
1407          * @since 1.5
1408          */
1409         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1410             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1411                              "CYRILLIC SUPPLEMENTARY",
1412                              "CYRILLICSUPPLEMENTARY",
1413                              "CYRILLIC SUPPLEMENT",
1414                              "CYRILLICSUPPLEMENT");
1415 
1416         /**
1417          * Constant for the "Tagalog" Unicode character block.
1418          * @since 1.5
1419          */
1420         public static final UnicodeBlock TAGALOG =
1421             new UnicodeBlock("TAGALOG");
1422 
1423         /**
1424          * Constant for the "Hanunoo" Unicode character block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock HANUNOO =
1428             new UnicodeBlock("HANUNOO");
1429 
1430         /**
1431          * Constant for the "Buhid" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock BUHID =
1435             new UnicodeBlock("BUHID");
1436 
1437         /**
1438          * Constant for the "Tagbanwa" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAGBANWA =
1442             new UnicodeBlock("TAGBANWA");
1443 
1444         /**
1445          * Constant for the "Limbu" Unicode character block.
1446          * @since 1.5
1447          */
1448         public static final UnicodeBlock LIMBU =
1449             new UnicodeBlock("LIMBU");
1450 
1451         /**
1452          * Constant for the "Tai Le" Unicode character block.
1453          * @since 1.5
1454          */
1455         public static final UnicodeBlock TAI_LE =
1456             new UnicodeBlock("TAI_LE",
1457                              "TAI LE",
1458                              "TAILE");
1459 
1460         /**
1461          * Constant for the "Khmer Symbols" Unicode character block.
1462          * @since 1.5
1463          */
1464         public static final UnicodeBlock KHMER_SYMBOLS =
1465             new UnicodeBlock("KHMER_SYMBOLS",
1466                              "KHMER SYMBOLS",
1467                              "KHMERSYMBOLS");
1468 
1469         /**
1470          * Constant for the "Phonetic Extensions" Unicode character block.
1471          * @since 1.5
1472          */
1473         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1474             new UnicodeBlock("PHONETIC_EXTENSIONS",
1475                              "PHONETIC EXTENSIONS",
1476                              "PHONETICEXTENSIONS");
1477 
1478         /**
1479          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1480          * @since 1.5
1481          */
1482         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1483             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1484                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1485                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1486 
1487         /**
1488          * Constant for the "Supplemental Arrows-A" Unicode character block.
1489          * @since 1.5
1490          */
1491         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1492             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1493                              "SUPPLEMENTAL ARROWS-A",
1494                              "SUPPLEMENTALARROWS-A");
1495 
1496         /**
1497          * Constant for the "Supplemental Arrows-B" Unicode character block.
1498          * @since 1.5
1499          */
1500         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1501             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1502                              "SUPPLEMENTAL ARROWS-B",
1503                              "SUPPLEMENTALARROWS-B");
1504 
1505         /**
1506          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1507          * character block.
1508          * @since 1.5
1509          */
1510         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1511             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1512                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1513                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1514 
1515         /**
1516          * Constant for the "Supplemental Mathematical Operators" Unicode
1517          * character block.
1518          * @since 1.5
1519          */
1520         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1521             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1522                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1523                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1524 
1525         /**
1526          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1527          * block.
1528          * @since 1.5
1529          */
1530         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1531             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1532                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1533                              "MISCELLANEOUSSYMBOLSANDARROWS");
1534 
1535         /**
1536          * Constant for the "Katakana Phonetic Extensions" Unicode character
1537          * block.
1538          * @since 1.5
1539          */
1540         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1541             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1542                              "KATAKANA PHONETIC EXTENSIONS",
1543                              "KATAKANAPHONETICEXTENSIONS");
1544 
1545         /**
1546          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1547          * @since 1.5
1548          */
1549         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1550             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1551                              "YIJING HEXAGRAM SYMBOLS",
1552                              "YIJINGHEXAGRAMSYMBOLS");
1553 
1554         /**
1555          * Constant for the "Variation Selectors" Unicode character block.
1556          * @since 1.5
1557          */
1558         public static final UnicodeBlock VARIATION_SELECTORS =
1559             new UnicodeBlock("VARIATION_SELECTORS",
1560                              "VARIATION SELECTORS",
1561                              "VARIATIONSELECTORS");
1562 
1563         /**
1564          * Constant for the "Linear B Syllabary" Unicode character block.
1565          * @since 1.5
1566          */
1567         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1568             new UnicodeBlock("LINEAR_B_SYLLABARY",
1569                              "LINEAR B SYLLABARY",
1570                              "LINEARBSYLLABARY");
1571 
1572         /**
1573          * Constant for the "Linear B Ideograms" Unicode character block.
1574          * @since 1.5
1575          */
1576         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1577             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1578                              "LINEAR B IDEOGRAMS",
1579                              "LINEARBIDEOGRAMS");
1580 
1581         /**
1582          * Constant for the "Aegean Numbers" Unicode character block.
1583          * @since 1.5
1584          */
1585         public static final UnicodeBlock AEGEAN_NUMBERS =
1586             new UnicodeBlock("AEGEAN_NUMBERS",
1587                              "AEGEAN NUMBERS",
1588                              "AEGEANNUMBERS");
1589 
1590         /**
1591          * Constant for the "Old Italic" Unicode character block.
1592          * @since 1.5
1593          */
1594         public static final UnicodeBlock OLD_ITALIC =
1595             new UnicodeBlock("OLD_ITALIC",
1596                              "OLD ITALIC",
1597                              "OLDITALIC");
1598 
1599         /**
1600          * Constant for the "Gothic" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock GOTHIC =
1604             new UnicodeBlock("GOTHIC");
1605 
1606         /**
1607          * Constant for the "Ugaritic" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock UGARITIC =
1611             new UnicodeBlock("UGARITIC");
1612 
1613         /**
1614          * Constant for the "Deseret" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock DESERET =
1618             new UnicodeBlock("DESERET");
1619 
1620         /**
1621          * Constant for the "Shavian" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock SHAVIAN =
1625             new UnicodeBlock("SHAVIAN");
1626 
1627         /**
1628          * Constant for the "Osmanya" Unicode character block.
1629          * @since 1.5
1630          */
1631         public static final UnicodeBlock OSMANYA =
1632             new UnicodeBlock("OSMANYA");
1633 
1634         /**
1635          * Constant for the "Cypriot Syllabary" Unicode character block.
1636          * @since 1.5
1637          */
1638         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1639             new UnicodeBlock("CYPRIOT_SYLLABARY",
1640                              "CYPRIOT SYLLABARY",
1641                              "CYPRIOTSYLLABARY");
1642 
1643         /**
1644          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1645          * @since 1.5
1646          */
1647         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1648             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1649                              "BYZANTINE MUSICAL SYMBOLS",
1650                              "BYZANTINEMUSICALSYMBOLS");
1651 
1652         /**
1653          * Constant for the "Musical Symbols" Unicode character block.
1654          * @since 1.5
1655          */
1656         public static final UnicodeBlock MUSICAL_SYMBOLS =
1657             new UnicodeBlock("MUSICAL_SYMBOLS",
1658                              "MUSICAL SYMBOLS",
1659                              "MUSICALSYMBOLS");
1660 
1661         /**
1662          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1663          * @since 1.5
1664          */
1665         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1666             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1667                              "TAI XUAN JING SYMBOLS",
1668                              "TAIXUANJINGSYMBOLS");
1669 
1670         /**
1671          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1672          * character block.
1673          * @since 1.5
1674          */
1675         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1676             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1677                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1678                              "MATHEMATICALALPHANUMERICSYMBOLS");
1679 
1680         /**
1681          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1682          * character block.
1683          * @since 1.5
1684          */
1685         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1686             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1687                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1688                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1689 
1690         /**
1691          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1692          * @since 1.5
1693          */
1694         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1695             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1696                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1697                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1698 
1699         /**
1700          * Constant for the "Tags" Unicode character block.
1701          * @since 1.5
1702          */
1703         public static final UnicodeBlock TAGS =
1704             new UnicodeBlock("TAGS");
1705 
1706         /**
1707          * Constant for the "Variation Selectors Supplement" Unicode character
1708          * block.
1709          * @since 1.5
1710          */
1711         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1712             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1713                              "VARIATION SELECTORS SUPPLEMENT",
1714                              "VARIATIONSELECTORSSUPPLEMENT");
1715 
1716         /**
1717          * Constant for the "Supplementary Private Use Area-A" Unicode character
1718          * block.
1719          * @since 1.5
1720          */
1721         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1722             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1723                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1724                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1725 
1726         /**
1727          * Constant for the "Supplementary Private Use Area-B" Unicode character
1728          * block.
1729          * @since 1.5
1730          */
1731         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1732             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1733                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1734                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1735 
1736         /**
1737          * Constant for the "High Surrogates" Unicode character block.
1738          * This block represents codepoint values in the high surrogate
1739          * range: U+D800 through U+DB7F
1740          *
1741          * @since 1.5
1742          */
1743         public static final UnicodeBlock HIGH_SURROGATES =
1744             new UnicodeBlock("HIGH_SURROGATES",
1745                              "HIGH SURROGATES",
1746                              "HIGHSURROGATES");
1747 
1748         /**
1749          * Constant for the "High Private Use Surrogates" Unicode character
1750          * block.
1751          * This block represents codepoint values in the private use high
1752          * surrogate range: U+DB80 through U+DBFF
1753          *
1754          * @since 1.5
1755          */
1756         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1757             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1758                              "HIGH PRIVATE USE SURROGATES",
1759                              "HIGHPRIVATEUSESURROGATES");
1760 
1761         /**
1762          * Constant for the "Low Surrogates" Unicode character block.
1763          * This block represents codepoint values in the low surrogate
1764          * range: U+DC00 through U+DFFF
1765          *
1766          * @since 1.5
1767          */
1768         public static final UnicodeBlock LOW_SURROGATES =
1769             new UnicodeBlock("LOW_SURROGATES",
1770                              "LOW SURROGATES",
1771                              "LOWSURROGATES");
1772 
1773         /**
1774          * Constant for the "Arabic Supplement" Unicode character block.
1775          * @since 1.7
1776          */
1777         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1778             new UnicodeBlock("ARABIC_SUPPLEMENT",
1779                              "ARABIC SUPPLEMENT",
1780                              "ARABICSUPPLEMENT");
1781 
1782         /**
1783          * Constant for the "NKo" Unicode character block.
1784          * @since 1.7
1785          */
1786         public static final UnicodeBlock NKO =
1787             new UnicodeBlock("NKO");
1788 
1789         /**
1790          * Constant for the "Samaritan" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock SAMARITAN =
1794             new UnicodeBlock("SAMARITAN");
1795 
1796         /**
1797          * Constant for the "Mandaic" Unicode character block.
1798          * @since 1.7
1799          */
1800         public static final UnicodeBlock MANDAIC =
1801             new UnicodeBlock("MANDAIC");
1802 
1803         /**
1804          * Constant for the "Ethiopic Supplement" Unicode character block.
1805          * @since 1.7
1806          */
1807         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1808             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1809                              "ETHIOPIC SUPPLEMENT",
1810                              "ETHIOPICSUPPLEMENT");
1811 
1812         /**
1813          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1814          * Unicode character block.
1815          * @since 1.7
1816          */
1817         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1818             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1819                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1820                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1821 
1822         /**
1823          * Constant for the "New Tai Lue" Unicode character block.
1824          * @since 1.7
1825          */
1826         public static final UnicodeBlock NEW_TAI_LUE =
1827             new UnicodeBlock("NEW_TAI_LUE",
1828                              "NEW TAI LUE",
1829                              "NEWTAILUE");
1830 
1831         /**
1832          * Constant for the "Buginese" Unicode character block.
1833          * @since 1.7
1834          */
1835         public static final UnicodeBlock BUGINESE =
1836             new UnicodeBlock("BUGINESE");
1837 
1838         /**
1839          * Constant for the "Tai Tham" Unicode character block.
1840          * @since 1.7
1841          */
1842         public static final UnicodeBlock TAI_THAM =
1843             new UnicodeBlock("TAI_THAM",
1844                              "TAI THAM",
1845                              "TAITHAM");
1846 
1847         /**
1848          * Constant for the "Balinese" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BALINESE =
1852             new UnicodeBlock("BALINESE");
1853 
1854         /**
1855          * Constant for the "Sundanese" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock SUNDANESE =
1859             new UnicodeBlock("SUNDANESE");
1860 
1861         /**
1862          * Constant for the "Batak" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock BATAK =
1866             new UnicodeBlock("BATAK");
1867 
1868         /**
1869          * Constant for the "Lepcha" Unicode character block.
1870          * @since 1.7
1871          */
1872         public static final UnicodeBlock LEPCHA =
1873             new UnicodeBlock("LEPCHA");
1874 
1875         /**
1876          * Constant for the "Ol Chiki" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock OL_CHIKI =
1880             new UnicodeBlock("OL_CHIKI",
1881                              "OL CHIKI",
1882                              "OLCHIKI");
1883 
1884         /**
1885          * Constant for the "Vedic Extensions" Unicode character block.
1886          * @since 1.7
1887          */
1888         public static final UnicodeBlock VEDIC_EXTENSIONS =
1889             new UnicodeBlock("VEDIC_EXTENSIONS",
1890                              "VEDIC EXTENSIONS",
1891                              "VEDICEXTENSIONS");
1892 
1893         /**
1894          * Constant for the "Phonetic Extensions Supplement" Unicode character
1895          * block.
1896          * @since 1.7
1897          */
1898         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1899             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1900                              "PHONETIC EXTENSIONS SUPPLEMENT",
1901                              "PHONETICEXTENSIONSSUPPLEMENT");
1902 
1903         /**
1904          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1905          * character block.
1906          * @since 1.7
1907          */
1908         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1909             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1910                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1911                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1912 
1913         /**
1914          * Constant for the "Glagolitic" Unicode character block.
1915          * @since 1.7
1916          */
1917         public static final UnicodeBlock GLAGOLITIC =
1918             new UnicodeBlock("GLAGOLITIC");
1919 
1920         /**
1921          * Constant for the "Latin Extended-C" Unicode character block.
1922          * @since 1.7
1923          */
1924         public static final UnicodeBlock LATIN_EXTENDED_C =
1925             new UnicodeBlock("LATIN_EXTENDED_C",
1926                              "LATIN EXTENDED-C",
1927                              "LATINEXTENDED-C");
1928 
1929         /**
1930          * Constant for the "Coptic" Unicode character block.
1931          * @since 1.7
1932          */
1933         public static final UnicodeBlock COPTIC =
1934             new UnicodeBlock("COPTIC");
1935 
1936         /**
1937          * Constant for the "Georgian Supplement" Unicode character block.
1938          * @since 1.7
1939          */
1940         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1941             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1942                              "GEORGIAN SUPPLEMENT",
1943                              "GEORGIANSUPPLEMENT");
1944 
1945         /**
1946          * Constant for the "Tifinagh" Unicode character block.
1947          * @since 1.7
1948          */
1949         public static final UnicodeBlock TIFINAGH =
1950             new UnicodeBlock("TIFINAGH");
1951 
1952         /**
1953          * Constant for the "Ethiopic Extended" Unicode character block.
1954          * @since 1.7
1955          */
1956         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1957             new UnicodeBlock("ETHIOPIC_EXTENDED",
1958                              "ETHIOPIC EXTENDED",
1959                              "ETHIOPICEXTENDED");
1960 
1961         /**
1962          * Constant for the "Cyrillic Extended-A" Unicode character block.
1963          * @since 1.7
1964          */
1965         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1966             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1967                              "CYRILLIC EXTENDED-A",
1968                              "CYRILLICEXTENDED-A");
1969 
1970         /**
1971          * Constant for the "Supplemental Punctuation" Unicode character block.
1972          * @since 1.7
1973          */
1974         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1975             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1976                              "SUPPLEMENTAL PUNCTUATION",
1977                              "SUPPLEMENTALPUNCTUATION");
1978 
1979         /**
1980          * Constant for the "CJK Strokes" Unicode character block.
1981          * @since 1.7
1982          */
1983         public static final UnicodeBlock CJK_STROKES =
1984             new UnicodeBlock("CJK_STROKES",
1985                              "CJK STROKES",
1986                              "CJKSTROKES");
1987 
1988         /**
1989          * Constant for the "Lisu" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock LISU =
1993             new UnicodeBlock("LISU");
1994 
1995         /**
1996          * Constant for the "Vai" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock VAI =
2000             new UnicodeBlock("VAI");
2001 
2002         /**
2003          * Constant for the "Cyrillic Extended-B" Unicode character block.
2004          * @since 1.7
2005          */
2006         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2007             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2008                              "CYRILLIC EXTENDED-B",
2009                              "CYRILLICEXTENDED-B");
2010 
2011         /**
2012          * Constant for the "Bamum" Unicode character block.
2013          * @since 1.7
2014          */
2015         public static final UnicodeBlock BAMUM =
2016             new UnicodeBlock("BAMUM");
2017 
2018         /**
2019          * Constant for the "Modifier Tone Letters" Unicode character block.
2020          * @since 1.7
2021          */
2022         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2023             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2024                              "MODIFIER TONE LETTERS",
2025                              "MODIFIERTONELETTERS");
2026 
2027         /**
2028          * Constant for the "Latin Extended-D" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock LATIN_EXTENDED_D =
2032             new UnicodeBlock("LATIN_EXTENDED_D",
2033                              "LATIN EXTENDED-D",
2034                              "LATINEXTENDED-D");
2035 
2036         /**
2037          * Constant for the "Syloti Nagri" Unicode character block.
2038          * @since 1.7
2039          */
2040         public static final UnicodeBlock SYLOTI_NAGRI =
2041             new UnicodeBlock("SYLOTI_NAGRI",
2042                              "SYLOTI NAGRI",
2043                              "SYLOTINAGRI");
2044 
2045         /**
2046          * Constant for the "Common Indic Number Forms" Unicode character block.
2047          * @since 1.7
2048          */
2049         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2050             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2051                              "COMMON INDIC NUMBER FORMS",
2052                              "COMMONINDICNUMBERFORMS");
2053 
2054         /**
2055          * Constant for the "Phags-pa" Unicode character block.
2056          * @since 1.7
2057          */
2058         public static final UnicodeBlock PHAGS_PA =
2059             new UnicodeBlock("PHAGS_PA",
2060                              "PHAGS-PA");
2061 
2062         /**
2063          * Constant for the "Saurashtra" Unicode character block.
2064          * @since 1.7
2065          */
2066         public static final UnicodeBlock SAURASHTRA =
2067             new UnicodeBlock("SAURASHTRA");
2068 
2069         /**
2070          * Constant for the "Devanagari Extended" Unicode character block.
2071          * @since 1.7
2072          */
2073         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2074             new UnicodeBlock("DEVANAGARI_EXTENDED",
2075                              "DEVANAGARI EXTENDED",
2076                              "DEVANAGARIEXTENDED");
2077 
2078         /**
2079          * Constant for the "Kayah Li" Unicode character block.
2080          * @since 1.7
2081          */
2082         public static final UnicodeBlock KAYAH_LI =
2083             new UnicodeBlock("KAYAH_LI",
2084                              "KAYAH LI",
2085                              "KAYAHLI");
2086 
2087         /**
2088          * Constant for the "Rejang" Unicode character block.
2089          * @since 1.7
2090          */
2091         public static final UnicodeBlock REJANG =
2092             new UnicodeBlock("REJANG");
2093 
2094         /**
2095          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2096          * @since 1.7
2097          */
2098         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2099             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2100                              "HANGUL JAMO EXTENDED-A",
2101                              "HANGULJAMOEXTENDED-A");
2102 
2103         /**
2104          * Constant for the "Javanese" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock JAVANESE =
2108             new UnicodeBlock("JAVANESE");
2109 
2110         /**
2111          * Constant for the "Cham" Unicode character block.
2112          * @since 1.7
2113          */
2114         public static final UnicodeBlock CHAM =
2115             new UnicodeBlock("CHAM");
2116 
2117         /**
2118          * Constant for the "Myanmar Extended-A" Unicode character block.
2119          * @since 1.7
2120          */
2121         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2122             new UnicodeBlock("MYANMAR_EXTENDED_A",
2123                              "MYANMAR EXTENDED-A",
2124                              "MYANMAREXTENDED-A");
2125 
2126         /**
2127          * Constant for the "Tai Viet" Unicode character block.
2128          * @since 1.7
2129          */
2130         public static final UnicodeBlock TAI_VIET =
2131             new UnicodeBlock("TAI_VIET",
2132                              "TAI VIET",
2133                              "TAIVIET");
2134 
2135         /**
2136          * Constant for the "Ethiopic Extended-A" Unicode character block.
2137          * @since 1.7
2138          */
2139         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2140             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2141                              "ETHIOPIC EXTENDED-A",
2142                              "ETHIOPICEXTENDED-A");
2143 
2144         /**
2145          * Constant for the "Meetei Mayek" Unicode character block.
2146          * @since 1.7
2147          */
2148         public static final UnicodeBlock MEETEI_MAYEK =
2149             new UnicodeBlock("MEETEI_MAYEK",
2150                              "MEETEI MAYEK",
2151                              "MEETEIMAYEK");
2152 
2153         /**
2154          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2155          * @since 1.7
2156          */
2157         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2158             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2159                              "HANGUL JAMO EXTENDED-B",
2160                              "HANGULJAMOEXTENDED-B");
2161 
2162         /**
2163          * Constant for the "Vertical Forms" Unicode character block.
2164          * @since 1.7
2165          */
2166         public static final UnicodeBlock VERTICAL_FORMS =
2167             new UnicodeBlock("VERTICAL_FORMS",
2168                              "VERTICAL FORMS",
2169                              "VERTICALFORMS");
2170 
2171         /**
2172          * Constant for the "Ancient Greek Numbers" Unicode character block.
2173          * @since 1.7
2174          */
2175         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2176             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2177                              "ANCIENT GREEK NUMBERS",
2178                              "ANCIENTGREEKNUMBERS");
2179 
2180         /**
2181          * Constant for the "Ancient Symbols" Unicode character block.
2182          * @since 1.7
2183          */
2184         public static final UnicodeBlock ANCIENT_SYMBOLS =
2185             new UnicodeBlock("ANCIENT_SYMBOLS",
2186                              "ANCIENT SYMBOLS",
2187                              "ANCIENTSYMBOLS");
2188 
2189         /**
2190          * Constant for the "Phaistos Disc" Unicode character block.
2191          * @since 1.7
2192          */
2193         public static final UnicodeBlock PHAISTOS_DISC =
2194             new UnicodeBlock("PHAISTOS_DISC",
2195                              "PHAISTOS DISC",
2196                              "PHAISTOSDISC");
2197 
2198         /**
2199          * Constant for the "Lycian" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock LYCIAN =
2203             new UnicodeBlock("LYCIAN");
2204 
2205         /**
2206          * Constant for the "Carian" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock CARIAN =
2210             new UnicodeBlock("CARIAN");
2211 
2212         /**
2213          * Constant for the "Old Persian" Unicode character block.
2214          * @since 1.7
2215          */
2216         public static final UnicodeBlock OLD_PERSIAN =
2217             new UnicodeBlock("OLD_PERSIAN",
2218                              "OLD PERSIAN",
2219                              "OLDPERSIAN");
2220 
2221         /**
2222          * Constant for the "Imperial Aramaic" Unicode character block.
2223          * @since 1.7
2224          */
2225         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2226             new UnicodeBlock("IMPERIAL_ARAMAIC",
2227                              "IMPERIAL ARAMAIC",
2228                              "IMPERIALARAMAIC");
2229 
2230         /**
2231          * Constant for the "Phoenician" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock PHOENICIAN =
2235             new UnicodeBlock("PHOENICIAN");
2236 
2237         /**
2238          * Constant for the "Lydian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock LYDIAN =
2242             new UnicodeBlock("LYDIAN");
2243 
2244         /**
2245          * Constant for the "Kharoshthi" Unicode character block.
2246          * @since 1.7
2247          */
2248         public static final UnicodeBlock KHAROSHTHI =
2249             new UnicodeBlock("KHAROSHTHI");
2250 
2251         /**
2252          * Constant for the "Old South Arabian" Unicode character block.
2253          * @since 1.7
2254          */
2255         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2256             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2257                              "OLD SOUTH ARABIAN",
2258                              "OLDSOUTHARABIAN");
2259 
2260         /**
2261          * Constant for the "Avestan" Unicode character block.
2262          * @since 1.7
2263          */
2264         public static final UnicodeBlock AVESTAN =
2265             new UnicodeBlock("AVESTAN");
2266 
2267         /**
2268          * Constant for the "Inscriptional Parthian" Unicode character block.
2269          * @since 1.7
2270          */
2271         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2272             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2273                              "INSCRIPTIONAL PARTHIAN",
2274                              "INSCRIPTIONALPARTHIAN");
2275 
2276         /**
2277          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2281             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2282                              "INSCRIPTIONAL PAHLAVI",
2283                              "INSCRIPTIONALPAHLAVI");
2284 
2285         /**
2286          * Constant for the "Old Turkic" Unicode character block.
2287          * @since 1.7
2288          */
2289         public static final UnicodeBlock OLD_TURKIC =
2290             new UnicodeBlock("OLD_TURKIC",
2291                              "OLD TURKIC",
2292                              "OLDTURKIC");
2293 
2294         /**
2295          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2296          * @since 1.7
2297          */
2298         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2299             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2300                              "RUMI NUMERAL SYMBOLS",
2301                              "RUMINUMERALSYMBOLS");
2302 
2303         /**
2304          * Constant for the "Brahmi" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock BRAHMI =
2308             new UnicodeBlock("BRAHMI");
2309 
2310         /**
2311          * Constant for the "Kaithi" Unicode character block.
2312          * @since 1.7
2313          */
2314         public static final UnicodeBlock KAITHI =
2315             new UnicodeBlock("KAITHI");
2316 
2317         /**
2318          * Constant for the "Cuneiform" Unicode character block.
2319          * @since 1.7
2320          */
2321         public static final UnicodeBlock CUNEIFORM =
2322             new UnicodeBlock("CUNEIFORM");
2323 
2324         /**
2325          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2326          * character block.
2327          * @since 1.7
2328          */
2329         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2330             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2331                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2332                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2333 
2334         /**
2335          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2336          * @since 1.7
2337          */
2338         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2339             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2340                              "EGYPTIAN HIEROGLYPHS",
2341                              "EGYPTIANHIEROGLYPHS");
2342 
2343         /**
2344          * Constant for the "Bamum Supplement" Unicode character block.
2345          * @since 1.7
2346          */
2347         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2348             new UnicodeBlock("BAMUM_SUPPLEMENT",
2349                              "BAMUM SUPPLEMENT",
2350                              "BAMUMSUPPLEMENT");
2351 
2352         /**
2353          * Constant for the "Kana Supplement" Unicode character block.
2354          * @since 1.7
2355          */
2356         public static final UnicodeBlock KANA_SUPPLEMENT =
2357             new UnicodeBlock("KANA_SUPPLEMENT",
2358                              "KANA SUPPLEMENT",
2359                              "KANASUPPLEMENT");
2360 
2361         /**
2362          * Constant for the "Ancient Greek Musical Notation" Unicode character
2363          * block.
2364          * @since 1.7
2365          */
2366         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2367             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2368                              "ANCIENT GREEK MUSICAL NOTATION",
2369                              "ANCIENTGREEKMUSICALNOTATION");
2370 
2371         /**
2372          * Constant for the "Counting Rod Numerals" Unicode character block.
2373          * @since 1.7
2374          */
2375         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2376             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2377                              "COUNTING ROD NUMERALS",
2378                              "COUNTINGRODNUMERALS");
2379 
2380         /**
2381          * Constant for the "Mahjong Tiles" Unicode character block.
2382          * @since 1.7
2383          */
2384         public static final UnicodeBlock MAHJONG_TILES =
2385             new UnicodeBlock("MAHJONG_TILES",
2386                              "MAHJONG TILES",
2387                              "MAHJONGTILES");
2388 
2389         /**
2390          * Constant for the "Domino Tiles" Unicode character block.
2391          * @since 1.7
2392          */
2393         public static final UnicodeBlock DOMINO_TILES =
2394             new UnicodeBlock("DOMINO_TILES",
2395                              "DOMINO TILES",
2396                              "DOMINOTILES");
2397 
2398         /**
2399          * Constant for the "Playing Cards" Unicode character block.
2400          * @since 1.7
2401          */
2402         public static final UnicodeBlock PLAYING_CARDS =
2403             new UnicodeBlock("PLAYING_CARDS",
2404                              "PLAYING CARDS",
2405                              "PLAYINGCARDS");
2406 
2407         /**
2408          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2409          * block.
2410          * @since 1.7
2411          */
2412         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2413             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2414                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2415                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2416 
2417         /**
2418          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2419          * block.
2420          * @since 1.7
2421          */
2422         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2423             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2424                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2425                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2426 
2427         /**
2428          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2429          * character block.
2430          * @since 1.7
2431          */
2432         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2433             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2434                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2435                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2436 
2437         /**
2438          * Constant for the "Emoticons" Unicode character block.
2439          * @since 1.7
2440          */
2441         public static final UnicodeBlock EMOTICONS =
2442             new UnicodeBlock("EMOTICONS");
2443 
2444         /**
2445          * Constant for the "Transport And Map Symbols" Unicode character block.
2446          * @since 1.7
2447          */
2448         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2449             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2450                              "TRANSPORT AND MAP SYMBOLS",
2451                              "TRANSPORTANDMAPSYMBOLS");
2452 
2453         /**
2454          * Constant for the "Alchemical Symbols" Unicode character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2458             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2459                              "ALCHEMICAL SYMBOLS",
2460                              "ALCHEMICALSYMBOLS");
2461 
2462         /**
2463          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2464          * character block.
2465          * @since 1.7
2466          */
2467         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2468             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2469                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2470                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2471 
2472         /**
2473          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2474          * character block.
2475          * @since 1.7
2476          */
2477         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2478             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2479                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2480                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2481 
2482         /**
2483          * Constant for the "Arabic Extended-A" Unicode character block.
2484          * @since 1.8
2485          */
2486         public static final UnicodeBlock ARABIC_EXTENDED_A =
2487             new UnicodeBlock("ARABIC_EXTENDED_A",
2488                              "ARABIC EXTENDED-A",
2489                              "ARABICEXTENDED-A");
2490 
2491         /**
2492          * Constant for the "Sundanese Supplement" Unicode character block.
2493          * @since 1.8
2494          */
2495         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2496             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2497                              "SUNDANESE SUPPLEMENT",
2498                              "SUNDANESESUPPLEMENT");
2499 
2500         /**
2501          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2502          * @since 1.8
2503          */
2504         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2505             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2506                              "MEETEI MAYEK EXTENSIONS",
2507                              "MEETEIMAYEKEXTENSIONS");
2508 
2509         /**
2510          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2511          * @since 1.8
2512          */
2513         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2514             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2515                              "MEROITIC HIEROGLYPHS",
2516                              "MEROITICHIEROGLYPHS");
2517 
2518         /**
2519          * Constant for the "Meroitic Cursive" Unicode character block.
2520          * @since 1.8
2521          */
2522         public static final UnicodeBlock MEROITIC_CURSIVE =
2523             new UnicodeBlock("MEROITIC_CURSIVE",
2524                              "MEROITIC CURSIVE",
2525                              "MEROITICCURSIVE");
2526 
2527         /**
2528          * Constant for the "Sora Sompeng" Unicode character block.
2529          * @since 1.8
2530          */
2531         public static final UnicodeBlock SORA_SOMPENG =
2532             new UnicodeBlock("SORA_SOMPENG",
2533                              "SORA SOMPENG",
2534                              "SORASOMPENG");
2535 
2536         /**
2537          * Constant for the "Chakma" Unicode character block.
2538          * @since 1.8
2539          */
2540         public static final UnicodeBlock CHAKMA =
2541             new UnicodeBlock("CHAKMA");
2542 
2543         /**
2544          * Constant for the "Sharada" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock SHARADA =
2548             new UnicodeBlock("SHARADA");
2549 
2550         /**
2551          * Constant for the "Takri" Unicode character block.
2552          * @since 1.8
2553          */
2554         public static final UnicodeBlock TAKRI =
2555             new UnicodeBlock("TAKRI");
2556 
2557         /**
2558          * Constant for the "Miao" Unicode character block.
2559          * @since 1.8
2560          */
2561         public static final UnicodeBlock MIAO =
2562             new UnicodeBlock("MIAO");
2563 
2564         /**
2565          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2566          * character block.
2567          * @since 1.8
2568          */
2569         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2570             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2571                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2572                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2573 
2574         private static final int blockStarts[] = {
2575             0x0000,   // 0000..007F; Basic Latin
2576             0x0080,   // 0080..00FF; Latin-1 Supplement
2577             0x0100,   // 0100..017F; Latin Extended-A
2578             0x0180,   // 0180..024F; Latin Extended-B
2579             0x0250,   // 0250..02AF; IPA Extensions
2580             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2581             0x0300,   // 0300..036F; Combining Diacritical Marks
2582             0x0370,   // 0370..03FF; Greek and Coptic
2583             0x0400,   // 0400..04FF; Cyrillic
2584             0x0500,   // 0500..052F; Cyrillic Supplement
2585             0x0530,   // 0530..058F; Armenian
2586             0x0590,   // 0590..05FF; Hebrew
2587             0x0600,   // 0600..06FF; Arabic
2588             0x0700,   // 0700..074F; Syriac
2589             0x0750,   // 0750..077F; Arabic Supplement
2590             0x0780,   // 0780..07BF; Thaana
2591             0x07C0,   // 07C0..07FF; NKo
2592             0x0800,   // 0800..083F; Samaritan
2593             0x0840,   // 0840..085F; Mandaic
2594             0x0860,   //             unassigned
2595             0x08A0,   // 08A0..08FF; Arabic Extended-A
2596             0x0900,   // 0900..097F; Devanagari
2597             0x0980,   // 0980..09FF; Bengali
2598             0x0A00,   // 0A00..0A7F; Gurmukhi
2599             0x0A80,   // 0A80..0AFF; Gujarati
2600             0x0B00,   // 0B00..0B7F; Oriya
2601             0x0B80,   // 0B80..0BFF; Tamil
2602             0x0C00,   // 0C00..0C7F; Telugu
2603             0x0C80,   // 0C80..0CFF; Kannada
2604             0x0D00,   // 0D00..0D7F; Malayalam
2605             0x0D80,   // 0D80..0DFF; Sinhala
2606             0x0E00,   // 0E00..0E7F; Thai
2607             0x0E80,   // 0E80..0EFF; Lao
2608             0x0F00,   // 0F00..0FFF; Tibetan
2609             0x1000,   // 1000..109F; Myanmar
2610             0x10A0,   // 10A0..10FF; Georgian
2611             0x1100,   // 1100..11FF; Hangul Jamo
2612             0x1200,   // 1200..137F; Ethiopic
2613             0x1380,   // 1380..139F; Ethiopic Supplement
2614             0x13A0,   // 13A0..13FF; Cherokee
2615             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2616             0x1680,   // 1680..169F; Ogham
2617             0x16A0,   // 16A0..16FF; Runic
2618             0x1700,   // 1700..171F; Tagalog
2619             0x1720,   // 1720..173F; Hanunoo
2620             0x1740,   // 1740..175F; Buhid
2621             0x1760,   // 1760..177F; Tagbanwa
2622             0x1780,   // 1780..17FF; Khmer
2623             0x1800,   // 1800..18AF; Mongolian
2624             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2625             0x1900,   // 1900..194F; Limbu
2626             0x1950,   // 1950..197F; Tai Le
2627             0x1980,   // 1980..19DF; New Tai Lue
2628             0x19E0,   // 19E0..19FF; Khmer Symbols
2629             0x1A00,   // 1A00..1A1F; Buginese
2630             0x1A20,   // 1A20..1AAF; Tai Tham
2631             0x1AB0,   //             unassigned
2632             0x1B00,   // 1B00..1B7F; Balinese
2633             0x1B80,   // 1B80..1BBF; Sundanese
2634             0x1BC0,   // 1BC0..1BFF; Batak
2635             0x1C00,   // 1C00..1C4F; Lepcha
2636             0x1C50,   // 1C50..1C7F; Ol Chiki
2637             0x1C80,   //             unassigned
2638             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2639             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2640             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2641             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2642             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2643             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2644             0x1F00,   // 1F00..1FFF; Greek Extended
2645             0x2000,   // 2000..206F; General Punctuation
2646             0x2070,   // 2070..209F; Superscripts and Subscripts
2647             0x20A0,   // 20A0..20CF; Currency Symbols
2648             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2649             0x2100,   // 2100..214F; Letterlike Symbols
2650             0x2150,   // 2150..218F; Number Forms
2651             0x2190,   // 2190..21FF; Arrows
2652             0x2200,   // 2200..22FF; Mathematical Operators
2653             0x2300,   // 2300..23FF; Miscellaneous Technical
2654             0x2400,   // 2400..243F; Control Pictures
2655             0x2440,   // 2440..245F; Optical Character Recognition
2656             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2657             0x2500,   // 2500..257F; Box Drawing
2658             0x2580,   // 2580..259F; Block Elements
2659             0x25A0,   // 25A0..25FF; Geometric Shapes
2660             0x2600,   // 2600..26FF; Miscellaneous Symbols
2661             0x2700,   // 2700..27BF; Dingbats
2662             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2663             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2664             0x2800,   // 2800..28FF; Braille Patterns
2665             0x2900,   // 2900..297F; Supplemental Arrows-B
2666             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2667             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2668             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2669             0x2C00,   // 2C00..2C5F; Glagolitic
2670             0x2C60,   // 2C60..2C7F; Latin Extended-C
2671             0x2C80,   // 2C80..2CFF; Coptic
2672             0x2D00,   // 2D00..2D2F; Georgian Supplement
2673             0x2D30,   // 2D30..2D7F; Tifinagh
2674             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2675             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2676             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2677             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2678             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2679             0x2FE0,   //             unassigned
2680             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2681             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2682             0x3040,   // 3040..309F; Hiragana
2683             0x30A0,   // 30A0..30FF; Katakana
2684             0x3100,   // 3100..312F; Bopomofo
2685             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2686             0x3190,   // 3190..319F; Kanbun
2687             0x31A0,   // 31A0..31BF; Bopomofo Extended
2688             0x31C0,   // 31C0..31EF; CJK Strokes
2689             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2690             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2691             0x3300,   // 3300..33FF; CJK Compatibility
2692             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2693             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2694             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2695             0xA000,   // A000..A48F; Yi Syllables
2696             0xA490,   // A490..A4CF; Yi Radicals
2697             0xA4D0,   // A4D0..A4FF; Lisu
2698             0xA500,   // A500..A63F; Vai
2699             0xA640,   // A640..A69F; Cyrillic Extended-B
2700             0xA6A0,   // A6A0..A6FF; Bamum
2701             0xA700,   // A700..A71F; Modifier Tone Letters
2702             0xA720,   // A720..A7FF; Latin Extended-D
2703             0xA800,   // A800..A82F; Syloti Nagri
2704             0xA830,   // A830..A83F; Common Indic Number Forms
2705             0xA840,   // A840..A87F; Phags-pa
2706             0xA880,   // A880..A8DF; Saurashtra
2707             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2708             0xA900,   // A900..A92F; Kayah Li
2709             0xA930,   // A930..A95F; Rejang
2710             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2711             0xA980,   // A980..A9DF; Javanese
2712             0xA9E0,   //             unassigned
2713             0xAA00,   // AA00..AA5F; Cham
2714             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2715             0xAA80,   // AA80..AADF; Tai Viet
2716             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2717             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2718             0xAB30,   //             unassigned
2719             0xABC0,   // ABC0..ABFF; Meetei Mayek
2720             0xAC00,   // AC00..D7AF; Hangul Syllables
2721             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2722             0xD800,   // D800..DB7F; High Surrogates
2723             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2724             0xDC00,   // DC00..DFFF; Low Surrogates
2725             0xE000,   // E000..F8FF; Private Use Area
2726             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2727             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2728             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2729             0xFE00,   // FE00..FE0F; Variation Selectors
2730             0xFE10,   // FE10..FE1F; Vertical Forms
2731             0xFE20,   // FE20..FE2F; Combining Half Marks
2732             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2733             0xFE50,   // FE50..FE6F; Small Form Variants
2734             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2735             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2736             0xFFF0,   // FFF0..FFFF; Specials
2737             0x10000,  // 10000..1007F; Linear B Syllabary
2738             0x10080,  // 10080..100FF; Linear B Ideograms
2739             0x10100,  // 10100..1013F; Aegean Numbers
2740             0x10140,  // 10140..1018F; Ancient Greek Numbers
2741             0x10190,  // 10190..101CF; Ancient Symbols
2742             0x101D0,  // 101D0..101FF; Phaistos Disc
2743             0x10200,  //               unassigned
2744             0x10280,  // 10280..1029F; Lycian
2745             0x102A0,  // 102A0..102DF; Carian
2746             0x102E0,  //               unassigned
2747             0x10300,  // 10300..1032F; Old Italic
2748             0x10330,  // 10330..1034F; Gothic
2749             0x10350,  //               unassigned
2750             0x10380,  // 10380..1039F; Ugaritic
2751             0x103A0,  // 103A0..103DF; Old Persian
2752             0x103E0,  //               unassigned
2753             0x10400,  // 10400..1044F; Deseret
2754             0x10450,  // 10450..1047F; Shavian
2755             0x10480,  // 10480..104AF; Osmanya
2756             0x104B0,  //               unassigned
2757             0x10800,  // 10800..1083F; Cypriot Syllabary
2758             0x10840,  // 10840..1085F; Imperial Aramaic
2759             0x10860,  //               unassigned
2760             0x10900,  // 10900..1091F; Phoenician
2761             0x10920,  // 10920..1093F; Lydian
2762             0x10940,  //               unassigned
2763             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2764             0x109A0,  // 109A0..109FF; Meroitic Cursive
2765             0x10A00,  // 10A00..10A5F; Kharoshthi
2766             0x10A60,  // 10A60..10A7F; Old South Arabian
2767             0x10A80,  //               unassigned
2768             0x10B00,  // 10B00..10B3F; Avestan
2769             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2770             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2771             0x10B80,  //               unassigned
2772             0x10C00,  // 10C00..10C4F; Old Turkic
2773             0x10C50,  //               unassigned
2774             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2775             0x10E80,  //               unassigned
2776             0x11000,  // 11000..1107F; Brahmi
2777             0x11080,  // 11080..110CF; Kaithi
2778             0x110D0,  // 110D0..110FF; Sora Sompeng
2779             0x11100,  // 11100..1114F; Chakma
2780             0x11150,  //               unassigned
2781             0x11180,  // 11180..111DF; Sharada
2782             0x111E0,  //               unassigned
2783             0x11680,  // 11680..116CF; Takri
2784             0x116D0,  //               unassigned
2785             0x12000,  // 12000..123FF; Cuneiform
2786             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2787             0x12480,  //               unassigned
2788             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2789             0x13430,  //               unassigned
2790             0x16800,  // 16800..16A3F; Bamum Supplement
2791             0x16A40,  //               unassigned
2792             0x16F00,  // 16F00..16F9F; Miao
2793             0x16FA0,  //               unassigned
2794             0x1B000,  // 1B000..1B0FF; Kana Supplement
2795             0x1B100,  //               unassigned
2796             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2797             0x1D100,  // 1D100..1D1FF; Musical Symbols
2798             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2799             0x1D250,  //               unassigned
2800             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2801             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2802             0x1D380,  //               unassigned
2803             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2804             0x1D800,  //               unassigned
2805             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2806             0x1EF00,  //               unassigned
2807             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2808             0x1F030,  // 1F030..1F09F; Domino Tiles
2809             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2810             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2811             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2812             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2813             0x1F600,  // 1F600..1F64F; Emoticons
2814             0x1F650,  //               unassigned
2815             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2816             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2817             0x1F780,  //               unassigned
2818             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2819             0x2A6E0,  //               unassigned
2820             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2821             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2822             0x2B820,  //               unassigned
2823             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2824             0x2FA20,  //               unassigned
2825             0xE0000,  // E0000..E007F; Tags
2826             0xE0080,  //               unassigned
2827             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2828             0xE01F0,  //               unassigned
2829             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2830             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2831         };
2832 
2833         private static final UnicodeBlock[] blocks = {
2834             BASIC_LATIN,
2835             LATIN_1_SUPPLEMENT,
2836             LATIN_EXTENDED_A,
2837             LATIN_EXTENDED_B,
2838             IPA_EXTENSIONS,
2839             SPACING_MODIFIER_LETTERS,
2840             COMBINING_DIACRITICAL_MARKS,
2841             GREEK,
2842             CYRILLIC,
2843             CYRILLIC_SUPPLEMENTARY,
2844             ARMENIAN,
2845             HEBREW,
2846             ARABIC,
2847             SYRIAC,
2848             ARABIC_SUPPLEMENT,
2849             THAANA,
2850             NKO,
2851             SAMARITAN,
2852             MANDAIC,
2853             null,
2854             ARABIC_EXTENDED_A,
2855             DEVANAGARI,
2856             BENGALI,
2857             GURMUKHI,
2858             GUJARATI,
2859             ORIYA,
2860             TAMIL,
2861             TELUGU,
2862             KANNADA,
2863             MALAYALAM,
2864             SINHALA,
2865             THAI,
2866             LAO,
2867             TIBETAN,
2868             MYANMAR,
2869             GEORGIAN,
2870             HANGUL_JAMO,
2871             ETHIOPIC,
2872             ETHIOPIC_SUPPLEMENT,
2873             CHEROKEE,
2874             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2875             OGHAM,
2876             RUNIC,
2877             TAGALOG,
2878             HANUNOO,
2879             BUHID,
2880             TAGBANWA,
2881             KHMER,
2882             MONGOLIAN,
2883             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2884             LIMBU,
2885             TAI_LE,
2886             NEW_TAI_LUE,
2887             KHMER_SYMBOLS,
2888             BUGINESE,
2889             TAI_THAM,
2890             null,
2891             BALINESE,
2892             SUNDANESE,
2893             BATAK,
2894             LEPCHA,
2895             OL_CHIKI,
2896             null,
2897             SUNDANESE_SUPPLEMENT,
2898             VEDIC_EXTENSIONS,
2899             PHONETIC_EXTENSIONS,
2900             PHONETIC_EXTENSIONS_SUPPLEMENT,
2901             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2902             LATIN_EXTENDED_ADDITIONAL,
2903             GREEK_EXTENDED,
2904             GENERAL_PUNCTUATION,
2905             SUPERSCRIPTS_AND_SUBSCRIPTS,
2906             CURRENCY_SYMBOLS,
2907             COMBINING_MARKS_FOR_SYMBOLS,
2908             LETTERLIKE_SYMBOLS,
2909             NUMBER_FORMS,
2910             ARROWS,
2911             MATHEMATICAL_OPERATORS,
2912             MISCELLANEOUS_TECHNICAL,
2913             CONTROL_PICTURES,
2914             OPTICAL_CHARACTER_RECOGNITION,
2915             ENCLOSED_ALPHANUMERICS,
2916             BOX_DRAWING,
2917             BLOCK_ELEMENTS,
2918             GEOMETRIC_SHAPES,
2919             MISCELLANEOUS_SYMBOLS,
2920             DINGBATS,
2921             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2922             SUPPLEMENTAL_ARROWS_A,
2923             BRAILLE_PATTERNS,
2924             SUPPLEMENTAL_ARROWS_B,
2925             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2926             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2927             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2928             GLAGOLITIC,
2929             LATIN_EXTENDED_C,
2930             COPTIC,
2931             GEORGIAN_SUPPLEMENT,
2932             TIFINAGH,
2933             ETHIOPIC_EXTENDED,
2934             CYRILLIC_EXTENDED_A,
2935             SUPPLEMENTAL_PUNCTUATION,
2936             CJK_RADICALS_SUPPLEMENT,
2937             KANGXI_RADICALS,
2938             null,
2939             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2940             CJK_SYMBOLS_AND_PUNCTUATION,
2941             HIRAGANA,
2942             KATAKANA,
2943             BOPOMOFO,
2944             HANGUL_COMPATIBILITY_JAMO,
2945             KANBUN,
2946             BOPOMOFO_EXTENDED,
2947             CJK_STROKES,
2948             KATAKANA_PHONETIC_EXTENSIONS,
2949             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2950             CJK_COMPATIBILITY,
2951             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2952             YIJING_HEXAGRAM_SYMBOLS,
2953             CJK_UNIFIED_IDEOGRAPHS,
2954             YI_SYLLABLES,
2955             YI_RADICALS,
2956             LISU,
2957             VAI,
2958             CYRILLIC_EXTENDED_B,
2959             BAMUM,
2960             MODIFIER_TONE_LETTERS,
2961             LATIN_EXTENDED_D,
2962             SYLOTI_NAGRI,
2963             COMMON_INDIC_NUMBER_FORMS,
2964             PHAGS_PA,
2965             SAURASHTRA,
2966             DEVANAGARI_EXTENDED,
2967             KAYAH_LI,
2968             REJANG,
2969             HANGUL_JAMO_EXTENDED_A,
2970             JAVANESE,
2971             null,
2972             CHAM,
2973             MYANMAR_EXTENDED_A,
2974             TAI_VIET,
2975             MEETEI_MAYEK_EXTENSIONS,
2976             ETHIOPIC_EXTENDED_A,
2977             null,
2978             MEETEI_MAYEK,
2979             HANGUL_SYLLABLES,
2980             HANGUL_JAMO_EXTENDED_B,
2981             HIGH_SURROGATES,
2982             HIGH_PRIVATE_USE_SURROGATES,
2983             LOW_SURROGATES,
2984             PRIVATE_USE_AREA,
2985             CJK_COMPATIBILITY_IDEOGRAPHS,
2986             ALPHABETIC_PRESENTATION_FORMS,
2987             ARABIC_PRESENTATION_FORMS_A,
2988             VARIATION_SELECTORS,
2989             VERTICAL_FORMS,
2990             COMBINING_HALF_MARKS,
2991             CJK_COMPATIBILITY_FORMS,
2992             SMALL_FORM_VARIANTS,
2993             ARABIC_PRESENTATION_FORMS_B,
2994             HALFWIDTH_AND_FULLWIDTH_FORMS,
2995             SPECIALS,
2996             LINEAR_B_SYLLABARY,
2997             LINEAR_B_IDEOGRAMS,
2998             AEGEAN_NUMBERS,
2999             ANCIENT_GREEK_NUMBERS,
3000             ANCIENT_SYMBOLS,
3001             PHAISTOS_DISC,
3002             null,
3003             LYCIAN,
3004             CARIAN,
3005             null,
3006             OLD_ITALIC,
3007             GOTHIC,
3008             null,
3009             UGARITIC,
3010             OLD_PERSIAN,
3011             null,
3012             DESERET,
3013             SHAVIAN,
3014             OSMANYA,
3015             null,
3016             CYPRIOT_SYLLABARY,
3017             IMPERIAL_ARAMAIC,
3018             null,
3019             PHOENICIAN,
3020             LYDIAN,
3021             null,
3022             MEROITIC_HIEROGLYPHS,
3023             MEROITIC_CURSIVE,
3024             KHAROSHTHI,
3025             OLD_SOUTH_ARABIAN,
3026             null,
3027             AVESTAN,
3028             INSCRIPTIONAL_PARTHIAN,
3029             INSCRIPTIONAL_PAHLAVI,
3030             null,
3031             OLD_TURKIC,
3032             null,
3033             RUMI_NUMERAL_SYMBOLS,
3034             null,
3035             BRAHMI,
3036             KAITHI,
3037             SORA_SOMPENG,
3038             CHAKMA,
3039             null,
3040             SHARADA,
3041             null,
3042             TAKRI,
3043             null,
3044             CUNEIFORM,
3045             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3046             null,
3047             EGYPTIAN_HIEROGLYPHS,
3048             null,
3049             BAMUM_SUPPLEMENT,
3050             null,
3051             MIAO,
3052             null,
3053             KANA_SUPPLEMENT,
3054             null,
3055             BYZANTINE_MUSICAL_SYMBOLS,
3056             MUSICAL_SYMBOLS,
3057             ANCIENT_GREEK_MUSICAL_NOTATION,
3058             null,
3059             TAI_XUAN_JING_SYMBOLS,
3060             COUNTING_ROD_NUMERALS,
3061             null,
3062             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3063             null,
3064             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3065             null,
3066             MAHJONG_TILES,
3067             DOMINO_TILES,
3068             PLAYING_CARDS,
3069             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3070             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3071             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3072             EMOTICONS,
3073             null,
3074             TRANSPORT_AND_MAP_SYMBOLS,
3075             ALCHEMICAL_SYMBOLS,
3076             null,
3077             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3078             null,
3079             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3080             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3081             null,
3082             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3083             null,
3084             TAGS,
3085             null,
3086             VARIATION_SELECTORS_SUPPLEMENT,
3087             null,
3088             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3089             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3090         };
3091 
3092 
3093         /**
3094          * Returns the object representing the Unicode block containing the
3095          * given character, or {@code null} if the character is not a
3096          * member of a defined block.
3097          *
3098          * <p><b>Note:</b> This method cannot handle
3099          * <a href="Character.html#supplementary"> supplementary
3100          * characters</a>.  To support all Unicode characters, including
3101          * supplementary characters, use the {@link #of(int)} method.
3102          *
3103          * @param   c  The character in question
3104          * @return  The {@code UnicodeBlock} instance representing the
3105          *          Unicode block of which this character is a member, or
3106          *          {@code null} if the character is not a member of any
3107          *          Unicode block
3108          */
3109         public static UnicodeBlock of(char c) {
3110             return of((int)c);
3111         }
3112 
3113         /**
3114          * Returns the object representing the Unicode block
3115          * containing the given character (Unicode code point), or
3116          * {@code null} if the character is not a member of a
3117          * defined block.
3118          *
3119          * @param   codePoint the character (Unicode code point) in question.
3120          * @return  The {@code UnicodeBlock} instance representing the
3121          *          Unicode block of which this character is a member, or
3122          *          {@code null} if the character is not a member of any
3123          *          Unicode block
3124          * @exception IllegalArgumentException if the specified
3125          * {@code codePoint} is an invalid Unicode code point.
3126          * @see Character#isValidCodePoint(int)
3127          * @since   1.5
3128          */
3129         public static UnicodeBlock of(int codePoint) {
3130             if (!isValidCodePoint(codePoint)) {
3131                 throw new IllegalArgumentException();
3132             }
3133 
3134             int top, bottom, current;
3135             bottom = 0;
3136             top = blockStarts.length;
3137             current = top/2;
3138 
3139             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3140             while (top - bottom > 1) {
3141                 if (codePoint >= blockStarts[current]) {
3142                     bottom = current;
3143                 } else {
3144                     top = current;
3145                 }
3146                 current = (top + bottom) / 2;
3147             }
3148             return blocks[current];
3149         }
3150 
3151         /**
3152          * Returns the UnicodeBlock with the given name. Block
3153          * names are determined by The Unicode Standard. The file
3154          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3155          * version of the standard. The {@link Character} class specifies
3156          * the version of the standard that it supports.
3157          * <p>
3158          * This method accepts block names in the following forms:
3159          * <ol>
3160          * <li> Canonical block names as defined by the Unicode Standard.
3161          * For example, the standard defines a "Basic Latin" block. Therefore, this
3162          * method accepts "Basic Latin" as a valid block name. The documentation of
3163          * each UnicodeBlock provides the canonical name.
3164          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3165          * is a valid block name for the "Basic Latin" block.
3166          * <li>The text representation of each constant UnicodeBlock identifier.
3167          * For example, this method will return the {@link #BASIC_LATIN} block if
3168          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3169          * hyphens in the canonical name with underscores.
3170          * </ol>
3171          * Finally, character case is ignored for all of the valid block name forms.
3172          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3173          * The en_US locale's case mapping rules are used to provide case-insensitive
3174          * string comparisons for block name validation.
3175          * <p>
3176          * If the Unicode Standard changes block names, both the previous and
3177          * current names will be accepted.
3178          *
3179          * @param blockName A {@code UnicodeBlock} name.
3180          * @return The {@code UnicodeBlock} instance identified
3181          *         by {@code blockName}
3182          * @throws IllegalArgumentException if {@code blockName} is an
3183          *         invalid name
3184          * @throws NullPointerException if {@code blockName} is null
3185          * @since 1.5
3186          */
3187         public static final UnicodeBlock forName(String blockName) {
3188             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3189             if (block == null) {
3190                 throw new IllegalArgumentException();
3191             }
3192             return block;
3193         }
3194     }
3195 
3196 
3197     /**
3198      * A family of character subsets representing the character scripts
3199      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3200      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3201      * character is assigned to a single Unicode script, either a specific
3202      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3203      * one of the following three special values,
3204      * {@link Character.UnicodeScript#INHERITED Inherited},
3205      * {@link Character.UnicodeScript#COMMON Common} or
3206      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3207      *
3208      * @since 1.7
3209      */
3210     public static enum UnicodeScript {
3211         /**
3212          * Unicode script "Common".
3213          */
3214         COMMON,
3215 
3216         /**
3217          * Unicode script "Latin".
3218          */
3219         LATIN,
3220 
3221         /**
3222          * Unicode script "Greek".
3223          */
3224         GREEK,
3225 
3226         /**
3227          * Unicode script "Cyrillic".
3228          */
3229         CYRILLIC,
3230 
3231         /**
3232          * Unicode script "Armenian".
3233          */
3234         ARMENIAN,
3235 
3236         /**
3237          * Unicode script "Hebrew".
3238          */
3239         HEBREW,
3240 
3241         /**
3242          * Unicode script "Arabic".
3243          */
3244         ARABIC,
3245 
3246         /**
3247          * Unicode script "Syriac".
3248          */
3249         SYRIAC,
3250 
3251         /**
3252          * Unicode script "Thaana".
3253          */
3254         THAANA,
3255 
3256         /**
3257          * Unicode script "Devanagari".
3258          */
3259         DEVANAGARI,
3260 
3261         /**
3262          * Unicode script "Bengali".
3263          */
3264         BENGALI,
3265 
3266         /**
3267          * Unicode script "Gurmukhi".
3268          */
3269         GURMUKHI,
3270 
3271         /**
3272          * Unicode script "Gujarati".
3273          */
3274         GUJARATI,
3275 
3276         /**
3277          * Unicode script "Oriya".
3278          */
3279         ORIYA,
3280 
3281         /**
3282          * Unicode script "Tamil".
3283          */
3284         TAMIL,
3285 
3286         /**
3287          * Unicode script "Telugu".
3288          */
3289         TELUGU,
3290 
3291         /**
3292          * Unicode script "Kannada".
3293          */
3294         KANNADA,
3295 
3296         /**
3297          * Unicode script "Malayalam".
3298          */
3299         MALAYALAM,
3300 
3301         /**
3302          * Unicode script "Sinhala".
3303          */
3304         SINHALA,
3305 
3306         /**
3307          * Unicode script "Thai".
3308          */
3309         THAI,
3310 
3311         /**
3312          * Unicode script "Lao".
3313          */
3314         LAO,
3315 
3316         /**
3317          * Unicode script "Tibetan".
3318          */
3319         TIBETAN,
3320 
3321         /**
3322          * Unicode script "Myanmar".
3323          */
3324         MYANMAR,
3325 
3326         /**
3327          * Unicode script "Georgian".
3328          */
3329         GEORGIAN,
3330 
3331         /**
3332          * Unicode script "Hangul".
3333          */
3334         HANGUL,
3335 
3336         /**
3337          * Unicode script "Ethiopic".
3338          */
3339         ETHIOPIC,
3340 
3341         /**
3342          * Unicode script "Cherokee".
3343          */
3344         CHEROKEE,
3345 
3346         /**
3347          * Unicode script "Canadian_Aboriginal".
3348          */
3349         CANADIAN_ABORIGINAL,
3350 
3351         /**
3352          * Unicode script "Ogham".
3353          */
3354         OGHAM,
3355 
3356         /**
3357          * Unicode script "Runic".
3358          */
3359         RUNIC,
3360 
3361         /**
3362          * Unicode script "Khmer".
3363          */
3364         KHMER,
3365 
3366         /**
3367          * Unicode script "Mongolian".
3368          */
3369         MONGOLIAN,
3370 
3371         /**
3372          * Unicode script "Hiragana".
3373          */
3374         HIRAGANA,
3375 
3376         /**
3377          * Unicode script "Katakana".
3378          */
3379         KATAKANA,
3380 
3381         /**
3382          * Unicode script "Bopomofo".
3383          */
3384         BOPOMOFO,
3385 
3386         /**
3387          * Unicode script "Han".
3388          */
3389         HAN,
3390 
3391         /**
3392          * Unicode script "Yi".
3393          */
3394         YI,
3395 
3396         /**
3397          * Unicode script "Old_Italic".
3398          */
3399         OLD_ITALIC,
3400 
3401         /**
3402          * Unicode script "Gothic".
3403          */
3404         GOTHIC,
3405 
3406         /**
3407          * Unicode script "Deseret".
3408          */
3409         DESERET,
3410 
3411         /**
3412          * Unicode script "Inherited".
3413          */
3414         INHERITED,
3415 
3416         /**
3417          * Unicode script "Tagalog".
3418          */
3419         TAGALOG,
3420 
3421         /**
3422          * Unicode script "Hanunoo".
3423          */
3424         HANUNOO,
3425 
3426         /**
3427          * Unicode script "Buhid".
3428          */
3429         BUHID,
3430 
3431         /**
3432          * Unicode script "Tagbanwa".
3433          */
3434         TAGBANWA,
3435 
3436         /**
3437          * Unicode script "Limbu".
3438          */
3439         LIMBU,
3440 
3441         /**
3442          * Unicode script "Tai_Le".
3443          */
3444         TAI_LE,
3445 
3446         /**
3447          * Unicode script "Linear_B".
3448          */
3449         LINEAR_B,
3450 
3451         /**
3452          * Unicode script "Ugaritic".
3453          */
3454         UGARITIC,
3455 
3456         /**
3457          * Unicode script "Shavian".
3458          */
3459         SHAVIAN,
3460 
3461         /**
3462          * Unicode script "Osmanya".
3463          */
3464         OSMANYA,
3465 
3466         /**
3467          * Unicode script "Cypriot".
3468          */
3469         CYPRIOT,
3470 
3471         /**
3472          * Unicode script "Braille".
3473          */
3474         BRAILLE,
3475 
3476         /**
3477          * Unicode script "Buginese".
3478          */
3479         BUGINESE,
3480 
3481         /**
3482          * Unicode script "Coptic".
3483          */
3484         COPTIC,
3485 
3486         /**
3487          * Unicode script "New_Tai_Lue".
3488          */
3489         NEW_TAI_LUE,
3490 
3491         /**
3492          * Unicode script "Glagolitic".
3493          */
3494         GLAGOLITIC,
3495 
3496         /**
3497          * Unicode script "Tifinagh".
3498          */
3499         TIFINAGH,
3500 
3501         /**
3502          * Unicode script "Syloti_Nagri".
3503          */
3504         SYLOTI_NAGRI,
3505 
3506         /**
3507          * Unicode script "Old_Persian".
3508          */
3509         OLD_PERSIAN,
3510 
3511         /**
3512          * Unicode script "Kharoshthi".
3513          */
3514         KHAROSHTHI,
3515 
3516         /**
3517          * Unicode script "Balinese".
3518          */
3519         BALINESE,
3520 
3521         /**
3522          * Unicode script "Cuneiform".
3523          */
3524         CUNEIFORM,
3525 
3526         /**
3527          * Unicode script "Phoenician".
3528          */
3529         PHOENICIAN,
3530 
3531         /**
3532          * Unicode script "Phags_Pa".
3533          */
3534         PHAGS_PA,
3535 
3536         /**
3537          * Unicode script "Nko".
3538          */
3539         NKO,
3540 
3541         /**
3542          * Unicode script "Sundanese".
3543          */
3544         SUNDANESE,
3545 
3546         /**
3547          * Unicode script "Batak".
3548          */
3549         BATAK,
3550 
3551         /**
3552          * Unicode script "Lepcha".
3553          */
3554         LEPCHA,
3555 
3556         /**
3557          * Unicode script "Ol_Chiki".
3558          */
3559         OL_CHIKI,
3560 
3561         /**
3562          * Unicode script "Vai".
3563          */
3564         VAI,
3565 
3566         /**
3567          * Unicode script "Saurashtra".
3568          */
3569         SAURASHTRA,
3570 
3571         /**
3572          * Unicode script "Kayah_Li".
3573          */
3574         KAYAH_LI,
3575 
3576         /**
3577          * Unicode script "Rejang".
3578          */
3579         REJANG,
3580 
3581         /**
3582          * Unicode script "Lycian".
3583          */
3584         LYCIAN,
3585 
3586         /**
3587          * Unicode script "Carian".
3588          */
3589         CARIAN,
3590 
3591         /**
3592          * Unicode script "Lydian".
3593          */
3594         LYDIAN,
3595 
3596         /**
3597          * Unicode script "Cham".
3598          */
3599         CHAM,
3600 
3601         /**
3602          * Unicode script "Tai_Tham".
3603          */
3604         TAI_THAM,
3605 
3606         /**
3607          * Unicode script "Tai_Viet".
3608          */
3609         TAI_VIET,
3610 
3611         /**
3612          * Unicode script "Avestan".
3613          */
3614         AVESTAN,
3615 
3616         /**
3617          * Unicode script "Egyptian_Hieroglyphs".
3618          */
3619         EGYPTIAN_HIEROGLYPHS,
3620 
3621         /**
3622          * Unicode script "Samaritan".
3623          */
3624         SAMARITAN,
3625 
3626         /**
3627          * Unicode script "Mandaic".
3628          */
3629         MANDAIC,
3630 
3631         /**
3632          * Unicode script "Lisu".
3633          */
3634         LISU,
3635 
3636         /**
3637          * Unicode script "Bamum".
3638          */
3639         BAMUM,
3640 
3641         /**
3642          * Unicode script "Javanese".
3643          */
3644         JAVANESE,
3645 
3646         /**
3647          * Unicode script "Meetei_Mayek".
3648          */
3649         MEETEI_MAYEK,
3650 
3651         /**
3652          * Unicode script "Imperial_Aramaic".
3653          */
3654         IMPERIAL_ARAMAIC,
3655 
3656         /**
3657          * Unicode script "Old_South_Arabian".
3658          */
3659         OLD_SOUTH_ARABIAN,
3660 
3661         /**
3662          * Unicode script "Inscriptional_Parthian".
3663          */
3664         INSCRIPTIONAL_PARTHIAN,
3665 
3666         /**
3667          * Unicode script "Inscriptional_Pahlavi".
3668          */
3669         INSCRIPTIONAL_PAHLAVI,
3670 
3671         /**
3672          * Unicode script "Old_Turkic".
3673          */
3674         OLD_TURKIC,
3675 
3676         /**
3677          * Unicode script "Brahmi".
3678          */
3679         BRAHMI,
3680 
3681         /**
3682          * Unicode script "Kaithi".
3683          */
3684         KAITHI,
3685 
3686         /**
3687          * Unicode script "Meroitic Hieroglyphs".
3688          */
3689         MEROITIC_HIEROGLYPHS,
3690 
3691         /**
3692          * Unicode script "Meroitic Cursive".
3693          */
3694         MEROITIC_CURSIVE,
3695 
3696         /**
3697          * Unicode script "Sora Sompeng".
3698          */
3699         SORA_SOMPENG,
3700 
3701         /**
3702          * Unicode script "Chakma".
3703          */
3704         CHAKMA,
3705 
3706         /**
3707          * Unicode script "Sharada".
3708          */
3709         SHARADA,
3710 
3711         /**
3712          * Unicode script "Takri".
3713          */
3714         TAKRI,
3715 
3716         /**
3717          * Unicode script "Miao".
3718          */
3719         MIAO,
3720 
3721         /**
3722          * Unicode script "Unknown".
3723          */
3724         UNKNOWN;
3725 
3726         private static final int[] scriptStarts = {
3727             0x0000,   // 0000..0040; COMMON
3728             0x0041,   // 0041..005A; LATIN
3729             0x005B,   // 005B..0060; COMMON
3730             0x0061,   // 0061..007A; LATIN
3731             0x007B,   // 007B..00A9; COMMON
3732             0x00AA,   // 00AA..00AA; LATIN
3733             0x00AB,   // 00AB..00B9; COMMON
3734             0x00BA,   // 00BA..00BA; LATIN
3735             0x00BB,   // 00BB..00BF; COMMON
3736             0x00C0,   // 00C0..00D6; LATIN
3737             0x00D7,   // 00D7..00D7; COMMON
3738             0x00D8,   // 00D8..00F6; LATIN
3739             0x00F7,   // 00F7..00F7; COMMON
3740             0x00F8,   // 00F8..02B8; LATIN
3741             0x02B9,   // 02B9..02DF; COMMON
3742             0x02E0,   // 02E0..02E4; LATIN
3743             0x02E5,   // 02E5..02E9; COMMON
3744             0x02EA,   // 02EA..02EB; BOPOMOFO
3745             0x02EC,   // 02EC..02FF; COMMON
3746             0x0300,   // 0300..036F; INHERITED
3747             0x0370,   // 0370..0373; GREEK
3748             0x0374,   // 0374..0374; COMMON
3749             0x0375,   // 0375..037D; GREEK
3750             0x037E,   // 037E..0383; COMMON
3751             0x0384,   // 0384..0384; GREEK
3752             0x0385,   // 0385..0385; COMMON
3753             0x0386,   // 0386..0386; GREEK
3754             0x0387,   // 0387..0387; COMMON
3755             0x0388,   // 0388..03E1; GREEK
3756             0x03E2,   // 03E2..03EF; COPTIC
3757             0x03F0,   // 03F0..03FF; GREEK
3758             0x0400,   // 0400..0484; CYRILLIC
3759             0x0485,   // 0485..0486; INHERITED
3760             0x0487,   // 0487..0530; CYRILLIC
3761             0x0531,   // 0531..0588; ARMENIAN
3762             0x0589,   // 0589..0589; COMMON
3763             0x058A,   // 058A..0590; ARMENIAN
3764             0x0591,   // 0591..05FF; HEBREW
3765             0x0600,   // 0600..060B; ARABIC
3766             0x060C,   // 060C..060C; COMMON
3767             0x060D,   // 060D..061A; ARABIC
3768             0x061B,   // 061B..061D; COMMON
3769             0x061E,   // 061E..061E; ARABIC
3770             0x061F,   // 061F..061F; COMMON
3771             0x0620,   // 0620..063F; ARABIC
3772             0x0640,   // 0640..0640; COMMON
3773             0x0641,   // 0641..064A; ARABIC
3774             0x064B,   // 064B..0655; INHERITED
3775             0x0656,   // 0656..065F; ARABIC
3776             0x0660,   // 0660..0669; COMMON
3777             0x066A,   // 066A..066F; ARABIC
3778             0x0670,   // 0670..0670; INHERITED
3779             0x0671,   // 0671..06DC; ARABIC
3780             0x06DD,   // 06DD..06DD; COMMON
3781             0x06DE,   // 06DE..06FF; ARABIC
3782             0x0700,   // 0700..074F; SYRIAC
3783             0x0750,   // 0750..077F; ARABIC
3784             0x0780,   // 0780..07BF; THAANA
3785             0x07C0,   // 07C0..07FF; NKO
3786             0x0800,   // 0800..083F; SAMARITAN
3787             0x0840,   // 0840..089F; MANDAIC
3788             0x08A0,   // 08A0..08FF; ARABIC
3789             0x0900,   // 0900..0950; DEVANAGARI
3790             0x0951,   // 0951..0952; INHERITED
3791             0x0953,   // 0953..0963; DEVANAGARI
3792             0x0964,   // 0964..0965; COMMON
3793             0x0966,   // 0966..0980; DEVANAGARI
3794             0x0981,   // 0981..0A00; BENGALI
3795             0x0A01,   // 0A01..0A80; GURMUKHI
3796             0x0A81,   // 0A81..0B00; GUJARATI
3797             0x0B01,   // 0B01..0B81; ORIYA
3798             0x0B82,   // 0B82..0C00; TAMIL
3799             0x0C01,   // 0C01..0C81; TELUGU
3800             0x0C82,   // 0C82..0CF0; KANNADA
3801             0x0D02,   // 0D02..0D81; MALAYALAM
3802             0x0D82,   // 0D82..0E00; SINHALA
3803             0x0E01,   // 0E01..0E3E; THAI
3804             0x0E3F,   // 0E3F..0E3F; COMMON
3805             0x0E40,   // 0E40..0E80; THAI
3806             0x0E81,   // 0E81..0EFF; LAO
3807             0x0F00,   // 0F00..0FD4; TIBETAN
3808             0x0FD5,   // 0FD5..0FD8; COMMON
3809             0x0FD9,   // 0FD9..0FFF; TIBETAN
3810             0x1000,   // 1000..109F; MYANMAR
3811             0x10A0,   // 10A0..10FA; GEORGIAN
3812             0x10FB,   // 10FB..10FB; COMMON
3813             0x10FC,   // 10FC..10FF; GEORGIAN
3814             0x1100,   // 1100..11FF; HANGUL
3815             0x1200,   // 1200..139F; ETHIOPIC
3816             0x13A0,   // 13A0..13FF; CHEROKEE
3817             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3818             0x1680,   // 1680..169F; OGHAM
3819             0x16A0,   // 16A0..16EA; RUNIC
3820             0x16EB,   // 16EB..16ED; COMMON
3821             0x16EE,   // 16EE..16FF; RUNIC
3822             0x1700,   // 1700..171F; TAGALOG
3823             0x1720,   // 1720..1734; HANUNOO
3824             0x1735,   // 1735..173F; COMMON
3825             0x1740,   // 1740..175F; BUHID
3826             0x1760,   // 1760..177F; TAGBANWA
3827             0x1780,   // 1780..17FF; KHMER
3828             0x1800,   // 1800..1801; MONGOLIAN
3829             0x1802,   // 1802..1803; COMMON
3830             0x1804,   // 1804..1804; MONGOLIAN
3831             0x1805,   // 1805..1805; COMMON
3832             0x1806,   // 1806..18AF; MONGOLIAN
3833             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3834             0x1900,   // 1900..194F; LIMBU
3835             0x1950,   // 1950..197F; TAI_LE
3836             0x1980,   // 1980..19DF; NEW_TAI_LUE
3837             0x19E0,   // 19E0..19FF; KHMER
3838             0x1A00,   // 1A00..1A1F; BUGINESE
3839             0x1A20,   // 1A20..1AFF; TAI_THAM
3840             0x1B00,   // 1B00..1B7F; BALINESE
3841             0x1B80,   // 1B80..1BBF; SUNDANESE
3842             0x1BC0,   // 1BC0..1BFF; BATAK
3843             0x1C00,   // 1C00..1C4F; LEPCHA
3844             0x1C50,   // 1C50..1CBF; OL_CHIKI
3845             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3846             0x1CD0,   // 1CD0..1CD2; INHERITED
3847             0x1CD3,   // 1CD3..1CD3; COMMON
3848             0x1CD4,   // 1CD4..1CE0; INHERITED
3849             0x1CE1,   // 1CE1..1CE1; COMMON
3850             0x1CE2,   // 1CE2..1CE8; INHERITED
3851             0x1CE9,   // 1CE9..1CEC; COMMON
3852             0x1CED,   // 1CED..1CED; INHERITED
3853             0x1CEE,   // 1CEE..1CF3; COMMON
3854             0x1CF4,   // 1CF4..1CF4; INHERITED
3855             0x1CF5,   // 1CF5..1CFF; COMMON
3856             0x1D00,   // 1D00..1D25; LATIN
3857             0x1D26,   // 1D26..1D2A; GREEK
3858             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3859             0x1D2C,   // 1D2C..1D5C; LATIN
3860             0x1D5D,   // 1D5D..1D61; GREEK
3861             0x1D62,   // 1D62..1D65; LATIN
3862             0x1D66,   // 1D66..1D6A; GREEK
3863             0x1D6B,   // 1D6B..1D77; LATIN
3864             0x1D78,   // 1D78..1D78; CYRILLIC
3865             0x1D79,   // 1D79..1DBE; LATIN
3866             0x1DBF,   // 1DBF..1DBF; GREEK
3867             0x1DC0,   // 1DC0..1DFF; INHERITED
3868             0x1E00,   // 1E00..1EFF; LATIN
3869             0x1F00,   // 1F00..1FFF; GREEK
3870             0x2000,   // 2000..200B; COMMON
3871             0x200C,   // 200C..200D; INHERITED
3872             0x200E,   // 200E..2070; COMMON
3873             0x2071,   // 2071..2073; LATIN
3874             0x2074,   // 2074..207E; COMMON
3875             0x207F,   // 207F..207F; LATIN
3876             0x2080,   // 2080..208F; COMMON
3877             0x2090,   // 2090..209F; LATIN
3878             0x20A0,   // 20A0..20CF; COMMON
3879             0x20D0,   // 20D0..20FF; INHERITED
3880             0x2100,   // 2100..2125; COMMON
3881             0x2126,   // 2126..2126; GREEK
3882             0x2127,   // 2127..2129; COMMON
3883             0x212A,   // 212A..212B; LATIN
3884             0x212C,   // 212C..2131; COMMON
3885             0x2132,   // 2132..2132; LATIN
3886             0x2133,   // 2133..214D; COMMON
3887             0x214E,   // 214E..214E; LATIN
3888             0x214F,   // 214F..215F; COMMON
3889             0x2160,   // 2160..2188; LATIN
3890             0x2189,   // 2189..27FF; COMMON
3891             0x2800,   // 2800..28FF; BRAILLE
3892             0x2900,   // 2900..2BFF; COMMON
3893             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3894             0x2C60,   // 2C60..2C7F; LATIN
3895             0x2C80,   // 2C80..2CFF; COPTIC
3896             0x2D00,   // 2D00..2D2F; GEORGIAN
3897             0x2D30,   // 2D30..2D7F; TIFINAGH
3898             0x2D80,   // 2D80..2DDF; ETHIOPIC
3899             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3900             0x2E00,   // 2E00..2E7F; COMMON
3901             0x2E80,   // 2E80..2FEF; HAN
3902             0x2FF0,   // 2FF0..3004; COMMON
3903             0x3005,   // 3005..3005; HAN
3904             0x3006,   // 3006..3006; COMMON
3905             0x3007,   // 3007..3007; HAN
3906             0x3008,   // 3008..3020; COMMON
3907             0x3021,   // 3021..3029; HAN
3908             0x302A,   // 302A..302D; INHERITED
3909             0x302E,   // 302E..302F; HANGUL
3910             0x3030,   // 3030..3037; COMMON
3911             0x3038,   // 3038..303B; HAN
3912             0x303C,   // 303C..3040; COMMON
3913             0x3041,   // 3041..3098; HIRAGANA
3914             0x3099,   // 3099..309A; INHERITED
3915             0x309B,   // 309B..309C; COMMON
3916             0x309D,   // 309D..309F; HIRAGANA
3917             0x30A0,   // 30A0..30A0; COMMON
3918             0x30A1,   // 30A1..30FA; KATAKANA
3919             0x30FB,   // 30FB..30FC; COMMON
3920             0x30FD,   // 30FD..3104; KATAKANA
3921             0x3105,   // 3105..3130; BOPOMOFO
3922             0x3131,   // 3131..318F; HANGUL
3923             0x3190,   // 3190..319F; COMMON
3924             0x31A0,   // 31A0..31BF; BOPOMOFO
3925             0x31C0,   // 31C0..31EF; COMMON
3926             0x31F0,   // 31F0..31FF; KATAKANA
3927             0x3200,   // 3200..321F; HANGUL
3928             0x3220,   // 3220..325F; COMMON
3929             0x3260,   // 3260..327E; HANGUL
3930             0x327F,   // 327F..32CF; COMMON
3931             0x32D0,   // 32D0..32FE; KATAKANA
3932             0x32FF,   // 32FF      ; COMMON
3933             0x3300,   // 3300..3357; KATAKANA
3934             0x3358,   // 3358..33FF; COMMON
3935             0x3400,   // 3400..4DBF; HAN
3936             0x4DC0,   // 4DC0..4DFF; COMMON
3937             0x4E00,   // 4E00..9FFF; HAN
3938             0xA000,   // A000..A4CF; YI
3939             0xA4D0,   // A4D0..A4FF; LISU
3940             0xA500,   // A500..A63F; VAI
3941             0xA640,   // A640..A69F; CYRILLIC
3942             0xA6A0,   // A6A0..A6FF; BAMUM
3943             0xA700,   // A700..A721; COMMON
3944             0xA722,   // A722..A787; LATIN
3945             0xA788,   // A788..A78A; COMMON
3946             0xA78B,   // A78B..A7FF; LATIN
3947             0xA800,   // A800..A82F; SYLOTI_NAGRI
3948             0xA830,   // A830..A83F; COMMON
3949             0xA840,   // A840..A87F; PHAGS_PA
3950             0xA880,   // A880..A8DF; SAURASHTRA
3951             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3952             0xA900,   // A900..A92F; KAYAH_LI
3953             0xA930,   // A930..A95F; REJANG
3954             0xA960,   // A960..A97F; HANGUL
3955             0xA980,   // A980..A9FF; JAVANESE
3956             0xAA00,   // AA00..AA5F; CHAM
3957             0xAA60,   // AA60..AA7F; MYANMAR
3958             0xAA80,   // AA80..AADF; TAI_VIET
3959             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3960             0xAB01,   // AB01..ABBF; ETHIOPIC
3961             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3962             0xAC00,   // AC00..D7FB; HANGUL
3963             0xD7FC,   // D7FC..F8FF; UNKNOWN
3964             0xF900,   // F900..FAFF; HAN
3965             0xFB00,   // FB00..FB12; LATIN
3966             0xFB13,   // FB13..FB1C; ARMENIAN
3967             0xFB1D,   // FB1D..FB4F; HEBREW
3968             0xFB50,   // FB50..FD3D; ARABIC
3969             0xFD3E,   // FD3E..FD4F; COMMON
3970             0xFD50,   // FD50..FDFC; ARABIC
3971             0xFDFD,   // FDFD..FDFF; COMMON
3972             0xFE00,   // FE00..FE0F; INHERITED
3973             0xFE10,   // FE10..FE1F; COMMON
3974             0xFE20,   // FE20..FE2F; INHERITED
3975             0xFE30,   // FE30..FE6F; COMMON
3976             0xFE70,   // FE70..FEFE; ARABIC
3977             0xFEFF,   // FEFF..FF20; COMMON
3978             0xFF21,   // FF21..FF3A; LATIN
3979             0xFF3B,   // FF3B..FF40; COMMON
3980             0xFF41,   // FF41..FF5A; LATIN
3981             0xFF5B,   // FF5B..FF65; COMMON
3982             0xFF66,   // FF66..FF6F; KATAKANA
3983             0xFF70,   // FF70..FF70; COMMON
3984             0xFF71,   // FF71..FF9D; KATAKANA
3985             0xFF9E,   // FF9E..FF9F; COMMON
3986             0xFFA0,   // FFA0..FFDF; HANGUL
3987             0xFFE0,   // FFE0..FFFF; COMMON
3988             0x10000,  // 10000..100FF; LINEAR_B
3989             0x10100,  // 10100..1013F; COMMON
3990             0x10140,  // 10140..1018F; GREEK
3991             0x10190,  // 10190..101FC; COMMON
3992             0x101FD,  // 101FD..1027F; INHERITED
3993             0x10280,  // 10280..1029F; LYCIAN
3994             0x102A0,  // 102A0..102FF; CARIAN
3995             0x10300,  // 10300..1032F; OLD_ITALIC
3996             0x10330,  // 10330..1037F; GOTHIC
3997             0x10380,  // 10380..1039F; UGARITIC
3998             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3999             0x10400,  // 10400..1044F; DESERET
4000             0x10450,  // 10450..1047F; SHAVIAN
4001             0x10480,  // 10480..107FF; OSMANYA
4002             0x10800,  // 10800..1083F; CYPRIOT
4003             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4004             0x10900,  // 10900..1091F; PHOENICIAN
4005             0x10920,  // 10920..1097F; LYDIAN
4006             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4007             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4008             0x10A00,  // 10A00..10A5F; KHAROSHTHI
4009             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4010             0x10B00,  // 10B00..10B3F; AVESTAN
4011             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4012             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4013             0x10C00,  // 10C00..10E5F; OLD_TURKIC
4014             0x10E60,  // 10E60..10FFF; ARABIC
4015             0x11000,  // 11000..1107F; BRAHMI
4016             0x11080,  // 11080..110CF; KAITHI
4017             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4018             0x11100,  // 11100..1117F; CHAKMA
4019             0x11180,  // 11180..1167F; SHARADA
4020             0x11680,  // 11680..116CF; TAKRI
4021             0x12000,  // 12000..12FFF; CUNEIFORM
4022             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4023             0x16800,  // 16800..16A38; BAMUM
4024             0x16F00,  // 16F00..16F9F; MIAO
4025             0x1B000,  // 1B000..1B000; KATAKANA
4026             0x1B001,  // 1B001..1CFFF; HIRAGANA
4027             0x1D000,  // 1D000..1D166; COMMON
4028             0x1D167,  // 1D167..1D169; INHERITED
4029             0x1D16A,  // 1D16A..1D17A; COMMON
4030             0x1D17B,  // 1D17B..1D182; INHERITED
4031             0x1D183,  // 1D183..1D184; COMMON
4032             0x1D185,  // 1D185..1D18B; INHERITED
4033             0x1D18C,  // 1D18C..1D1A9; COMMON
4034             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4035             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4036             0x1D200,  // 1D200..1D2FF; GREEK
4037             0x1D300,  // 1D300..1EDFF; COMMON
4038             0x1EE00,  // 1EE00..1EFFF; ARABIC
4039             0x1F000,  // 1F000..1F1FF; COMMON
4040             0x1F200,  // 1F200..1F200; HIRAGANA
4041             0x1F201,  // 1F210..1FFFF; COMMON
4042             0x20000,  // 20000..E0000; HAN
4043             0xE0001,  // E0001..E00FF; COMMON
4044             0xE0100,  // E0100..E01EF; INHERITED
4045             0xE01F0   // E01F0..10FFFF; UNKNOWN
4046 
4047         };
4048 
4049         private static final UnicodeScript[] scripts = {
4050             COMMON,
4051             LATIN,
4052             COMMON,
4053             LATIN,
4054             COMMON,
4055             LATIN,
4056             COMMON,
4057             LATIN,
4058             COMMON,
4059             LATIN,
4060             COMMON,
4061             LATIN,
4062             COMMON,
4063             LATIN,
4064             COMMON,
4065             LATIN,
4066             COMMON,
4067             BOPOMOFO,
4068             COMMON,
4069             INHERITED,
4070             GREEK,
4071             COMMON,
4072             GREEK,
4073             COMMON,
4074             GREEK,
4075             COMMON,
4076             GREEK,
4077             COMMON,
4078             GREEK,
4079             COPTIC,
4080             GREEK,
4081             CYRILLIC,
4082             INHERITED,
4083             CYRILLIC,
4084             ARMENIAN,
4085             COMMON,
4086             ARMENIAN,
4087             HEBREW,
4088             ARABIC,
4089             COMMON,
4090             ARABIC,
4091             COMMON,
4092             ARABIC,
4093             COMMON,
4094             ARABIC,
4095             COMMON,
4096             ARABIC,
4097             INHERITED,
4098             ARABIC,
4099             COMMON,
4100             ARABIC,
4101             INHERITED,
4102             ARABIC,
4103             COMMON,
4104             ARABIC,
4105             SYRIAC,
4106             ARABIC,
4107             THAANA,
4108             NKO,
4109             SAMARITAN,
4110             MANDAIC,
4111             ARABIC,
4112             DEVANAGARI,
4113             INHERITED,
4114             DEVANAGARI,
4115             COMMON,
4116             DEVANAGARI,
4117             BENGALI,
4118             GURMUKHI,
4119             GUJARATI,
4120             ORIYA,
4121             TAMIL,
4122             TELUGU,
4123             KANNADA,
4124             MALAYALAM,
4125             SINHALA,
4126             THAI,
4127             COMMON,
4128             THAI,
4129             LAO,
4130             TIBETAN,
4131             COMMON,
4132             TIBETAN,
4133             MYANMAR,
4134             GEORGIAN,
4135             COMMON,
4136             GEORGIAN,
4137             HANGUL,
4138             ETHIOPIC,
4139             CHEROKEE,
4140             CANADIAN_ABORIGINAL,
4141             OGHAM,
4142             RUNIC,
4143             COMMON,
4144             RUNIC,
4145             TAGALOG,
4146             HANUNOO,
4147             COMMON,
4148             BUHID,
4149             TAGBANWA,
4150             KHMER,
4151             MONGOLIAN,
4152             COMMON,
4153             MONGOLIAN,
4154             COMMON,
4155             MONGOLIAN,
4156             CANADIAN_ABORIGINAL,
4157             LIMBU,
4158             TAI_LE,
4159             NEW_TAI_LUE,
4160             KHMER,
4161             BUGINESE,
4162             TAI_THAM,
4163             BALINESE,
4164             SUNDANESE,
4165             BATAK,
4166             LEPCHA,
4167             OL_CHIKI,
4168             SUNDANESE,
4169             INHERITED,
4170             COMMON,
4171             INHERITED,
4172             COMMON,
4173             INHERITED,
4174             COMMON,
4175             INHERITED,
4176             COMMON,
4177             INHERITED,
4178             COMMON,
4179             LATIN,
4180             GREEK,
4181             CYRILLIC,
4182             LATIN,
4183             GREEK,
4184             LATIN,
4185             GREEK,
4186             LATIN,
4187             CYRILLIC,
4188             LATIN,
4189             GREEK,
4190             INHERITED,
4191             LATIN,
4192             GREEK,
4193             COMMON,
4194             INHERITED,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             LATIN,
4199             COMMON,
4200             LATIN,
4201             COMMON,
4202             INHERITED,
4203             COMMON,
4204             GREEK,
4205             COMMON,
4206             LATIN,
4207             COMMON,
4208             LATIN,
4209             COMMON,
4210             LATIN,
4211             COMMON,
4212             LATIN,
4213             COMMON,
4214             BRAILLE,
4215             COMMON,
4216             GLAGOLITIC,
4217             LATIN,
4218             COPTIC,
4219             GEORGIAN,
4220             TIFINAGH,
4221             ETHIOPIC,
4222             CYRILLIC,
4223             COMMON,
4224             HAN,
4225             COMMON,
4226             HAN,
4227             COMMON,
4228             HAN,
4229             COMMON,
4230             HAN,
4231             INHERITED,
4232             HANGUL,
4233             COMMON,
4234             HAN,
4235             COMMON,
4236             HIRAGANA,
4237             INHERITED,
4238             COMMON,
4239             HIRAGANA,
4240             COMMON,
4241             KATAKANA,
4242             COMMON,
4243             KATAKANA,
4244             BOPOMOFO,
4245             HANGUL,
4246             COMMON,
4247             BOPOMOFO,
4248             COMMON,
4249             KATAKANA,
4250             HANGUL,
4251             COMMON,
4252             HANGUL,
4253             COMMON,
4254             KATAKANA,  // 32D0..32FE
4255             COMMON,    // 32FF
4256             KATAKANA,  // 3300..3357
4257             COMMON,
4258             HAN,
4259             COMMON,
4260             HAN,
4261             YI,
4262             LISU,
4263             VAI,
4264             CYRILLIC,
4265             BAMUM,
4266             COMMON,
4267             LATIN,
4268             COMMON,
4269             LATIN,
4270             SYLOTI_NAGRI,
4271             COMMON,
4272             PHAGS_PA,
4273             SAURASHTRA,
4274             DEVANAGARI,
4275             KAYAH_LI,
4276             REJANG,
4277             HANGUL,
4278             JAVANESE,
4279             CHAM,
4280             MYANMAR,
4281             TAI_VIET,
4282             MEETEI_MAYEK,
4283             ETHIOPIC,
4284             MEETEI_MAYEK,
4285             HANGUL,
4286             UNKNOWN     ,
4287             HAN,
4288             LATIN,
4289             ARMENIAN,
4290             HEBREW,
4291             ARABIC,
4292             COMMON,
4293             ARABIC,
4294             COMMON,
4295             INHERITED,
4296             COMMON,
4297             INHERITED,
4298             COMMON,
4299             ARABIC,
4300             COMMON,
4301             LATIN,
4302             COMMON,
4303             LATIN,
4304             COMMON,
4305             KATAKANA,
4306             COMMON,
4307             KATAKANA,
4308             COMMON,
4309             HANGUL,
4310             COMMON,
4311             LINEAR_B,
4312             COMMON,
4313             GREEK,
4314             COMMON,
4315             INHERITED,
4316             LYCIAN,
4317             CARIAN,
4318             OLD_ITALIC,
4319             GOTHIC,
4320             UGARITIC,
4321             OLD_PERSIAN,
4322             DESERET,
4323             SHAVIAN,
4324             OSMANYA,
4325             CYPRIOT,
4326             IMPERIAL_ARAMAIC,
4327             PHOENICIAN,
4328             LYDIAN,
4329             MEROITIC_HIEROGLYPHS,
4330             MEROITIC_CURSIVE,
4331             KHAROSHTHI,
4332             OLD_SOUTH_ARABIAN,
4333             AVESTAN,
4334             INSCRIPTIONAL_PARTHIAN,
4335             INSCRIPTIONAL_PAHLAVI,
4336             OLD_TURKIC,
4337             ARABIC,
4338             BRAHMI,
4339             KAITHI,
4340             SORA_SOMPENG,
4341             CHAKMA,
4342             SHARADA,
4343             TAKRI,
4344             CUNEIFORM,
4345             EGYPTIAN_HIEROGLYPHS,
4346             BAMUM,
4347             MIAO,
4348             KATAKANA,
4349             HIRAGANA,
4350             COMMON,
4351             INHERITED,
4352             COMMON,
4353             INHERITED,
4354             COMMON,
4355             INHERITED,
4356             COMMON,
4357             INHERITED,
4358             COMMON,
4359             GREEK,
4360             COMMON,
4361             ARABIC,
4362             COMMON,
4363             HIRAGANA,
4364             COMMON,
4365             HAN,
4366             COMMON,
4367             INHERITED,
4368             UNKNOWN
4369         };
4370 
4371         private static HashMap<String, Character.UnicodeScript> aliases;
4372         static {
4373             aliases = new HashMap<>(128);
4374             aliases.put("ARAB", ARABIC);
4375             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4376             aliases.put("ARMN", ARMENIAN);
4377             aliases.put("AVST", AVESTAN);
4378             aliases.put("BALI", BALINESE);
4379             aliases.put("BAMU", BAMUM);
4380             aliases.put("BATK", BATAK);
4381             aliases.put("BENG", BENGALI);
4382             aliases.put("BOPO", BOPOMOFO);
4383             aliases.put("BRAI", BRAILLE);
4384             aliases.put("BRAH", BRAHMI);
4385             aliases.put("BUGI", BUGINESE);
4386             aliases.put("BUHD", BUHID);
4387             aliases.put("CAKM", CHAKMA);
4388             aliases.put("CANS", CANADIAN_ABORIGINAL);
4389             aliases.put("CARI", CARIAN);
4390             aliases.put("CHAM", CHAM);
4391             aliases.put("CHER", CHEROKEE);
4392             aliases.put("COPT", COPTIC);
4393             aliases.put("CPRT", CYPRIOT);
4394             aliases.put("CYRL", CYRILLIC);
4395             aliases.put("DEVA", DEVANAGARI);
4396             aliases.put("DSRT", DESERET);
4397             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4398             aliases.put("ETHI", ETHIOPIC);
4399             aliases.put("GEOR", GEORGIAN);
4400             aliases.put("GLAG", GLAGOLITIC);
4401             aliases.put("GOTH", GOTHIC);
4402             aliases.put("GREK", GREEK);
4403             aliases.put("GUJR", GUJARATI);
4404             aliases.put("GURU", GURMUKHI);
4405             aliases.put("HANG", HANGUL);
4406             aliases.put("HANI", HAN);
4407             aliases.put("HANO", HANUNOO);
4408             aliases.put("HEBR", HEBREW);
4409             aliases.put("HIRA", HIRAGANA);
4410             // it appears we don't have the KATAKANA_OR_HIRAGANA
4411             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4412             aliases.put("ITAL", OLD_ITALIC);
4413             aliases.put("JAVA", JAVANESE);
4414             aliases.put("KALI", KAYAH_LI);
4415             aliases.put("KANA", KATAKANA);
4416             aliases.put("KHAR", KHAROSHTHI);
4417             aliases.put("KHMR", KHMER);
4418             aliases.put("KNDA", KANNADA);
4419             aliases.put("KTHI", KAITHI);
4420             aliases.put("LANA", TAI_THAM);
4421             aliases.put("LAOO", LAO);
4422             aliases.put("LATN", LATIN);
4423             aliases.put("LEPC", LEPCHA);
4424             aliases.put("LIMB", LIMBU);
4425             aliases.put("LINB", LINEAR_B);
4426             aliases.put("LISU", LISU);
4427             aliases.put("LYCI", LYCIAN);
4428             aliases.put("LYDI", LYDIAN);
4429             aliases.put("MAND", MANDAIC);
4430             aliases.put("MERC", MEROITIC_CURSIVE);
4431             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4432             aliases.put("MLYM", MALAYALAM);
4433             aliases.put("MONG", MONGOLIAN);
4434             aliases.put("MTEI", MEETEI_MAYEK);
4435             aliases.put("MYMR", MYANMAR);
4436             aliases.put("NKOO", NKO);
4437             aliases.put("OGAM", OGHAM);
4438             aliases.put("OLCK", OL_CHIKI);
4439             aliases.put("ORKH", OLD_TURKIC);
4440             aliases.put("ORYA", ORIYA);
4441             aliases.put("OSMA", OSMANYA);
4442             aliases.put("PHAG", PHAGS_PA);
4443             aliases.put("PLRD", MIAO);
4444             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4445             aliases.put("PHNX", PHOENICIAN);
4446             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4447             aliases.put("RJNG", REJANG);
4448             aliases.put("RUNR", RUNIC);
4449             aliases.put("SAMR", SAMARITAN);
4450             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4451             aliases.put("SAUR", SAURASHTRA);
4452             aliases.put("SHAW", SHAVIAN);
4453             aliases.put("SHRD", SHARADA);
4454             aliases.put("SINH", SINHALA);
4455             aliases.put("SORA", SORA_SOMPENG);
4456             aliases.put("SUND", SUNDANESE);
4457             aliases.put("SYLO", SYLOTI_NAGRI);
4458             aliases.put("SYRC", SYRIAC);
4459             aliases.put("TAGB", TAGBANWA);
4460             aliases.put("TALE", TAI_LE);
4461             aliases.put("TAKR", TAKRI);
4462             aliases.put("TALU", NEW_TAI_LUE);
4463             aliases.put("TAML", TAMIL);
4464             aliases.put("TAVT", TAI_VIET);
4465             aliases.put("TELU", TELUGU);
4466             aliases.put("TFNG", TIFINAGH);
4467             aliases.put("TGLG", TAGALOG);
4468             aliases.put("THAA", THAANA);
4469             aliases.put("THAI", THAI);
4470             aliases.put("TIBT", TIBETAN);
4471             aliases.put("UGAR", UGARITIC);
4472             aliases.put("VAII", VAI);
4473             aliases.put("XPEO", OLD_PERSIAN);
4474             aliases.put("XSUX", CUNEIFORM);
4475             aliases.put("YIII", YI);
4476             aliases.put("ZINH", INHERITED);
4477             aliases.put("ZYYY", COMMON);
4478             aliases.put("ZZZZ", UNKNOWN);
4479         }
4480 
4481         /**
4482          * Returns the enum constant representing the Unicode script of which
4483          * the given character (Unicode code point) is assigned to.
4484          *
4485          * @param   codePoint the character (Unicode code point) in question.
4486          * @return  The {@code UnicodeScript} constant representing the
4487          *          Unicode script of which this character is assigned to.
4488          *
4489          * @exception IllegalArgumentException if the specified
4490          * {@code codePoint} is an invalid Unicode code point.
4491          * @see Character#isValidCodePoint(int)
4492          *
4493          */
4494         public static UnicodeScript of(int codePoint) {
4495             if (!isValidCodePoint(codePoint))
4496                 throw new IllegalArgumentException();
4497             int type = getType(codePoint);
4498             // leave SURROGATE and PRIVATE_USE for table lookup
4499             if (type == UNASSIGNED)
4500                 return UNKNOWN;
4501             int index = Arrays.binarySearch(scriptStarts, codePoint);
4502             if (index < 0)
4503                 index = -index - 2;
4504             return scripts[index];
4505         }
4506 
4507         /**
4508          * Returns the UnicodeScript constant with the given Unicode script
4509          * name or the script name alias. Script names and their aliases are
4510          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4511          * and PropertyValueAliases&lt;version&gt;.txt define script names
4512          * and the script name aliases for a particular version of the
4513          * standard. The {@link Character} class specifies the version of
4514          * the standard that it supports.
4515          * <p>
4516          * Character case is ignored for all of the valid script names.
4517          * The en_US locale's case mapping rules are used to provide
4518          * case-insensitive string comparisons for script name validation.
4519          * <p>
4520          *
4521          * @param scriptName A {@code UnicodeScript} name.
4522          * @return The {@code UnicodeScript} constant identified
4523          *         by {@code scriptName}
4524          * @throws IllegalArgumentException if {@code scriptName} is an
4525          *         invalid name
4526          * @throws NullPointerException if {@code scriptName} is null
4527          */
4528         public static final UnicodeScript forName(String scriptName) {
4529             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4530                                  //.replace(' ', '_'));
4531             UnicodeScript sc = aliases.get(scriptName);
4532             if (sc != null)
4533                 return sc;
4534             return valueOf(scriptName);
4535         }
4536     }
4537 
4538     /**
4539      * The value of the {@code Character}.
4540      *
4541      * @serial
4542      */
4543     private final char value;
4544 
4545     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4546     private static final long serialVersionUID = 3786198910865385080L;
4547 
4548     /**
4549      * Constructs a newly allocated {@code Character} object that
4550      * represents the specified {@code char} value.
4551      *
4552      * @param  value   the value to be represented by the
4553      *                  {@code Character} object.
4554      */
4555     public Character(char value) {
4556         this.value = value;
4557     }
4558 
4559     private static class CharacterCache {
4560         private CharacterCache(){}
4561 
4562         static final Character cache[] = new Character[127 + 1];
4563 
4564         static {
4565             for (int i = 0; i < cache.length; i++)
4566                 cache[i] = new Character((char)i);
4567         }
4568     }
4569 
4570     /**
4571      * Returns a <tt>Character</tt> instance representing the specified
4572      * <tt>char</tt> value.
4573      * If a new <tt>Character</tt> instance is not required, this method
4574      * should generally be used in preference to the constructor
4575      * {@link #Character(char)}, as this method is likely to yield
4576      * significantly better space and time performance by caching
4577      * frequently requested values.
4578      *
4579      * This method will always cache values in the range {@code
4580      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4581      * cache other values outside of this range.
4582      *
4583      * @param  c a char value.
4584      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4585      * @since  1.5
4586      */
4587     public static Character valueOf(char c) {
4588         if (c <= 127) { // must cache
4589             return CharacterCache.cache[(int)c];
4590         }
4591         return new Character(c);
4592     }
4593 
4594     /**
4595      * Returns the value of this {@code Character} object.
4596      * @return  the primitive {@code char} value represented by
4597      *          this object.
4598      */
4599     public char charValue() {
4600         return value;
4601     }
4602 
4603     /**
4604      * Returns a hash code for this {@code Character}; equal to the result
4605      * of invoking {@code charValue()}.
4606      *
4607      * @return a hash code value for this {@code Character}
4608      */
4609     @Override
4610     public int hashCode() {
4611         return Character.hashCode(value);
4612     }
4613 
4614     /**
4615      * Returns a hash code for a {@code char} value; compatible with
4616      * {@code Character.hashCode()}.
4617      *
4618      * @since 1.8
4619      *
4620      * @param value The {@code char} for which to return a hash code.
4621      * @return a hash code value for a {@code char} value.
4622      */
4623     public static int hashCode(char value) {
4624         return (int)value;
4625     }
4626 
4627     /**
4628      * Compares this object against the specified object.
4629      * The result is {@code true} if and only if the argument is not
4630      * {@code null} and is a {@code Character} object that
4631      * represents the same {@code char} value as this object.
4632      *
4633      * @param   obj   the object to compare with.
4634      * @return  {@code true} if the objects are the same;
4635      *          {@code false} otherwise.
4636      */
4637     public boolean equals(Object obj) {
4638         if (obj instanceof Character) {
4639             return value == ((Character)obj).charValue();
4640         }
4641         return false;
4642     }
4643 
4644     /**
4645      * Returns a {@code String} object representing this
4646      * {@code Character}'s value.  The result is a string of
4647      * length 1 whose sole component is the primitive
4648      * {@code char} value represented by this
4649      * {@code Character} object.
4650      *
4651      * @return  a string representation of this object.
4652      */
4653     public String toString() {
4654         char buf[] = {value};
4655         return String.valueOf(buf);
4656     }
4657 
4658     /**
4659      * Returns a {@code String} object representing the
4660      * specified {@code char}.  The result is a string of length
4661      * 1 consisting solely of the specified {@code char}.
4662      *
4663      * @param c the {@code char} to be converted
4664      * @return the string representation of the specified {@code char}
4665      * @since 1.4
4666      */
4667     public static String toString(char c) {
4668         return String.valueOf(c);
4669     }
4670 
4671     /**
4672      * Determines whether the specified code point is a valid
4673      * <a href="http://www.unicode.org/glossary/#code_point">
4674      * Unicode code point value</a>.
4675      *
4676      * @param  codePoint the Unicode code point to be tested
4677      * @return {@code true} if the specified code point value is between
4678      *         {@link #MIN_CODE_POINT} and
4679      *         {@link #MAX_CODE_POINT} inclusive;
4680      *         {@code false} otherwise.
4681      * @since  1.5
4682      */
4683     public static boolean isValidCodePoint(int codePoint) {
4684         // Optimized form of:
4685         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4686         int plane = codePoint >>> 16;
4687         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4688     }
4689 
4690     /**
4691      * Determines whether the specified character (Unicode code point)
4692      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4693      * Such code points can be represented using a single {@code char}.
4694      *
4695      * @param  codePoint the character (Unicode code point) to be tested
4696      * @return {@code true} if the specified code point is between
4697      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4698      *         {@code false} otherwise.
4699      * @since  1.7
4700      */
4701     public static boolean isBmpCodePoint(int codePoint) {
4702         return codePoint >>> 16 == 0;
4703         // Optimized form of:
4704         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4705         // We consistently use logical shift (>>>) to facilitate
4706         // additional runtime optimizations.
4707     }
4708 
4709     /**
4710      * Determines whether the specified character (Unicode code point)
4711      * is in the <a href="#supplementary">supplementary character</a> range.
4712      *
4713      * @param  codePoint the character (Unicode code point) to be tested
4714      * @return {@code true} if the specified code point is between
4715      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4716      *         {@link #MAX_CODE_POINT} inclusive;
4717      *         {@code false} otherwise.
4718      * @since  1.5
4719      */
4720     public static boolean isSupplementaryCodePoint(int codePoint) {
4721         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4722             && codePoint <  MAX_CODE_POINT + 1;
4723     }
4724 
4725     /**
4726      * Determines if the given {@code char} value is a
4727      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4728      * Unicode high-surrogate code unit</a>
4729      * (also known as <i>leading-surrogate code unit</i>).
4730      *
4731      * <p>Such values do not represent characters by themselves,
4732      * but are used in the representation of
4733      * <a href="#supplementary">supplementary characters</a>
4734      * in the UTF-16 encoding.
4735      *
4736      * @param  ch the {@code char} value to be tested.
4737      * @return {@code true} if the {@code char} value is between
4738      *         {@link #MIN_HIGH_SURROGATE} and
4739      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4740      *         {@code false} otherwise.
4741      * @see    Character#isLowSurrogate(char)
4742      * @see    Character.UnicodeBlock#of(int)
4743      * @since  1.5
4744      */
4745     public static boolean isHighSurrogate(char ch) {
4746         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4747         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4748     }
4749 
4750     /**
4751      * Determines if the given {@code char} value is a
4752      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4753      * Unicode low-surrogate code unit</a>
4754      * (also known as <i>trailing-surrogate code unit</i>).
4755      *
4756      * <p>Such values do not represent characters by themselves,
4757      * but are used in the representation of
4758      * <a href="#supplementary">supplementary characters</a>
4759      * in the UTF-16 encoding.
4760      *
4761      * @param  ch the {@code char} value to be tested.
4762      * @return {@code true} if the {@code char} value is between
4763      *         {@link #MIN_LOW_SURROGATE} and
4764      *         {@link #MAX_LOW_SURROGATE} inclusive;
4765      *         {@code false} otherwise.
4766      * @see    Character#isHighSurrogate(char)
4767      * @since  1.5
4768      */
4769     public static boolean isLowSurrogate(char ch) {
4770         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4771     }
4772 
4773     /**
4774      * Determines if the given {@code char} value is a Unicode
4775      * <i>surrogate code unit</i>.
4776      *
4777      * <p>Such values do not represent characters by themselves,
4778      * but are used in the representation of
4779      * <a href="#supplementary">supplementary characters</a>
4780      * in the UTF-16 encoding.
4781      *
4782      * <p>A char value is a surrogate code unit if and only if it is either
4783      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4784      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4785      *
4786      * @param  ch the {@code char} value to be tested.
4787      * @return {@code true} if the {@code char} value is between
4788      *         {@link #MIN_SURROGATE} and
4789      *         {@link #MAX_SURROGATE} inclusive;
4790      *         {@code false} otherwise.
4791      * @since  1.7
4792      */
4793     public static boolean isSurrogate(char ch) {
4794         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4795     }
4796 
4797     /**
4798      * Determines whether the specified pair of {@code char}
4799      * values is a valid
4800      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4801      * Unicode surrogate pair</a>.
4802 
4803      * <p>This method is equivalent to the expression:
4804      * <blockquote><pre>{@code
4805      * isHighSurrogate(high) && isLowSurrogate(low)
4806      * }</pre></blockquote>
4807      *
4808      * @param  high the high-surrogate code value to be tested
4809      * @param  low the low-surrogate code value to be tested
4810      * @return {@code true} if the specified high and
4811      * low-surrogate code values represent a valid surrogate pair;
4812      * {@code false} otherwise.
4813      * @since  1.5
4814      */
4815     public static boolean isSurrogatePair(char high, char low) {
4816         return isHighSurrogate(high) && isLowSurrogate(low);
4817     }
4818 
4819     /**
4820      * Determines the number of {@code char} values needed to
4821      * represent the specified character (Unicode code point). If the
4822      * specified character is equal to or greater than 0x10000, then
4823      * the method returns 2. Otherwise, the method returns 1.
4824      *
4825      * <p>This method doesn't validate the specified character to be a
4826      * valid Unicode code point. The caller must validate the
4827      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4828      * if necessary.
4829      *
4830      * @param   codePoint the character (Unicode code point) to be tested.
4831      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4832      * @see     Character#isSupplementaryCodePoint(int)
4833      * @since   1.5
4834      */
4835     public static int charCount(int codePoint) {
4836         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4837     }
4838 
4839     /**
4840      * Converts the specified surrogate pair to its supplementary code
4841      * point value. This method does not validate the specified
4842      * surrogate pair. The caller must validate it using {@link
4843      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4844      *
4845      * @param  high the high-surrogate code unit
4846      * @param  low the low-surrogate code unit
4847      * @return the supplementary code point composed from the
4848      *         specified surrogate pair.
4849      * @since  1.5
4850      */
4851     public static int toCodePoint(char high, char low) {
4852         // Optimized form of:
4853         // return ((high - MIN_HIGH_SURROGATE) << 10)
4854         //         + (low - MIN_LOW_SURROGATE)
4855         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4856         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4857                                        - (MIN_HIGH_SURROGATE << 10)
4858                                        - MIN_LOW_SURROGATE);
4859     }
4860 
4861     /**
4862      * Returns the code point at the given index of the
4863      * {@code CharSequence}. If the {@code char} value at
4864      * the given index in the {@code CharSequence} is in the
4865      * high-surrogate range, the following index is less than the
4866      * length of the {@code CharSequence}, and the
4867      * {@code char} value at the following index is in the
4868      * low-surrogate range, then the supplementary code point
4869      * corresponding to this surrogate pair is returned. Otherwise,
4870      * the {@code char} value at the given index is returned.
4871      *
4872      * @param seq a sequence of {@code char} values (Unicode code
4873      * units)
4874      * @param index the index to the {@code char} values (Unicode
4875      * code units) in {@code seq} to be converted
4876      * @return the Unicode code point at the given index
4877      * @exception NullPointerException if {@code seq} is null.
4878      * @exception IndexOutOfBoundsException if the value
4879      * {@code index} is negative or not less than
4880      * {@link CharSequence#length() seq.length()}.
4881      * @since  1.5
4882      */
4883     public static int codePointAt(CharSequence seq, int index) {
4884         char c1 = seq.charAt(index);
4885         if (isHighSurrogate(c1) && ++index < seq.length()) {
4886             char c2 = seq.charAt(index);
4887             if (isLowSurrogate(c2)) {
4888                 return toCodePoint(c1, c2);
4889             }
4890         }
4891         return c1;
4892     }
4893 
4894     /**
4895      * Returns the code point at the given index of the
4896      * {@code char} array. If the {@code char} value at
4897      * the given index in the {@code char} array is in the
4898      * high-surrogate range, the following index is less than the
4899      * length of the {@code char} array, and the
4900      * {@code char} value at the following index is in the
4901      * low-surrogate range, then the supplementary code point
4902      * corresponding to this surrogate pair is returned. Otherwise,
4903      * the {@code char} value at the given index is returned.
4904      *
4905      * @param a the {@code char} array
4906      * @param index the index to the {@code char} values (Unicode
4907      * code units) in the {@code char} array to be converted
4908      * @return the Unicode code point at the given index
4909      * @exception NullPointerException if {@code a} is null.
4910      * @exception IndexOutOfBoundsException if the value
4911      * {@code index} is negative or not less than
4912      * the length of the {@code char} array.
4913      * @since  1.5
4914      */
4915     public static int codePointAt(char[] a, int index) {
4916         return codePointAtImpl(a, index, a.length);
4917     }
4918 
4919     /**
4920      * Returns the code point at the given index of the
4921      * {@code char} array, where only array elements with
4922      * {@code index} less than {@code limit} can be used. If
4923      * the {@code char} value at the given index in the
4924      * {@code char} array is in the high-surrogate range, the
4925      * following index is less than the {@code limit}, and the
4926      * {@code char} value at the following index is in the
4927      * low-surrogate range, then the supplementary code point
4928      * corresponding to this surrogate pair is returned. Otherwise,
4929      * the {@code char} value at the given index is returned.
4930      *
4931      * @param a the {@code char} array
4932      * @param index the index to the {@code char} values (Unicode
4933      * code units) in the {@code char} array to be converted
4934      * @param limit the index after the last array element that
4935      * can be used in the {@code char} array
4936      * @return the Unicode code point at the given index
4937      * @exception NullPointerException if {@code a} is null.
4938      * @exception IndexOutOfBoundsException if the {@code index}
4939      * argument is negative or not less than the {@code limit}
4940      * argument, or if the {@code limit} argument is negative or
4941      * greater than the length of the {@code char} array.
4942      * @since  1.5
4943      */
4944     public static int codePointAt(char[] a, int index, int limit) {
4945         if (index >= limit || limit < 0 || limit > a.length) {
4946             throw new IndexOutOfBoundsException();
4947         }
4948         return codePointAtImpl(a, index, limit);
4949     }
4950 
4951     // throws ArrayIndexOutOfBoundsException if index out of bounds
4952     static int codePointAtImpl(char[] a, int index, int limit) {
4953         char c1 = a[index];
4954         if (isHighSurrogate(c1) && ++index < limit) {
4955             char c2 = a[index];
4956             if (isLowSurrogate(c2)) {
4957                 return toCodePoint(c1, c2);
4958             }
4959         }
4960         return c1;
4961     }
4962 
4963     /**
4964      * Returns the code point preceding the given index of the
4965      * {@code CharSequence}. If the {@code char} value at
4966      * {@code (index - 1)} in the {@code CharSequence} is in
4967      * the low-surrogate range, {@code (index - 2)} is not
4968      * negative, and the {@code char} value at {@code (index - 2)}
4969      * in the {@code CharSequence} is in the
4970      * high-surrogate range, then the supplementary code point
4971      * corresponding to this surrogate pair is returned. Otherwise,
4972      * the {@code char} value at {@code (index - 1)} is
4973      * returned.
4974      *
4975      * @param seq the {@code CharSequence} instance
4976      * @param index the index following the code point that should be returned
4977      * @return the Unicode code point value before the given index.
4978      * @exception NullPointerException if {@code seq} is null.
4979      * @exception IndexOutOfBoundsException if the {@code index}
4980      * argument is less than 1 or greater than {@link
4981      * CharSequence#length() seq.length()}.
4982      * @since  1.5
4983      */
4984     public static int codePointBefore(CharSequence seq, int index) {
4985         char c2 = seq.charAt(--index);
4986         if (isLowSurrogate(c2) && index > 0) {
4987             char c1 = seq.charAt(--index);
4988             if (isHighSurrogate(c1)) {
4989                 return toCodePoint(c1, c2);
4990             }
4991         }
4992         return c2;
4993     }
4994 
4995     /**
4996      * Returns the code point preceding the given index of the
4997      * {@code char} array. If the {@code char} value at
4998      * {@code (index - 1)} in the {@code char} array is in
4999      * the low-surrogate range, {@code (index - 2)} is not
5000      * negative, and the {@code char} value at {@code (index - 2)}
5001      * in the {@code char} array is in the
5002      * high-surrogate range, then the supplementary code point
5003      * corresponding to this surrogate pair is returned. Otherwise,
5004      * the {@code char} value at {@code (index - 1)} is
5005      * returned.
5006      *
5007      * @param a the {@code char} array
5008      * @param index the index following the code point that should be returned
5009      * @return the Unicode code point value before the given index.
5010      * @exception NullPointerException if {@code a} is null.
5011      * @exception IndexOutOfBoundsException if the {@code index}
5012      * argument is less than 1 or greater than the length of the
5013      * {@code char} array
5014      * @since  1.5
5015      */
5016     public static int codePointBefore(char[] a, int index) {
5017         return codePointBeforeImpl(a, index, 0);
5018     }
5019 
5020     /**
5021      * Returns the code point preceding the given index of the
5022      * {@code char} array, where only array elements with
5023      * {@code index} greater than or equal to {@code start}
5024      * can be used. If the {@code char} value at {@code (index - 1)}
5025      * in the {@code char} array is in the
5026      * low-surrogate range, {@code (index - 2)} is not less than
5027      * {@code start}, and the {@code char} value at
5028      * {@code (index - 2)} in the {@code char} array is in
5029      * the high-surrogate range, then the supplementary code point
5030      * corresponding to this surrogate pair is returned. Otherwise,
5031      * the {@code char} value at {@code (index - 1)} is
5032      * returned.
5033      *
5034      * @param a the {@code char} array
5035      * @param index the index following the code point that should be returned
5036      * @param start the index of the first array element in the
5037      * {@code char} array
5038      * @return the Unicode code point value before the given index.
5039      * @exception NullPointerException if {@code a} is null.
5040      * @exception IndexOutOfBoundsException if the {@code index}
5041      * argument is not greater than the {@code start} argument or
5042      * is greater than the length of the {@code char} array, or
5043      * if the {@code start} argument is negative or not less than
5044      * the length of the {@code char} array.
5045      * @since  1.5
5046      */
5047     public static int codePointBefore(char[] a, int index, int start) {
5048         if (index <= start || start < 0 || start >= a.length) {
5049             throw new IndexOutOfBoundsException();
5050         }
5051         return codePointBeforeImpl(a, index, start);
5052     }
5053 
5054     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5055     static int codePointBeforeImpl(char[] a, int index, int start) {
5056         char c2 = a[--index];
5057         if (isLowSurrogate(c2) && index > start) {
5058             char c1 = a[--index];
5059             if (isHighSurrogate(c1)) {
5060                 return toCodePoint(c1, c2);
5061             }
5062         }
5063         return c2;
5064     }
5065 
5066     /**
5067      * Returns the leading surrogate (a
5068      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5069      * high surrogate code unit</a>) of the
5070      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5071      * surrogate pair</a>
5072      * representing the specified supplementary character (Unicode
5073      * code point) in the UTF-16 encoding.  If the specified character
5074      * is not a
5075      * <a href="Character.html#supplementary">supplementary character</a>,
5076      * an unspecified {@code char} is returned.
5077      *
5078      * <p>If
5079      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5080      * is {@code true}, then
5081      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5082      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5083      * are also always {@code true}.
5084      *
5085      * @param   codePoint a supplementary character (Unicode code point)
5086      * @return  the leading surrogate code unit used to represent the
5087      *          character in the UTF-16 encoding
5088      * @since   1.7
5089      */
5090     public static char highSurrogate(int codePoint) {
5091         return (char) ((codePoint >>> 10)
5092             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5093     }
5094 
5095     /**
5096      * Returns the trailing surrogate (a
5097      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5098      * low surrogate code unit</a>) of the
5099      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5100      * surrogate pair</a>
5101      * representing the specified supplementary character (Unicode
5102      * code point) in the UTF-16 encoding.  If the specified character
5103      * is not a
5104      * <a href="Character.html#supplementary">supplementary character</a>,
5105      * an unspecified {@code char} is returned.
5106      *
5107      * <p>If
5108      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5109      * is {@code true}, then
5110      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5111      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5112      * are also always {@code true}.
5113      *
5114      * @param   codePoint a supplementary character (Unicode code point)
5115      * @return  the trailing surrogate code unit used to represent the
5116      *          character in the UTF-16 encoding
5117      * @since   1.7
5118      */
5119     public static char lowSurrogate(int codePoint) {
5120         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5121     }
5122 
5123     /**
5124      * Converts the specified character (Unicode code point) to its
5125      * UTF-16 representation. If the specified code point is a BMP
5126      * (Basic Multilingual Plane or Plane 0) value, the same value is
5127      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5128      * specified code point is a supplementary character, its
5129      * surrogate values are stored in {@code dst[dstIndex]}
5130      * (high-surrogate) and {@code dst[dstIndex+1]}
5131      * (low-surrogate), and 2 is returned.
5132      *
5133      * @param  codePoint the character (Unicode code point) to be converted.
5134      * @param  dst an array of {@code char} in which the
5135      * {@code codePoint}'s UTF-16 value is stored.
5136      * @param dstIndex the start index into the {@code dst}
5137      * array where the converted value is stored.
5138      * @return 1 if the code point is a BMP code point, 2 if the
5139      * code point is a supplementary code point.
5140      * @exception IllegalArgumentException if the specified
5141      * {@code codePoint} is not a valid Unicode code point.
5142      * @exception NullPointerException if the specified {@code dst} is null.
5143      * @exception IndexOutOfBoundsException if {@code dstIndex}
5144      * is negative or not less than {@code dst.length}, or if
5145      * {@code dst} at {@code dstIndex} doesn't have enough
5146      * array element(s) to store the resulting {@code char}
5147      * value(s). (If {@code dstIndex} is equal to
5148      * {@code dst.length-1} and the specified
5149      * {@code codePoint} is a supplementary character, the
5150      * high-surrogate value is not stored in
5151      * {@code dst[dstIndex]}.)
5152      * @since  1.5
5153      */
5154     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5155         if (isBmpCodePoint(codePoint)) {
5156             dst[dstIndex] = (char) codePoint;
5157             return 1;
5158         } else if (isValidCodePoint(codePoint)) {
5159             toSurrogates(codePoint, dst, dstIndex);
5160             return 2;
5161         } else {
5162             throw new IllegalArgumentException();
5163         }
5164     }
5165 
5166     /**
5167      * Converts the specified character (Unicode code point) to its
5168      * UTF-16 representation stored in a {@code char} array. If
5169      * the specified code point is a BMP (Basic Multilingual Plane or
5170      * Plane 0) value, the resulting {@code char} array has
5171      * the same value as {@code codePoint}. If the specified code
5172      * point is a supplementary code point, the resulting
5173      * {@code char} array has the corresponding surrogate pair.
5174      *
5175      * @param  codePoint a Unicode code point
5176      * @return a {@code char} array having
5177      *         {@code codePoint}'s UTF-16 representation.
5178      * @exception IllegalArgumentException if the specified
5179      * {@code codePoint} is not a valid Unicode code point.
5180      * @since  1.5
5181      */
5182     public static char[] toChars(int codePoint) {
5183         if (isBmpCodePoint(codePoint)) {
5184             return new char[] { (char) codePoint };
5185         } else if (isValidCodePoint(codePoint)) {
5186             char[] result = new char[2];
5187             toSurrogates(codePoint, result, 0);
5188             return result;
5189         } else {
5190             throw new IllegalArgumentException();
5191         }
5192     }
5193 
5194     static void toSurrogates(int codePoint, char[] dst, int index) {
5195         // We write elements "backwards" to guarantee all-or-nothing
5196         dst[index+1] = lowSurrogate(codePoint);
5197         dst[index] = highSurrogate(codePoint);
5198     }
5199 
5200     /**
5201      * Returns the number of Unicode code points in the text range of
5202      * the specified char sequence. The text range begins at the
5203      * specified {@code beginIndex} and extends to the
5204      * {@code char} at index {@code endIndex - 1}. Thus the
5205      * length (in {@code char}s) of the text range is
5206      * {@code endIndex-beginIndex}. Unpaired surrogates within
5207      * the text range count as one code point each.
5208      *
5209      * @param seq the char sequence
5210      * @param beginIndex the index to the first {@code char} of
5211      * the text range.
5212      * @param endIndex the index after the last {@code char} of
5213      * the text range.
5214      * @return the number of Unicode code points in the specified text
5215      * range
5216      * @exception NullPointerException if {@code seq} is null.
5217      * @exception IndexOutOfBoundsException if the
5218      * {@code beginIndex} is negative, or {@code endIndex}
5219      * is larger than the length of the given sequence, or
5220      * {@code beginIndex} is larger than {@code endIndex}.
5221      * @since  1.5
5222      */
5223     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5224         int length = seq.length();
5225         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5226             throw new IndexOutOfBoundsException();
5227         }
5228         int n = endIndex - beginIndex;
5229         for (int i = beginIndex; i < endIndex; ) {
5230             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5231                 isLowSurrogate(seq.charAt(i))) {
5232                 n--;
5233                 i++;
5234             }
5235         }
5236         return n;
5237     }
5238 
5239     /**
5240      * Returns the number of Unicode code points in a subarray of the
5241      * {@code char} array argument. The {@code offset}
5242      * argument is the index of the first {@code char} of the
5243      * subarray and the {@code count} argument specifies the
5244      * length of the subarray in {@code char}s. Unpaired
5245      * surrogates within the subarray count as one code point each.
5246      *
5247      * @param a the {@code char} array
5248      * @param offset the index of the first {@code char} in the
5249      * given {@code char} array
5250      * @param count the length of the subarray in {@code char}s
5251      * @return the number of Unicode code points in the specified subarray
5252      * @exception NullPointerException if {@code a} is null.
5253      * @exception IndexOutOfBoundsException if {@code offset} or
5254      * {@code count} is negative, or if {@code offset +
5255      * count} is larger than the length of the given array.
5256      * @since  1.5
5257      */
5258     public static int codePointCount(char[] a, int offset, int count) {
5259         if (count > a.length - offset || offset < 0 || count < 0) {
5260             throw new IndexOutOfBoundsException();
5261         }
5262         return codePointCountImpl(a, offset, count);
5263     }
5264 
5265     static int codePointCountImpl(char[] a, int offset, int count) {
5266         int endIndex = offset + count;
5267         int n = count;
5268         for (int i = offset; i < endIndex; ) {
5269             if (isHighSurrogate(a[i++]) && i < endIndex &&
5270                 isLowSurrogate(a[i])) {
5271                 n--;
5272                 i++;
5273             }
5274         }
5275         return n;
5276     }
5277 
5278     /**
5279      * Returns the index within the given char sequence that is offset
5280      * from the given {@code index} by {@code codePointOffset}
5281      * code points. Unpaired surrogates within the text range given by
5282      * {@code index} and {@code codePointOffset} count as
5283      * one code point each.
5284      *
5285      * @param seq the char sequence
5286      * @param index the index to be offset
5287      * @param codePointOffset the offset in code points
5288      * @return the index within the char sequence
5289      * @exception NullPointerException if {@code seq} is null.
5290      * @exception IndexOutOfBoundsException if {@code index}
5291      *   is negative or larger then the length of the char sequence,
5292      *   or if {@code codePointOffset} is positive and the
5293      *   subsequence starting with {@code index} has fewer than
5294      *   {@code codePointOffset} code points, or if
5295      *   {@code codePointOffset} is negative and the subsequence
5296      *   before {@code index} has fewer than the absolute value
5297      *   of {@code codePointOffset} code points.
5298      * @since 1.5
5299      */
5300     public static int offsetByCodePoints(CharSequence seq, int index,
5301                                          int codePointOffset) {
5302         int length = seq.length();
5303         if (index < 0 || index > length) {
5304             throw new IndexOutOfBoundsException();
5305         }
5306 
5307         int x = index;
5308         if (codePointOffset >= 0) {
5309             int i;
5310             for (i = 0; x < length && i < codePointOffset; i++) {
5311                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5312                     isLowSurrogate(seq.charAt(x))) {
5313                     x++;
5314                 }
5315             }
5316             if (i < codePointOffset) {
5317                 throw new IndexOutOfBoundsException();
5318             }
5319         } else {
5320             int i;
5321             for (i = codePointOffset; x > 0 && i < 0; i++) {
5322                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5323                     isHighSurrogate(seq.charAt(x-1))) {
5324                     x--;
5325                 }
5326             }
5327             if (i < 0) {
5328                 throw new IndexOutOfBoundsException();
5329             }
5330         }
5331         return x;
5332     }
5333 
5334     /**
5335      * Returns the index within the given {@code char} subarray
5336      * that is offset from the given {@code index} by
5337      * {@code codePointOffset} code points. The
5338      * {@code start} and {@code count} arguments specify a
5339      * subarray of the {@code char} array. Unpaired surrogates
5340      * within the text range given by {@code index} and
5341      * {@code codePointOffset} count as one code point each.
5342      *
5343      * @param a the {@code char} array
5344      * @param start the index of the first {@code char} of the
5345      * subarray
5346      * @param count the length of the subarray in {@code char}s
5347      * @param index the index to be offset
5348      * @param codePointOffset the offset in code points
5349      * @return the index within the subarray
5350      * @exception NullPointerException if {@code a} is null.
5351      * @exception IndexOutOfBoundsException
5352      *   if {@code start} or {@code count} is negative,
5353      *   or if {@code start + count} is larger than the length of
5354      *   the given array,
5355      *   or if {@code index} is less than {@code start} or
5356      *   larger then {@code start + count},
5357      *   or if {@code codePointOffset} is positive and the text range
5358      *   starting with {@code index} and ending with {@code start + count - 1}
5359      *   has fewer than {@code codePointOffset} code
5360      *   points,
5361      *   or if {@code codePointOffset} is negative and the text range
5362      *   starting with {@code start} and ending with {@code index - 1}
5363      *   has fewer than the absolute value of
5364      *   {@code codePointOffset} code points.
5365      * @since 1.5
5366      */
5367     public static int offsetByCodePoints(char[] a, int start, int count,
5368                                          int index, int codePointOffset) {
5369         if (count > a.length-start || start < 0 || count < 0
5370             || index < start || index > start+count) {
5371             throw new IndexOutOfBoundsException();
5372         }
5373         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5374     }
5375 
5376     static int offsetByCodePointsImpl(char[]a, int start, int count,
5377                                       int index, int codePointOffset) {
5378         int x = index;
5379         if (codePointOffset >= 0) {
5380             int limit = start + count;
5381             int i;
5382             for (i = 0; x < limit && i < codePointOffset; i++) {
5383                 if (isHighSurrogate(a[x++]) && x < limit &&
5384                     isLowSurrogate(a[x])) {
5385                     x++;
5386                 }
5387             }
5388             if (i < codePointOffset) {
5389                 throw new IndexOutOfBoundsException();
5390             }
5391         } else {
5392             int i;
5393             for (i = codePointOffset; x > start && i < 0; i++) {
5394                 if (isLowSurrogate(a[--x]) && x > start &&
5395                     isHighSurrogate(a[x-1])) {
5396                     x--;
5397                 }
5398             }
5399             if (i < 0) {
5400                 throw new IndexOutOfBoundsException();
5401             }
5402         }
5403         return x;
5404     }
5405 
5406     /**
5407      * Determines if the specified character is a lowercase character.
5408      * <p>
5409      * A character is lowercase if its general category type, provided
5410      * by {@code Character.getType(ch)}, is
5411      * {@code LOWERCASE_LETTER}, or it has contributory property
5412      * Other_Lowercase as defined by the Unicode Standard.
5413      * <p>
5414      * The following are examples of lowercase characters:
5415      * <blockquote><pre>
5416      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5417      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5418      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5419      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5420      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5421      * </pre></blockquote>
5422      * <p> Many other Unicode characters are lowercase too.
5423      *
5424      * <p><b>Note:</b> This method cannot handle <a
5425      * href="#supplementary"> supplementary characters</a>. To support
5426      * all Unicode characters, including supplementary characters, use
5427      * the {@link #isLowerCase(int)} method.
5428      *
5429      * @param   ch   the character to be tested.
5430      * @return  {@code true} if the character is lowercase;
5431      *          {@code false} otherwise.
5432      * @see     Character#isLowerCase(char)
5433      * @see     Character#isTitleCase(char)
5434      * @see     Character#toLowerCase(char)
5435      * @see     Character#getType(char)
5436      */
5437     public static boolean isLowerCase(char ch) {
5438         return isLowerCase((int)ch);
5439     }
5440 
5441     /**
5442      * Determines if the specified character (Unicode code point) is a
5443      * lowercase character.
5444      * <p>
5445      * A character is lowercase if its general category type, provided
5446      * by {@link Character#getType getType(codePoint)}, is
5447      * {@code LOWERCASE_LETTER}, or it has contributory property
5448      * Other_Lowercase as defined by the Unicode Standard.
5449      * <p>
5450      * The following are examples of lowercase characters:
5451      * <blockquote><pre>
5452      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5453      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5454      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5455      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5456      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5457      * </pre></blockquote>
5458      * <p> Many other Unicode characters are lowercase too.
5459      *
5460      * @param   codePoint the character (Unicode code point) to be tested.
5461      * @return  {@code true} if the character is lowercase;
5462      *          {@code false} otherwise.
5463      * @see     Character#isLowerCase(int)
5464      * @see     Character#isTitleCase(int)
5465      * @see     Character#toLowerCase(int)
5466      * @see     Character#getType(int)
5467      * @since   1.5
5468      */
5469     public static boolean isLowerCase(int codePoint) {
5470         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5471                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5472     }
5473 
5474     /**
5475      * Determines if the specified character is an uppercase character.
5476      * <p>
5477      * A character is uppercase if its general category type, provided by
5478      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5479      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5480      * <p>
5481      * The following are examples of uppercase characters:
5482      * <blockquote><pre>
5483      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5484      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5485      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5486      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5487      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5488      * </pre></blockquote>
5489      * <p> Many other Unicode characters are uppercase too.
5490      *
5491      * <p><b>Note:</b> This method cannot handle <a
5492      * href="#supplementary"> supplementary characters</a>. To support
5493      * all Unicode characters, including supplementary characters, use
5494      * the {@link #isUpperCase(int)} method.
5495      *
5496      * @param   ch   the character to be tested.
5497      * @return  {@code true} if the character is uppercase;
5498      *          {@code false} otherwise.
5499      * @see     Character#isLowerCase(char)
5500      * @see     Character#isTitleCase(char)
5501      * @see     Character#toUpperCase(char)
5502      * @see     Character#getType(char)
5503      * @since   1.0
5504      */
5505     public static boolean isUpperCase(char ch) {
5506         return isUpperCase((int)ch);
5507     }
5508 
5509     /**
5510      * Determines if the specified character (Unicode code point) is an uppercase character.
5511      * <p>
5512      * A character is uppercase if its general category type, provided by
5513      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5514      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5515      * <p>
5516      * The following are examples of uppercase characters:
5517      * <blockquote><pre>
5518      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5519      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5520      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5521      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5522      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5523      * </pre></blockquote>
5524      * <p> Many other Unicode characters are uppercase too.<p>
5525      *
5526      * @param   codePoint the character (Unicode code point) to be tested.
5527      * @return  {@code true} if the character is uppercase;
5528      *          {@code false} otherwise.
5529      * @see     Character#isLowerCase(int)
5530      * @see     Character#isTitleCase(int)
5531      * @see     Character#toUpperCase(int)
5532      * @see     Character#getType(int)
5533      * @since   1.5
5534      */
5535     public static boolean isUpperCase(int codePoint) {
5536         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5537                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5538     }
5539 
5540     /**
5541      * Determines if the specified character is a titlecase character.
5542      * <p>
5543      * A character is a titlecase character if its general
5544      * category type, provided by {@code Character.getType(ch)},
5545      * is {@code TITLECASE_LETTER}.
5546      * <p>
5547      * Some characters look like pairs of Latin letters. For example, there
5548      * is an uppercase letter that looks like "LJ" and has a corresponding
5549      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5550      * is the appropriate form to use when rendering a word in lowercase
5551      * with initial capitals, as for a book title.
5552      * <p>
5553      * These are some of the Unicode characters for which this method returns
5554      * {@code true}:
5555      * <ul>
5556      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5557      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5558      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5559      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5560      * </ul>
5561      * <p> Many other Unicode characters are titlecase too.
5562      *
5563      * <p><b>Note:</b> This method cannot handle <a
5564      * href="#supplementary"> supplementary characters</a>. To support
5565      * all Unicode characters, including supplementary characters, use
5566      * the {@link #isTitleCase(int)} method.
5567      *
5568      * @param   ch   the character to be tested.
5569      * @return  {@code true} if the character is titlecase;
5570      *          {@code false} otherwise.
5571      * @see     Character#isLowerCase(char)
5572      * @see     Character#isUpperCase(char)
5573      * @see     Character#toTitleCase(char)
5574      * @see     Character#getType(char)
5575      * @since   1.0.2
5576      */
5577     public static boolean isTitleCase(char ch) {
5578         return isTitleCase((int)ch);
5579     }
5580 
5581     /**
5582      * Determines if the specified character (Unicode code point) is a titlecase character.
5583      * <p>
5584      * A character is a titlecase character if its general
5585      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5586      * is {@code TITLECASE_LETTER}.
5587      * <p>
5588      * Some characters look like pairs of Latin letters. For example, there
5589      * is an uppercase letter that looks like "LJ" and has a corresponding
5590      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5591      * is the appropriate form to use when rendering a word in lowercase
5592      * with initial capitals, as for a book title.
5593      * <p>
5594      * These are some of the Unicode characters for which this method returns
5595      * {@code true}:
5596      * <ul>
5597      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5598      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5599      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5600      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5601      * </ul>
5602      * <p> Many other Unicode characters are titlecase too.<p>
5603      *
5604      * @param   codePoint the character (Unicode code point) to be tested.
5605      * @return  {@code true} if the character is titlecase;
5606      *          {@code false} otherwise.
5607      * @see     Character#isLowerCase(int)
5608      * @see     Character#isUpperCase(int)
5609      * @see     Character#toTitleCase(int)
5610      * @see     Character#getType(int)
5611      * @since   1.5
5612      */
5613     public static boolean isTitleCase(int codePoint) {
5614         return getType(codePoint) == Character.TITLECASE_LETTER;
5615     }
5616 
5617     /**
5618      * Determines if the specified character is a digit.
5619      * <p>
5620      * A character is a digit if its general category type, provided
5621      * by {@code Character.getType(ch)}, is
5622      * {@code DECIMAL_DIGIT_NUMBER}.
5623      * <p>
5624      * Some Unicode character ranges that contain digits:
5625      * <ul>
5626      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5627      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5628      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5629      *     Arabic-Indic digits
5630      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5631      *     Extended Arabic-Indic digits
5632      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5633      *     Devanagari digits
5634      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5635      *     Fullwidth digits
5636      * </ul>
5637      *
5638      * Many other character ranges contain digits as well.
5639      *
5640      * <p><b>Note:</b> This method cannot handle <a
5641      * href="#supplementary"> supplementary characters</a>. To support
5642      * all Unicode characters, including supplementary characters, use
5643      * the {@link #isDigit(int)} method.
5644      *
5645      * @param   ch   the character to be tested.
5646      * @return  {@code true} if the character is a digit;
5647      *          {@code false} otherwise.
5648      * @see     Character#digit(char, int)
5649      * @see     Character#forDigit(int, int)
5650      * @see     Character#getType(char)
5651      */
5652     public static boolean isDigit(char ch) {
5653         return isDigit((int)ch);
5654     }
5655 
5656     /**
5657      * Determines if the specified character (Unicode code point) is a digit.
5658      * <p>
5659      * A character is a digit if its general category type, provided
5660      * by {@link Character#getType(int) getType(codePoint)}, is
5661      * {@code DECIMAL_DIGIT_NUMBER}.
5662      * <p>
5663      * Some Unicode character ranges that contain digits:
5664      * <ul>
5665      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5666      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5667      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5668      *     Arabic-Indic digits
5669      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5670      *     Extended Arabic-Indic digits
5671      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5672      *     Devanagari digits
5673      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5674      *     Fullwidth digits
5675      * </ul>
5676      *
5677      * Many other character ranges contain digits as well.
5678      *
5679      * @param   codePoint the character (Unicode code point) to be tested.
5680      * @return  {@code true} if the character is a digit;
5681      *          {@code false} otherwise.
5682      * @see     Character#forDigit(int, int)
5683      * @see     Character#getType(int)
5684      * @since   1.5
5685      */
5686     public static boolean isDigit(int codePoint) {
5687         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5688     }
5689 
5690     /**
5691      * Determines if a character is defined in Unicode.
5692      * <p>
5693      * A character is defined if at least one of the following is true:
5694      * <ul>
5695      * <li>It has an entry in the UnicodeData file.
5696      * <li>It has a value in a range defined by the UnicodeData file.
5697      * </ul>
5698      *
5699      * <p><b>Note:</b> This method cannot handle <a
5700      * href="#supplementary"> supplementary characters</a>. To support
5701      * all Unicode characters, including supplementary characters, use
5702      * the {@link #isDefined(int)} method.
5703      *
5704      * @param   ch   the character to be tested
5705      * @return  {@code true} if the character has a defined meaning
5706      *          in Unicode; {@code false} otherwise.
5707      * @see     Character#isDigit(char)
5708      * @see     Character#isLetter(char)
5709      * @see     Character#isLetterOrDigit(char)
5710      * @see     Character#isLowerCase(char)
5711      * @see     Character#isTitleCase(char)
5712      * @see     Character#isUpperCase(char)
5713      * @since   1.0.2
5714      */
5715     public static boolean isDefined(char ch) {
5716         return isDefined((int)ch);
5717     }
5718 
5719     /**
5720      * Determines if a character (Unicode code point) is defined in Unicode.
5721      * <p>
5722      * A character is defined if at least one of the following is true:
5723      * <ul>
5724      * <li>It has an entry in the UnicodeData file.
5725      * <li>It has a value in a range defined by the UnicodeData file.
5726      * </ul>
5727      *
5728      * @param   codePoint the character (Unicode code point) to be tested.
5729      * @return  {@code true} if the character has a defined meaning
5730      *          in Unicode; {@code false} otherwise.
5731      * @see     Character#isDigit(int)
5732      * @see     Character#isLetter(int)
5733      * @see     Character#isLetterOrDigit(int)
5734      * @see     Character#isLowerCase(int)
5735      * @see     Character#isTitleCase(int)
5736      * @see     Character#isUpperCase(int)
5737      * @since   1.5
5738      */
5739     public static boolean isDefined(int codePoint) {
5740         return getType(codePoint) != Character.UNASSIGNED;
5741     }
5742 
5743     /**
5744      * Determines if the specified character is a letter.
5745      * <p>
5746      * A character is considered to be a letter if its general
5747      * category type, provided by {@code Character.getType(ch)},
5748      * is any of the following:
5749      * <ul>
5750      * <li> {@code UPPERCASE_LETTER}
5751      * <li> {@code LOWERCASE_LETTER}
5752      * <li> {@code TITLECASE_LETTER}
5753      * <li> {@code MODIFIER_LETTER}
5754      * <li> {@code OTHER_LETTER}
5755      * </ul>
5756      *
5757      * Not all letters have case. Many characters are
5758      * letters but are neither uppercase nor lowercase nor titlecase.
5759      *
5760      * <p><b>Note:</b> This method cannot handle <a
5761      * href="#supplementary"> supplementary characters</a>. To support
5762      * all Unicode characters, including supplementary characters, use
5763      * the {@link #isLetter(int)} method.
5764      *
5765      * @param   ch   the character to be tested.
5766      * @return  {@code true} if the character is a letter;
5767      *          {@code false} otherwise.
5768      * @see     Character#isDigit(char)
5769      * @see     Character#isJavaIdentifierStart(char)
5770      * @see     Character#isJavaLetter(char)
5771      * @see     Character#isJavaLetterOrDigit(char)
5772      * @see     Character#isLetterOrDigit(char)
5773      * @see     Character#isLowerCase(char)
5774      * @see     Character#isTitleCase(char)
5775      * @see     Character#isUnicodeIdentifierStart(char)
5776      * @see     Character#isUpperCase(char)
5777      */
5778     public static boolean isLetter(char ch) {
5779         return isLetter((int)ch);
5780     }
5781 
5782     /**
5783      * Determines if the specified character (Unicode code point) is a letter.
5784      * <p>
5785      * A character is considered to be a letter if its general
5786      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5787      * is any of the following:
5788      * <ul>
5789      * <li> {@code UPPERCASE_LETTER}
5790      * <li> {@code LOWERCASE_LETTER}
5791      * <li> {@code TITLECASE_LETTER}
5792      * <li> {@code MODIFIER_LETTER}
5793      * <li> {@code OTHER_LETTER}
5794      * </ul>
5795      *
5796      * Not all letters have case. Many characters are
5797      * letters but are neither uppercase nor lowercase nor titlecase.
5798      *
5799      * @param   codePoint the character (Unicode code point) to be tested.
5800      * @return  {@code true} if the character is a letter;
5801      *          {@code false} otherwise.
5802      * @see     Character#isDigit(int)
5803      * @see     Character#isJavaIdentifierStart(int)
5804      * @see     Character#isLetterOrDigit(int)
5805      * @see     Character#isLowerCase(int)
5806      * @see     Character#isTitleCase(int)
5807      * @see     Character#isUnicodeIdentifierStart(int)
5808      * @see     Character#isUpperCase(int)
5809      * @since   1.5
5810      */
5811     public static boolean isLetter(int codePoint) {
5812         return ((((1 << Character.UPPERCASE_LETTER) |
5813             (1 << Character.LOWERCASE_LETTER) |
5814             (1 << Character.TITLECASE_LETTER) |
5815             (1 << Character.MODIFIER_LETTER) |
5816             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5817             != 0;
5818     }
5819 
5820     /**
5821      * Determines if the specified character is a letter or digit.
5822      * <p>
5823      * A character is considered to be a letter or digit if either
5824      * {@code Character.isLetter(char ch)} or
5825      * {@code Character.isDigit(char ch)} returns
5826      * {@code true} for the character.
5827      *
5828      * <p><b>Note:</b> This method cannot handle <a
5829      * href="#supplementary"> supplementary characters</a>. To support
5830      * all Unicode characters, including supplementary characters, use
5831      * the {@link #isLetterOrDigit(int)} method.
5832      *
5833      * @param   ch   the character to be tested.
5834      * @return  {@code true} if the character is a letter or digit;
5835      *          {@code false} otherwise.
5836      * @see     Character#isDigit(char)
5837      * @see     Character#isJavaIdentifierPart(char)
5838      * @see     Character#isJavaLetter(char)
5839      * @see     Character#isJavaLetterOrDigit(char)
5840      * @see     Character#isLetter(char)
5841      * @see     Character#isUnicodeIdentifierPart(char)
5842      * @since   1.0.2
5843      */
5844     public static boolean isLetterOrDigit(char ch) {
5845         return isLetterOrDigit((int)ch);
5846     }
5847 
5848     /**
5849      * Determines if the specified character (Unicode code point) is a letter or digit.
5850      * <p>
5851      * A character is considered to be a letter or digit if either
5852      * {@link #isLetter(int) isLetter(codePoint)} or
5853      * {@link #isDigit(int) isDigit(codePoint)} returns
5854      * {@code true} for the character.
5855      *
5856      * @param   codePoint the character (Unicode code point) to be tested.
5857      * @return  {@code true} if the character is a letter or digit;
5858      *          {@code false} otherwise.
5859      * @see     Character#isDigit(int)
5860      * @see     Character#isJavaIdentifierPart(int)
5861      * @see     Character#isLetter(int)
5862      * @see     Character#isUnicodeIdentifierPart(int)
5863      * @since   1.5
5864      */
5865     public static boolean isLetterOrDigit(int codePoint) {
5866         return ((((1 << Character.UPPERCASE_LETTER) |
5867             (1 << Character.LOWERCASE_LETTER) |
5868             (1 << Character.TITLECASE_LETTER) |
5869             (1 << Character.MODIFIER_LETTER) |
5870             (1 << Character.OTHER_LETTER) |
5871             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5872             != 0;
5873     }
5874 
5875     /**
5876      * Determines if the specified character is permissible as the first
5877      * character in a Java identifier.
5878      * <p>
5879      * A character may start a Java identifier if and only if
5880      * one of the following conditions is true:
5881      * <ul>
5882      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5883      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5884      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5885      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5886      * </ul>
5887      *
5888      * These conditions are tested against the character information from version
5889      * 6.2 of the Unicode Standard.
5890      *
5891      * @param   ch the character to be tested.
5892      * @return  {@code true} if the character may start a Java
5893      *          identifier; {@code false} otherwise.
5894      * @see     Character#isJavaLetterOrDigit(char)
5895      * @see     Character#isJavaIdentifierStart(char)
5896      * @see     Character#isJavaIdentifierPart(char)
5897      * @see     Character#isLetter(char)
5898      * @see     Character#isLetterOrDigit(char)
5899      * @see     Character#isUnicodeIdentifierStart(char)
5900      * @since   1.02
5901      * @deprecated Replaced by isJavaIdentifierStart(char).
5902      */
5903     @Deprecated
5904     public static boolean isJavaLetter(char ch) {
5905         return isJavaIdentifierStart(ch);
5906     }
5907 
5908     /**
5909      * Determines if the specified character may be part of a Java
5910      * identifier as other than the first character.
5911      * <p>
5912      * A character may be part of a Java identifier if and only if any
5913      * of the following conditions are true:
5914      * <ul>
5915      * <li>  it is a letter
5916      * <li>  it is a currency symbol (such as {@code '$'})
5917      * <li>  it is a connecting punctuation character (such as {@code '_'})
5918      * <li>  it is a digit
5919      * <li>  it is a numeric letter (such as a Roman numeral character)
5920      * <li>  it is a combining mark
5921      * <li>  it is a non-spacing mark
5922      * <li> {@code isIdentifierIgnorable} returns
5923      * {@code true} for the character.
5924      * </ul>
5925      *
5926      * These conditions are tested against the character information from version
5927      * 6.2 of the Unicode Standard.
5928      *
5929      * @param   ch the character to be tested.
5930      * @return  {@code true} if the character may be part of a
5931      *          Java identifier; {@code false} otherwise.
5932      * @see     Character#isJavaLetter(char)
5933      * @see     Character#isJavaIdentifierStart(char)
5934      * @see     Character#isJavaIdentifierPart(char)
5935      * @see     Character#isLetter(char)
5936      * @see     Character#isLetterOrDigit(char)
5937      * @see     Character#isUnicodeIdentifierPart(char)
5938      * @see     Character#isIdentifierIgnorable(char)
5939      * @since   1.02
5940      * @deprecated Replaced by isJavaIdentifierPart(char).
5941      */
5942     @Deprecated
5943     public static boolean isJavaLetterOrDigit(char ch) {
5944         return isJavaIdentifierPart(ch);
5945     }
5946 
5947     /**
5948      * Determines if the specified character (Unicode code point) is an alphabet.
5949      * <p>
5950      * A character is considered to be alphabetic if its general category type,
5951      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5952      * the following:
5953      * <ul>
5954      * <li> <code>UPPERCASE_LETTER</code>
5955      * <li> <code>LOWERCASE_LETTER</code>
5956      * <li> <code>TITLECASE_LETTER</code>
5957      * <li> <code>MODIFIER_LETTER</code>
5958      * <li> <code>OTHER_LETTER</code>
5959      * <li> <code>LETTER_NUMBER</code>
5960      * </ul>
5961      * or it has contributory property Other_Alphabetic as defined by the
5962      * Unicode Standard.
5963      *
5964      * @param   codePoint the character (Unicode code point) to be tested.
5965      * @return  <code>true</code> if the character is a Unicode alphabet
5966      *          character, <code>false</code> otherwise.
5967      * @since   1.7
5968      */
5969     public static boolean isAlphabetic(int codePoint) {
5970         return (((((1 << Character.UPPERCASE_LETTER) |
5971             (1 << Character.LOWERCASE_LETTER) |
5972             (1 << Character.TITLECASE_LETTER) |
5973             (1 << Character.MODIFIER_LETTER) |
5974             (1 << Character.OTHER_LETTER) |
5975             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5976             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5977     }
5978 
5979     /**
5980      * Determines if the specified character (Unicode code point) is a CJKV
5981      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5982      * the Unicode Standard.
5983      *
5984      * @param   codePoint the character (Unicode code point) to be tested.
5985      * @return  <code>true</code> if the character is a Unicode ideograph
5986      *          character, <code>false</code> otherwise.
5987      * @since   1.7
5988      */
5989     public static boolean isIdeographic(int codePoint) {
5990         return CharacterData.of(codePoint).isIdeographic(codePoint);
5991     }
5992 
5993     /**
5994      * Determines if the specified character is
5995      * permissible as the first character in a Java identifier.
5996      * <p>
5997      * A character may start a Java identifier if and only if
5998      * one of the following conditions is true:
5999      * <ul>
6000      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6001      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6002      * <li> {@code ch} is a currency symbol (such as {@code '$'})
6003      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6004      * </ul>
6005      *
6006      * These conditions are tested against the character information from version
6007      * 6.2 of the Unicode Standard.
6008      *
6009      * <p><b>Note:</b> This method cannot handle <a
6010      * href="#supplementary"> supplementary characters</a>. To support
6011      * all Unicode characters, including supplementary characters, use
6012      * the {@link #isJavaIdentifierStart(int)} method.
6013      *
6014      * @param   ch the character to be tested.
6015      * @return  {@code true} if the character may start a Java identifier;
6016      *          {@code false} otherwise.
6017      * @see     Character#isJavaIdentifierPart(char)
6018      * @see     Character#isLetter(char)
6019      * @see     Character#isUnicodeIdentifierStart(char)
6020      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6021      * @since   1.1
6022      */
6023     public static boolean isJavaIdentifierStart(char ch) {
6024         return isJavaIdentifierStart((int)ch);
6025     }
6026 
6027     /**
6028      * Determines if the character (Unicode code point) is
6029      * permissible as the first character in a Java identifier.
6030      * <p>
6031      * A character may start a Java identifier if and only if
6032      * one of the following conditions is true:
6033      * <ul>
6034      * <li> {@link #isLetter(int) isLetter(codePoint)}
6035      *      returns {@code true}
6036      * <li> {@link #getType(int) getType(codePoint)}
6037      *      returns {@code LETTER_NUMBER}
6038      * <li> the referenced character is a currency symbol (such as {@code '$'})
6039      * <li> the referenced character is a connecting punctuation character
6040      *      (such as {@code '_'}).
6041      * </ul>
6042      *
6043      * These conditions are tested against the character information from version
6044      * 6.2 of the Unicode Standard.
6045      *
6046      * @param   codePoint the character (Unicode code point) to be tested.
6047      * @return  {@code true} if the character may start a Java identifier;
6048      *          {@code false} otherwise.
6049      * @see     Character#isJavaIdentifierPart(int)
6050      * @see     Character#isLetter(int)
6051      * @see     Character#isUnicodeIdentifierStart(int)
6052      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6053      * @since   1.5
6054      */
6055     public static boolean isJavaIdentifierStart(int codePoint) {
6056         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6057     }
6058 
6059     /**
6060      * Determines if the specified character may be part of a Java
6061      * identifier as other than the first character.
6062      * <p>
6063      * A character may be part of a Java identifier if any of the following
6064      * conditions are true:
6065      * <ul>
6066      * <li>  it is a letter
6067      * <li>  it is a currency symbol (such as {@code '$'})
6068      * <li>  it is a connecting punctuation character (such as {@code '_'})
6069      * <li>  it is a digit
6070      * <li>  it is a numeric letter (such as a Roman numeral character)
6071      * <li>  it is a combining mark
6072      * <li>  it is a non-spacing mark
6073      * <li> {@code isIdentifierIgnorable} returns
6074      * {@code true} for the character
6075      * </ul>
6076      *
6077      * These conditions are tested against the character information from version
6078      * 6.2 of the Unicode Standard.
6079      *
6080      * <p><b>Note:</b> This method cannot handle <a
6081      * href="#supplementary"> supplementary characters</a>. To support
6082      * all Unicode characters, including supplementary characters, use
6083      * the {@link #isJavaIdentifierPart(int)} method.
6084      *
6085      * @param   ch      the character to be tested.
6086      * @return {@code true} if the character may be part of a
6087      *          Java identifier; {@code false} otherwise.
6088      * @see     Character#isIdentifierIgnorable(char)
6089      * @see     Character#isJavaIdentifierStart(char)
6090      * @see     Character#isLetterOrDigit(char)
6091      * @see     Character#isUnicodeIdentifierPart(char)
6092      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6093      * @since   1.1
6094      */
6095     public static boolean isJavaIdentifierPart(char ch) {
6096         return isJavaIdentifierPart((int)ch);
6097     }
6098 
6099     /**
6100      * Determines if the character (Unicode code point) may be part of a Java
6101      * identifier as other than the first character.
6102      * <p>
6103      * A character may be part of a Java identifier if any of the following
6104      * conditions are true:
6105      * <ul>
6106      * <li>  it is a letter
6107      * <li>  it is a currency symbol (such as {@code '$'})
6108      * <li>  it is a connecting punctuation character (such as {@code '_'})
6109      * <li>  it is a digit
6110      * <li>  it is a numeric letter (such as a Roman numeral character)
6111      * <li>  it is a combining mark
6112      * <li>  it is a non-spacing mark
6113      * <li> {@link #isIdentifierIgnorable(int)
6114      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6115      * the code point
6116      * </ul>
6117      *
6118      * These conditions are tested against the character information from version
6119      * 6.2 of the Unicode Standard.
6120      *
6121      * @param   codePoint the character (Unicode code point) to be tested.
6122      * @return {@code true} if the character may be part of a
6123      *          Java identifier; {@code false} otherwise.
6124      * @see     Character#isIdentifierIgnorable(int)
6125      * @see     Character#isJavaIdentifierStart(int)
6126      * @see     Character#isLetterOrDigit(int)
6127      * @see     Character#isUnicodeIdentifierPart(int)
6128      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6129      * @since   1.5
6130      */
6131     public static boolean isJavaIdentifierPart(int codePoint) {
6132         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6133     }
6134 
6135     /**
6136      * Determines if the specified character is permissible as the
6137      * first character in a Unicode identifier.
6138      * <p>
6139      * A character may start a Unicode identifier if and only if
6140      * one of the following conditions is true:
6141      * <ul>
6142      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6143      * <li> {@link #getType(char) getType(ch)} returns
6144      *      {@code LETTER_NUMBER}.
6145      * </ul>
6146      *
6147      * <p><b>Note:</b> This method cannot handle <a
6148      * href="#supplementary"> supplementary characters</a>. To support
6149      * all Unicode characters, including supplementary characters, use
6150      * the {@link #isUnicodeIdentifierStart(int)} method.
6151      *
6152      * @param   ch      the character to be tested.
6153      * @return  {@code true} if the character may start a Unicode
6154      *          identifier; {@code false} otherwise.
6155      * @see     Character#isJavaIdentifierStart(char)
6156      * @see     Character#isLetter(char)
6157      * @see     Character#isUnicodeIdentifierPart(char)
6158      * @since   1.1
6159      */
6160     public static boolean isUnicodeIdentifierStart(char ch) {
6161         return isUnicodeIdentifierStart((int)ch);
6162     }
6163 
6164     /**
6165      * Determines if the specified character (Unicode code point) is permissible as the
6166      * first character in a Unicode identifier.
6167      * <p>
6168      * A character may start a Unicode identifier if and only if
6169      * one of the following conditions is true:
6170      * <ul>
6171      * <li> {@link #isLetter(int) isLetter(codePoint)}
6172      *      returns {@code true}
6173      * <li> {@link #getType(int) getType(codePoint)}
6174      *      returns {@code LETTER_NUMBER}.
6175      * </ul>
6176      * @param   codePoint the character (Unicode code point) to be tested.
6177      * @return  {@code true} if the character may start a Unicode
6178      *          identifier; {@code false} otherwise.
6179      * @see     Character#isJavaIdentifierStart(int)
6180      * @see     Character#isLetter(int)
6181      * @see     Character#isUnicodeIdentifierPart(int)
6182      * @since   1.5
6183      */
6184     public static boolean isUnicodeIdentifierStart(int codePoint) {
6185         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6186     }
6187 
6188     /**
6189      * Determines if the specified character may be part of a Unicode
6190      * identifier as other than the first character.
6191      * <p>
6192      * A character may be part of a Unicode identifier if and only if
6193      * one of the following statements is true:
6194      * <ul>
6195      * <li>  it is a letter
6196      * <li>  it is a connecting punctuation character (such as {@code '_'})
6197      * <li>  it is a digit
6198      * <li>  it is a numeric letter (such as a Roman numeral character)
6199      * <li>  it is a combining mark
6200      * <li>  it is a non-spacing mark
6201      * <li> {@code isIdentifierIgnorable} returns
6202      * {@code true} for this character.
6203      * </ul>
6204      *
6205      * <p><b>Note:</b> This method cannot handle <a
6206      * href="#supplementary"> supplementary characters</a>. To support
6207      * all Unicode characters, including supplementary characters, use
6208      * the {@link #isUnicodeIdentifierPart(int)} method.
6209      *
6210      * @param   ch      the character to be tested.
6211      * @return  {@code true} if the character may be part of a
6212      *          Unicode identifier; {@code false} otherwise.
6213      * @see     Character#isIdentifierIgnorable(char)
6214      * @see     Character#isJavaIdentifierPart(char)
6215      * @see     Character#isLetterOrDigit(char)
6216      * @see     Character#isUnicodeIdentifierStart(char)
6217      * @since   1.1
6218      */
6219     public static boolean isUnicodeIdentifierPart(char ch) {
6220         return isUnicodeIdentifierPart((int)ch);
6221     }
6222 
6223     /**
6224      * Determines if the specified character (Unicode code point) may be part of a Unicode
6225      * identifier as other than the first character.
6226      * <p>
6227      * A character may be part of a Unicode identifier if and only if
6228      * one of the following statements is true:
6229      * <ul>
6230      * <li>  it is a letter
6231      * <li>  it is a connecting punctuation character (such as {@code '_'})
6232      * <li>  it is a digit
6233      * <li>  it is a numeric letter (such as a Roman numeral character)
6234      * <li>  it is a combining mark
6235      * <li>  it is a non-spacing mark
6236      * <li> {@code isIdentifierIgnorable} returns
6237      * {@code true} for this character.
6238      * </ul>
6239      * @param   codePoint the character (Unicode code point) to be tested.
6240      * @return  {@code true} if the character may be part of a
6241      *          Unicode identifier; {@code false} otherwise.
6242      * @see     Character#isIdentifierIgnorable(int)
6243      * @see     Character#isJavaIdentifierPart(int)
6244      * @see     Character#isLetterOrDigit(int)
6245      * @see     Character#isUnicodeIdentifierStart(int)
6246      * @since   1.5
6247      */
6248     public static boolean isUnicodeIdentifierPart(int codePoint) {
6249         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6250     }
6251 
6252     /**
6253      * Determines if the specified character should be regarded as
6254      * an ignorable character in a Java identifier or a Unicode identifier.
6255      * <p>
6256      * The following Unicode characters are ignorable in a Java identifier
6257      * or a Unicode identifier:
6258      * <ul>
6259      * <li>ISO control characters that are not whitespace
6260      * <ul>
6261      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6262      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6263      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6264      * </ul>
6265      *
6266      * <li>all characters that have the {@code FORMAT} general
6267      * category value
6268      * </ul>
6269      *
6270      * <p><b>Note:</b> This method cannot handle <a
6271      * href="#supplementary"> supplementary characters</a>. To support
6272      * all Unicode characters, including supplementary characters, use
6273      * the {@link #isIdentifierIgnorable(int)} method.
6274      *
6275      * @param   ch      the character to be tested.
6276      * @return  {@code true} if the character is an ignorable control
6277      *          character that may be part of a Java or Unicode identifier;
6278      *           {@code false} otherwise.
6279      * @see     Character#isJavaIdentifierPart(char)
6280      * @see     Character#isUnicodeIdentifierPart(char)
6281      * @since   1.1
6282      */
6283     public static boolean isIdentifierIgnorable(char ch) {
6284         return isIdentifierIgnorable((int)ch);
6285     }
6286 
6287     /**
6288      * Determines if the specified character (Unicode code point) should be regarded as
6289      * an ignorable character in a Java identifier or a Unicode identifier.
6290      * <p>
6291      * The following Unicode characters are ignorable in a Java identifier
6292      * or a Unicode identifier:
6293      * <ul>
6294      * <li>ISO control characters that are not whitespace
6295      * <ul>
6296      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6297      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6298      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6299      * </ul>
6300      *
6301      * <li>all characters that have the {@code FORMAT} general
6302      * category value
6303      * </ul>
6304      *
6305      * @param   codePoint the character (Unicode code point) to be tested.
6306      * @return  {@code true} if the character is an ignorable control
6307      *          character that may be part of a Java or Unicode identifier;
6308      *          {@code false} otherwise.
6309      * @see     Character#isJavaIdentifierPart(int)
6310      * @see     Character#isUnicodeIdentifierPart(int)
6311      * @since   1.5
6312      */
6313     public static boolean isIdentifierIgnorable(int codePoint) {
6314         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6315     }
6316 
6317     /**
6318      * Converts the character argument to lowercase using case
6319      * mapping information from the UnicodeData file.
6320      * <p>
6321      * Note that
6322      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6323      * does not always return {@code true} for some ranges of
6324      * characters, particularly those that are symbols or ideographs.
6325      *
6326      * <p>In general, {@link String#toLowerCase()} should be used to map
6327      * characters to lowercase. {@code String} case mapping methods
6328      * have several benefits over {@code Character} case mapping methods.
6329      * {@code String} case mapping methods can perform locale-sensitive
6330      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6331      * the {@code Character} case mapping methods cannot.
6332      *
6333      * <p><b>Note:</b> This method cannot handle <a
6334      * href="#supplementary"> supplementary characters</a>. To support
6335      * all Unicode characters, including supplementary characters, use
6336      * the {@link #toLowerCase(int)} method.
6337      *
6338      * @param   ch   the character to be converted.
6339      * @return  the lowercase equivalent of the character, if any;
6340      *          otherwise, the character itself.
6341      * @see     Character#isLowerCase(char)
6342      * @see     String#toLowerCase()
6343      */
6344     public static char toLowerCase(char ch) {
6345         return (char)toLowerCase((int)ch);
6346     }
6347 
6348     /**
6349      * Converts the character (Unicode code point) argument to
6350      * lowercase using case mapping information from the UnicodeData
6351      * file.
6352      *
6353      * <p> Note that
6354      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6355      * does not always return {@code true} for some ranges of
6356      * characters, particularly those that are symbols or ideographs.
6357      *
6358      * <p>In general, {@link String#toLowerCase()} should be used to map
6359      * characters to lowercase. {@code String} case mapping methods
6360      * have several benefits over {@code Character} case mapping methods.
6361      * {@code String} case mapping methods can perform locale-sensitive
6362      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6363      * the {@code Character} case mapping methods cannot.
6364      *
6365      * @param   codePoint   the character (Unicode code point) to be converted.
6366      * @return  the lowercase equivalent of the character (Unicode code
6367      *          point), if any; otherwise, the character itself.
6368      * @see     Character#isLowerCase(int)
6369      * @see     String#toLowerCase()
6370      *
6371      * @since   1.5
6372      */
6373     public static int toLowerCase(int codePoint) {
6374         return CharacterData.of(codePoint).toLowerCase(codePoint);
6375     }
6376 
6377     /**
6378      * Converts the character argument to uppercase using case mapping
6379      * information from the UnicodeData file.
6380      * <p>
6381      * Note that
6382      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6383      * does not always return {@code true} for some ranges of
6384      * characters, particularly those that are symbols or ideographs.
6385      *
6386      * <p>In general, {@link String#toUpperCase()} should be used to map
6387      * characters to uppercase. {@code String} case mapping methods
6388      * have several benefits over {@code Character} case mapping methods.
6389      * {@code String} case mapping methods can perform locale-sensitive
6390      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6391      * the {@code Character} case mapping methods cannot.
6392      *
6393      * <p><b>Note:</b> This method cannot handle <a
6394      * href="#supplementary"> supplementary characters</a>. To support
6395      * all Unicode characters, including supplementary characters, use
6396      * the {@link #toUpperCase(int)} method.
6397      *
6398      * @param   ch   the character to be converted.
6399      * @return  the uppercase equivalent of the character, if any;
6400      *          otherwise, the character itself.
6401      * @see     Character#isUpperCase(char)
6402      * @see     String#toUpperCase()
6403      */
6404     public static char toUpperCase(char ch) {
6405         return (char)toUpperCase((int)ch);
6406     }
6407 
6408     /**
6409      * Converts the character (Unicode code point) argument to
6410      * uppercase using case mapping information from the UnicodeData
6411      * file.
6412      *
6413      * <p>Note that
6414      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6415      * does not always return {@code true} for some ranges of
6416      * characters, particularly those that are symbols or ideographs.
6417      *
6418      * <p>In general, {@link String#toUpperCase()} should be used to map
6419      * characters to uppercase. {@code String} case mapping methods
6420      * have several benefits over {@code Character} case mapping methods.
6421      * {@code String} case mapping methods can perform locale-sensitive
6422      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6423      * the {@code Character} case mapping methods cannot.
6424      *
6425      * @param   codePoint   the character (Unicode code point) to be converted.
6426      * @return  the uppercase equivalent of the character, if any;
6427      *          otherwise, the character itself.
6428      * @see     Character#isUpperCase(int)
6429      * @see     String#toUpperCase()
6430      *
6431      * @since   1.5
6432      */
6433     public static int toUpperCase(int codePoint) {
6434         return CharacterData.of(codePoint).toUpperCase(codePoint);
6435     }
6436 
6437     /**
6438      * Converts the character argument to titlecase using case mapping
6439      * information from the UnicodeData file. If a character has no
6440      * explicit titlecase mapping and is not itself a titlecase char
6441      * according to UnicodeData, then the uppercase mapping is
6442      * returned as an equivalent titlecase mapping. If the
6443      * {@code char} argument is already a titlecase
6444      * {@code char}, the same {@code char} value will be
6445      * returned.
6446      * <p>
6447      * Note that
6448      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6449      * does not always return {@code true} for some ranges of
6450      * characters.
6451      *
6452      * <p><b>Note:</b> This method cannot handle <a
6453      * href="#supplementary"> supplementary characters</a>. To support
6454      * all Unicode characters, including supplementary characters, use
6455      * the {@link #toTitleCase(int)} method.
6456      *
6457      * @param   ch   the character to be converted.
6458      * @return  the titlecase equivalent of the character, if any;
6459      *          otherwise, the character itself.
6460      * @see     Character#isTitleCase(char)
6461      * @see     Character#toLowerCase(char)
6462      * @see     Character#toUpperCase(char)
6463      * @since   1.0.2
6464      */
6465     public static char toTitleCase(char ch) {
6466         return (char)toTitleCase((int)ch);
6467     }
6468 
6469     /**
6470      * Converts the character (Unicode code point) argument to titlecase using case mapping
6471      * information from the UnicodeData file. If a character has no
6472      * explicit titlecase mapping and is not itself a titlecase char
6473      * according to UnicodeData, then the uppercase mapping is
6474      * returned as an equivalent titlecase mapping. If the
6475      * character argument is already a titlecase
6476      * character, the same character value will be
6477      * returned.
6478      *
6479      * <p>Note that
6480      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6481      * does not always return {@code true} for some ranges of
6482      * characters.
6483      *
6484      * @param   codePoint   the character (Unicode code point) to be converted.
6485      * @return  the titlecase equivalent of the character, if any;
6486      *          otherwise, the character itself.
6487      * @see     Character#isTitleCase(int)
6488      * @see     Character#toLowerCase(int)
6489      * @see     Character#toUpperCase(int)
6490      * @since   1.5
6491      */
6492     public static int toTitleCase(int codePoint) {
6493         return CharacterData.of(codePoint).toTitleCase(codePoint);
6494     }
6495 
6496     /**
6497      * Returns the numeric value of the character {@code ch} in the
6498      * specified radix.
6499      * <p>
6500      * If the radix is not in the range {@code MIN_RADIX} &le;
6501      * {@code radix} &le; {@code MAX_RADIX} or if the
6502      * value of {@code ch} is not a valid digit in the specified
6503      * radix, {@code -1} is returned. A character is a valid digit
6504      * if at least one of the following is true:
6505      * <ul>
6506      * <li>The method {@code isDigit} is {@code true} of the character
6507      *     and the Unicode decimal digit value of the character (or its
6508      *     single-character decomposition) is less than the specified radix.
6509      *     In this case the decimal digit value is returned.
6510      * <li>The character is one of the uppercase Latin letters
6511      *     {@code 'A'} through {@code 'Z'} and its code is less than
6512      *     {@code radix + 'A' - 10}.
6513      *     In this case, {@code ch - 'A' + 10}
6514      *     is returned.
6515      * <li>The character is one of the lowercase Latin letters
6516      *     {@code 'a'} through {@code 'z'} and its code is less than
6517      *     {@code radix + 'a' - 10}.
6518      *     In this case, {@code ch - 'a' + 10}
6519      *     is returned.
6520      * <li>The character is one of the fullwidth uppercase Latin letters A
6521      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6522      *     and its code is less than
6523      *     {@code radix + '\u005CuFF21' - 10}.
6524      *     In this case, {@code ch - '\u005CuFF21' + 10}
6525      *     is returned.
6526      * <li>The character is one of the fullwidth lowercase Latin letters a
6527      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6528      *     and its code is less than
6529      *     {@code radix + '\u005CuFF41' - 10}.
6530      *     In this case, {@code ch - '\u005CuFF41' + 10}
6531      *     is returned.
6532      * </ul>
6533      *
6534      * <p><b>Note:</b> This method cannot handle <a
6535      * href="#supplementary"> supplementary characters</a>. To support
6536      * all Unicode characters, including supplementary characters, use
6537      * the {@link #digit(int, int)} method.
6538      *
6539      * @param   ch      the character to be converted.
6540      * @param   radix   the radix.
6541      * @return  the numeric value represented by the character in the
6542      *          specified radix.
6543      * @see     Character#forDigit(int, int)
6544      * @see     Character#isDigit(char)
6545      */
6546     public static int digit(char ch, int radix) {
6547         return digit((int)ch, radix);
6548     }
6549 
6550     /**
6551      * Returns the numeric value of the specified character (Unicode
6552      * code point) in the specified radix.
6553      *
6554      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6555      * {@code radix} &le; {@code MAX_RADIX} or if the
6556      * character is not a valid digit in the specified
6557      * radix, {@code -1} is returned. A character is a valid digit
6558      * if at least one of the following is true:
6559      * <ul>
6560      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6561      *     and the Unicode decimal digit value of the character (or its
6562      *     single-character decomposition) is less than the specified radix.
6563      *     In this case the decimal digit value is returned.
6564      * <li>The character is one of the uppercase Latin letters
6565      *     {@code 'A'} through {@code 'Z'} and its code is less than
6566      *     {@code radix + 'A' - 10}.
6567      *     In this case, {@code codePoint - 'A' + 10}
6568      *     is returned.
6569      * <li>The character is one of the lowercase Latin letters
6570      *     {@code 'a'} through {@code 'z'} and its code is less than
6571      *     {@code radix + 'a' - 10}.
6572      *     In this case, {@code codePoint - 'a' + 10}
6573      *     is returned.
6574      * <li>The character is one of the fullwidth uppercase Latin letters A
6575      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6576      *     and its code is less than
6577      *     {@code radix + '\u005CuFF21' - 10}.
6578      *     In this case,
6579      *     {@code codePoint - '\u005CuFF21' + 10}
6580      *     is returned.
6581      * <li>The character is one of the fullwidth lowercase Latin letters a
6582      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6583      *     and its code is less than
6584      *     {@code radix + '\u005CuFF41'- 10}.
6585      *     In this case,
6586      *     {@code codePoint - '\u005CuFF41' + 10}
6587      *     is returned.
6588      * </ul>
6589      *
6590      * @param   codePoint the character (Unicode code point) to be converted.
6591      * @param   radix   the radix.
6592      * @return  the numeric value represented by the character in the
6593      *          specified radix.
6594      * @see     Character#forDigit(int, int)
6595      * @see     Character#isDigit(int)
6596      * @since   1.5
6597      */
6598     public static int digit(int codePoint, int radix) {
6599         return CharacterData.of(codePoint).digit(codePoint, radix);
6600     }
6601 
6602     /**
6603      * Returns the {@code int} value that the specified Unicode
6604      * character represents. For example, the character
6605      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6606      * an int with a value of 50.
6607      * <p>
6608      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6609      * {@code '\u005Cu005A'}), lowercase
6610      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6611      * full width variant ({@code '\u005CuFF21'} through
6612      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6613      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6614      * through 35. This is independent of the Unicode specification,
6615      * which does not assign numeric values to these {@code char}
6616      * values.
6617      * <p>
6618      * If the character does not have a numeric value, then -1 is returned.
6619      * If the character has a numeric value that cannot be represented as a
6620      * nonnegative integer (for example, a fractional value), then -2
6621      * is returned.
6622      *
6623      * <p><b>Note:</b> This method cannot handle <a
6624      * href="#supplementary"> supplementary characters</a>. To support
6625      * all Unicode characters, including supplementary characters, use
6626      * the {@link #getNumericValue(int)} method.
6627      *
6628      * @param   ch      the character to be converted.
6629      * @return  the numeric value of the character, as a nonnegative {@code int}
6630      *           value; -2 if the character has a numeric value that is not a
6631      *          nonnegative integer; -1 if the character has no numeric value.
6632      * @see     Character#forDigit(int, int)
6633      * @see     Character#isDigit(char)
6634      * @since   1.1
6635      */
6636     public static int getNumericValue(char ch) {
6637         return getNumericValue((int)ch);
6638     }
6639 
6640     /**
6641      * Returns the {@code int} value that the specified
6642      * character (Unicode code point) represents. For example, the character
6643      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6644      * an {@code int} with a value of 50.
6645      * <p>
6646      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6647      * {@code '\u005Cu005A'}), lowercase
6648      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6649      * full width variant ({@code '\u005CuFF21'} through
6650      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6651      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6652      * through 35. This is independent of the Unicode specification,
6653      * which does not assign numeric values to these {@code char}
6654      * values.
6655      * <p>
6656      * If the character does not have a numeric value, then -1 is returned.
6657      * If the character has a numeric value that cannot be represented as a
6658      * nonnegative integer (for example, a fractional value), then -2
6659      * is returned.
6660      *
6661      * @param   codePoint the character (Unicode code point) to be converted.
6662      * @return  the numeric value of the character, as a nonnegative {@code int}
6663      *          value; -2 if the character has a numeric value that is not a
6664      *          nonnegative integer; -1 if the character has no numeric value.
6665      * @see     Character#forDigit(int, int)
6666      * @see     Character#isDigit(int)
6667      * @since   1.5
6668      */
6669     public static int getNumericValue(int codePoint) {
6670         return CharacterData.of(codePoint).getNumericValue(codePoint);
6671     }
6672 
6673     /**
6674      * Determines if the specified character is ISO-LATIN-1 white space.
6675      * This method returns {@code true} for the following five
6676      * characters only:
6677      * <table summary="truechars">
6678      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6679      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6680      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6681      *     <td>{@code NEW LINE}</td></tr>
6682      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6683      *     <td>{@code FORM FEED}</td></tr>
6684      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6685      *     <td>{@code CARRIAGE RETURN}</td></tr>
6686      * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
6687      *     <td>{@code SPACE}</td></tr>
6688      * </table>
6689      *
6690      * @param      ch   the character to be tested.
6691      * @return     {@code true} if the character is ISO-LATIN-1 white
6692      *             space; {@code false} otherwise.
6693      * @see        Character#isSpaceChar(char)
6694      * @see        Character#isWhitespace(char)
6695      * @deprecated Replaced by isWhitespace(char).
6696      */
6697     @Deprecated
6698     public static boolean isSpace(char ch) {
6699         return (ch <= 0x0020) &&
6700             (((((1L << 0x0009) |
6701             (1L << 0x000A) |
6702             (1L << 0x000C) |
6703             (1L << 0x000D) |
6704             (1L << 0x0020)) >> ch) & 1L) != 0);
6705     }
6706 
6707 
6708     /**
6709      * Determines if the specified character is a Unicode space character.
6710      * A character is considered to be a space character if and only if
6711      * it is specified to be a space character by the Unicode Standard. This
6712      * method returns true if the character's general category type is any of
6713      * the following:
6714      * <ul>
6715      * <li> {@code SPACE_SEPARATOR}
6716      * <li> {@code LINE_SEPARATOR}
6717      * <li> {@code PARAGRAPH_SEPARATOR}
6718      * </ul>
6719      *
6720      * <p><b>Note:</b> This method cannot handle <a
6721      * href="#supplementary"> supplementary characters</a>. To support
6722      * all Unicode characters, including supplementary characters, use
6723      * the {@link #isSpaceChar(int)} method.
6724      *
6725      * @param   ch      the character to be tested.
6726      * @return  {@code true} if the character is a space character;
6727      *          {@code false} otherwise.
6728      * @see     Character#isWhitespace(char)
6729      * @since   1.1
6730      */
6731     public static boolean isSpaceChar(char ch) {
6732         return isSpaceChar((int)ch);
6733     }
6734 
6735     /**
6736      * Determines if the specified character (Unicode code point) is a
6737      * Unicode space character.  A character is considered to be a
6738      * space character if and only if it is specified to be a space
6739      * character by the Unicode Standard. This method returns true if
6740      * the character's general category type is any of the following:
6741      *
6742      * <ul>
6743      * <li> {@link #SPACE_SEPARATOR}
6744      * <li> {@link #LINE_SEPARATOR}
6745      * <li> {@link #PARAGRAPH_SEPARATOR}
6746      * </ul>
6747      *
6748      * @param   codePoint the character (Unicode code point) to be tested.
6749      * @return  {@code true} if the character is a space character;
6750      *          {@code false} otherwise.
6751      * @see     Character#isWhitespace(int)
6752      * @since   1.5
6753      */
6754     public static boolean isSpaceChar(int codePoint) {
6755         return ((((1 << Character.SPACE_SEPARATOR) |
6756                   (1 << Character.LINE_SEPARATOR) |
6757                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6758             != 0;
6759     }
6760 
6761     /**
6762      * Determines if the specified character is white space according to Java.
6763      * A character is a Java whitespace character if and only if it satisfies
6764      * one of the following criteria:
6765      * <ul>
6766      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6767      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6768      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779      * </ul>
6780      *
6781      * <p><b>Note:</b> This method cannot handle <a
6782      * href="#supplementary"> supplementary characters</a>. To support
6783      * all Unicode characters, including supplementary characters, use
6784      * the {@link #isWhitespace(int)} method.
6785      *
6786      * @param   ch the character to be tested.
6787      * @return  {@code true} if the character is a Java whitespace
6788      *          character; {@code false} otherwise.
6789      * @see     Character#isSpaceChar(char)
6790      * @since   1.1
6791      */
6792     public static boolean isWhitespace(char ch) {
6793         return isWhitespace((int)ch);
6794     }
6795 
6796     /**
6797      * Determines if the specified character (Unicode code point) is
6798      * white space according to Java.  A character is a Java
6799      * whitespace character if and only if it satisfies one of the
6800      * following criteria:
6801      * <ul>
6802      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6803      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6804      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6805      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6806      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6807      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6808      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6809      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6810      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6811      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6812      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6813      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6814      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6815      * </ul>
6816      * <p>
6817      *
6818      * @param   codePoint the character (Unicode code point) to be tested.
6819      * @return  {@code true} if the character is a Java whitespace
6820      *          character; {@code false} otherwise.
6821      * @see     Character#isSpaceChar(int)
6822      * @since   1.5
6823      */
6824     public static boolean isWhitespace(int codePoint) {
6825         return CharacterData.of(codePoint).isWhitespace(codePoint);
6826     }
6827 
6828     /**
6829      * Determines if the specified character is an ISO control
6830      * character.  A character is considered to be an ISO control
6831      * character if its code is in the range {@code '\u005Cu0000'}
6832      * through {@code '\u005Cu001F'} or in the range
6833      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6834      *
6835      * <p><b>Note:</b> This method cannot handle <a
6836      * href="#supplementary"> supplementary characters</a>. To support
6837      * all Unicode characters, including supplementary characters, use
6838      * the {@link #isISOControl(int)} method.
6839      *
6840      * @param   ch      the character to be tested.
6841      * @return  {@code true} if the character is an ISO control character;
6842      *          {@code false} otherwise.
6843      *
6844      * @see     Character#isSpaceChar(char)
6845      * @see     Character#isWhitespace(char)
6846      * @since   1.1
6847      */
6848     public static boolean isISOControl(char ch) {
6849         return isISOControl((int)ch);
6850     }
6851 
6852     /**
6853      * Determines if the referenced character (Unicode code point) is an ISO control
6854      * character.  A character is considered to be an ISO control
6855      * character if its code is in the range {@code '\u005Cu0000'}
6856      * through {@code '\u005Cu001F'} or in the range
6857      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6858      *
6859      * @param   codePoint the character (Unicode code point) to be tested.
6860      * @return  {@code true} if the character is an ISO control character;
6861      *          {@code false} otherwise.
6862      * @see     Character#isSpaceChar(int)
6863      * @see     Character#isWhitespace(int)
6864      * @since   1.5
6865      */
6866     public static boolean isISOControl(int codePoint) {
6867         // Optimized form of:
6868         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6869         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6870         return codePoint <= 0x9F &&
6871             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6872     }
6873 
6874     /**
6875      * Returns a value indicating a character's general category.
6876      *
6877      * <p><b>Note:</b> This method cannot handle <a
6878      * href="#supplementary"> supplementary characters</a>. To support
6879      * all Unicode characters, including supplementary characters, use
6880      * the {@link #getType(int)} method.
6881      *
6882      * @param   ch      the character to be tested.
6883      * @return  a value of type {@code int} representing the
6884      *          character's general category.
6885      * @see     Character#COMBINING_SPACING_MARK
6886      * @see     Character#CONNECTOR_PUNCTUATION
6887      * @see     Character#CONTROL
6888      * @see     Character#CURRENCY_SYMBOL
6889      * @see     Character#DASH_PUNCTUATION
6890      * @see     Character#DECIMAL_DIGIT_NUMBER
6891      * @see     Character#ENCLOSING_MARK
6892      * @see     Character#END_PUNCTUATION
6893      * @see     Character#FINAL_QUOTE_PUNCTUATION
6894      * @see     Character#FORMAT
6895      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6896      * @see     Character#LETTER_NUMBER
6897      * @see     Character#LINE_SEPARATOR
6898      * @see     Character#LOWERCASE_LETTER
6899      * @see     Character#MATH_SYMBOL
6900      * @see     Character#MODIFIER_LETTER
6901      * @see     Character#MODIFIER_SYMBOL
6902      * @see     Character#NON_SPACING_MARK
6903      * @see     Character#OTHER_LETTER
6904      * @see     Character#OTHER_NUMBER
6905      * @see     Character#OTHER_PUNCTUATION
6906      * @see     Character#OTHER_SYMBOL
6907      * @see     Character#PARAGRAPH_SEPARATOR
6908      * @see     Character#PRIVATE_USE
6909      * @see     Character#SPACE_SEPARATOR
6910      * @see     Character#START_PUNCTUATION
6911      * @see     Character#SURROGATE
6912      * @see     Character#TITLECASE_LETTER
6913      * @see     Character#UNASSIGNED
6914      * @see     Character#UPPERCASE_LETTER
6915      * @since   1.1
6916      */
6917     public static int getType(char ch) {
6918         return getType((int)ch);
6919     }
6920 
6921     /**
6922      * Returns a value indicating a character's general category.
6923      *
6924      * @param   codePoint the character (Unicode code point) to be tested.
6925      * @return  a value of type {@code int} representing the
6926      *          character's general category.
6927      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6928      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6929      * @see     Character#CONTROL CONTROL
6930      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6931      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6932      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6933      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6934      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6935      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6936      * @see     Character#FORMAT FORMAT
6937      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6938      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6939      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6940      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6941      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6942      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6943      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6944      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6945      * @see     Character#OTHER_LETTER OTHER_LETTER
6946      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6947      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6948      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6949      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6950      * @see     Character#PRIVATE_USE PRIVATE_USE
6951      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6952      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6953      * @see     Character#SURROGATE SURROGATE
6954      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6955      * @see     Character#UNASSIGNED UNASSIGNED
6956      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6957      * @since   1.5
6958      */
6959     public static int getType(int codePoint) {
6960         return CharacterData.of(codePoint).getType(codePoint);
6961     }
6962 
6963     /**
6964      * Determines the character representation for a specific digit in
6965      * the specified radix. If the value of {@code radix} is not a
6966      * valid radix, or the value of {@code digit} is not a valid
6967      * digit in the specified radix, the null character
6968      * ({@code '\u005Cu0000'}) is returned.
6969      * <p>
6970      * The {@code radix} argument is valid if it is greater than or
6971      * equal to {@code MIN_RADIX} and less than or equal to
6972      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6973      * {@code 0 <= digit < radix}.
6974      * <p>
6975      * If the digit is less than 10, then
6976      * {@code '0' + digit} is returned. Otherwise, the value
6977      * {@code 'a' + digit - 10} is returned.
6978      *
6979      * @param   digit   the number to convert to a character.
6980      * @param   radix   the radix.
6981      * @return  the {@code char} representation of the specified digit
6982      *          in the specified radix.
6983      * @see     Character#MIN_RADIX
6984      * @see     Character#MAX_RADIX
6985      * @see     Character#digit(char, int)
6986      */
6987     public static char forDigit(int digit, int radix) {
6988         if ((digit >= radix) || (digit < 0)) {
6989             return '\0';
6990         }
6991         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6992             return '\0';
6993         }
6994         if (digit < 10) {
6995             return (char)('0' + digit);
6996         }
6997         return (char)('a' - 10 + digit);
6998     }
6999 
7000     /**
7001      * Returns the Unicode directionality property for the given
7002      * character.  Character directionality is used to calculate the
7003      * visual ordering of text. The directionality value of undefined
7004      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7005      *
7006      * <p><b>Note:</b> This method cannot handle <a
7007      * href="#supplementary"> supplementary characters</a>. To support
7008      * all Unicode characters, including supplementary characters, use
7009      * the {@link #getDirectionality(int)} method.
7010      *
7011      * @param  ch {@code char} for which the directionality property
7012      *            is requested.
7013      * @return the directionality property of the {@code char} value.
7014      *
7015      * @see Character#DIRECTIONALITY_UNDEFINED
7016      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7017      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7018      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7019      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7020      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7021      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7022      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7023      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7024      * @see Character#DIRECTIONALITY_NONSPACING_MARK
7025      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7026      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7027      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7028      * @see Character#DIRECTIONALITY_WHITESPACE
7029      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7030      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7031      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7032      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7033      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7034      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7035      * @since 1.4
7036      */
7037     public static byte getDirectionality(char ch) {
7038         return getDirectionality((int)ch);
7039     }
7040 
7041     /**
7042      * Returns the Unicode directionality property for the given
7043      * character (Unicode code point).  Character directionality is
7044      * used to calculate the visual ordering of text. The
7045      * directionality value of undefined character is {@link
7046      * #DIRECTIONALITY_UNDEFINED}.
7047      *
7048      * @param   codePoint the character (Unicode code point) for which
7049      *          the directionality property is requested.
7050      * @return the directionality property of the character.
7051      *
7052      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7053      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7054      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7055      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7056      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7057      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7058      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7059      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7060      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7061      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7062      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7063      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7064      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7065      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7066      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7067      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7068      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7069      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7070      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7071      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7072      * @since    1.5
7073      */
7074     public static byte getDirectionality(int codePoint) {
7075         return CharacterData.of(codePoint).getDirectionality(codePoint);
7076     }
7077 
7078     /**
7079      * Determines whether the character is mirrored according to the
7080      * Unicode specification.  Mirrored characters should have their
7081      * glyphs horizontally mirrored when displayed in text that is
7082      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7083      * PARENTHESIS is semantically defined to be an <i>opening
7084      * parenthesis</i>.  This will appear as a "(" in text that is
7085      * left-to-right but as a ")" in text that is right-to-left.
7086      *
7087      * <p><b>Note:</b> This method cannot handle <a
7088      * href="#supplementary"> supplementary characters</a>. To support
7089      * all Unicode characters, including supplementary characters, use
7090      * the {@link #isMirrored(int)} method.
7091      *
7092      * @param  ch {@code char} for which the mirrored property is requested
7093      * @return {@code true} if the char is mirrored, {@code false}
7094      *         if the {@code char} is not mirrored or is not defined.
7095      * @since 1.4
7096      */
7097     public static boolean isMirrored(char ch) {
7098         return isMirrored((int)ch);
7099     }
7100 
7101     /**
7102      * Determines whether the specified character (Unicode code point)
7103      * is mirrored according to the Unicode specification.  Mirrored
7104      * characters should have their glyphs horizontally mirrored when
7105      * displayed in text that is right-to-left.  For example,
7106      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7107      * defined to be an <i>opening parenthesis</i>.  This will appear
7108      * as a "(" in text that is left-to-right but as a ")" in text
7109      * that is right-to-left.
7110      *
7111      * @param   codePoint the character (Unicode code point) to be tested.
7112      * @return  {@code true} if the character is mirrored, {@code false}
7113      *          if the character is not mirrored or is not defined.
7114      * @since   1.5
7115      */
7116     public static boolean isMirrored(int codePoint) {
7117         return CharacterData.of(codePoint).isMirrored(codePoint);
7118     }
7119 
7120     /**
7121      * Compares two {@code Character} objects numerically.
7122      *
7123      * @param   anotherCharacter   the {@code Character} to be compared.
7124 
7125      * @return  the value {@code 0} if the argument {@code Character}
7126      *          is equal to this {@code Character}; a value less than
7127      *          {@code 0} if this {@code Character} is numerically less
7128      *          than the {@code Character} argument; and a value greater than
7129      *          {@code 0} if this {@code Character} is numerically greater
7130      *          than the {@code Character} argument (unsigned comparison).
7131      *          Note that this is strictly a numerical comparison; it is not
7132      *          locale-dependent.
7133      * @since   1.2
7134      */
7135     public int compareTo(Character anotherCharacter) {
7136         return compare(this.value, anotherCharacter.value);
7137     }
7138 
7139     /**
7140      * Compares two {@code char} values numerically.
7141      * The value returned is identical to what would be returned by:
7142      * <pre>
7143      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7144      * </pre>
7145      *
7146      * @param  x the first {@code char} to compare
7147      * @param  y the second {@code char} to compare
7148      * @return the value {@code 0} if {@code x == y};
7149      *         a value less than {@code 0} if {@code x < y}; and
7150      *         a value greater than {@code 0} if {@code x > y}
7151      * @since 1.7
7152      */
7153     public static int compare(char x, char y) {
7154         return x - y;
7155     }
7156 
7157     /**
7158      * Converts the character (Unicode code point) argument to uppercase using
7159      * information from the UnicodeData file.
7160      * <p>
7161      *
7162      * @param   codePoint   the character (Unicode code point) to be converted.
7163      * @return  either the uppercase equivalent of the character, if
7164      *          any, or an error flag ({@code Character.ERROR})
7165      *          that indicates that a 1:M {@code char} mapping exists.
7166      * @see     Character#isLowerCase(char)
7167      * @see     Character#isUpperCase(char)
7168      * @see     Character#toLowerCase(char)
7169      * @see     Character#toTitleCase(char)
7170      * @since 1.4
7171      */
7172     static int toUpperCaseEx(int codePoint) {
7173         assert isValidCodePoint(codePoint);
7174         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7175     }
7176 
7177     /**
7178      * Converts the character (Unicode code point) argument to uppercase using case
7179      * mapping information from the SpecialCasing file in the Unicode
7180      * specification. If a character has no explicit uppercase
7181      * mapping, then the {@code char} itself is returned in the
7182      * {@code char[]}.
7183      *
7184      * @param   codePoint   the character (Unicode code point) to be converted.
7185      * @return a {@code char[]} with the uppercased character.
7186      * @since 1.4
7187      */
7188     static char[] toUpperCaseCharArray(int codePoint) {
7189         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7190         assert isBmpCodePoint(codePoint);
7191         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7192     }
7193 
7194     /**
7195      * The number of bits used to represent a <tt>char</tt> value in unsigned
7196      * binary form, constant {@code 16}.
7197      *
7198      * @since 1.5
7199      */
7200     public static final int SIZE = 16;
7201 
7202     /**
7203      * The number of bytes used to represent a {@code char} value in unsigned
7204      * binary form.
7205      *
7206      * @since 1.8
7207      */
7208     public static final int BYTES = SIZE / Byte.SIZE;
7209 
7210     /**
7211      * Returns the value obtained by reversing the order of the bytes in the
7212      * specified <tt>char</tt> value.
7213      *
7214      * @param ch The {@code char} of which to reverse the byte order.
7215      * @return the value obtained by reversing (or, equivalently, swapping)
7216      *     the bytes in the specified <tt>char</tt> value.
7217      * @since 1.5
7218      */
7219     public static char reverseBytes(char ch) {
7220         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7221     }
7222 
7223     /**
7224      * Returns the Unicode name of the specified character
7225      * {@code codePoint}, or null if the code point is
7226      * {@link #UNASSIGNED unassigned}.
7227      * <p>
7228      * Note: if the specified character is not assigned a name by
7229      * the <i>UnicodeData</i> file (part of the Unicode Character
7230      * Database maintained by the Unicode Consortium), the returned
7231      * name is the same as the result of expression.
7232      *
7233      * <blockquote>{@code
7234      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7235      *     + " "
7236      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7237      *
7238      * }</blockquote>
7239      *
7240      * @param  codePoint the character (Unicode code point)
7241      *
7242      * @return the Unicode name of the specified character, or null if
7243      *         the code point is unassigned.
7244      *
7245      * @exception IllegalArgumentException if the specified
7246      *            {@code codePoint} is not a valid Unicode
7247      *            code point.
7248      *
7249      * @since 1.7
7250      */
7251     public static String getName(int codePoint) {
7252         if (!isValidCodePoint(codePoint)) {
7253             throw new IllegalArgumentException();
7254         }
7255         String name = CharacterName.get(codePoint);
7256         if (name != null)
7257             return name;
7258         if (getType(codePoint) == UNASSIGNED)
7259             return null;
7260         UnicodeBlock block = UnicodeBlock.of(codePoint);
7261         if (block != null)
7262             return block.toString().replace('_', ' ') + " "
7263                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7264         // should never come here
7265         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7266     }
7267 }