1 /*
   2  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 import jdk.internal.HotSpotIntrinsicCandidate;
  34 
  35 /**
  36  * The {@code Character} class wraps a value of the primitive
  37  * type {@code char} in an object. An object of type
  38  * {@code Character} contains a single field whose type is
  39  * {@code char}.
  40  * <p>
  41  * In addition, this class provides several methods for determining
  42  * a character's category (lowercase letter, digit, etc.) and for converting
  43  * characters from uppercase to lowercase and vice versa.
  44  * <p>
  45  * Character information is based on the Unicode Standard, version 6.2.0.
  46  * <p>
  47  * The methods and data of class {@code Character} are defined by
  48  * the information in the <i>UnicodeData</i> file that is part of the
  49  * Unicode Character Database maintained by the Unicode
  50  * Consortium. This file specifies various properties including name
  51  * and general category for every defined Unicode code point or
  52  * character range.
  53  * <p>
  54  * The file and its description are available from the Unicode Consortium at:
  55  * <ul>
  56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  57  * </ul>
  58  *
  59  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  60  *
  61  * <p>The {@code char} data type (and therefore the value that a
  62  * {@code Character} object encapsulates) are based on the
  63  * original Unicode specification, which defined characters as
  64  * fixed-width 16-bit entities. The Unicode Standard has since been
  65  * changed to allow for characters whose representation requires more
  66  * than 16 bits.  The range of legal <em>code point</em>s is now
  67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  68  * (Refer to the <a
  69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  71  * Standard.)
  72  *
  73  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  75  * <a name="supplementary">Characters</a> whose code points are greater
  76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  77  * platform uses the UTF-16 representation in {@code char} arrays and
  78  * in the {@code String} and {@code StringBuffer} classes. In
  79  * this representation, supplementary characters are represented as a pair
  80  * of {@code char} values, the first from the <em>high-surrogates</em>
  81  * range, (&#92;uD800-&#92;uDBFF), the second from the
  82  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
  83  *
  84  * <p>A {@code char} value, therefore, represents Basic
  85  * Multilingual Plane (BMP) code points, including the surrogate
  86  * code points, or code units of the UTF-16 encoding. An
  87  * {@code int} value represents all Unicode code points,
  88  * including supplementary code points. The lower (least significant)
  89  * 21 bits of {@code int} are used to represent Unicode code
  90  * points and the upper (most significant) 11 bits must be zero.
  91  * Unless otherwise specified, the behavior with respect to
  92  * supplementary characters and surrogate {@code char} values is
  93  * as follows:
  94  *
  95  * <ul>
  96  * <li>The methods that only accept a {@code char} value cannot support
  97  * supplementary characters. They treat {@code char} values from the
  98  * surrogate ranges as undefined characters. For example,
  99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
 100  * this specific value if followed by any low-surrogate value in a string
 101  * would represent a letter.
 102  *
 103  * <li>The methods that accept an {@code int} value support all
 104  * Unicode characters, including supplementary characters. For
 105  * example, {@code Character.isLetter(0x2F81A)} returns
 106  * {@code true} because the code point value represents a letter
 107  * (a CJK ideograph).
 108  * </ul>
 109  *
 110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 111  * used for character values in the range between U+0000 and U+10FFFF,
 112  * and <em>Unicode code unit</em> is used for 16-bit
 113  * {@code char} values that are code units of the <em>UTF-16</em>
 114  * encoding. For more information on Unicode terminology, refer to the
 115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 116  *
 117  * @author  Lee Boynton
 118  * @author  Guy Steele
 119  * @author  Akira Tanaka
 120  * @author  Martin Buchholz
 121  * @author  Ulf Zibis
 122  * @since   1.0
 123  */
 124 public final
 125 class Character implements java.io.Serializable, Comparable<Character> {
 126     /**
 127      * The minimum radix available for conversion to and from strings.
 128      * The constant value of this field is the smallest value permitted
 129      * for the radix argument in radix-conversion methods such as the
 130      * {@code digit} method, the {@code forDigit} method, and the
 131      * {@code toString} method of class {@code Integer}.
 132      *
 133      * @see     Character#digit(char, int)
 134      * @see     Character#forDigit(int, int)
 135      * @see     Integer#toString(int, int)
 136      * @see     Integer#valueOf(String)
 137      */
 138     public static final int MIN_RADIX = 2;
 139 
 140     /**
 141      * The maximum radix available for conversion to and from strings.
 142      * The constant value of this field is the largest value permitted
 143      * for the radix argument in radix-conversion methods such as the
 144      * {@code digit} method, the {@code forDigit} method, and the
 145      * {@code toString} method of class {@code Integer}.
 146      *
 147      * @see     Character#digit(char, int)
 148      * @see     Character#forDigit(int, int)
 149      * @see     Integer#toString(int, int)
 150      * @see     Integer#valueOf(String)
 151      */
 152     public static final int MAX_RADIX = 36;
 153 
 154     /**
 155      * The constant value of this field is the smallest value of type
 156      * {@code char}, {@code '\u005Cu0000'}.
 157      *
 158      * @since   1.0.2
 159      */
 160     public static final char MIN_VALUE = '\u0000';
 161 
 162     /**
 163      * The constant value of this field is the largest value of type
 164      * {@code char}, {@code '\u005CuFFFF'}.
 165      *
 166      * @since   1.0.2
 167      */
 168     public static final char MAX_VALUE = '\uFFFF';
 169 
 170     /**
 171      * The {@code Class} instance representing the primitive type
 172      * {@code char}.
 173      *
 174      * @since   1.1
 175      */
 176     @SuppressWarnings("unchecked")
 177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 178 
 179     /*
 180      * Normative general types
 181      */
 182 
 183     /*
 184      * General character types
 185      */
 186 
 187     /**
 188      * General category "Cn" in the Unicode specification.
 189      * @since   1.1
 190      */
 191     public static final byte UNASSIGNED = 0;
 192 
 193     /**
 194      * General category "Lu" in the Unicode specification.
 195      * @since   1.1
 196      */
 197     public static final byte UPPERCASE_LETTER = 1;
 198 
 199     /**
 200      * General category "Ll" in the Unicode specification.
 201      * @since   1.1
 202      */
 203     public static final byte LOWERCASE_LETTER = 2;
 204 
 205     /**
 206      * General category "Lt" in the Unicode specification.
 207      * @since   1.1
 208      */
 209     public static final byte TITLECASE_LETTER = 3;
 210 
 211     /**
 212      * General category "Lm" in the Unicode specification.
 213      * @since   1.1
 214      */
 215     public static final byte MODIFIER_LETTER = 4;
 216 
 217     /**
 218      * General category "Lo" in the Unicode specification.
 219      * @since   1.1
 220      */
 221     public static final byte OTHER_LETTER = 5;
 222 
 223     /**
 224      * General category "Mn" in the Unicode specification.
 225      * @since   1.1
 226      */
 227     public static final byte NON_SPACING_MARK = 6;
 228 
 229     /**
 230      * General category "Me" in the Unicode specification.
 231      * @since   1.1
 232      */
 233     public static final byte ENCLOSING_MARK = 7;
 234 
 235     /**
 236      * General category "Mc" in the Unicode specification.
 237      * @since   1.1
 238      */
 239     public static final byte COMBINING_SPACING_MARK = 8;
 240 
 241     /**
 242      * General category "Nd" in the Unicode specification.
 243      * @since   1.1
 244      */
 245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 246 
 247     /**
 248      * General category "Nl" in the Unicode specification.
 249      * @since   1.1
 250      */
 251     public static final byte LETTER_NUMBER = 10;
 252 
 253     /**
 254      * General category "No" in the Unicode specification.
 255      * @since   1.1
 256      */
 257     public static final byte OTHER_NUMBER = 11;
 258 
 259     /**
 260      * General category "Zs" in the Unicode specification.
 261      * @since   1.1
 262      */
 263     public static final byte SPACE_SEPARATOR = 12;
 264 
 265     /**
 266      * General category "Zl" in the Unicode specification.
 267      * @since   1.1
 268      */
 269     public static final byte LINE_SEPARATOR = 13;
 270 
 271     /**
 272      * General category "Zp" in the Unicode specification.
 273      * @since   1.1
 274      */
 275     public static final byte PARAGRAPH_SEPARATOR = 14;
 276 
 277     /**
 278      * General category "Cc" in the Unicode specification.
 279      * @since   1.1
 280      */
 281     public static final byte CONTROL = 15;
 282 
 283     /**
 284      * General category "Cf" in the Unicode specification.
 285      * @since   1.1
 286      */
 287     public static final byte FORMAT = 16;
 288 
 289     /**
 290      * General category "Co" in the Unicode specification.
 291      * @since   1.1
 292      */
 293     public static final byte PRIVATE_USE = 18;
 294 
 295     /**
 296      * General category "Cs" in the Unicode specification.
 297      * @since   1.1
 298      */
 299     public static final byte SURROGATE = 19;
 300 
 301     /**
 302      * General category "Pd" in the Unicode specification.
 303      * @since   1.1
 304      */
 305     public static final byte DASH_PUNCTUATION = 20;
 306 
 307     /**
 308      * General category "Ps" in the Unicode specification.
 309      * @since   1.1
 310      */
 311     public static final byte START_PUNCTUATION = 21;
 312 
 313     /**
 314      * General category "Pe" in the Unicode specification.
 315      * @since   1.1
 316      */
 317     public static final byte END_PUNCTUATION = 22;
 318 
 319     /**
 320      * General category "Pc" in the Unicode specification.
 321      * @since   1.1
 322      */
 323     public static final byte CONNECTOR_PUNCTUATION = 23;
 324 
 325     /**
 326      * General category "Po" in the Unicode specification.
 327      * @since   1.1
 328      */
 329     public static final byte OTHER_PUNCTUATION = 24;
 330 
 331     /**
 332      * General category "Sm" in the Unicode specification.
 333      * @since   1.1
 334      */
 335     public static final byte MATH_SYMBOL = 25;
 336 
 337     /**
 338      * General category "Sc" in the Unicode specification.
 339      * @since   1.1
 340      */
 341     public static final byte CURRENCY_SYMBOL = 26;
 342 
 343     /**
 344      * General category "Sk" in the Unicode specification.
 345      * @since   1.1
 346      */
 347     public static final byte MODIFIER_SYMBOL = 27;
 348 
 349     /**
 350      * General category "So" in the Unicode specification.
 351      * @since   1.1
 352      */
 353     public static final byte OTHER_SYMBOL = 28;
 354 
 355     /**
 356      * General category "Pi" in the Unicode specification.
 357      * @since   1.4
 358      */
 359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 360 
 361     /**
 362      * General category "Pf" in the Unicode specification.
 363      * @since   1.4
 364      */
 365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 366 
 367     /**
 368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 369      */
 370     static final int ERROR = 0xFFFFFFFF;
 371 
 372 
 373     /**
 374      * Undefined bidirectional character type. Undefined {@code char}
 375      * values have undefined directionality in the Unicode specification.
 376      * @since 1.4
 377      */
 378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 379 
 380     /**
 381      * Strong bidirectional character type "L" in the Unicode specification.
 382      * @since 1.4
 383      */
 384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 385 
 386     /**
 387      * Strong bidirectional character type "R" in the Unicode specification.
 388      * @since 1.4
 389      */
 390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 391 
 392     /**
 393     * Strong bidirectional character type "AL" in the Unicode specification.
 394      * @since 1.4
 395      */
 396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 397 
 398     /**
 399      * Weak bidirectional character type "EN" in the Unicode specification.
 400      * @since 1.4
 401      */
 402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 403 
 404     /**
 405      * Weak bidirectional character type "ES" in the Unicode specification.
 406      * @since 1.4
 407      */
 408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 409 
 410     /**
 411      * Weak bidirectional character type "ET" in the Unicode specification.
 412      * @since 1.4
 413      */
 414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 415 
 416     /**
 417      * Weak bidirectional character type "AN" in the Unicode specification.
 418      * @since 1.4
 419      */
 420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 421 
 422     /**
 423      * Weak bidirectional character type "CS" in the Unicode specification.
 424      * @since 1.4
 425      */
 426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 427 
 428     /**
 429      * Weak bidirectional character type "NSM" in the Unicode specification.
 430      * @since 1.4
 431      */
 432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 433 
 434     /**
 435      * Weak bidirectional character type "BN" in the Unicode specification.
 436      * @since 1.4
 437      */
 438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 439 
 440     /**
 441      * Neutral bidirectional character type "B" in the Unicode specification.
 442      * @since 1.4
 443      */
 444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 445 
 446     /**
 447      * Neutral bidirectional character type "S" in the Unicode specification.
 448      * @since 1.4
 449      */
 450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 451 
 452     /**
 453      * Neutral bidirectional character type "WS" in the Unicode specification.
 454      * @since 1.4
 455      */
 456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 457 
 458     /**
 459      * Neutral bidirectional character type "ON" in the Unicode specification.
 460      * @since 1.4
 461      */
 462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 463 
 464     /**
 465      * Strong bidirectional character type "LRE" in the Unicode specification.
 466      * @since 1.4
 467      */
 468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 469 
 470     /**
 471      * Strong bidirectional character type "LRO" in the Unicode specification.
 472      * @since 1.4
 473      */
 474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 475 
 476     /**
 477      * Strong bidirectional character type "RLE" in the Unicode specification.
 478      * @since 1.4
 479      */
 480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 481 
 482     /**
 483      * Strong bidirectional character type "RLO" in the Unicode specification.
 484      * @since 1.4
 485      */
 486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 487 
 488     /**
 489      * Weak bidirectional character type "PDF" in the Unicode specification.
 490      * @since 1.4
 491      */
 492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 493 
 494     /**
 495      * The minimum value of a
 496      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 497      * Unicode high-surrogate code unit</a>
 498      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 499      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 500      *
 501      * @since 1.5
 502      */
 503     public static final char MIN_HIGH_SURROGATE = '\uD800';
 504 
 505     /**
 506      * The maximum value of a
 507      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 508      * Unicode high-surrogate code unit</a>
 509      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 510      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 511      *
 512      * @since 1.5
 513      */
 514     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 515 
 516     /**
 517      * The minimum value of a
 518      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 519      * Unicode low-surrogate code unit</a>
 520      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 521      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 522      *
 523      * @since 1.5
 524      */
 525     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 526 
 527     /**
 528      * The maximum value of a
 529      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 530      * Unicode low-surrogate code unit</a>
 531      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 532      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 533      *
 534      * @since 1.5
 535      */
 536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 537 
 538     /**
 539      * The minimum value of a Unicode surrogate code unit in the
 540      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 541      *
 542      * @since 1.5
 543      */
 544     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 545 
 546     /**
 547      * The maximum value of a Unicode surrogate code unit in the
 548      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 549      *
 550      * @since 1.5
 551      */
 552     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 553 
 554     /**
 555      * The minimum value of a
 556      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 557      * Unicode supplementary code point</a>, constant {@code U+10000}.
 558      *
 559      * @since 1.5
 560      */
 561     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 562 
 563     /**
 564      * The minimum value of a
 565      * <a href="http://www.unicode.org/glossary/#code_point">
 566      * Unicode code point</a>, constant {@code U+0000}.
 567      *
 568      * @since 1.5
 569      */
 570     public static final int MIN_CODE_POINT = 0x000000;
 571 
 572     /**
 573      * The maximum value of a
 574      * <a href="http://www.unicode.org/glossary/#code_point">
 575      * Unicode code point</a>, constant {@code U+10FFFF}.
 576      *
 577      * @since 1.5
 578      */
 579     public static final int MAX_CODE_POINT = 0X10FFFF;
 580 
 581 
 582     /**
 583      * Instances of this class represent particular subsets of the Unicode
 584      * character set.  The only family of subsets defined in the
 585      * {@code Character} class is {@link Character.UnicodeBlock}.
 586      * Other portions of the Java API may define other subsets for their
 587      * own purposes.
 588      *
 589      * @since 1.2
 590      */
 591     public static class Subset  {
 592 
 593         private String name;
 594 
 595         /**
 596          * Constructs a new {@code Subset} instance.
 597          *
 598          * @param  name  The name of this subset
 599          * @exception NullPointerException if name is {@code null}
 600          */
 601         protected Subset(String name) {
 602             if (name == null) {
 603                 throw new NullPointerException("name");
 604             }
 605             this.name = name;
 606         }
 607 
 608         /**
 609          * Compares two {@code Subset} objects for equality.
 610          * This method returns {@code true} if and only if
 611          * {@code this} and the argument refer to the same
 612          * object; since this method is {@code final}, this
 613          * guarantee holds for all subclasses.
 614          */
 615         public final boolean equals(Object obj) {
 616             return (this == obj);
 617         }
 618 
 619         /**
 620          * Returns the standard hash code as defined by the
 621          * {@link Object#hashCode} method.  This method
 622          * is {@code final} in order to ensure that the
 623          * {@code equals} and {@code hashCode} methods will
 624          * be consistent in all subclasses.
 625          */
 626         public final int hashCode() {
 627             return super.hashCode();
 628         }
 629 
 630         /**
 631          * Returns the name of this subset.
 632          */
 633         public final String toString() {
 634             return name;
 635         }
 636     }
 637 
 638     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 639     // for the latest specification of Unicode Blocks.
 640 
 641     /**
 642      * A family of character subsets representing the character blocks in the
 643      * Unicode specification. Character blocks generally define characters
 644      * used for a specific script or purpose. A character is contained by
 645      * at most one Unicode block.
 646      *
 647      * @since 1.2
 648      */
 649     public static final class UnicodeBlock extends Subset {
 650         /**
 651          * 510  - the expected number of entities
 652          * 0.75 - the default load factor of HashMap
 653          */
 654         private static Map<String, UnicodeBlock> map =
 655                 new HashMap<>((int)(510 / 0.75f + 1.0f));
 656 
 657         /**
 658          * Creates a UnicodeBlock with the given identifier name.
 659          * This name must be the same as the block identifier.
 660          */
 661         private UnicodeBlock(String idName) {
 662             super(idName);
 663             map.put(idName, this);
 664         }
 665 
 666         /**
 667          * Creates a UnicodeBlock with the given identifier name and
 668          * alias name.
 669          */
 670         private UnicodeBlock(String idName, String alias) {
 671             this(idName);
 672             map.put(alias, this);
 673         }
 674 
 675         /**
 676          * Creates a UnicodeBlock with the given identifier name and
 677          * alias names.
 678          */
 679         private UnicodeBlock(String idName, String... aliases) {
 680             this(idName);
 681             for (String alias : aliases)
 682                 map.put(alias, this);
 683         }
 684 
 685         /**
 686          * Constant for the "Basic Latin" Unicode character block.
 687          * @since 1.2
 688          */
 689         public static final UnicodeBlock  BASIC_LATIN =
 690             new UnicodeBlock("BASIC_LATIN",
 691                              "BASIC LATIN",
 692                              "BASICLATIN");
 693 
 694         /**
 695          * Constant for the "Latin-1 Supplement" Unicode character block.
 696          * @since 1.2
 697          */
 698         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 699             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 700                              "LATIN-1 SUPPLEMENT",
 701                              "LATIN-1SUPPLEMENT");
 702 
 703         /**
 704          * Constant for the "Latin Extended-A" Unicode character block.
 705          * @since 1.2
 706          */
 707         public static final UnicodeBlock LATIN_EXTENDED_A =
 708             new UnicodeBlock("LATIN_EXTENDED_A",
 709                              "LATIN EXTENDED-A",
 710                              "LATINEXTENDED-A");
 711 
 712         /**
 713          * Constant for the "Latin Extended-B" Unicode character block.
 714          * @since 1.2
 715          */
 716         public static final UnicodeBlock LATIN_EXTENDED_B =
 717             new UnicodeBlock("LATIN_EXTENDED_B",
 718                              "LATIN EXTENDED-B",
 719                              "LATINEXTENDED-B");
 720 
 721         /**
 722          * Constant for the "IPA Extensions" Unicode character block.
 723          * @since 1.2
 724          */
 725         public static final UnicodeBlock IPA_EXTENSIONS =
 726             new UnicodeBlock("IPA_EXTENSIONS",
 727                              "IPA EXTENSIONS",
 728                              "IPAEXTENSIONS");
 729 
 730         /**
 731          * Constant for the "Spacing Modifier Letters" Unicode character block.
 732          * @since 1.2
 733          */
 734         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 735             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 736                              "SPACING MODIFIER LETTERS",
 737                              "SPACINGMODIFIERLETTERS");
 738 
 739         /**
 740          * Constant for the "Combining Diacritical Marks" Unicode character block.
 741          * @since 1.2
 742          */
 743         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 744             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 745                              "COMBINING DIACRITICAL MARKS",
 746                              "COMBININGDIACRITICALMARKS");
 747 
 748         /**
 749          * Constant for the "Greek and Coptic" Unicode character block.
 750          * <p>
 751          * This block was previously known as the "Greek" block.
 752          *
 753          * @since 1.2
 754          */
 755         public static final UnicodeBlock GREEK =
 756             new UnicodeBlock("GREEK",
 757                              "GREEK AND COPTIC",
 758                              "GREEKANDCOPTIC");
 759 
 760         /**
 761          * Constant for the "Cyrillic" Unicode character block.
 762          * @since 1.2
 763          */
 764         public static final UnicodeBlock CYRILLIC =
 765             new UnicodeBlock("CYRILLIC");
 766 
 767         /**
 768          * Constant for the "Armenian" Unicode character block.
 769          * @since 1.2
 770          */
 771         public static final UnicodeBlock ARMENIAN =
 772             new UnicodeBlock("ARMENIAN");
 773 
 774         /**
 775          * Constant for the "Hebrew" Unicode character block.
 776          * @since 1.2
 777          */
 778         public static final UnicodeBlock HEBREW =
 779             new UnicodeBlock("HEBREW");
 780 
 781         /**
 782          * Constant for the "Arabic" Unicode character block.
 783          * @since 1.2
 784          */
 785         public static final UnicodeBlock ARABIC =
 786             new UnicodeBlock("ARABIC");
 787 
 788         /**
 789          * Constant for the "Devanagari" Unicode character block.
 790          * @since 1.2
 791          */
 792         public static final UnicodeBlock DEVANAGARI =
 793             new UnicodeBlock("DEVANAGARI");
 794 
 795         /**
 796          * Constant for the "Bengali" Unicode character block.
 797          * @since 1.2
 798          */
 799         public static final UnicodeBlock BENGALI =
 800             new UnicodeBlock("BENGALI");
 801 
 802         /**
 803          * Constant for the "Gurmukhi" Unicode character block.
 804          * @since 1.2
 805          */
 806         public static final UnicodeBlock GURMUKHI =
 807             new UnicodeBlock("GURMUKHI");
 808 
 809         /**
 810          * Constant for the "Gujarati" Unicode character block.
 811          * @since 1.2
 812          */
 813         public static final UnicodeBlock GUJARATI =
 814             new UnicodeBlock("GUJARATI");
 815 
 816         /**
 817          * Constant for the "Oriya" Unicode character block.
 818          * @since 1.2
 819          */
 820         public static final UnicodeBlock ORIYA =
 821             new UnicodeBlock("ORIYA");
 822 
 823         /**
 824          * Constant for the "Tamil" Unicode character block.
 825          * @since 1.2
 826          */
 827         public static final UnicodeBlock TAMIL =
 828             new UnicodeBlock("TAMIL");
 829 
 830         /**
 831          * Constant for the "Telugu" Unicode character block.
 832          * @since 1.2
 833          */
 834         public static final UnicodeBlock TELUGU =
 835             new UnicodeBlock("TELUGU");
 836 
 837         /**
 838          * Constant for the "Kannada" Unicode character block.
 839          * @since 1.2
 840          */
 841         public static final UnicodeBlock KANNADA =
 842             new UnicodeBlock("KANNADA");
 843 
 844         /**
 845          * Constant for the "Malayalam" Unicode character block.
 846          * @since 1.2
 847          */
 848         public static final UnicodeBlock MALAYALAM =
 849             new UnicodeBlock("MALAYALAM");
 850 
 851         /**
 852          * Constant for the "Thai" Unicode character block.
 853          * @since 1.2
 854          */
 855         public static final UnicodeBlock THAI =
 856             new UnicodeBlock("THAI");
 857 
 858         /**
 859          * Constant for the "Lao" Unicode character block.
 860          * @since 1.2
 861          */
 862         public static final UnicodeBlock LAO =
 863             new UnicodeBlock("LAO");
 864 
 865         /**
 866          * Constant for the "Tibetan" Unicode character block.
 867          * @since 1.2
 868          */
 869         public static final UnicodeBlock TIBETAN =
 870             new UnicodeBlock("TIBETAN");
 871 
 872         /**
 873          * Constant for the "Georgian" Unicode character block.
 874          * @since 1.2
 875          */
 876         public static final UnicodeBlock GEORGIAN =
 877             new UnicodeBlock("GEORGIAN");
 878 
 879         /**
 880          * Constant for the "Hangul Jamo" Unicode character block.
 881          * @since 1.2
 882          */
 883         public static final UnicodeBlock HANGUL_JAMO =
 884             new UnicodeBlock("HANGUL_JAMO",
 885                              "HANGUL JAMO",
 886                              "HANGULJAMO");
 887 
 888         /**
 889          * Constant for the "Latin Extended Additional" Unicode character block.
 890          * @since 1.2
 891          */
 892         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 893             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 894                              "LATIN EXTENDED ADDITIONAL",
 895                              "LATINEXTENDEDADDITIONAL");
 896 
 897         /**
 898          * Constant for the "Greek Extended" Unicode character block.
 899          * @since 1.2
 900          */
 901         public static final UnicodeBlock GREEK_EXTENDED =
 902             new UnicodeBlock("GREEK_EXTENDED",
 903                              "GREEK EXTENDED",
 904                              "GREEKEXTENDED");
 905 
 906         /**
 907          * Constant for the "General Punctuation" Unicode character block.
 908          * @since 1.2
 909          */
 910         public static final UnicodeBlock GENERAL_PUNCTUATION =
 911             new UnicodeBlock("GENERAL_PUNCTUATION",
 912                              "GENERAL PUNCTUATION",
 913                              "GENERALPUNCTUATION");
 914 
 915         /**
 916          * Constant for the "Superscripts and Subscripts" Unicode character
 917          * block.
 918          * @since 1.2
 919          */
 920         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 921             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 922                              "SUPERSCRIPTS AND SUBSCRIPTS",
 923                              "SUPERSCRIPTSANDSUBSCRIPTS");
 924 
 925         /**
 926          * Constant for the "Currency Symbols" Unicode character block.
 927          * @since 1.2
 928          */
 929         public static final UnicodeBlock CURRENCY_SYMBOLS =
 930             new UnicodeBlock("CURRENCY_SYMBOLS",
 931                              "CURRENCY SYMBOLS",
 932                              "CURRENCYSYMBOLS");
 933 
 934         /**
 935          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 936          * character block.
 937          * <p>
 938          * This block was previously known as "Combining Marks for Symbols".
 939          * @since 1.2
 940          */
 941         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 942             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 943                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 944                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 945                              "COMBINING MARKS FOR SYMBOLS",
 946                              "COMBININGMARKSFORSYMBOLS");
 947 
 948         /**
 949          * Constant for the "Letterlike Symbols" Unicode character block.
 950          * @since 1.2
 951          */
 952         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 953             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 954                              "LETTERLIKE SYMBOLS",
 955                              "LETTERLIKESYMBOLS");
 956 
 957         /**
 958          * Constant for the "Number Forms" Unicode character block.
 959          * @since 1.2
 960          */
 961         public static final UnicodeBlock NUMBER_FORMS =
 962             new UnicodeBlock("NUMBER_FORMS",
 963                              "NUMBER FORMS",
 964                              "NUMBERFORMS");
 965 
 966         /**
 967          * Constant for the "Arrows" Unicode character block.
 968          * @since 1.2
 969          */
 970         public static final UnicodeBlock ARROWS =
 971             new UnicodeBlock("ARROWS");
 972 
 973         /**
 974          * Constant for the "Mathematical Operators" Unicode character block.
 975          * @since 1.2
 976          */
 977         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 978             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 979                              "MATHEMATICAL OPERATORS",
 980                              "MATHEMATICALOPERATORS");
 981 
 982         /**
 983          * Constant for the "Miscellaneous Technical" Unicode character block.
 984          * @since 1.2
 985          */
 986         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 987             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 988                              "MISCELLANEOUS TECHNICAL",
 989                              "MISCELLANEOUSTECHNICAL");
 990 
 991         /**
 992          * Constant for the "Control Pictures" Unicode character block.
 993          * @since 1.2
 994          */
 995         public static final UnicodeBlock CONTROL_PICTURES =
 996             new UnicodeBlock("CONTROL_PICTURES",
 997                              "CONTROL PICTURES",
 998                              "CONTROLPICTURES");
 999 
1000         /**
1001          * Constant for the "Optical Character Recognition" Unicode character block.
1002          * @since 1.2
1003          */
1004         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1005             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1006                              "OPTICAL CHARACTER RECOGNITION",
1007                              "OPTICALCHARACTERRECOGNITION");
1008 
1009         /**
1010          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1011          * @since 1.2
1012          */
1013         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1014             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1015                              "ENCLOSED ALPHANUMERICS",
1016                              "ENCLOSEDALPHANUMERICS");
1017 
1018         /**
1019          * Constant for the "Box Drawing" Unicode character block.
1020          * @since 1.2
1021          */
1022         public static final UnicodeBlock BOX_DRAWING =
1023             new UnicodeBlock("BOX_DRAWING",
1024                              "BOX DRAWING",
1025                              "BOXDRAWING");
1026 
1027         /**
1028          * Constant for the "Block Elements" Unicode character block.
1029          * @since 1.2
1030          */
1031         public static final UnicodeBlock BLOCK_ELEMENTS =
1032             new UnicodeBlock("BLOCK_ELEMENTS",
1033                              "BLOCK ELEMENTS",
1034                              "BLOCKELEMENTS");
1035 
1036         /**
1037          * Constant for the "Geometric Shapes" Unicode character block.
1038          * @since 1.2
1039          */
1040         public static final UnicodeBlock GEOMETRIC_SHAPES =
1041             new UnicodeBlock("GEOMETRIC_SHAPES",
1042                              "GEOMETRIC SHAPES",
1043                              "GEOMETRICSHAPES");
1044 
1045         /**
1046          * Constant for the "Miscellaneous Symbols" Unicode character block.
1047          * @since 1.2
1048          */
1049         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1050             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1051                              "MISCELLANEOUS SYMBOLS",
1052                              "MISCELLANEOUSSYMBOLS");
1053 
1054         /**
1055          * Constant for the "Dingbats" Unicode character block.
1056          * @since 1.2
1057          */
1058         public static final UnicodeBlock DINGBATS =
1059             new UnicodeBlock("DINGBATS");
1060 
1061         /**
1062          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1063          * @since 1.2
1064          */
1065         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1066             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1067                              "CJK SYMBOLS AND PUNCTUATION",
1068                              "CJKSYMBOLSANDPUNCTUATION");
1069 
1070         /**
1071          * Constant for the "Hiragana" Unicode character block.
1072          * @since 1.2
1073          */
1074         public static final UnicodeBlock HIRAGANA =
1075             new UnicodeBlock("HIRAGANA");
1076 
1077         /**
1078          * Constant for the "Katakana" Unicode character block.
1079          * @since 1.2
1080          */
1081         public static final UnicodeBlock KATAKANA =
1082             new UnicodeBlock("KATAKANA");
1083 
1084         /**
1085          * Constant for the "Bopomofo" Unicode character block.
1086          * @since 1.2
1087          */
1088         public static final UnicodeBlock BOPOMOFO =
1089             new UnicodeBlock("BOPOMOFO");
1090 
1091         /**
1092          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1093          * @since 1.2
1094          */
1095         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1096             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1097                              "HANGUL COMPATIBILITY JAMO",
1098                              "HANGULCOMPATIBILITYJAMO");
1099 
1100         /**
1101          * Constant for the "Kanbun" Unicode character block.
1102          * @since 1.2
1103          */
1104         public static final UnicodeBlock KANBUN =
1105             new UnicodeBlock("KANBUN");
1106 
1107         /**
1108          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1109          * @since 1.2
1110          */
1111         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1112             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1113                              "ENCLOSED CJK LETTERS AND MONTHS",
1114                              "ENCLOSEDCJKLETTERSANDMONTHS");
1115 
1116         /**
1117          * Constant for the "CJK Compatibility" Unicode character block.
1118          * @since 1.2
1119          */
1120         public static final UnicodeBlock CJK_COMPATIBILITY =
1121             new UnicodeBlock("CJK_COMPATIBILITY",
1122                              "CJK COMPATIBILITY",
1123                              "CJKCOMPATIBILITY");
1124 
1125         /**
1126          * Constant for the "CJK Unified Ideographs" Unicode character block.
1127          * @since 1.2
1128          */
1129         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1130             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1131                              "CJK UNIFIED IDEOGRAPHS",
1132                              "CJKUNIFIEDIDEOGRAPHS");
1133 
1134         /**
1135          * Constant for the "Hangul Syllables" Unicode character block.
1136          * @since 1.2
1137          */
1138         public static final UnicodeBlock HANGUL_SYLLABLES =
1139             new UnicodeBlock("HANGUL_SYLLABLES",
1140                              "HANGUL SYLLABLES",
1141                              "HANGULSYLLABLES");
1142 
1143         /**
1144          * Constant for the "Private Use Area" Unicode character block.
1145          * @since 1.2
1146          */
1147         public static final UnicodeBlock PRIVATE_USE_AREA =
1148             new UnicodeBlock("PRIVATE_USE_AREA",
1149                              "PRIVATE USE AREA",
1150                              "PRIVATEUSEAREA");
1151 
1152         /**
1153          * Constant for the "CJK Compatibility Ideographs" Unicode character
1154          * block.
1155          * @since 1.2
1156          */
1157         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1158             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1159                              "CJK COMPATIBILITY IDEOGRAPHS",
1160                              "CJKCOMPATIBILITYIDEOGRAPHS");
1161 
1162         /**
1163          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1164          * @since 1.2
1165          */
1166         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1167             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1168                              "ALPHABETIC PRESENTATION FORMS",
1169                              "ALPHABETICPRESENTATIONFORMS");
1170 
1171         /**
1172          * Constant for the "Arabic Presentation Forms-A" Unicode character
1173          * block.
1174          * @since 1.2
1175          */
1176         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1177             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1178                              "ARABIC PRESENTATION FORMS-A",
1179                              "ARABICPRESENTATIONFORMS-A");
1180 
1181         /**
1182          * Constant for the "Combining Half Marks" Unicode character block.
1183          * @since 1.2
1184          */
1185         public static final UnicodeBlock COMBINING_HALF_MARKS =
1186             new UnicodeBlock("COMBINING_HALF_MARKS",
1187                              "COMBINING HALF MARKS",
1188                              "COMBININGHALFMARKS");
1189 
1190         /**
1191          * Constant for the "CJK Compatibility Forms" Unicode character block.
1192          * @since 1.2
1193          */
1194         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1195             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1196                              "CJK COMPATIBILITY FORMS",
1197                              "CJKCOMPATIBILITYFORMS");
1198 
1199         /**
1200          * Constant for the "Small Form Variants" Unicode character block.
1201          * @since 1.2
1202          */
1203         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1204             new UnicodeBlock("SMALL_FORM_VARIANTS",
1205                              "SMALL FORM VARIANTS",
1206                              "SMALLFORMVARIANTS");
1207 
1208         /**
1209          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1210          * @since 1.2
1211          */
1212         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1213             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1214                              "ARABIC PRESENTATION FORMS-B",
1215                              "ARABICPRESENTATIONFORMS-B");
1216 
1217         /**
1218          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1219          * block.
1220          * @since 1.2
1221          */
1222         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1223             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1224                              "HALFWIDTH AND FULLWIDTH FORMS",
1225                              "HALFWIDTHANDFULLWIDTHFORMS");
1226 
1227         /**
1228          * Constant for the "Specials" Unicode character block.
1229          * @since 1.2
1230          */
1231         public static final UnicodeBlock SPECIALS =
1232             new UnicodeBlock("SPECIALS");
1233 
1234         /**
1235          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1236          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1237          *             {@link #LOW_SURROGATES}. These new constants match
1238          *             the block definitions of the Unicode Standard.
1239          *             The {@link #of(char)} and {@link #of(int)} methods
1240          *             return the new constants, not SURROGATES_AREA.
1241          */
1242         @Deprecated
1243         public static final UnicodeBlock SURROGATES_AREA =
1244             new UnicodeBlock("SURROGATES_AREA");
1245 
1246         /**
1247          * Constant for the "Syriac" Unicode character block.
1248          * @since 1.4
1249          */
1250         public static final UnicodeBlock SYRIAC =
1251             new UnicodeBlock("SYRIAC");
1252 
1253         /**
1254          * Constant for the "Thaana" Unicode character block.
1255          * @since 1.4
1256          */
1257         public static final UnicodeBlock THAANA =
1258             new UnicodeBlock("THAANA");
1259 
1260         /**
1261          * Constant for the "Sinhala" Unicode character block.
1262          * @since 1.4
1263          */
1264         public static final UnicodeBlock SINHALA =
1265             new UnicodeBlock("SINHALA");
1266 
1267         /**
1268          * Constant for the "Myanmar" Unicode character block.
1269          * @since 1.4
1270          */
1271         public static final UnicodeBlock MYANMAR =
1272             new UnicodeBlock("MYANMAR");
1273 
1274         /**
1275          * Constant for the "Ethiopic" Unicode character block.
1276          * @since 1.4
1277          */
1278         public static final UnicodeBlock ETHIOPIC =
1279             new UnicodeBlock("ETHIOPIC");
1280 
1281         /**
1282          * Constant for the "Cherokee" Unicode character block.
1283          * @since 1.4
1284          */
1285         public static final UnicodeBlock CHEROKEE =
1286             new UnicodeBlock("CHEROKEE");
1287 
1288         /**
1289          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1290          * @since 1.4
1291          */
1292         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1293             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1294                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1295                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1296 
1297         /**
1298          * Constant for the "Ogham" Unicode character block.
1299          * @since 1.4
1300          */
1301         public static final UnicodeBlock OGHAM =
1302             new UnicodeBlock("OGHAM");
1303 
1304         /**
1305          * Constant for the "Runic" Unicode character block.
1306          * @since 1.4
1307          */
1308         public static final UnicodeBlock RUNIC =
1309             new UnicodeBlock("RUNIC");
1310 
1311         /**
1312          * Constant for the "Khmer" Unicode character block.
1313          * @since 1.4
1314          */
1315         public static final UnicodeBlock KHMER =
1316             new UnicodeBlock("KHMER");
1317 
1318         /**
1319          * Constant for the "Mongolian" Unicode character block.
1320          * @since 1.4
1321          */
1322         public static final UnicodeBlock MONGOLIAN =
1323             new UnicodeBlock("MONGOLIAN");
1324 
1325         /**
1326          * Constant for the "Braille Patterns" Unicode character block.
1327          * @since 1.4
1328          */
1329         public static final UnicodeBlock BRAILLE_PATTERNS =
1330             new UnicodeBlock("BRAILLE_PATTERNS",
1331                              "BRAILLE PATTERNS",
1332                              "BRAILLEPATTERNS");
1333 
1334         /**
1335          * Constant for the "CJK Radicals Supplement" Unicode character block.
1336          * @since 1.4
1337          */
1338         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1339             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1340                              "CJK RADICALS SUPPLEMENT",
1341                              "CJKRADICALSSUPPLEMENT");
1342 
1343         /**
1344          * Constant for the "Kangxi Radicals" Unicode character block.
1345          * @since 1.4
1346          */
1347         public static final UnicodeBlock KANGXI_RADICALS =
1348             new UnicodeBlock("KANGXI_RADICALS",
1349                              "KANGXI RADICALS",
1350                              "KANGXIRADICALS");
1351 
1352         /**
1353          * Constant for the "Ideographic Description Characters" Unicode character block.
1354          * @since 1.4
1355          */
1356         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1357             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1358                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1359                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1360 
1361         /**
1362          * Constant for the "Bopomofo Extended" Unicode character block.
1363          * @since 1.4
1364          */
1365         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1366             new UnicodeBlock("BOPOMOFO_EXTENDED",
1367                              "BOPOMOFO EXTENDED",
1368                              "BOPOMOFOEXTENDED");
1369 
1370         /**
1371          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1372          * @since 1.4
1373          */
1374         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1375             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1376                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1377                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1378 
1379         /**
1380          * Constant for the "Yi Syllables" Unicode character block.
1381          * @since 1.4
1382          */
1383         public static final UnicodeBlock YI_SYLLABLES =
1384             new UnicodeBlock("YI_SYLLABLES",
1385                              "YI SYLLABLES",
1386                              "YISYLLABLES");
1387 
1388         /**
1389          * Constant for the "Yi Radicals" Unicode character block.
1390          * @since 1.4
1391          */
1392         public static final UnicodeBlock YI_RADICALS =
1393             new UnicodeBlock("YI_RADICALS",
1394                              "YI RADICALS",
1395                              "YIRADICALS");
1396 
1397         /**
1398          * Constant for the "Cyrillic Supplementary" Unicode character block.
1399          * @since 1.5
1400          */
1401         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1402             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1403                              "CYRILLIC SUPPLEMENTARY",
1404                              "CYRILLICSUPPLEMENTARY",
1405                              "CYRILLIC SUPPLEMENT",
1406                              "CYRILLICSUPPLEMENT");
1407 
1408         /**
1409          * Constant for the "Tagalog" Unicode character block.
1410          * @since 1.5
1411          */
1412         public static final UnicodeBlock TAGALOG =
1413             new UnicodeBlock("TAGALOG");
1414 
1415         /**
1416          * Constant for the "Hanunoo" Unicode character block.
1417          * @since 1.5
1418          */
1419         public static final UnicodeBlock HANUNOO =
1420             new UnicodeBlock("HANUNOO");
1421 
1422         /**
1423          * Constant for the "Buhid" Unicode character block.
1424          * @since 1.5
1425          */
1426         public static final UnicodeBlock BUHID =
1427             new UnicodeBlock("BUHID");
1428 
1429         /**
1430          * Constant for the "Tagbanwa" Unicode character block.
1431          * @since 1.5
1432          */
1433         public static final UnicodeBlock TAGBANWA =
1434             new UnicodeBlock("TAGBANWA");
1435 
1436         /**
1437          * Constant for the "Limbu" Unicode character block.
1438          * @since 1.5
1439          */
1440         public static final UnicodeBlock LIMBU =
1441             new UnicodeBlock("LIMBU");
1442 
1443         /**
1444          * Constant for the "Tai Le" Unicode character block.
1445          * @since 1.5
1446          */
1447         public static final UnicodeBlock TAI_LE =
1448             new UnicodeBlock("TAI_LE",
1449                              "TAI LE",
1450                              "TAILE");
1451 
1452         /**
1453          * Constant for the "Khmer Symbols" Unicode character block.
1454          * @since 1.5
1455          */
1456         public static final UnicodeBlock KHMER_SYMBOLS =
1457             new UnicodeBlock("KHMER_SYMBOLS",
1458                              "KHMER SYMBOLS",
1459                              "KHMERSYMBOLS");
1460 
1461         /**
1462          * Constant for the "Phonetic Extensions" Unicode character block.
1463          * @since 1.5
1464          */
1465         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1466             new UnicodeBlock("PHONETIC_EXTENSIONS",
1467                              "PHONETIC EXTENSIONS",
1468                              "PHONETICEXTENSIONS");
1469 
1470         /**
1471          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1472          * @since 1.5
1473          */
1474         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1475             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1476                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1477                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1478 
1479         /**
1480          * Constant for the "Supplemental Arrows-A" Unicode character block.
1481          * @since 1.5
1482          */
1483         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1484             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1485                              "SUPPLEMENTAL ARROWS-A",
1486                              "SUPPLEMENTALARROWS-A");
1487 
1488         /**
1489          * Constant for the "Supplemental Arrows-B" Unicode character block.
1490          * @since 1.5
1491          */
1492         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1493             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1494                              "SUPPLEMENTAL ARROWS-B",
1495                              "SUPPLEMENTALARROWS-B");
1496 
1497         /**
1498          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1499          * character block.
1500          * @since 1.5
1501          */
1502         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1503             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1504                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1505                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1506 
1507         /**
1508          * Constant for the "Supplemental Mathematical Operators" Unicode
1509          * character block.
1510          * @since 1.5
1511          */
1512         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1513             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1514                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1515                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1516 
1517         /**
1518          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1519          * block.
1520          * @since 1.5
1521          */
1522         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1523             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1524                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1525                              "MISCELLANEOUSSYMBOLSANDARROWS");
1526 
1527         /**
1528          * Constant for the "Katakana Phonetic Extensions" Unicode character
1529          * block.
1530          * @since 1.5
1531          */
1532         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1533             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1534                              "KATAKANA PHONETIC EXTENSIONS",
1535                              "KATAKANAPHONETICEXTENSIONS");
1536 
1537         /**
1538          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1539          * @since 1.5
1540          */
1541         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1542             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1543                              "YIJING HEXAGRAM SYMBOLS",
1544                              "YIJINGHEXAGRAMSYMBOLS");
1545 
1546         /**
1547          * Constant for the "Variation Selectors" Unicode character block.
1548          * @since 1.5
1549          */
1550         public static final UnicodeBlock VARIATION_SELECTORS =
1551             new UnicodeBlock("VARIATION_SELECTORS",
1552                              "VARIATION SELECTORS",
1553                              "VARIATIONSELECTORS");
1554 
1555         /**
1556          * Constant for the "Linear B Syllabary" Unicode character block.
1557          * @since 1.5
1558          */
1559         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1560             new UnicodeBlock("LINEAR_B_SYLLABARY",
1561                              "LINEAR B SYLLABARY",
1562                              "LINEARBSYLLABARY");
1563 
1564         /**
1565          * Constant for the "Linear B Ideograms" Unicode character block.
1566          * @since 1.5
1567          */
1568         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1569             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1570                              "LINEAR B IDEOGRAMS",
1571                              "LINEARBIDEOGRAMS");
1572 
1573         /**
1574          * Constant for the "Aegean Numbers" Unicode character block.
1575          * @since 1.5
1576          */
1577         public static final UnicodeBlock AEGEAN_NUMBERS =
1578             new UnicodeBlock("AEGEAN_NUMBERS",
1579                              "AEGEAN NUMBERS",
1580                              "AEGEANNUMBERS");
1581 
1582         /**
1583          * Constant for the "Old Italic" Unicode character block.
1584          * @since 1.5
1585          */
1586         public static final UnicodeBlock OLD_ITALIC =
1587             new UnicodeBlock("OLD_ITALIC",
1588                              "OLD ITALIC",
1589                              "OLDITALIC");
1590 
1591         /**
1592          * Constant for the "Gothic" Unicode character block.
1593          * @since 1.5
1594          */
1595         public static final UnicodeBlock GOTHIC =
1596             new UnicodeBlock("GOTHIC");
1597 
1598         /**
1599          * Constant for the "Ugaritic" Unicode character block.
1600          * @since 1.5
1601          */
1602         public static final UnicodeBlock UGARITIC =
1603             new UnicodeBlock("UGARITIC");
1604 
1605         /**
1606          * Constant for the "Deseret" Unicode character block.
1607          * @since 1.5
1608          */
1609         public static final UnicodeBlock DESERET =
1610             new UnicodeBlock("DESERET");
1611 
1612         /**
1613          * Constant for the "Shavian" Unicode character block.
1614          * @since 1.5
1615          */
1616         public static final UnicodeBlock SHAVIAN =
1617             new UnicodeBlock("SHAVIAN");
1618 
1619         /**
1620          * Constant for the "Osmanya" Unicode character block.
1621          * @since 1.5
1622          */
1623         public static final UnicodeBlock OSMANYA =
1624             new UnicodeBlock("OSMANYA");
1625 
1626         /**
1627          * Constant for the "Cypriot Syllabary" Unicode character block.
1628          * @since 1.5
1629          */
1630         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1631             new UnicodeBlock("CYPRIOT_SYLLABARY",
1632                              "CYPRIOT SYLLABARY",
1633                              "CYPRIOTSYLLABARY");
1634 
1635         /**
1636          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1637          * @since 1.5
1638          */
1639         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1640             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1641                              "BYZANTINE MUSICAL SYMBOLS",
1642                              "BYZANTINEMUSICALSYMBOLS");
1643 
1644         /**
1645          * Constant for the "Musical Symbols" Unicode character block.
1646          * @since 1.5
1647          */
1648         public static final UnicodeBlock MUSICAL_SYMBOLS =
1649             new UnicodeBlock("MUSICAL_SYMBOLS",
1650                              "MUSICAL SYMBOLS",
1651                              "MUSICALSYMBOLS");
1652 
1653         /**
1654          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1655          * @since 1.5
1656          */
1657         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1658             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1659                              "TAI XUAN JING SYMBOLS",
1660                              "TAIXUANJINGSYMBOLS");
1661 
1662         /**
1663          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1664          * character block.
1665          * @since 1.5
1666          */
1667         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1668             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1669                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1670                              "MATHEMATICALALPHANUMERICSYMBOLS");
1671 
1672         /**
1673          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1674          * character block.
1675          * @since 1.5
1676          */
1677         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1678             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1679                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1680                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1681 
1682         /**
1683          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1684          * @since 1.5
1685          */
1686         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1687             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1688                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1689                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1690 
1691         /**
1692          * Constant for the "Tags" Unicode character block.
1693          * @since 1.5
1694          */
1695         public static final UnicodeBlock TAGS =
1696             new UnicodeBlock("TAGS");
1697 
1698         /**
1699          * Constant for the "Variation Selectors Supplement" Unicode character
1700          * block.
1701          * @since 1.5
1702          */
1703         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1704             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1705                              "VARIATION SELECTORS SUPPLEMENT",
1706                              "VARIATIONSELECTORSSUPPLEMENT");
1707 
1708         /**
1709          * Constant for the "Supplementary Private Use Area-A" Unicode character
1710          * block.
1711          * @since 1.5
1712          */
1713         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1714             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1715                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1716                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1717 
1718         /**
1719          * Constant for the "Supplementary Private Use Area-B" Unicode character
1720          * block.
1721          * @since 1.5
1722          */
1723         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1724             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1725                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1726                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1727 
1728         /**
1729          * Constant for the "High Surrogates" Unicode character block.
1730          * This block represents codepoint values in the high surrogate
1731          * range: U+D800 through U+DB7F
1732          *
1733          * @since 1.5
1734          */
1735         public static final UnicodeBlock HIGH_SURROGATES =
1736             new UnicodeBlock("HIGH_SURROGATES",
1737                              "HIGH SURROGATES",
1738                              "HIGHSURROGATES");
1739 
1740         /**
1741          * Constant for the "High Private Use Surrogates" Unicode character
1742          * block.
1743          * This block represents codepoint values in the private use high
1744          * surrogate range: U+DB80 through U+DBFF
1745          *
1746          * @since 1.5
1747          */
1748         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1749             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1750                              "HIGH PRIVATE USE SURROGATES",
1751                              "HIGHPRIVATEUSESURROGATES");
1752 
1753         /**
1754          * Constant for the "Low Surrogates" Unicode character block.
1755          * This block represents codepoint values in the low surrogate
1756          * range: U+DC00 through U+DFFF
1757          *
1758          * @since 1.5
1759          */
1760         public static final UnicodeBlock LOW_SURROGATES =
1761             new UnicodeBlock("LOW_SURROGATES",
1762                              "LOW SURROGATES",
1763                              "LOWSURROGATES");
1764 
1765         /**
1766          * Constant for the "Arabic Supplement" Unicode character block.
1767          * @since 1.7
1768          */
1769         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1770             new UnicodeBlock("ARABIC_SUPPLEMENT",
1771                              "ARABIC SUPPLEMENT",
1772                              "ARABICSUPPLEMENT");
1773 
1774         /**
1775          * Constant for the "NKo" Unicode character block.
1776          * @since 1.7
1777          */
1778         public static final UnicodeBlock NKO =
1779             new UnicodeBlock("NKO");
1780 
1781         /**
1782          * Constant for the "Samaritan" Unicode character block.
1783          * @since 1.7
1784          */
1785         public static final UnicodeBlock SAMARITAN =
1786             new UnicodeBlock("SAMARITAN");
1787 
1788         /**
1789          * Constant for the "Mandaic" Unicode character block.
1790          * @since 1.7
1791          */
1792         public static final UnicodeBlock MANDAIC =
1793             new UnicodeBlock("MANDAIC");
1794 
1795         /**
1796          * Constant for the "Ethiopic Supplement" Unicode character block.
1797          * @since 1.7
1798          */
1799         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1800             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1801                              "ETHIOPIC SUPPLEMENT",
1802                              "ETHIOPICSUPPLEMENT");
1803 
1804         /**
1805          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1806          * Unicode character block.
1807          * @since 1.7
1808          */
1809         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1810             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1811                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1812                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1813 
1814         /**
1815          * Constant for the "New Tai Lue" Unicode character block.
1816          * @since 1.7
1817          */
1818         public static final UnicodeBlock NEW_TAI_LUE =
1819             new UnicodeBlock("NEW_TAI_LUE",
1820                              "NEW TAI LUE",
1821                              "NEWTAILUE");
1822 
1823         /**
1824          * Constant for the "Buginese" Unicode character block.
1825          * @since 1.7
1826          */
1827         public static final UnicodeBlock BUGINESE =
1828             new UnicodeBlock("BUGINESE");
1829 
1830         /**
1831          * Constant for the "Tai Tham" Unicode character block.
1832          * @since 1.7
1833          */
1834         public static final UnicodeBlock TAI_THAM =
1835             new UnicodeBlock("TAI_THAM",
1836                              "TAI THAM",
1837                              "TAITHAM");
1838 
1839         /**
1840          * Constant for the "Balinese" Unicode character block.
1841          * @since 1.7
1842          */
1843         public static final UnicodeBlock BALINESE =
1844             new UnicodeBlock("BALINESE");
1845 
1846         /**
1847          * Constant for the "Sundanese" Unicode character block.
1848          * @since 1.7
1849          */
1850         public static final UnicodeBlock SUNDANESE =
1851             new UnicodeBlock("SUNDANESE");
1852 
1853         /**
1854          * Constant for the "Batak" Unicode character block.
1855          * @since 1.7
1856          */
1857         public static final UnicodeBlock BATAK =
1858             new UnicodeBlock("BATAK");
1859 
1860         /**
1861          * Constant for the "Lepcha" Unicode character block.
1862          * @since 1.7
1863          */
1864         public static final UnicodeBlock LEPCHA =
1865             new UnicodeBlock("LEPCHA");
1866 
1867         /**
1868          * Constant for the "Ol Chiki" Unicode character block.
1869          * @since 1.7
1870          */
1871         public static final UnicodeBlock OL_CHIKI =
1872             new UnicodeBlock("OL_CHIKI",
1873                              "OL CHIKI",
1874                              "OLCHIKI");
1875 
1876         /**
1877          * Constant for the "Vedic Extensions" Unicode character block.
1878          * @since 1.7
1879          */
1880         public static final UnicodeBlock VEDIC_EXTENSIONS =
1881             new UnicodeBlock("VEDIC_EXTENSIONS",
1882                              "VEDIC EXTENSIONS",
1883                              "VEDICEXTENSIONS");
1884 
1885         /**
1886          * Constant for the "Phonetic Extensions Supplement" Unicode character
1887          * block.
1888          * @since 1.7
1889          */
1890         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1891             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1892                              "PHONETIC EXTENSIONS SUPPLEMENT",
1893                              "PHONETICEXTENSIONSSUPPLEMENT");
1894 
1895         /**
1896          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1897          * character block.
1898          * @since 1.7
1899          */
1900         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1901             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1902                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1903                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1904 
1905         /**
1906          * Constant for the "Glagolitic" Unicode character block.
1907          * @since 1.7
1908          */
1909         public static final UnicodeBlock GLAGOLITIC =
1910             new UnicodeBlock("GLAGOLITIC");
1911 
1912         /**
1913          * Constant for the "Latin Extended-C" Unicode character block.
1914          * @since 1.7
1915          */
1916         public static final UnicodeBlock LATIN_EXTENDED_C =
1917             new UnicodeBlock("LATIN_EXTENDED_C",
1918                              "LATIN EXTENDED-C",
1919                              "LATINEXTENDED-C");
1920 
1921         /**
1922          * Constant for the "Coptic" Unicode character block.
1923          * @since 1.7
1924          */
1925         public static final UnicodeBlock COPTIC =
1926             new UnicodeBlock("COPTIC");
1927 
1928         /**
1929          * Constant for the "Georgian Supplement" Unicode character block.
1930          * @since 1.7
1931          */
1932         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1933             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1934                              "GEORGIAN SUPPLEMENT",
1935                              "GEORGIANSUPPLEMENT");
1936 
1937         /**
1938          * Constant for the "Tifinagh" Unicode character block.
1939          * @since 1.7
1940          */
1941         public static final UnicodeBlock TIFINAGH =
1942             new UnicodeBlock("TIFINAGH");
1943 
1944         /**
1945          * Constant for the "Ethiopic Extended" Unicode character block.
1946          * @since 1.7
1947          */
1948         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1949             new UnicodeBlock("ETHIOPIC_EXTENDED",
1950                              "ETHIOPIC EXTENDED",
1951                              "ETHIOPICEXTENDED");
1952 
1953         /**
1954          * Constant for the "Cyrillic Extended-A" Unicode character block.
1955          * @since 1.7
1956          */
1957         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1958             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1959                              "CYRILLIC EXTENDED-A",
1960                              "CYRILLICEXTENDED-A");
1961 
1962         /**
1963          * Constant for the "Supplemental Punctuation" Unicode character block.
1964          * @since 1.7
1965          */
1966         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1967             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1968                              "SUPPLEMENTAL PUNCTUATION",
1969                              "SUPPLEMENTALPUNCTUATION");
1970 
1971         /**
1972          * Constant for the "CJK Strokes" Unicode character block.
1973          * @since 1.7
1974          */
1975         public static final UnicodeBlock CJK_STROKES =
1976             new UnicodeBlock("CJK_STROKES",
1977                              "CJK STROKES",
1978                              "CJKSTROKES");
1979 
1980         /**
1981          * Constant for the "Lisu" Unicode character block.
1982          * @since 1.7
1983          */
1984         public static final UnicodeBlock LISU =
1985             new UnicodeBlock("LISU");
1986 
1987         /**
1988          * Constant for the "Vai" Unicode character block.
1989          * @since 1.7
1990          */
1991         public static final UnicodeBlock VAI =
1992             new UnicodeBlock("VAI");
1993 
1994         /**
1995          * Constant for the "Cyrillic Extended-B" Unicode character block.
1996          * @since 1.7
1997          */
1998         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1999             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2000                              "CYRILLIC EXTENDED-B",
2001                              "CYRILLICEXTENDED-B");
2002 
2003         /**
2004          * Constant for the "Bamum" Unicode character block.
2005          * @since 1.7
2006          */
2007         public static final UnicodeBlock BAMUM =
2008             new UnicodeBlock("BAMUM");
2009 
2010         /**
2011          * Constant for the "Modifier Tone Letters" Unicode character block.
2012          * @since 1.7
2013          */
2014         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2015             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2016                              "MODIFIER TONE LETTERS",
2017                              "MODIFIERTONELETTERS");
2018 
2019         /**
2020          * Constant for the "Latin Extended-D" Unicode character block.
2021          * @since 1.7
2022          */
2023         public static final UnicodeBlock LATIN_EXTENDED_D =
2024             new UnicodeBlock("LATIN_EXTENDED_D",
2025                              "LATIN EXTENDED-D",
2026                              "LATINEXTENDED-D");
2027 
2028         /**
2029          * Constant for the "Syloti Nagri" Unicode character block.
2030          * @since 1.7
2031          */
2032         public static final UnicodeBlock SYLOTI_NAGRI =
2033             new UnicodeBlock("SYLOTI_NAGRI",
2034                              "SYLOTI NAGRI",
2035                              "SYLOTINAGRI");
2036 
2037         /**
2038          * Constant for the "Common Indic Number Forms" Unicode character block.
2039          * @since 1.7
2040          */
2041         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2042             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2043                              "COMMON INDIC NUMBER FORMS",
2044                              "COMMONINDICNUMBERFORMS");
2045 
2046         /**
2047          * Constant for the "Phags-pa" Unicode character block.
2048          * @since 1.7
2049          */
2050         public static final UnicodeBlock PHAGS_PA =
2051             new UnicodeBlock("PHAGS_PA",
2052                              "PHAGS-PA");
2053 
2054         /**
2055          * Constant for the "Saurashtra" Unicode character block.
2056          * @since 1.7
2057          */
2058         public static final UnicodeBlock SAURASHTRA =
2059             new UnicodeBlock("SAURASHTRA");
2060 
2061         /**
2062          * Constant for the "Devanagari Extended" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2066             new UnicodeBlock("DEVANAGARI_EXTENDED",
2067                              "DEVANAGARI EXTENDED",
2068                              "DEVANAGARIEXTENDED");
2069 
2070         /**
2071          * Constant for the "Kayah Li" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock KAYAH_LI =
2075             new UnicodeBlock("KAYAH_LI",
2076                              "KAYAH LI",
2077                              "KAYAHLI");
2078 
2079         /**
2080          * Constant for the "Rejang" Unicode character block.
2081          * @since 1.7
2082          */
2083         public static final UnicodeBlock REJANG =
2084             new UnicodeBlock("REJANG");
2085 
2086         /**
2087          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2088          * @since 1.7
2089          */
2090         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2091             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2092                              "HANGUL JAMO EXTENDED-A",
2093                              "HANGULJAMOEXTENDED-A");
2094 
2095         /**
2096          * Constant for the "Javanese" Unicode character block.
2097          * @since 1.7
2098          */
2099         public static final UnicodeBlock JAVANESE =
2100             new UnicodeBlock("JAVANESE");
2101 
2102         /**
2103          * Constant for the "Cham" Unicode character block.
2104          * @since 1.7
2105          */
2106         public static final UnicodeBlock CHAM =
2107             new UnicodeBlock("CHAM");
2108 
2109         /**
2110          * Constant for the "Myanmar Extended-A" Unicode character block.
2111          * @since 1.7
2112          */
2113         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2114             new UnicodeBlock("MYANMAR_EXTENDED_A",
2115                              "MYANMAR EXTENDED-A",
2116                              "MYANMAREXTENDED-A");
2117 
2118         /**
2119          * Constant for the "Tai Viet" Unicode character block.
2120          * @since 1.7
2121          */
2122         public static final UnicodeBlock TAI_VIET =
2123             new UnicodeBlock("TAI_VIET",
2124                              "TAI VIET",
2125                              "TAIVIET");
2126 
2127         /**
2128          * Constant for the "Ethiopic Extended-A" Unicode character block.
2129          * @since 1.7
2130          */
2131         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2132             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2133                              "ETHIOPIC EXTENDED-A",
2134                              "ETHIOPICEXTENDED-A");
2135 
2136         /**
2137          * Constant for the "Meetei Mayek" Unicode character block.
2138          * @since 1.7
2139          */
2140         public static final UnicodeBlock MEETEI_MAYEK =
2141             new UnicodeBlock("MEETEI_MAYEK",
2142                              "MEETEI MAYEK",
2143                              "MEETEIMAYEK");
2144 
2145         /**
2146          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2147          * @since 1.7
2148          */
2149         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2150             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2151                              "HANGUL JAMO EXTENDED-B",
2152                              "HANGULJAMOEXTENDED-B");
2153 
2154         /**
2155          * Constant for the "Vertical Forms" Unicode character block.
2156          * @since 1.7
2157          */
2158         public static final UnicodeBlock VERTICAL_FORMS =
2159             new UnicodeBlock("VERTICAL_FORMS",
2160                              "VERTICAL FORMS",
2161                              "VERTICALFORMS");
2162 
2163         /**
2164          * Constant for the "Ancient Greek Numbers" Unicode character block.
2165          * @since 1.7
2166          */
2167         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2168             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2169                              "ANCIENT GREEK NUMBERS",
2170                              "ANCIENTGREEKNUMBERS");
2171 
2172         /**
2173          * Constant for the "Ancient Symbols" Unicode character block.
2174          * @since 1.7
2175          */
2176         public static final UnicodeBlock ANCIENT_SYMBOLS =
2177             new UnicodeBlock("ANCIENT_SYMBOLS",
2178                              "ANCIENT SYMBOLS",
2179                              "ANCIENTSYMBOLS");
2180 
2181         /**
2182          * Constant for the "Phaistos Disc" Unicode character block.
2183          * @since 1.7
2184          */
2185         public static final UnicodeBlock PHAISTOS_DISC =
2186             new UnicodeBlock("PHAISTOS_DISC",
2187                              "PHAISTOS DISC",
2188                              "PHAISTOSDISC");
2189 
2190         /**
2191          * Constant for the "Lycian" Unicode character block.
2192          * @since 1.7
2193          */
2194         public static final UnicodeBlock LYCIAN =
2195             new UnicodeBlock("LYCIAN");
2196 
2197         /**
2198          * Constant for the "Carian" Unicode character block.
2199          * @since 1.7
2200          */
2201         public static final UnicodeBlock CARIAN =
2202             new UnicodeBlock("CARIAN");
2203 
2204         /**
2205          * Constant for the "Old Persian" Unicode character block.
2206          * @since 1.7
2207          */
2208         public static final UnicodeBlock OLD_PERSIAN =
2209             new UnicodeBlock("OLD_PERSIAN",
2210                              "OLD PERSIAN",
2211                              "OLDPERSIAN");
2212 
2213         /**
2214          * Constant for the "Imperial Aramaic" Unicode character block.
2215          * @since 1.7
2216          */
2217         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2218             new UnicodeBlock("IMPERIAL_ARAMAIC",
2219                              "IMPERIAL ARAMAIC",
2220                              "IMPERIALARAMAIC");
2221 
2222         /**
2223          * Constant for the "Phoenician" Unicode character block.
2224          * @since 1.7
2225          */
2226         public static final UnicodeBlock PHOENICIAN =
2227             new UnicodeBlock("PHOENICIAN");
2228 
2229         /**
2230          * Constant for the "Lydian" Unicode character block.
2231          * @since 1.7
2232          */
2233         public static final UnicodeBlock LYDIAN =
2234             new UnicodeBlock("LYDIAN");
2235 
2236         /**
2237          * Constant for the "Kharoshthi" Unicode character block.
2238          * @since 1.7
2239          */
2240         public static final UnicodeBlock KHAROSHTHI =
2241             new UnicodeBlock("KHAROSHTHI");
2242 
2243         /**
2244          * Constant for the "Old South Arabian" Unicode character block.
2245          * @since 1.7
2246          */
2247         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2248             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2249                              "OLD SOUTH ARABIAN",
2250                              "OLDSOUTHARABIAN");
2251 
2252         /**
2253          * Constant for the "Avestan" Unicode character block.
2254          * @since 1.7
2255          */
2256         public static final UnicodeBlock AVESTAN =
2257             new UnicodeBlock("AVESTAN");
2258 
2259         /**
2260          * Constant for the "Inscriptional Parthian" Unicode character block.
2261          * @since 1.7
2262          */
2263         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2264             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2265                              "INSCRIPTIONAL PARTHIAN",
2266                              "INSCRIPTIONALPARTHIAN");
2267 
2268         /**
2269          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2270          * @since 1.7
2271          */
2272         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2273             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2274                              "INSCRIPTIONAL PAHLAVI",
2275                              "INSCRIPTIONALPAHLAVI");
2276 
2277         /**
2278          * Constant for the "Old Turkic" Unicode character block.
2279          * @since 1.7
2280          */
2281         public static final UnicodeBlock OLD_TURKIC =
2282             new UnicodeBlock("OLD_TURKIC",
2283                              "OLD TURKIC",
2284                              "OLDTURKIC");
2285 
2286         /**
2287          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2288          * @since 1.7
2289          */
2290         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2291             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2292                              "RUMI NUMERAL SYMBOLS",
2293                              "RUMINUMERALSYMBOLS");
2294 
2295         /**
2296          * Constant for the "Brahmi" Unicode character block.
2297          * @since 1.7
2298          */
2299         public static final UnicodeBlock BRAHMI =
2300             new UnicodeBlock("BRAHMI");
2301 
2302         /**
2303          * Constant for the "Kaithi" Unicode character block.
2304          * @since 1.7
2305          */
2306         public static final UnicodeBlock KAITHI =
2307             new UnicodeBlock("KAITHI");
2308 
2309         /**
2310          * Constant for the "Cuneiform" Unicode character block.
2311          * @since 1.7
2312          */
2313         public static final UnicodeBlock CUNEIFORM =
2314             new UnicodeBlock("CUNEIFORM");
2315 
2316         /**
2317          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2318          * character block.
2319          * @since 1.7
2320          */
2321         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2322             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2323                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2324                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2325 
2326         /**
2327          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2328          * @since 1.7
2329          */
2330         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2331             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2332                              "EGYPTIAN HIEROGLYPHS",
2333                              "EGYPTIANHIEROGLYPHS");
2334 
2335         /**
2336          * Constant for the "Bamum Supplement" Unicode character block.
2337          * @since 1.7
2338          */
2339         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2340             new UnicodeBlock("BAMUM_SUPPLEMENT",
2341                              "BAMUM SUPPLEMENT",
2342                              "BAMUMSUPPLEMENT");
2343 
2344         /**
2345          * Constant for the "Kana Supplement" Unicode character block.
2346          * @since 1.7
2347          */
2348         public static final UnicodeBlock KANA_SUPPLEMENT =
2349             new UnicodeBlock("KANA_SUPPLEMENT",
2350                              "KANA SUPPLEMENT",
2351                              "KANASUPPLEMENT");
2352 
2353         /**
2354          * Constant for the "Ancient Greek Musical Notation" Unicode character
2355          * block.
2356          * @since 1.7
2357          */
2358         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2359             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2360                              "ANCIENT GREEK MUSICAL NOTATION",
2361                              "ANCIENTGREEKMUSICALNOTATION");
2362 
2363         /**
2364          * Constant for the "Counting Rod Numerals" Unicode character block.
2365          * @since 1.7
2366          */
2367         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2368             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2369                              "COUNTING ROD NUMERALS",
2370                              "COUNTINGRODNUMERALS");
2371 
2372         /**
2373          * Constant for the "Mahjong Tiles" Unicode character block.
2374          * @since 1.7
2375          */
2376         public static final UnicodeBlock MAHJONG_TILES =
2377             new UnicodeBlock("MAHJONG_TILES",
2378                              "MAHJONG TILES",
2379                              "MAHJONGTILES");
2380 
2381         /**
2382          * Constant for the "Domino Tiles" Unicode character block.
2383          * @since 1.7
2384          */
2385         public static final UnicodeBlock DOMINO_TILES =
2386             new UnicodeBlock("DOMINO_TILES",
2387                              "DOMINO TILES",
2388                              "DOMINOTILES");
2389 
2390         /**
2391          * Constant for the "Playing Cards" Unicode character block.
2392          * @since 1.7
2393          */
2394         public static final UnicodeBlock PLAYING_CARDS =
2395             new UnicodeBlock("PLAYING_CARDS",
2396                              "PLAYING CARDS",
2397                              "PLAYINGCARDS");
2398 
2399         /**
2400          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2401          * block.
2402          * @since 1.7
2403          */
2404         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2405             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2406                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2407                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2408 
2409         /**
2410          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2411          * block.
2412          * @since 1.7
2413          */
2414         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2415             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2416                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2417                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2418 
2419         /**
2420          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2421          * character block.
2422          * @since 1.7
2423          */
2424         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2425             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2426                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2427                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2428 
2429         /**
2430          * Constant for the "Emoticons" Unicode character block.
2431          * @since 1.7
2432          */
2433         public static final UnicodeBlock EMOTICONS =
2434             new UnicodeBlock("EMOTICONS");
2435 
2436         /**
2437          * Constant for the "Transport And Map Symbols" Unicode character block.
2438          * @since 1.7
2439          */
2440         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2441             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2442                              "TRANSPORT AND MAP SYMBOLS",
2443                              "TRANSPORTANDMAPSYMBOLS");
2444 
2445         /**
2446          * Constant for the "Alchemical Symbols" Unicode character block.
2447          * @since 1.7
2448          */
2449         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2450             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2451                              "ALCHEMICAL SYMBOLS",
2452                              "ALCHEMICALSYMBOLS");
2453 
2454         /**
2455          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2456          * character block.
2457          * @since 1.7
2458          */
2459         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2460             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2461                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2462                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2463 
2464         /**
2465          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2466          * character block.
2467          * @since 1.7
2468          */
2469         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2470             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2471                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2472                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2473 
2474         /**
2475          * Constant for the "Arabic Extended-A" Unicode character block.
2476          * @since 1.8
2477          */
2478         public static final UnicodeBlock ARABIC_EXTENDED_A =
2479             new UnicodeBlock("ARABIC_EXTENDED_A",
2480                              "ARABIC EXTENDED-A",
2481                              "ARABICEXTENDED-A");
2482 
2483         /**
2484          * Constant for the "Sundanese Supplement" Unicode character block.
2485          * @since 1.8
2486          */
2487         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2488             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2489                              "SUNDANESE SUPPLEMENT",
2490                              "SUNDANESESUPPLEMENT");
2491 
2492         /**
2493          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2494          * @since 1.8
2495          */
2496         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2497             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2498                              "MEETEI MAYEK EXTENSIONS",
2499                              "MEETEIMAYEKEXTENSIONS");
2500 
2501         /**
2502          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2503          * @since 1.8
2504          */
2505         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2506             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2507                              "MEROITIC HIEROGLYPHS",
2508                              "MEROITICHIEROGLYPHS");
2509 
2510         /**
2511          * Constant for the "Meroitic Cursive" Unicode character block.
2512          * @since 1.8
2513          */
2514         public static final UnicodeBlock MEROITIC_CURSIVE =
2515             new UnicodeBlock("MEROITIC_CURSIVE",
2516                              "MEROITIC CURSIVE",
2517                              "MEROITICCURSIVE");
2518 
2519         /**
2520          * Constant for the "Sora Sompeng" Unicode character block.
2521          * @since 1.8
2522          */
2523         public static final UnicodeBlock SORA_SOMPENG =
2524             new UnicodeBlock("SORA_SOMPENG",
2525                              "SORA SOMPENG",
2526                              "SORASOMPENG");
2527 
2528         /**
2529          * Constant for the "Chakma" Unicode character block.
2530          * @since 1.8
2531          */
2532         public static final UnicodeBlock CHAKMA =
2533             new UnicodeBlock("CHAKMA");
2534 
2535         /**
2536          * Constant for the "Sharada" Unicode character block.
2537          * @since 1.8
2538          */
2539         public static final UnicodeBlock SHARADA =
2540             new UnicodeBlock("SHARADA");
2541 
2542         /**
2543          * Constant for the "Takri" Unicode character block.
2544          * @since 1.8
2545          */
2546         public static final UnicodeBlock TAKRI =
2547             new UnicodeBlock("TAKRI");
2548 
2549         /**
2550          * Constant for the "Miao" Unicode character block.
2551          * @since 1.8
2552          */
2553         public static final UnicodeBlock MIAO =
2554             new UnicodeBlock("MIAO");
2555 
2556         /**
2557          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2558          * character block.
2559          * @since 1.8
2560          */
2561         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2562             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2563                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2564                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2565 
2566         private static final int blockStarts[] = {
2567             0x0000,   // 0000..007F; Basic Latin
2568             0x0080,   // 0080..00FF; Latin-1 Supplement
2569             0x0100,   // 0100..017F; Latin Extended-A
2570             0x0180,   // 0180..024F; Latin Extended-B
2571             0x0250,   // 0250..02AF; IPA Extensions
2572             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2573             0x0300,   // 0300..036F; Combining Diacritical Marks
2574             0x0370,   // 0370..03FF; Greek and Coptic
2575             0x0400,   // 0400..04FF; Cyrillic
2576             0x0500,   // 0500..052F; Cyrillic Supplement
2577             0x0530,   // 0530..058F; Armenian
2578             0x0590,   // 0590..05FF; Hebrew
2579             0x0600,   // 0600..06FF; Arabic
2580             0x0700,   // 0700..074F; Syriac
2581             0x0750,   // 0750..077F; Arabic Supplement
2582             0x0780,   // 0780..07BF; Thaana
2583             0x07C0,   // 07C0..07FF; NKo
2584             0x0800,   // 0800..083F; Samaritan
2585             0x0840,   // 0840..085F; Mandaic
2586             0x0860,   //             unassigned
2587             0x08A0,   // 08A0..08FF; Arabic Extended-A
2588             0x0900,   // 0900..097F; Devanagari
2589             0x0980,   // 0980..09FF; Bengali
2590             0x0A00,   // 0A00..0A7F; Gurmukhi
2591             0x0A80,   // 0A80..0AFF; Gujarati
2592             0x0B00,   // 0B00..0B7F; Oriya
2593             0x0B80,   // 0B80..0BFF; Tamil
2594             0x0C00,   // 0C00..0C7F; Telugu
2595             0x0C80,   // 0C80..0CFF; Kannada
2596             0x0D00,   // 0D00..0D7F; Malayalam
2597             0x0D80,   // 0D80..0DFF; Sinhala
2598             0x0E00,   // 0E00..0E7F; Thai
2599             0x0E80,   // 0E80..0EFF; Lao
2600             0x0F00,   // 0F00..0FFF; Tibetan
2601             0x1000,   // 1000..109F; Myanmar
2602             0x10A0,   // 10A0..10FF; Georgian
2603             0x1100,   // 1100..11FF; Hangul Jamo
2604             0x1200,   // 1200..137F; Ethiopic
2605             0x1380,   // 1380..139F; Ethiopic Supplement
2606             0x13A0,   // 13A0..13FF; Cherokee
2607             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2608             0x1680,   // 1680..169F; Ogham
2609             0x16A0,   // 16A0..16FF; Runic
2610             0x1700,   // 1700..171F; Tagalog
2611             0x1720,   // 1720..173F; Hanunoo
2612             0x1740,   // 1740..175F; Buhid
2613             0x1760,   // 1760..177F; Tagbanwa
2614             0x1780,   // 1780..17FF; Khmer
2615             0x1800,   // 1800..18AF; Mongolian
2616             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2617             0x1900,   // 1900..194F; Limbu
2618             0x1950,   // 1950..197F; Tai Le
2619             0x1980,   // 1980..19DF; New Tai Lue
2620             0x19E0,   // 19E0..19FF; Khmer Symbols
2621             0x1A00,   // 1A00..1A1F; Buginese
2622             0x1A20,   // 1A20..1AAF; Tai Tham
2623             0x1AB0,   //             unassigned
2624             0x1B00,   // 1B00..1B7F; Balinese
2625             0x1B80,   // 1B80..1BBF; Sundanese
2626             0x1BC0,   // 1BC0..1BFF; Batak
2627             0x1C00,   // 1C00..1C4F; Lepcha
2628             0x1C50,   // 1C50..1C7F; Ol Chiki
2629             0x1C80,   //             unassigned
2630             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2631             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2632             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2633             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2634             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2635             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2636             0x1F00,   // 1F00..1FFF; Greek Extended
2637             0x2000,   // 2000..206F; General Punctuation
2638             0x2070,   // 2070..209F; Superscripts and Subscripts
2639             0x20A0,   // 20A0..20CF; Currency Symbols
2640             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2641             0x2100,   // 2100..214F; Letterlike Symbols
2642             0x2150,   // 2150..218F; Number Forms
2643             0x2190,   // 2190..21FF; Arrows
2644             0x2200,   // 2200..22FF; Mathematical Operators
2645             0x2300,   // 2300..23FF; Miscellaneous Technical
2646             0x2400,   // 2400..243F; Control Pictures
2647             0x2440,   // 2440..245F; Optical Character Recognition
2648             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2649             0x2500,   // 2500..257F; Box Drawing
2650             0x2580,   // 2580..259F; Block Elements
2651             0x25A0,   // 25A0..25FF; Geometric Shapes
2652             0x2600,   // 2600..26FF; Miscellaneous Symbols
2653             0x2700,   // 2700..27BF; Dingbats
2654             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2655             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2656             0x2800,   // 2800..28FF; Braille Patterns
2657             0x2900,   // 2900..297F; Supplemental Arrows-B
2658             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2659             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2660             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2661             0x2C00,   // 2C00..2C5F; Glagolitic
2662             0x2C60,   // 2C60..2C7F; Latin Extended-C
2663             0x2C80,   // 2C80..2CFF; Coptic
2664             0x2D00,   // 2D00..2D2F; Georgian Supplement
2665             0x2D30,   // 2D30..2D7F; Tifinagh
2666             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2667             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2668             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2669             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2670             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2671             0x2FE0,   //             unassigned
2672             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2673             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2674             0x3040,   // 3040..309F; Hiragana
2675             0x30A0,   // 30A0..30FF; Katakana
2676             0x3100,   // 3100..312F; Bopomofo
2677             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2678             0x3190,   // 3190..319F; Kanbun
2679             0x31A0,   // 31A0..31BF; Bopomofo Extended
2680             0x31C0,   // 31C0..31EF; CJK Strokes
2681             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2682             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2683             0x3300,   // 3300..33FF; CJK Compatibility
2684             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2685             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2686             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2687             0xA000,   // A000..A48F; Yi Syllables
2688             0xA490,   // A490..A4CF; Yi Radicals
2689             0xA4D0,   // A4D0..A4FF; Lisu
2690             0xA500,   // A500..A63F; Vai
2691             0xA640,   // A640..A69F; Cyrillic Extended-B
2692             0xA6A0,   // A6A0..A6FF; Bamum
2693             0xA700,   // A700..A71F; Modifier Tone Letters
2694             0xA720,   // A720..A7FF; Latin Extended-D
2695             0xA800,   // A800..A82F; Syloti Nagri
2696             0xA830,   // A830..A83F; Common Indic Number Forms
2697             0xA840,   // A840..A87F; Phags-pa
2698             0xA880,   // A880..A8DF; Saurashtra
2699             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2700             0xA900,   // A900..A92F; Kayah Li
2701             0xA930,   // A930..A95F; Rejang
2702             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2703             0xA980,   // A980..A9DF; Javanese
2704             0xA9E0,   //             unassigned
2705             0xAA00,   // AA00..AA5F; Cham
2706             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2707             0xAA80,   // AA80..AADF; Tai Viet
2708             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2709             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2710             0xAB30,   //             unassigned
2711             0xABC0,   // ABC0..ABFF; Meetei Mayek
2712             0xAC00,   // AC00..D7AF; Hangul Syllables
2713             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2714             0xD800,   // D800..DB7F; High Surrogates
2715             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2716             0xDC00,   // DC00..DFFF; Low Surrogates
2717             0xE000,   // E000..F8FF; Private Use Area
2718             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2719             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2720             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2721             0xFE00,   // FE00..FE0F; Variation Selectors
2722             0xFE10,   // FE10..FE1F; Vertical Forms
2723             0xFE20,   // FE20..FE2F; Combining Half Marks
2724             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2725             0xFE50,   // FE50..FE6F; Small Form Variants
2726             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2727             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2728             0xFFF0,   // FFF0..FFFF; Specials
2729             0x10000,  // 10000..1007F; Linear B Syllabary
2730             0x10080,  // 10080..100FF; Linear B Ideograms
2731             0x10100,  // 10100..1013F; Aegean Numbers
2732             0x10140,  // 10140..1018F; Ancient Greek Numbers
2733             0x10190,  // 10190..101CF; Ancient Symbols
2734             0x101D0,  // 101D0..101FF; Phaistos Disc
2735             0x10200,  //               unassigned
2736             0x10280,  // 10280..1029F; Lycian
2737             0x102A0,  // 102A0..102DF; Carian
2738             0x102E0,  //               unassigned
2739             0x10300,  // 10300..1032F; Old Italic
2740             0x10330,  // 10330..1034F; Gothic
2741             0x10350,  //               unassigned
2742             0x10380,  // 10380..1039F; Ugaritic
2743             0x103A0,  // 103A0..103DF; Old Persian
2744             0x103E0,  //               unassigned
2745             0x10400,  // 10400..1044F; Deseret
2746             0x10450,  // 10450..1047F; Shavian
2747             0x10480,  // 10480..104AF; Osmanya
2748             0x104B0,  //               unassigned
2749             0x10800,  // 10800..1083F; Cypriot Syllabary
2750             0x10840,  // 10840..1085F; Imperial Aramaic
2751             0x10860,  //               unassigned
2752             0x10900,  // 10900..1091F; Phoenician
2753             0x10920,  // 10920..1093F; Lydian
2754             0x10940,  //               unassigned
2755             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2756             0x109A0,  // 109A0..109FF; Meroitic Cursive
2757             0x10A00,  // 10A00..10A5F; Kharoshthi
2758             0x10A60,  // 10A60..10A7F; Old South Arabian
2759             0x10A80,  //               unassigned
2760             0x10B00,  // 10B00..10B3F; Avestan
2761             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2762             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2763             0x10B80,  //               unassigned
2764             0x10C00,  // 10C00..10C4F; Old Turkic
2765             0x10C50,  //               unassigned
2766             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2767             0x10E80,  //               unassigned
2768             0x11000,  // 11000..1107F; Brahmi
2769             0x11080,  // 11080..110CF; Kaithi
2770             0x110D0,  // 110D0..110FF; Sora Sompeng
2771             0x11100,  // 11100..1114F; Chakma
2772             0x11150,  //               unassigned
2773             0x11180,  // 11180..111DF; Sharada
2774             0x111E0,  //               unassigned
2775             0x11680,  // 11680..116CF; Takri
2776             0x116D0,  //               unassigned
2777             0x12000,  // 12000..123FF; Cuneiform
2778             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2779             0x12480,  //               unassigned
2780             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2781             0x13430,  //               unassigned
2782             0x16800,  // 16800..16A3F; Bamum Supplement
2783             0x16A40,  //               unassigned
2784             0x16F00,  // 16F00..16F9F; Miao
2785             0x16FA0,  //               unassigned
2786             0x1B000,  // 1B000..1B0FF; Kana Supplement
2787             0x1B100,  //               unassigned
2788             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2789             0x1D100,  // 1D100..1D1FF; Musical Symbols
2790             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2791             0x1D250,  //               unassigned
2792             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2793             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2794             0x1D380,  //               unassigned
2795             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2796             0x1D800,  //               unassigned
2797             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2798             0x1EF00,  //               unassigned
2799             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2800             0x1F030,  // 1F030..1F09F; Domino Tiles
2801             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2802             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2803             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2804             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2805             0x1F600,  // 1F600..1F64F; Emoticons
2806             0x1F650,  //               unassigned
2807             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2808             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2809             0x1F780,  //               unassigned
2810             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2811             0x2A6E0,  //               unassigned
2812             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2813             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2814             0x2B820,  //               unassigned
2815             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2816             0x2FA20,  //               unassigned
2817             0xE0000,  // E0000..E007F; Tags
2818             0xE0080,  //               unassigned
2819             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2820             0xE01F0,  //               unassigned
2821             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2822             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2823         };
2824 
2825         private static final UnicodeBlock[] blocks = {
2826             BASIC_LATIN,
2827             LATIN_1_SUPPLEMENT,
2828             LATIN_EXTENDED_A,
2829             LATIN_EXTENDED_B,
2830             IPA_EXTENSIONS,
2831             SPACING_MODIFIER_LETTERS,
2832             COMBINING_DIACRITICAL_MARKS,
2833             GREEK,
2834             CYRILLIC,
2835             CYRILLIC_SUPPLEMENTARY,
2836             ARMENIAN,
2837             HEBREW,
2838             ARABIC,
2839             SYRIAC,
2840             ARABIC_SUPPLEMENT,
2841             THAANA,
2842             NKO,
2843             SAMARITAN,
2844             MANDAIC,
2845             null,
2846             ARABIC_EXTENDED_A,
2847             DEVANAGARI,
2848             BENGALI,
2849             GURMUKHI,
2850             GUJARATI,
2851             ORIYA,
2852             TAMIL,
2853             TELUGU,
2854             KANNADA,
2855             MALAYALAM,
2856             SINHALA,
2857             THAI,
2858             LAO,
2859             TIBETAN,
2860             MYANMAR,
2861             GEORGIAN,
2862             HANGUL_JAMO,
2863             ETHIOPIC,
2864             ETHIOPIC_SUPPLEMENT,
2865             CHEROKEE,
2866             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2867             OGHAM,
2868             RUNIC,
2869             TAGALOG,
2870             HANUNOO,
2871             BUHID,
2872             TAGBANWA,
2873             KHMER,
2874             MONGOLIAN,
2875             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2876             LIMBU,
2877             TAI_LE,
2878             NEW_TAI_LUE,
2879             KHMER_SYMBOLS,
2880             BUGINESE,
2881             TAI_THAM,
2882             null,
2883             BALINESE,
2884             SUNDANESE,
2885             BATAK,
2886             LEPCHA,
2887             OL_CHIKI,
2888             null,
2889             SUNDANESE_SUPPLEMENT,
2890             VEDIC_EXTENSIONS,
2891             PHONETIC_EXTENSIONS,
2892             PHONETIC_EXTENSIONS_SUPPLEMENT,
2893             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2894             LATIN_EXTENDED_ADDITIONAL,
2895             GREEK_EXTENDED,
2896             GENERAL_PUNCTUATION,
2897             SUPERSCRIPTS_AND_SUBSCRIPTS,
2898             CURRENCY_SYMBOLS,
2899             COMBINING_MARKS_FOR_SYMBOLS,
2900             LETTERLIKE_SYMBOLS,
2901             NUMBER_FORMS,
2902             ARROWS,
2903             MATHEMATICAL_OPERATORS,
2904             MISCELLANEOUS_TECHNICAL,
2905             CONTROL_PICTURES,
2906             OPTICAL_CHARACTER_RECOGNITION,
2907             ENCLOSED_ALPHANUMERICS,
2908             BOX_DRAWING,
2909             BLOCK_ELEMENTS,
2910             GEOMETRIC_SHAPES,
2911             MISCELLANEOUS_SYMBOLS,
2912             DINGBATS,
2913             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2914             SUPPLEMENTAL_ARROWS_A,
2915             BRAILLE_PATTERNS,
2916             SUPPLEMENTAL_ARROWS_B,
2917             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2918             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2919             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2920             GLAGOLITIC,
2921             LATIN_EXTENDED_C,
2922             COPTIC,
2923             GEORGIAN_SUPPLEMENT,
2924             TIFINAGH,
2925             ETHIOPIC_EXTENDED,
2926             CYRILLIC_EXTENDED_A,
2927             SUPPLEMENTAL_PUNCTUATION,
2928             CJK_RADICALS_SUPPLEMENT,
2929             KANGXI_RADICALS,
2930             null,
2931             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2932             CJK_SYMBOLS_AND_PUNCTUATION,
2933             HIRAGANA,
2934             KATAKANA,
2935             BOPOMOFO,
2936             HANGUL_COMPATIBILITY_JAMO,
2937             KANBUN,
2938             BOPOMOFO_EXTENDED,
2939             CJK_STROKES,
2940             KATAKANA_PHONETIC_EXTENSIONS,
2941             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2942             CJK_COMPATIBILITY,
2943             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2944             YIJING_HEXAGRAM_SYMBOLS,
2945             CJK_UNIFIED_IDEOGRAPHS,
2946             YI_SYLLABLES,
2947             YI_RADICALS,
2948             LISU,
2949             VAI,
2950             CYRILLIC_EXTENDED_B,
2951             BAMUM,
2952             MODIFIER_TONE_LETTERS,
2953             LATIN_EXTENDED_D,
2954             SYLOTI_NAGRI,
2955             COMMON_INDIC_NUMBER_FORMS,
2956             PHAGS_PA,
2957             SAURASHTRA,
2958             DEVANAGARI_EXTENDED,
2959             KAYAH_LI,
2960             REJANG,
2961             HANGUL_JAMO_EXTENDED_A,
2962             JAVANESE,
2963             null,
2964             CHAM,
2965             MYANMAR_EXTENDED_A,
2966             TAI_VIET,
2967             MEETEI_MAYEK_EXTENSIONS,
2968             ETHIOPIC_EXTENDED_A,
2969             null,
2970             MEETEI_MAYEK,
2971             HANGUL_SYLLABLES,
2972             HANGUL_JAMO_EXTENDED_B,
2973             HIGH_SURROGATES,
2974             HIGH_PRIVATE_USE_SURROGATES,
2975             LOW_SURROGATES,
2976             PRIVATE_USE_AREA,
2977             CJK_COMPATIBILITY_IDEOGRAPHS,
2978             ALPHABETIC_PRESENTATION_FORMS,
2979             ARABIC_PRESENTATION_FORMS_A,
2980             VARIATION_SELECTORS,
2981             VERTICAL_FORMS,
2982             COMBINING_HALF_MARKS,
2983             CJK_COMPATIBILITY_FORMS,
2984             SMALL_FORM_VARIANTS,
2985             ARABIC_PRESENTATION_FORMS_B,
2986             HALFWIDTH_AND_FULLWIDTH_FORMS,
2987             SPECIALS,
2988             LINEAR_B_SYLLABARY,
2989             LINEAR_B_IDEOGRAMS,
2990             AEGEAN_NUMBERS,
2991             ANCIENT_GREEK_NUMBERS,
2992             ANCIENT_SYMBOLS,
2993             PHAISTOS_DISC,
2994             null,
2995             LYCIAN,
2996             CARIAN,
2997             null,
2998             OLD_ITALIC,
2999             GOTHIC,
3000             null,
3001             UGARITIC,
3002             OLD_PERSIAN,
3003             null,
3004             DESERET,
3005             SHAVIAN,
3006             OSMANYA,
3007             null,
3008             CYPRIOT_SYLLABARY,
3009             IMPERIAL_ARAMAIC,
3010             null,
3011             PHOENICIAN,
3012             LYDIAN,
3013             null,
3014             MEROITIC_HIEROGLYPHS,
3015             MEROITIC_CURSIVE,
3016             KHAROSHTHI,
3017             OLD_SOUTH_ARABIAN,
3018             null,
3019             AVESTAN,
3020             INSCRIPTIONAL_PARTHIAN,
3021             INSCRIPTIONAL_PAHLAVI,
3022             null,
3023             OLD_TURKIC,
3024             null,
3025             RUMI_NUMERAL_SYMBOLS,
3026             null,
3027             BRAHMI,
3028             KAITHI,
3029             SORA_SOMPENG,
3030             CHAKMA,
3031             null,
3032             SHARADA,
3033             null,
3034             TAKRI,
3035             null,
3036             CUNEIFORM,
3037             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3038             null,
3039             EGYPTIAN_HIEROGLYPHS,
3040             null,
3041             BAMUM_SUPPLEMENT,
3042             null,
3043             MIAO,
3044             null,
3045             KANA_SUPPLEMENT,
3046             null,
3047             BYZANTINE_MUSICAL_SYMBOLS,
3048             MUSICAL_SYMBOLS,
3049             ANCIENT_GREEK_MUSICAL_NOTATION,
3050             null,
3051             TAI_XUAN_JING_SYMBOLS,
3052             COUNTING_ROD_NUMERALS,
3053             null,
3054             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3055             null,
3056             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3057             null,
3058             MAHJONG_TILES,
3059             DOMINO_TILES,
3060             PLAYING_CARDS,
3061             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3062             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3063             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3064             EMOTICONS,
3065             null,
3066             TRANSPORT_AND_MAP_SYMBOLS,
3067             ALCHEMICAL_SYMBOLS,
3068             null,
3069             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3070             null,
3071             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3072             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3073             null,
3074             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3075             null,
3076             TAGS,
3077             null,
3078             VARIATION_SELECTORS_SUPPLEMENT,
3079             null,
3080             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3081             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3082         };
3083 
3084 
3085         /**
3086          * Returns the object representing the Unicode block containing the
3087          * given character, or {@code null} if the character is not a
3088          * member of a defined block.
3089          *
3090          * <p><b>Note:</b> This method cannot handle
3091          * <a href="Character.html#supplementary"> supplementary
3092          * characters</a>.  To support all Unicode characters, including
3093          * supplementary characters, use the {@link #of(int)} method.
3094          *
3095          * @param   c  The character in question
3096          * @return  The {@code UnicodeBlock} instance representing the
3097          *          Unicode block of which this character is a member, or
3098          *          {@code null} if the character is not a member of any
3099          *          Unicode block
3100          */
3101         public static UnicodeBlock of(char c) {
3102             return of((int)c);
3103         }
3104 
3105         /**
3106          * Returns the object representing the Unicode block
3107          * containing the given character (Unicode code point), or
3108          * {@code null} if the character is not a member of a
3109          * defined block.
3110          *
3111          * @param   codePoint the character (Unicode code point) in question.
3112          * @return  The {@code UnicodeBlock} instance representing the
3113          *          Unicode block of which this character is a member, or
3114          *          {@code null} if the character is not a member of any
3115          *          Unicode block
3116          * @exception IllegalArgumentException if the specified
3117          * {@code codePoint} is an invalid Unicode code point.
3118          * @see Character#isValidCodePoint(int)
3119          * @since   1.5
3120          */
3121         public static UnicodeBlock of(int codePoint) {
3122             if (!isValidCodePoint(codePoint)) {
3123                 throw new IllegalArgumentException();
3124             }
3125 
3126             int top, bottom, current;
3127             bottom = 0;
3128             top = blockStarts.length;
3129             current = top/2;
3130 
3131             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3132             while (top - bottom > 1) {
3133                 if (codePoint >= blockStarts[current]) {
3134                     bottom = current;
3135                 } else {
3136                     top = current;
3137                 }
3138                 current = (top + bottom) / 2;
3139             }
3140             return blocks[current];
3141         }
3142 
3143         /**
3144          * Returns the UnicodeBlock with the given name. Block
3145          * names are determined by The Unicode Standard. The file
3146          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3147          * version of the standard. The {@link Character} class specifies
3148          * the version of the standard that it supports.
3149          * <p>
3150          * This method accepts block names in the following forms:
3151          * <ol>
3152          * <li> Canonical block names as defined by the Unicode Standard.
3153          * For example, the standard defines a "Basic Latin" block. Therefore, this
3154          * method accepts "Basic Latin" as a valid block name. The documentation of
3155          * each UnicodeBlock provides the canonical name.
3156          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3157          * is a valid block name for the "Basic Latin" block.
3158          * <li>The text representation of each constant UnicodeBlock identifier.
3159          * For example, this method will return the {@link #BASIC_LATIN} block if
3160          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3161          * hyphens in the canonical name with underscores.
3162          * </ol>
3163          * Finally, character case is ignored for all of the valid block name forms.
3164          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3165          * The en_US locale's case mapping rules are used to provide case-insensitive
3166          * string comparisons for block name validation.
3167          * <p>
3168          * If the Unicode Standard changes block names, both the previous and
3169          * current names will be accepted.
3170          *
3171          * @param blockName A {@code UnicodeBlock} name.
3172          * @return The {@code UnicodeBlock} instance identified
3173          *         by {@code blockName}
3174          * @throws IllegalArgumentException if {@code blockName} is an
3175          *         invalid name
3176          * @throws NullPointerException if {@code blockName} is null
3177          * @since 1.5
3178          */
3179         public static final UnicodeBlock forName(String blockName) {
3180             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3181             if (block == null) {
3182                 throw new IllegalArgumentException();
3183             }
3184             return block;
3185         }
3186     }
3187 
3188 
3189     /**
3190      * A family of character subsets representing the character scripts
3191      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3192      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3193      * character is assigned to a single Unicode script, either a specific
3194      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3195      * one of the following three special values,
3196      * {@link Character.UnicodeScript#INHERITED Inherited},
3197      * {@link Character.UnicodeScript#COMMON Common} or
3198      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3199      *
3200      * @since 1.7
3201      */
3202     public static enum UnicodeScript {
3203         /**
3204          * Unicode script "Common".
3205          */
3206         COMMON,
3207 
3208         /**
3209          * Unicode script "Latin".
3210          */
3211         LATIN,
3212 
3213         /**
3214          * Unicode script "Greek".
3215          */
3216         GREEK,
3217 
3218         /**
3219          * Unicode script "Cyrillic".
3220          */
3221         CYRILLIC,
3222 
3223         /**
3224          * Unicode script "Armenian".
3225          */
3226         ARMENIAN,
3227 
3228         /**
3229          * Unicode script "Hebrew".
3230          */
3231         HEBREW,
3232 
3233         /**
3234          * Unicode script "Arabic".
3235          */
3236         ARABIC,
3237 
3238         /**
3239          * Unicode script "Syriac".
3240          */
3241         SYRIAC,
3242 
3243         /**
3244          * Unicode script "Thaana".
3245          */
3246         THAANA,
3247 
3248         /**
3249          * Unicode script "Devanagari".
3250          */
3251         DEVANAGARI,
3252 
3253         /**
3254          * Unicode script "Bengali".
3255          */
3256         BENGALI,
3257 
3258         /**
3259          * Unicode script "Gurmukhi".
3260          */
3261         GURMUKHI,
3262 
3263         /**
3264          * Unicode script "Gujarati".
3265          */
3266         GUJARATI,
3267 
3268         /**
3269          * Unicode script "Oriya".
3270          */
3271         ORIYA,
3272 
3273         /**
3274          * Unicode script "Tamil".
3275          */
3276         TAMIL,
3277 
3278         /**
3279          * Unicode script "Telugu".
3280          */
3281         TELUGU,
3282 
3283         /**
3284          * Unicode script "Kannada".
3285          */
3286         KANNADA,
3287 
3288         /**
3289          * Unicode script "Malayalam".
3290          */
3291         MALAYALAM,
3292 
3293         /**
3294          * Unicode script "Sinhala".
3295          */
3296         SINHALA,
3297 
3298         /**
3299          * Unicode script "Thai".
3300          */
3301         THAI,
3302 
3303         /**
3304          * Unicode script "Lao".
3305          */
3306         LAO,
3307 
3308         /**
3309          * Unicode script "Tibetan".
3310          */
3311         TIBETAN,
3312 
3313         /**
3314          * Unicode script "Myanmar".
3315          */
3316         MYANMAR,
3317 
3318         /**
3319          * Unicode script "Georgian".
3320          */
3321         GEORGIAN,
3322 
3323         /**
3324          * Unicode script "Hangul".
3325          */
3326         HANGUL,
3327 
3328         /**
3329          * Unicode script "Ethiopic".
3330          */
3331         ETHIOPIC,
3332 
3333         /**
3334          * Unicode script "Cherokee".
3335          */
3336         CHEROKEE,
3337 
3338         /**
3339          * Unicode script "Canadian_Aboriginal".
3340          */
3341         CANADIAN_ABORIGINAL,
3342 
3343         /**
3344          * Unicode script "Ogham".
3345          */
3346         OGHAM,
3347 
3348         /**
3349          * Unicode script "Runic".
3350          */
3351         RUNIC,
3352 
3353         /**
3354          * Unicode script "Khmer".
3355          */
3356         KHMER,
3357 
3358         /**
3359          * Unicode script "Mongolian".
3360          */
3361         MONGOLIAN,
3362 
3363         /**
3364          * Unicode script "Hiragana".
3365          */
3366         HIRAGANA,
3367 
3368         /**
3369          * Unicode script "Katakana".
3370          */
3371         KATAKANA,
3372 
3373         /**
3374          * Unicode script "Bopomofo".
3375          */
3376         BOPOMOFO,
3377 
3378         /**
3379          * Unicode script "Han".
3380          */
3381         HAN,
3382 
3383         /**
3384          * Unicode script "Yi".
3385          */
3386         YI,
3387 
3388         /**
3389          * Unicode script "Old_Italic".
3390          */
3391         OLD_ITALIC,
3392 
3393         /**
3394          * Unicode script "Gothic".
3395          */
3396         GOTHIC,
3397 
3398         /**
3399          * Unicode script "Deseret".
3400          */
3401         DESERET,
3402 
3403         /**
3404          * Unicode script "Inherited".
3405          */
3406         INHERITED,
3407 
3408         /**
3409          * Unicode script "Tagalog".
3410          */
3411         TAGALOG,
3412 
3413         /**
3414          * Unicode script "Hanunoo".
3415          */
3416         HANUNOO,
3417 
3418         /**
3419          * Unicode script "Buhid".
3420          */
3421         BUHID,
3422 
3423         /**
3424          * Unicode script "Tagbanwa".
3425          */
3426         TAGBANWA,
3427 
3428         /**
3429          * Unicode script "Limbu".
3430          */
3431         LIMBU,
3432 
3433         /**
3434          * Unicode script "Tai_Le".
3435          */
3436         TAI_LE,
3437 
3438         /**
3439          * Unicode script "Linear_B".
3440          */
3441         LINEAR_B,
3442 
3443         /**
3444          * Unicode script "Ugaritic".
3445          */
3446         UGARITIC,
3447 
3448         /**
3449          * Unicode script "Shavian".
3450          */
3451         SHAVIAN,
3452 
3453         /**
3454          * Unicode script "Osmanya".
3455          */
3456         OSMANYA,
3457 
3458         /**
3459          * Unicode script "Cypriot".
3460          */
3461         CYPRIOT,
3462 
3463         /**
3464          * Unicode script "Braille".
3465          */
3466         BRAILLE,
3467 
3468         /**
3469          * Unicode script "Buginese".
3470          */
3471         BUGINESE,
3472 
3473         /**
3474          * Unicode script "Coptic".
3475          */
3476         COPTIC,
3477 
3478         /**
3479          * Unicode script "New_Tai_Lue".
3480          */
3481         NEW_TAI_LUE,
3482 
3483         /**
3484          * Unicode script "Glagolitic".
3485          */
3486         GLAGOLITIC,
3487 
3488         /**
3489          * Unicode script "Tifinagh".
3490          */
3491         TIFINAGH,
3492 
3493         /**
3494          * Unicode script "Syloti_Nagri".
3495          */
3496         SYLOTI_NAGRI,
3497 
3498         /**
3499          * Unicode script "Old_Persian".
3500          */
3501         OLD_PERSIAN,
3502 
3503         /**
3504          * Unicode script "Kharoshthi".
3505          */
3506         KHAROSHTHI,
3507 
3508         /**
3509          * Unicode script "Balinese".
3510          */
3511         BALINESE,
3512 
3513         /**
3514          * Unicode script "Cuneiform".
3515          */
3516         CUNEIFORM,
3517 
3518         /**
3519          * Unicode script "Phoenician".
3520          */
3521         PHOENICIAN,
3522 
3523         /**
3524          * Unicode script "Phags_Pa".
3525          */
3526         PHAGS_PA,
3527 
3528         /**
3529          * Unicode script "Nko".
3530          */
3531         NKO,
3532 
3533         /**
3534          * Unicode script "Sundanese".
3535          */
3536         SUNDANESE,
3537 
3538         /**
3539          * Unicode script "Batak".
3540          */
3541         BATAK,
3542 
3543         /**
3544          * Unicode script "Lepcha".
3545          */
3546         LEPCHA,
3547 
3548         /**
3549          * Unicode script "Ol_Chiki".
3550          */
3551         OL_CHIKI,
3552 
3553         /**
3554          * Unicode script "Vai".
3555          */
3556         VAI,
3557 
3558         /**
3559          * Unicode script "Saurashtra".
3560          */
3561         SAURASHTRA,
3562 
3563         /**
3564          * Unicode script "Kayah_Li".
3565          */
3566         KAYAH_LI,
3567 
3568         /**
3569          * Unicode script "Rejang".
3570          */
3571         REJANG,
3572 
3573         /**
3574          * Unicode script "Lycian".
3575          */
3576         LYCIAN,
3577 
3578         /**
3579          * Unicode script "Carian".
3580          */
3581         CARIAN,
3582 
3583         /**
3584          * Unicode script "Lydian".
3585          */
3586         LYDIAN,
3587 
3588         /**
3589          * Unicode script "Cham".
3590          */
3591         CHAM,
3592 
3593         /**
3594          * Unicode script "Tai_Tham".
3595          */
3596         TAI_THAM,
3597 
3598         /**
3599          * Unicode script "Tai_Viet".
3600          */
3601         TAI_VIET,
3602 
3603         /**
3604          * Unicode script "Avestan".
3605          */
3606         AVESTAN,
3607 
3608         /**
3609          * Unicode script "Egyptian_Hieroglyphs".
3610          */
3611         EGYPTIAN_HIEROGLYPHS,
3612 
3613         /**
3614          * Unicode script "Samaritan".
3615          */
3616         SAMARITAN,
3617 
3618         /**
3619          * Unicode script "Mandaic".
3620          */
3621         MANDAIC,
3622 
3623         /**
3624          * Unicode script "Lisu".
3625          */
3626         LISU,
3627 
3628         /**
3629          * Unicode script "Bamum".
3630          */
3631         BAMUM,
3632 
3633         /**
3634          * Unicode script "Javanese".
3635          */
3636         JAVANESE,
3637 
3638         /**
3639          * Unicode script "Meetei_Mayek".
3640          */
3641         MEETEI_MAYEK,
3642 
3643         /**
3644          * Unicode script "Imperial_Aramaic".
3645          */
3646         IMPERIAL_ARAMAIC,
3647 
3648         /**
3649          * Unicode script "Old_South_Arabian".
3650          */
3651         OLD_SOUTH_ARABIAN,
3652 
3653         /**
3654          * Unicode script "Inscriptional_Parthian".
3655          */
3656         INSCRIPTIONAL_PARTHIAN,
3657 
3658         /**
3659          * Unicode script "Inscriptional_Pahlavi".
3660          */
3661         INSCRIPTIONAL_PAHLAVI,
3662 
3663         /**
3664          * Unicode script "Old_Turkic".
3665          */
3666         OLD_TURKIC,
3667 
3668         /**
3669          * Unicode script "Brahmi".
3670          */
3671         BRAHMI,
3672 
3673         /**
3674          * Unicode script "Kaithi".
3675          */
3676         KAITHI,
3677 
3678         /**
3679          * Unicode script "Meroitic Hieroglyphs".
3680          */
3681         MEROITIC_HIEROGLYPHS,
3682 
3683         /**
3684          * Unicode script "Meroitic Cursive".
3685          */
3686         MEROITIC_CURSIVE,
3687 
3688         /**
3689          * Unicode script "Sora Sompeng".
3690          */
3691         SORA_SOMPENG,
3692 
3693         /**
3694          * Unicode script "Chakma".
3695          */
3696         CHAKMA,
3697 
3698         /**
3699          * Unicode script "Sharada".
3700          */
3701         SHARADA,
3702 
3703         /**
3704          * Unicode script "Takri".
3705          */
3706         TAKRI,
3707 
3708         /**
3709          * Unicode script "Miao".
3710          */
3711         MIAO,
3712 
3713         /**
3714          * Unicode script "Unknown".
3715          */
3716         UNKNOWN;
3717 
3718         private static final int[] scriptStarts = {
3719             0x0000,   // 0000..0040; COMMON
3720             0x0041,   // 0041..005A; LATIN
3721             0x005B,   // 005B..0060; COMMON
3722             0x0061,   // 0061..007A; LATIN
3723             0x007B,   // 007B..00A9; COMMON
3724             0x00AA,   // 00AA..00AA; LATIN
3725             0x00AB,   // 00AB..00B9; COMMON
3726             0x00BA,   // 00BA..00BA; LATIN
3727             0x00BB,   // 00BB..00BF; COMMON
3728             0x00C0,   // 00C0..00D6; LATIN
3729             0x00D7,   // 00D7..00D7; COMMON
3730             0x00D8,   // 00D8..00F6; LATIN
3731             0x00F7,   // 00F7..00F7; COMMON
3732             0x00F8,   // 00F8..02B8; LATIN
3733             0x02B9,   // 02B9..02DF; COMMON
3734             0x02E0,   // 02E0..02E4; LATIN
3735             0x02E5,   // 02E5..02E9; COMMON
3736             0x02EA,   // 02EA..02EB; BOPOMOFO
3737             0x02EC,   // 02EC..02FF; COMMON
3738             0x0300,   // 0300..036F; INHERITED
3739             0x0370,   // 0370..0373; GREEK
3740             0x0374,   // 0374..0374; COMMON
3741             0x0375,   // 0375..037D; GREEK
3742             0x037E,   // 037E..0383; COMMON
3743             0x0384,   // 0384..0384; GREEK
3744             0x0385,   // 0385..0385; COMMON
3745             0x0386,   // 0386..0386; GREEK
3746             0x0387,   // 0387..0387; COMMON
3747             0x0388,   // 0388..03E1; GREEK
3748             0x03E2,   // 03E2..03EF; COPTIC
3749             0x03F0,   // 03F0..03FF; GREEK
3750             0x0400,   // 0400..0484; CYRILLIC
3751             0x0485,   // 0485..0486; INHERITED
3752             0x0487,   // 0487..0530; CYRILLIC
3753             0x0531,   // 0531..0588; ARMENIAN
3754             0x0589,   // 0589..0589; COMMON
3755             0x058A,   // 058A..0590; ARMENIAN
3756             0x0591,   // 0591..05FF; HEBREW
3757             0x0600,   // 0600..060B; ARABIC
3758             0x060C,   // 060C..060C; COMMON
3759             0x060D,   // 060D..061A; ARABIC
3760             0x061B,   // 061B..061D; COMMON
3761             0x061E,   // 061E..061E; ARABIC
3762             0x061F,   // 061F..061F; COMMON
3763             0x0620,   // 0620..063F; ARABIC
3764             0x0640,   // 0640..0640; COMMON
3765             0x0641,   // 0641..064A; ARABIC
3766             0x064B,   // 064B..0655; INHERITED
3767             0x0656,   // 0656..065F; ARABIC
3768             0x0660,   // 0660..0669; COMMON
3769             0x066A,   // 066A..066F; ARABIC
3770             0x0670,   // 0670..0670; INHERITED
3771             0x0671,   // 0671..06DC; ARABIC
3772             0x06DD,   // 06DD..06DD; COMMON
3773             0x06DE,   // 06DE..06FF; ARABIC
3774             0x0700,   // 0700..074F; SYRIAC
3775             0x0750,   // 0750..077F; ARABIC
3776             0x0780,   // 0780..07BF; THAANA
3777             0x07C0,   // 07C0..07FF; NKO
3778             0x0800,   // 0800..083F; SAMARITAN
3779             0x0840,   // 0840..089F; MANDAIC
3780             0x08A0,   // 08A0..08FF; ARABIC
3781             0x0900,   // 0900..0950; DEVANAGARI
3782             0x0951,   // 0951..0952; INHERITED
3783             0x0953,   // 0953..0963; DEVANAGARI
3784             0x0964,   // 0964..0965; COMMON
3785             0x0966,   // 0966..0980; DEVANAGARI
3786             0x0981,   // 0981..0A00; BENGALI
3787             0x0A01,   // 0A01..0A80; GURMUKHI
3788             0x0A81,   // 0A81..0B00; GUJARATI
3789             0x0B01,   // 0B01..0B81; ORIYA
3790             0x0B82,   // 0B82..0C00; TAMIL
3791             0x0C01,   // 0C01..0C81; TELUGU
3792             0x0C82,   // 0C82..0CF0; KANNADA
3793             0x0D02,   // 0D02..0D81; MALAYALAM
3794             0x0D82,   // 0D82..0E00; SINHALA
3795             0x0E01,   // 0E01..0E3E; THAI
3796             0x0E3F,   // 0E3F..0E3F; COMMON
3797             0x0E40,   // 0E40..0E80; THAI
3798             0x0E81,   // 0E81..0EFF; LAO
3799             0x0F00,   // 0F00..0FD4; TIBETAN
3800             0x0FD5,   // 0FD5..0FD8; COMMON
3801             0x0FD9,   // 0FD9..0FFF; TIBETAN
3802             0x1000,   // 1000..109F; MYANMAR
3803             0x10A0,   // 10A0..10FA; GEORGIAN
3804             0x10FB,   // 10FB..10FB; COMMON
3805             0x10FC,   // 10FC..10FF; GEORGIAN
3806             0x1100,   // 1100..11FF; HANGUL
3807             0x1200,   // 1200..139F; ETHIOPIC
3808             0x13A0,   // 13A0..13FF; CHEROKEE
3809             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3810             0x1680,   // 1680..169F; OGHAM
3811             0x16A0,   // 16A0..16EA; RUNIC
3812             0x16EB,   // 16EB..16ED; COMMON
3813             0x16EE,   // 16EE..16FF; RUNIC
3814             0x1700,   // 1700..171F; TAGALOG
3815             0x1720,   // 1720..1734; HANUNOO
3816             0x1735,   // 1735..173F; COMMON
3817             0x1740,   // 1740..175F; BUHID
3818             0x1760,   // 1760..177F; TAGBANWA
3819             0x1780,   // 1780..17FF; KHMER
3820             0x1800,   // 1800..1801; MONGOLIAN
3821             0x1802,   // 1802..1803; COMMON
3822             0x1804,   // 1804..1804; MONGOLIAN
3823             0x1805,   // 1805..1805; COMMON
3824             0x1806,   // 1806..18AF; MONGOLIAN
3825             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3826             0x1900,   // 1900..194F; LIMBU
3827             0x1950,   // 1950..197F; TAI_LE
3828             0x1980,   // 1980..19DF; NEW_TAI_LUE
3829             0x19E0,   // 19E0..19FF; KHMER
3830             0x1A00,   // 1A00..1A1F; BUGINESE
3831             0x1A20,   // 1A20..1AFF; TAI_THAM
3832             0x1B00,   // 1B00..1B7F; BALINESE
3833             0x1B80,   // 1B80..1BBF; SUNDANESE
3834             0x1BC0,   // 1BC0..1BFF; BATAK
3835             0x1C00,   // 1C00..1C4F; LEPCHA
3836             0x1C50,   // 1C50..1CBF; OL_CHIKI
3837             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3838             0x1CD0,   // 1CD0..1CD2; INHERITED
3839             0x1CD3,   // 1CD3..1CD3; COMMON
3840             0x1CD4,   // 1CD4..1CE0; INHERITED
3841             0x1CE1,   // 1CE1..1CE1; COMMON
3842             0x1CE2,   // 1CE2..1CE8; INHERITED
3843             0x1CE9,   // 1CE9..1CEC; COMMON
3844             0x1CED,   // 1CED..1CED; INHERITED
3845             0x1CEE,   // 1CEE..1CF3; COMMON
3846             0x1CF4,   // 1CF4..1CF4; INHERITED
3847             0x1CF5,   // 1CF5..1CFF; COMMON
3848             0x1D00,   // 1D00..1D25; LATIN
3849             0x1D26,   // 1D26..1D2A; GREEK
3850             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3851             0x1D2C,   // 1D2C..1D5C; LATIN
3852             0x1D5D,   // 1D5D..1D61; GREEK
3853             0x1D62,   // 1D62..1D65; LATIN
3854             0x1D66,   // 1D66..1D6A; GREEK
3855             0x1D6B,   // 1D6B..1D77; LATIN
3856             0x1D78,   // 1D78..1D78; CYRILLIC
3857             0x1D79,   // 1D79..1DBE; LATIN
3858             0x1DBF,   // 1DBF..1DBF; GREEK
3859             0x1DC0,   // 1DC0..1DFF; INHERITED
3860             0x1E00,   // 1E00..1EFF; LATIN
3861             0x1F00,   // 1F00..1FFF; GREEK
3862             0x2000,   // 2000..200B; COMMON
3863             0x200C,   // 200C..200D; INHERITED
3864             0x200E,   // 200E..2070; COMMON
3865             0x2071,   // 2071..2073; LATIN
3866             0x2074,   // 2074..207E; COMMON
3867             0x207F,   // 207F..207F; LATIN
3868             0x2080,   // 2080..208F; COMMON
3869             0x2090,   // 2090..209F; LATIN
3870             0x20A0,   // 20A0..20CF; COMMON
3871             0x20D0,   // 20D0..20FF; INHERITED
3872             0x2100,   // 2100..2125; COMMON
3873             0x2126,   // 2126..2126; GREEK
3874             0x2127,   // 2127..2129; COMMON
3875             0x212A,   // 212A..212B; LATIN
3876             0x212C,   // 212C..2131; COMMON
3877             0x2132,   // 2132..2132; LATIN
3878             0x2133,   // 2133..214D; COMMON
3879             0x214E,   // 214E..214E; LATIN
3880             0x214F,   // 214F..215F; COMMON
3881             0x2160,   // 2160..2188; LATIN
3882             0x2189,   // 2189..27FF; COMMON
3883             0x2800,   // 2800..28FF; BRAILLE
3884             0x2900,   // 2900..2BFF; COMMON
3885             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3886             0x2C60,   // 2C60..2C7F; LATIN
3887             0x2C80,   // 2C80..2CFF; COPTIC
3888             0x2D00,   // 2D00..2D2F; GEORGIAN
3889             0x2D30,   // 2D30..2D7F; TIFINAGH
3890             0x2D80,   // 2D80..2DDF; ETHIOPIC
3891             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3892             0x2E00,   // 2E00..2E7F; COMMON
3893             0x2E80,   // 2E80..2FEF; HAN
3894             0x2FF0,   // 2FF0..3004; COMMON
3895             0x3005,   // 3005..3005; HAN
3896             0x3006,   // 3006..3006; COMMON
3897             0x3007,   // 3007..3007; HAN
3898             0x3008,   // 3008..3020; COMMON
3899             0x3021,   // 3021..3029; HAN
3900             0x302A,   // 302A..302D; INHERITED
3901             0x302E,   // 302E..302F; HANGUL
3902             0x3030,   // 3030..3037; COMMON
3903             0x3038,   // 3038..303B; HAN
3904             0x303C,   // 303C..3040; COMMON
3905             0x3041,   // 3041..3098; HIRAGANA
3906             0x3099,   // 3099..309A; INHERITED
3907             0x309B,   // 309B..309C; COMMON
3908             0x309D,   // 309D..309F; HIRAGANA
3909             0x30A0,   // 30A0..30A0; COMMON
3910             0x30A1,   // 30A1..30FA; KATAKANA
3911             0x30FB,   // 30FB..30FC; COMMON
3912             0x30FD,   // 30FD..3104; KATAKANA
3913             0x3105,   // 3105..3130; BOPOMOFO
3914             0x3131,   // 3131..318F; HANGUL
3915             0x3190,   // 3190..319F; COMMON
3916             0x31A0,   // 31A0..31BF; BOPOMOFO
3917             0x31C0,   // 31C0..31EF; COMMON
3918             0x31F0,   // 31F0..31FF; KATAKANA
3919             0x3200,   // 3200..321F; HANGUL
3920             0x3220,   // 3220..325F; COMMON
3921             0x3260,   // 3260..327E; HANGUL
3922             0x327F,   // 327F..32CF; COMMON
3923             0x32D0,   // 32D0..3357; KATAKANA
3924             0x3358,   // 3358..33FF; COMMON
3925             0x3400,   // 3400..4DBF; HAN
3926             0x4DC0,   // 4DC0..4DFF; COMMON
3927             0x4E00,   // 4E00..9FFF; HAN
3928             0xA000,   // A000..A4CF; YI
3929             0xA4D0,   // A4D0..A4FF; LISU
3930             0xA500,   // A500..A63F; VAI
3931             0xA640,   // A640..A69F; CYRILLIC
3932             0xA6A0,   // A6A0..A6FF; BAMUM
3933             0xA700,   // A700..A721; COMMON
3934             0xA722,   // A722..A787; LATIN
3935             0xA788,   // A788..A78A; COMMON
3936             0xA78B,   // A78B..A7FF; LATIN
3937             0xA800,   // A800..A82F; SYLOTI_NAGRI
3938             0xA830,   // A830..A83F; COMMON
3939             0xA840,   // A840..A87F; PHAGS_PA
3940             0xA880,   // A880..A8DF; SAURASHTRA
3941             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3942             0xA900,   // A900..A92F; KAYAH_LI
3943             0xA930,   // A930..A95F; REJANG
3944             0xA960,   // A960..A97F; HANGUL
3945             0xA980,   // A980..A9FF; JAVANESE
3946             0xAA00,   // AA00..AA5F; CHAM
3947             0xAA60,   // AA60..AA7F; MYANMAR
3948             0xAA80,   // AA80..AADF; TAI_VIET
3949             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3950             0xAB01,   // AB01..ABBF; ETHIOPIC
3951             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3952             0xAC00,   // AC00..D7FB; HANGUL
3953             0xD7FC,   // D7FC..F8FF; UNKNOWN
3954             0xF900,   // F900..FAFF; HAN
3955             0xFB00,   // FB00..FB12; LATIN
3956             0xFB13,   // FB13..FB1C; ARMENIAN
3957             0xFB1D,   // FB1D..FB4F; HEBREW
3958             0xFB50,   // FB50..FD3D; ARABIC
3959             0xFD3E,   // FD3E..FD4F; COMMON
3960             0xFD50,   // FD50..FDFC; ARABIC
3961             0xFDFD,   // FDFD..FDFF; COMMON
3962             0xFE00,   // FE00..FE0F; INHERITED
3963             0xFE10,   // FE10..FE1F; COMMON
3964             0xFE20,   // FE20..FE2F; INHERITED
3965             0xFE30,   // FE30..FE6F; COMMON
3966             0xFE70,   // FE70..FEFE; ARABIC
3967             0xFEFF,   // FEFF..FF20; COMMON
3968             0xFF21,   // FF21..FF3A; LATIN
3969             0xFF3B,   // FF3B..FF40; COMMON
3970             0xFF41,   // FF41..FF5A; LATIN
3971             0xFF5B,   // FF5B..FF65; COMMON
3972             0xFF66,   // FF66..FF6F; KATAKANA
3973             0xFF70,   // FF70..FF70; COMMON
3974             0xFF71,   // FF71..FF9D; KATAKANA
3975             0xFF9E,   // FF9E..FF9F; COMMON
3976             0xFFA0,   // FFA0..FFDF; HANGUL
3977             0xFFE0,   // FFE0..FFFF; COMMON
3978             0x10000,  // 10000..100FF; LINEAR_B
3979             0x10100,  // 10100..1013F; COMMON
3980             0x10140,  // 10140..1018F; GREEK
3981             0x10190,  // 10190..101FC; COMMON
3982             0x101FD,  // 101FD..1027F; INHERITED
3983             0x10280,  // 10280..1029F; LYCIAN
3984             0x102A0,  // 102A0..102FF; CARIAN
3985             0x10300,  // 10300..1032F; OLD_ITALIC
3986             0x10330,  // 10330..1037F; GOTHIC
3987             0x10380,  // 10380..1039F; UGARITIC
3988             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3989             0x10400,  // 10400..1044F; DESERET
3990             0x10450,  // 10450..1047F; SHAVIAN
3991             0x10480,  // 10480..107FF; OSMANYA
3992             0x10800,  // 10800..1083F; CYPRIOT
3993             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3994             0x10900,  // 10900..1091F; PHOENICIAN
3995             0x10920,  // 10920..1097F; LYDIAN
3996             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3997             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3998             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3999             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4000             0x10B00,  // 10B00..10B3F; AVESTAN
4001             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4002             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4003             0x10C00,  // 10C00..10E5F; OLD_TURKIC
4004             0x10E60,  // 10E60..10FFF; ARABIC
4005             0x11000,  // 11000..1107F; BRAHMI
4006             0x11080,  // 11080..110CF; KAITHI
4007             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4008             0x11100,  // 11100..1117F; CHAKMA
4009             0x11180,  // 11180..1167F; SHARADA
4010             0x11680,  // 11680..116CF; TAKRI
4011             0x12000,  // 12000..12FFF; CUNEIFORM
4012             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4013             0x16800,  // 16800..16A38; BAMUM
4014             0x16F00,  // 16F00..16F9F; MIAO
4015             0x1B000,  // 1B000..1B000; KATAKANA
4016             0x1B001,  // 1B001..1CFFF; HIRAGANA
4017             0x1D000,  // 1D000..1D166; COMMON
4018             0x1D167,  // 1D167..1D169; INHERITED
4019             0x1D16A,  // 1D16A..1D17A; COMMON
4020             0x1D17B,  // 1D17B..1D182; INHERITED
4021             0x1D183,  // 1D183..1D184; COMMON
4022             0x1D185,  // 1D185..1D18B; INHERITED
4023             0x1D18C,  // 1D18C..1D1A9; COMMON
4024             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4025             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4026             0x1D200,  // 1D200..1D2FF; GREEK
4027             0x1D300,  // 1D300..1EDFF; COMMON
4028             0x1EE00,  // 1EE00..1EFFF; ARABIC
4029             0x1F000,  // 1F000..1F1FF; COMMON
4030             0x1F200,  // 1F200..1F200; HIRAGANA
4031             0x1F201,  // 1F210..1FFFF; COMMON
4032             0x20000,  // 20000..E0000; HAN
4033             0xE0001,  // E0001..E00FF; COMMON
4034             0xE0100,  // E0100..E01EF; INHERITED
4035             0xE01F0   // E01F0..10FFFF; UNKNOWN
4036 
4037         };
4038 
4039         private static final UnicodeScript[] scripts = {
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             LATIN,
4048             COMMON,
4049             LATIN,
4050             COMMON,
4051             LATIN,
4052             COMMON,
4053             LATIN,
4054             COMMON,
4055             LATIN,
4056             COMMON,
4057             BOPOMOFO,
4058             COMMON,
4059             INHERITED,
4060             GREEK,
4061             COMMON,
4062             GREEK,
4063             COMMON,
4064             GREEK,
4065             COMMON,
4066             GREEK,
4067             COMMON,
4068             GREEK,
4069             COPTIC,
4070             GREEK,
4071             CYRILLIC,
4072             INHERITED,
4073             CYRILLIC,
4074             ARMENIAN,
4075             COMMON,
4076             ARMENIAN,
4077             HEBREW,
4078             ARABIC,
4079             COMMON,
4080             ARABIC,
4081             COMMON,
4082             ARABIC,
4083             COMMON,
4084             ARABIC,
4085             COMMON,
4086             ARABIC,
4087             INHERITED,
4088             ARABIC,
4089             COMMON,
4090             ARABIC,
4091             INHERITED,
4092             ARABIC,
4093             COMMON,
4094             ARABIC,
4095             SYRIAC,
4096             ARABIC,
4097             THAANA,
4098             NKO,
4099             SAMARITAN,
4100             MANDAIC,
4101             ARABIC,
4102             DEVANAGARI,
4103             INHERITED,
4104             DEVANAGARI,
4105             COMMON,
4106             DEVANAGARI,
4107             BENGALI,
4108             GURMUKHI,
4109             GUJARATI,
4110             ORIYA,
4111             TAMIL,
4112             TELUGU,
4113             KANNADA,
4114             MALAYALAM,
4115             SINHALA,
4116             THAI,
4117             COMMON,
4118             THAI,
4119             LAO,
4120             TIBETAN,
4121             COMMON,
4122             TIBETAN,
4123             MYANMAR,
4124             GEORGIAN,
4125             COMMON,
4126             GEORGIAN,
4127             HANGUL,
4128             ETHIOPIC,
4129             CHEROKEE,
4130             CANADIAN_ABORIGINAL,
4131             OGHAM,
4132             RUNIC,
4133             COMMON,
4134             RUNIC,
4135             TAGALOG,
4136             HANUNOO,
4137             COMMON,
4138             BUHID,
4139             TAGBANWA,
4140             KHMER,
4141             MONGOLIAN,
4142             COMMON,
4143             MONGOLIAN,
4144             COMMON,
4145             MONGOLIAN,
4146             CANADIAN_ABORIGINAL,
4147             LIMBU,
4148             TAI_LE,
4149             NEW_TAI_LUE,
4150             KHMER,
4151             BUGINESE,
4152             TAI_THAM,
4153             BALINESE,
4154             SUNDANESE,
4155             BATAK,
4156             LEPCHA,
4157             OL_CHIKI,
4158             SUNDANESE,
4159             INHERITED,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             INHERITED,
4164             COMMON,
4165             INHERITED,
4166             COMMON,
4167             INHERITED,
4168             COMMON,
4169             LATIN,
4170             GREEK,
4171             CYRILLIC,
4172             LATIN,
4173             GREEK,
4174             LATIN,
4175             GREEK,
4176             LATIN,
4177             CYRILLIC,
4178             LATIN,
4179             GREEK,
4180             INHERITED,
4181             LATIN,
4182             GREEK,
4183             COMMON,
4184             INHERITED,
4185             COMMON,
4186             LATIN,
4187             COMMON,
4188             LATIN,
4189             COMMON,
4190             LATIN,
4191             COMMON,
4192             INHERITED,
4193             COMMON,
4194             GREEK,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             LATIN,
4199             COMMON,
4200             LATIN,
4201             COMMON,
4202             LATIN,
4203             COMMON,
4204             BRAILLE,
4205             COMMON,
4206             GLAGOLITIC,
4207             LATIN,
4208             COPTIC,
4209             GEORGIAN,
4210             TIFINAGH,
4211             ETHIOPIC,
4212             CYRILLIC,
4213             COMMON,
4214             HAN,
4215             COMMON,
4216             HAN,
4217             COMMON,
4218             HAN,
4219             COMMON,
4220             HAN,
4221             INHERITED,
4222             HANGUL,
4223             COMMON,
4224             HAN,
4225             COMMON,
4226             HIRAGANA,
4227             INHERITED,
4228             COMMON,
4229             HIRAGANA,
4230             COMMON,
4231             KATAKANA,
4232             COMMON,
4233             KATAKANA,
4234             BOPOMOFO,
4235             HANGUL,
4236             COMMON,
4237             BOPOMOFO,
4238             COMMON,
4239             KATAKANA,
4240             HANGUL,
4241             COMMON,
4242             HANGUL,
4243             COMMON,
4244             KATAKANA,
4245             COMMON,
4246             HAN,
4247             COMMON,
4248             HAN,
4249             YI,
4250             LISU,
4251             VAI,
4252             CYRILLIC,
4253             BAMUM,
4254             COMMON,
4255             LATIN,
4256             COMMON,
4257             LATIN,
4258             SYLOTI_NAGRI,
4259             COMMON,
4260             PHAGS_PA,
4261             SAURASHTRA,
4262             DEVANAGARI,
4263             KAYAH_LI,
4264             REJANG,
4265             HANGUL,
4266             JAVANESE,
4267             CHAM,
4268             MYANMAR,
4269             TAI_VIET,
4270             MEETEI_MAYEK,
4271             ETHIOPIC,
4272             MEETEI_MAYEK,
4273             HANGUL,
4274             UNKNOWN     ,
4275             HAN,
4276             LATIN,
4277             ARMENIAN,
4278             HEBREW,
4279             ARABIC,
4280             COMMON,
4281             ARABIC,
4282             COMMON,
4283             INHERITED,
4284             COMMON,
4285             INHERITED,
4286             COMMON,
4287             ARABIC,
4288             COMMON,
4289             LATIN,
4290             COMMON,
4291             LATIN,
4292             COMMON,
4293             KATAKANA,
4294             COMMON,
4295             KATAKANA,
4296             COMMON,
4297             HANGUL,
4298             COMMON,
4299             LINEAR_B,
4300             COMMON,
4301             GREEK,
4302             COMMON,
4303             INHERITED,
4304             LYCIAN,
4305             CARIAN,
4306             OLD_ITALIC,
4307             GOTHIC,
4308             UGARITIC,
4309             OLD_PERSIAN,
4310             DESERET,
4311             SHAVIAN,
4312             OSMANYA,
4313             CYPRIOT,
4314             IMPERIAL_ARAMAIC,
4315             PHOENICIAN,
4316             LYDIAN,
4317             MEROITIC_HIEROGLYPHS,
4318             MEROITIC_CURSIVE,
4319             KHAROSHTHI,
4320             OLD_SOUTH_ARABIAN,
4321             AVESTAN,
4322             INSCRIPTIONAL_PARTHIAN,
4323             INSCRIPTIONAL_PAHLAVI,
4324             OLD_TURKIC,
4325             ARABIC,
4326             BRAHMI,
4327             KAITHI,
4328             SORA_SOMPENG,
4329             CHAKMA,
4330             SHARADA,
4331             TAKRI,
4332             CUNEIFORM,
4333             EGYPTIAN_HIEROGLYPHS,
4334             BAMUM,
4335             MIAO,
4336             KATAKANA,
4337             HIRAGANA,
4338             COMMON,
4339             INHERITED,
4340             COMMON,
4341             INHERITED,
4342             COMMON,
4343             INHERITED,
4344             COMMON,
4345             INHERITED,
4346             COMMON,
4347             GREEK,
4348             COMMON,
4349             ARABIC,
4350             COMMON,
4351             HIRAGANA,
4352             COMMON,
4353             HAN,
4354             COMMON,
4355             INHERITED,
4356             UNKNOWN
4357         };
4358 
4359         private static HashMap<String, Character.UnicodeScript> aliases;
4360         static {
4361             aliases = new HashMap<>(128);
4362             aliases.put("ARAB", ARABIC);
4363             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4364             aliases.put("ARMN", ARMENIAN);
4365             aliases.put("AVST", AVESTAN);
4366             aliases.put("BALI", BALINESE);
4367             aliases.put("BAMU", BAMUM);
4368             aliases.put("BATK", BATAK);
4369             aliases.put("BENG", BENGALI);
4370             aliases.put("BOPO", BOPOMOFO);
4371             aliases.put("BRAI", BRAILLE);
4372             aliases.put("BRAH", BRAHMI);
4373             aliases.put("BUGI", BUGINESE);
4374             aliases.put("BUHD", BUHID);
4375             aliases.put("CAKM", CHAKMA);
4376             aliases.put("CANS", CANADIAN_ABORIGINAL);
4377             aliases.put("CARI", CARIAN);
4378             aliases.put("CHAM", CHAM);
4379             aliases.put("CHER", CHEROKEE);
4380             aliases.put("COPT", COPTIC);
4381             aliases.put("CPRT", CYPRIOT);
4382             aliases.put("CYRL", CYRILLIC);
4383             aliases.put("DEVA", DEVANAGARI);
4384             aliases.put("DSRT", DESERET);
4385             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4386             aliases.put("ETHI", ETHIOPIC);
4387             aliases.put("GEOR", GEORGIAN);
4388             aliases.put("GLAG", GLAGOLITIC);
4389             aliases.put("GOTH", GOTHIC);
4390             aliases.put("GREK", GREEK);
4391             aliases.put("GUJR", GUJARATI);
4392             aliases.put("GURU", GURMUKHI);
4393             aliases.put("HANG", HANGUL);
4394             aliases.put("HANI", HAN);
4395             aliases.put("HANO", HANUNOO);
4396             aliases.put("HEBR", HEBREW);
4397             aliases.put("HIRA", HIRAGANA);
4398             // it appears we don't have the KATAKANA_OR_HIRAGANA
4399             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4400             aliases.put("ITAL", OLD_ITALIC);
4401             aliases.put("JAVA", JAVANESE);
4402             aliases.put("KALI", KAYAH_LI);
4403             aliases.put("KANA", KATAKANA);
4404             aliases.put("KHAR", KHAROSHTHI);
4405             aliases.put("KHMR", KHMER);
4406             aliases.put("KNDA", KANNADA);
4407             aliases.put("KTHI", KAITHI);
4408             aliases.put("LANA", TAI_THAM);
4409             aliases.put("LAOO", LAO);
4410             aliases.put("LATN", LATIN);
4411             aliases.put("LEPC", LEPCHA);
4412             aliases.put("LIMB", LIMBU);
4413             aliases.put("LINB", LINEAR_B);
4414             aliases.put("LISU", LISU);
4415             aliases.put("LYCI", LYCIAN);
4416             aliases.put("LYDI", LYDIAN);
4417             aliases.put("MAND", MANDAIC);
4418             aliases.put("MERC", MEROITIC_CURSIVE);
4419             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4420             aliases.put("MLYM", MALAYALAM);
4421             aliases.put("MONG", MONGOLIAN);
4422             aliases.put("MTEI", MEETEI_MAYEK);
4423             aliases.put("MYMR", MYANMAR);
4424             aliases.put("NKOO", NKO);
4425             aliases.put("OGAM", OGHAM);
4426             aliases.put("OLCK", OL_CHIKI);
4427             aliases.put("ORKH", OLD_TURKIC);
4428             aliases.put("ORYA", ORIYA);
4429             aliases.put("OSMA", OSMANYA);
4430             aliases.put("PHAG", PHAGS_PA);
4431             aliases.put("PLRD", MIAO);
4432             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4433             aliases.put("PHNX", PHOENICIAN);
4434             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4435             aliases.put("RJNG", REJANG);
4436             aliases.put("RUNR", RUNIC);
4437             aliases.put("SAMR", SAMARITAN);
4438             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4439             aliases.put("SAUR", SAURASHTRA);
4440             aliases.put("SHAW", SHAVIAN);
4441             aliases.put("SHRD", SHARADA);
4442             aliases.put("SINH", SINHALA);
4443             aliases.put("SORA", SORA_SOMPENG);
4444             aliases.put("SUND", SUNDANESE);
4445             aliases.put("SYLO", SYLOTI_NAGRI);
4446             aliases.put("SYRC", SYRIAC);
4447             aliases.put("TAGB", TAGBANWA);
4448             aliases.put("TALE", TAI_LE);
4449             aliases.put("TAKR", TAKRI);
4450             aliases.put("TALU", NEW_TAI_LUE);
4451             aliases.put("TAML", TAMIL);
4452             aliases.put("TAVT", TAI_VIET);
4453             aliases.put("TELU", TELUGU);
4454             aliases.put("TFNG", TIFINAGH);
4455             aliases.put("TGLG", TAGALOG);
4456             aliases.put("THAA", THAANA);
4457             aliases.put("THAI", THAI);
4458             aliases.put("TIBT", TIBETAN);
4459             aliases.put("UGAR", UGARITIC);
4460             aliases.put("VAII", VAI);
4461             aliases.put("XPEO", OLD_PERSIAN);
4462             aliases.put("XSUX", CUNEIFORM);
4463             aliases.put("YIII", YI);
4464             aliases.put("ZINH", INHERITED);
4465             aliases.put("ZYYY", COMMON);
4466             aliases.put("ZZZZ", UNKNOWN);
4467         }
4468 
4469         /**
4470          * Returns the enum constant representing the Unicode script of which
4471          * the given character (Unicode code point) is assigned to.
4472          *
4473          * @param   codePoint the character (Unicode code point) in question.
4474          * @return  The {@code UnicodeScript} constant representing the
4475          *          Unicode script of which this character is assigned to.
4476          *
4477          * @exception IllegalArgumentException if the specified
4478          * {@code codePoint} is an invalid Unicode code point.
4479          * @see Character#isValidCodePoint(int)
4480          *
4481          */
4482         public static UnicodeScript of(int codePoint) {
4483             if (!isValidCodePoint(codePoint))
4484                 throw new IllegalArgumentException();
4485             int type = getType(codePoint);
4486             // leave SURROGATE and PRIVATE_USE for table lookup
4487             if (type == UNASSIGNED)
4488                 return UNKNOWN;
4489             int index = Arrays.binarySearch(scriptStarts, codePoint);
4490             if (index < 0)
4491                 index = -index - 2;
4492             return scripts[index];
4493         }
4494 
4495         /**
4496          * Returns the UnicodeScript constant with the given Unicode script
4497          * name or the script name alias. Script names and their aliases are
4498          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4499          * and PropertyValueAliases&lt;version&gt;.txt define script names
4500          * and the script name aliases for a particular version of the
4501          * standard. The {@link Character} class specifies the version of
4502          * the standard that it supports.
4503          * <p>
4504          * Character case is ignored for all of the valid script names.
4505          * The en_US locale's case mapping rules are used to provide
4506          * case-insensitive string comparisons for script name validation.
4507          *
4508          * @param scriptName A {@code UnicodeScript} name.
4509          * @return The {@code UnicodeScript} constant identified
4510          *         by {@code scriptName}
4511          * @throws IllegalArgumentException if {@code scriptName} is an
4512          *         invalid name
4513          * @throws NullPointerException if {@code scriptName} is null
4514          */
4515         public static final UnicodeScript forName(String scriptName) {
4516             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4517                                  //.replace(' ', '_'));
4518             UnicodeScript sc = aliases.get(scriptName);
4519             if (sc != null)
4520                 return sc;
4521             return valueOf(scriptName);
4522         }
4523     }
4524 
4525     /**
4526      * The value of the {@code Character}.
4527      *
4528      * @serial
4529      */
4530     private final char value;
4531 
4532     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4533     private static final long serialVersionUID = 3786198910865385080L;
4534 
4535     /**
4536      * Constructs a newly allocated {@code Character} object that
4537      * represents the specified {@code char} value.
4538      *
4539      * @param  value   the value to be represented by the
4540      *                  {@code Character} object.
4541      */
4542     public Character(char value) {
4543         this.value = value;
4544     }
4545 
4546     private static class CharacterCache {
4547         private CharacterCache(){}
4548 
4549         static final Character cache[] = new Character[127 + 1];
4550 
4551         static {
4552             for (int i = 0; i < cache.length; i++)
4553                 cache[i] = new Character((char)i);
4554         }
4555     }
4556 
4557     /**
4558      * Returns a <tt>Character</tt> instance representing the specified
4559      * <tt>char</tt> value.
4560      * If a new <tt>Character</tt> instance is not required, this method
4561      * should generally be used in preference to the constructor
4562      * {@link #Character(char)}, as this method is likely to yield
4563      * significantly better space and time performance by caching
4564      * frequently requested values.
4565      *
4566      * This method will always cache values in the range {@code
4567      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4568      * cache other values outside of this range.
4569      *
4570      * @param  c a char value.
4571      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4572      * @since  1.5
4573      */
4574     @HotSpotIntrinsicCandidate
4575     public static Character valueOf(char c) {
4576         if (c <= 127) { // must cache
4577             return CharacterCache.cache[(int)c];
4578         }
4579         return new Character(c);
4580     }
4581 
4582     /**
4583      * Returns the value of this {@code Character} object.
4584      * @return  the primitive {@code char} value represented by
4585      *          this object.
4586      */
4587     @HotSpotIntrinsicCandidate
4588     public char charValue() {
4589         return value;
4590     }
4591 
4592     /**
4593      * Returns a hash code for this {@code Character}; equal to the result
4594      * of invoking {@code charValue()}.
4595      *
4596      * @return a hash code value for this {@code Character}
4597      */
4598     @Override
4599     public int hashCode() {
4600         return Character.hashCode(value);
4601     }
4602 
4603     /**
4604      * Returns a hash code for a {@code char} value; compatible with
4605      * {@code Character.hashCode()}.
4606      *
4607      * @since 1.8
4608      *
4609      * @param value The {@code char} for which to return a hash code.
4610      * @return a hash code value for a {@code char} value.
4611      */
4612     public static int hashCode(char value) {
4613         return (int)value;
4614     }
4615 
4616     /**
4617      * Compares this object against the specified object.
4618      * The result is {@code true} if and only if the argument is not
4619      * {@code null} and is a {@code Character} object that
4620      * represents the same {@code char} value as this object.
4621      *
4622      * @param   obj   the object to compare with.
4623      * @return  {@code true} if the objects are the same;
4624      *          {@code false} otherwise.
4625      */
4626     public boolean equals(Object obj) {
4627         if (obj instanceof Character) {
4628             return value == ((Character)obj).charValue();
4629         }
4630         return false;
4631     }
4632 
4633     /**
4634      * Returns a {@code String} object representing this
4635      * {@code Character}'s value.  The result is a string of
4636      * length 1 whose sole component is the primitive
4637      * {@code char} value represented by this
4638      * {@code Character} object.
4639      *
4640      * @return  a string representation of this object.
4641      */
4642     public String toString() {
4643         char buf[] = {value};
4644         return String.valueOf(buf);
4645     }
4646 
4647     /**
4648      * Returns a {@code String} object representing the
4649      * specified {@code char}.  The result is a string of length
4650      * 1 consisting solely of the specified {@code char}.
4651      *
4652      * @param c the {@code char} to be converted
4653      * @return the string representation of the specified {@code char}
4654      * @since 1.4
4655      */
4656     public static String toString(char c) {
4657         return String.valueOf(c);
4658     }
4659 
4660     /**
4661      * Determines whether the specified code point is a valid
4662      * <a href="http://www.unicode.org/glossary/#code_point">
4663      * Unicode code point value</a>.
4664      *
4665      * @param  codePoint the Unicode code point to be tested
4666      * @return {@code true} if the specified code point value is between
4667      *         {@link #MIN_CODE_POINT} and
4668      *         {@link #MAX_CODE_POINT} inclusive;
4669      *         {@code false} otherwise.
4670      * @since  1.5
4671      */
4672     public static boolean isValidCodePoint(int codePoint) {
4673         // Optimized form of:
4674         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4675         int plane = codePoint >>> 16;
4676         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4677     }
4678 
4679     /**
4680      * Determines whether the specified character (Unicode code point)
4681      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4682      * Such code points can be represented using a single {@code char}.
4683      *
4684      * @param  codePoint the character (Unicode code point) to be tested
4685      * @return {@code true} if the specified code point is between
4686      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4687      *         {@code false} otherwise.
4688      * @since  1.7
4689      */
4690     public static boolean isBmpCodePoint(int codePoint) {
4691         return codePoint >>> 16 == 0;
4692         // Optimized form of:
4693         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4694         // We consistently use logical shift (>>>) to facilitate
4695         // additional runtime optimizations.
4696     }
4697 
4698     /**
4699      * Determines whether the specified character (Unicode code point)
4700      * is in the <a href="#supplementary">supplementary character</a> range.
4701      *
4702      * @param  codePoint the character (Unicode code point) to be tested
4703      * @return {@code true} if the specified code point is between
4704      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4705      *         {@link #MAX_CODE_POINT} inclusive;
4706      *         {@code false} otherwise.
4707      * @since  1.5
4708      */
4709     public static boolean isSupplementaryCodePoint(int codePoint) {
4710         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4711             && codePoint <  MAX_CODE_POINT + 1;
4712     }
4713 
4714     /**
4715      * Determines if the given {@code char} value is a
4716      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4717      * Unicode high-surrogate code unit</a>
4718      * (also known as <i>leading-surrogate code unit</i>).
4719      *
4720      * <p>Such values do not represent characters by themselves,
4721      * but are used in the representation of
4722      * <a href="#supplementary">supplementary characters</a>
4723      * in the UTF-16 encoding.
4724      *
4725      * @param  ch the {@code char} value to be tested.
4726      * @return {@code true} if the {@code char} value is between
4727      *         {@link #MIN_HIGH_SURROGATE} and
4728      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4729      *         {@code false} otherwise.
4730      * @see    Character#isLowSurrogate(char)
4731      * @see    Character.UnicodeBlock#of(int)
4732      * @since  1.5
4733      */
4734     public static boolean isHighSurrogate(char ch) {
4735         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4736         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4737     }
4738 
4739     /**
4740      * Determines if the given {@code char} value is a
4741      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4742      * Unicode low-surrogate code unit</a>
4743      * (also known as <i>trailing-surrogate code unit</i>).
4744      *
4745      * <p>Such values do not represent characters by themselves,
4746      * but are used in the representation of
4747      * <a href="#supplementary">supplementary characters</a>
4748      * in the UTF-16 encoding.
4749      *
4750      * @param  ch the {@code char} value to be tested.
4751      * @return {@code true} if the {@code char} value is between
4752      *         {@link #MIN_LOW_SURROGATE} and
4753      *         {@link #MAX_LOW_SURROGATE} inclusive;
4754      *         {@code false} otherwise.
4755      * @see    Character#isHighSurrogate(char)
4756      * @since  1.5
4757      */
4758     public static boolean isLowSurrogate(char ch) {
4759         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4760     }
4761 
4762     /**
4763      * Determines if the given {@code char} value is a Unicode
4764      * <i>surrogate code unit</i>.
4765      *
4766      * <p>Such values do not represent characters by themselves,
4767      * but are used in the representation of
4768      * <a href="#supplementary">supplementary characters</a>
4769      * in the UTF-16 encoding.
4770      *
4771      * <p>A char value is a surrogate code unit if and only if it is either
4772      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4773      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4774      *
4775      * @param  ch the {@code char} value to be tested.
4776      * @return {@code true} if the {@code char} value is between
4777      *         {@link #MIN_SURROGATE} and
4778      *         {@link #MAX_SURROGATE} inclusive;
4779      *         {@code false} otherwise.
4780      * @since  1.7
4781      */
4782     public static boolean isSurrogate(char ch) {
4783         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4784     }
4785 
4786     /**
4787      * Determines whether the specified pair of {@code char}
4788      * values is a valid
4789      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4790      * Unicode surrogate pair</a>.
4791 
4792      * <p>This method is equivalent to the expression:
4793      * <blockquote><pre>{@code
4794      * isHighSurrogate(high) && isLowSurrogate(low)
4795      * }</pre></blockquote>
4796      *
4797      * @param  high the high-surrogate code value to be tested
4798      * @param  low the low-surrogate code value to be tested
4799      * @return {@code true} if the specified high and
4800      * low-surrogate code values represent a valid surrogate pair;
4801      * {@code false} otherwise.
4802      * @since  1.5
4803      */
4804     public static boolean isSurrogatePair(char high, char low) {
4805         return isHighSurrogate(high) && isLowSurrogate(low);
4806     }
4807 
4808     /**
4809      * Determines the number of {@code char} values needed to
4810      * represent the specified character (Unicode code point). If the
4811      * specified character is equal to or greater than 0x10000, then
4812      * the method returns 2. Otherwise, the method returns 1.
4813      *
4814      * <p>This method doesn't validate the specified character to be a
4815      * valid Unicode code point. The caller must validate the
4816      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4817      * if necessary.
4818      *
4819      * @param   codePoint the character (Unicode code point) to be tested.
4820      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4821      * @see     Character#isSupplementaryCodePoint(int)
4822      * @since   1.5
4823      */
4824     public static int charCount(int codePoint) {
4825         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4826     }
4827 
4828     /**
4829      * Converts the specified surrogate pair to its supplementary code
4830      * point value. This method does not validate the specified
4831      * surrogate pair. The caller must validate it using {@link
4832      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4833      *
4834      * @param  high the high-surrogate code unit
4835      * @param  low the low-surrogate code unit
4836      * @return the supplementary code point composed from the
4837      *         specified surrogate pair.
4838      * @since  1.5
4839      */
4840     public static int toCodePoint(char high, char low) {
4841         // Optimized form of:
4842         // return ((high - MIN_HIGH_SURROGATE) << 10)
4843         //         + (low - MIN_LOW_SURROGATE)
4844         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4845         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4846                                        - (MIN_HIGH_SURROGATE << 10)
4847                                        - MIN_LOW_SURROGATE);
4848     }
4849 
4850     /**
4851      * Returns the code point at the given index of the
4852      * {@code CharSequence}. If the {@code char} value at
4853      * the given index in the {@code CharSequence} is in the
4854      * high-surrogate range, the following index is less than the
4855      * length of the {@code CharSequence}, and the
4856      * {@code char} value at the following index is in the
4857      * low-surrogate range, then the supplementary code point
4858      * corresponding to this surrogate pair is returned. Otherwise,
4859      * the {@code char} value at the given index is returned.
4860      *
4861      * @param seq a sequence of {@code char} values (Unicode code
4862      * units)
4863      * @param index the index to the {@code char} values (Unicode
4864      * code units) in {@code seq} to be converted
4865      * @return the Unicode code point at the given index
4866      * @exception NullPointerException if {@code seq} is null.
4867      * @exception IndexOutOfBoundsException if the value
4868      * {@code index} is negative or not less than
4869      * {@link CharSequence#length() seq.length()}.
4870      * @since  1.5
4871      */
4872     public static int codePointAt(CharSequence seq, int index) {
4873         char c1 = seq.charAt(index);
4874         if (isHighSurrogate(c1) && ++index < seq.length()) {
4875             char c2 = seq.charAt(index);
4876             if (isLowSurrogate(c2)) {
4877                 return toCodePoint(c1, c2);
4878             }
4879         }
4880         return c1;
4881     }
4882 
4883     /**
4884      * Returns the code point at the given index of the
4885      * {@code char} array. If the {@code char} value at
4886      * the given index in the {@code char} array is in the
4887      * high-surrogate range, the following index is less than the
4888      * length of the {@code char} array, and the
4889      * {@code char} value at the following index is in the
4890      * low-surrogate range, then the supplementary code point
4891      * corresponding to this surrogate pair is returned. Otherwise,
4892      * the {@code char} value at the given index is returned.
4893      *
4894      * @param a the {@code char} array
4895      * @param index the index to the {@code char} values (Unicode
4896      * code units) in the {@code char} array to be converted
4897      * @return the Unicode code point at the given index
4898      * @exception NullPointerException if {@code a} is null.
4899      * @exception IndexOutOfBoundsException if the value
4900      * {@code index} is negative or not less than
4901      * the length of the {@code char} array.
4902      * @since  1.5
4903      */
4904     public static int codePointAt(char[] a, int index) {
4905         return codePointAtImpl(a, index, a.length);
4906     }
4907 
4908     /**
4909      * Returns the code point at the given index of the
4910      * {@code char} array, where only array elements with
4911      * {@code index} less than {@code limit} can be used. If
4912      * the {@code char} value at the given index in the
4913      * {@code char} array is in the high-surrogate range, the
4914      * following index is less than the {@code limit}, and the
4915      * {@code char} value at the following index is in the
4916      * low-surrogate range, then the supplementary code point
4917      * corresponding to this surrogate pair is returned. Otherwise,
4918      * the {@code char} value at the given index is returned.
4919      *
4920      * @param a the {@code char} array
4921      * @param index the index to the {@code char} values (Unicode
4922      * code units) in the {@code char} array to be converted
4923      * @param limit the index after the last array element that
4924      * can be used in the {@code char} array
4925      * @return the Unicode code point at the given index
4926      * @exception NullPointerException if {@code a} is null.
4927      * @exception IndexOutOfBoundsException if the {@code index}
4928      * argument is negative or not less than the {@code limit}
4929      * argument, or if the {@code limit} argument is negative or
4930      * greater than the length of the {@code char} array.
4931      * @since  1.5
4932      */
4933     public static int codePointAt(char[] a, int index, int limit) {
4934         if (index >= limit || limit < 0 || limit > a.length) {
4935             throw new IndexOutOfBoundsException();
4936         }
4937         return codePointAtImpl(a, index, limit);
4938     }
4939 
4940     // throws ArrayIndexOutOfBoundsException if index out of bounds
4941     static int codePointAtImpl(char[] a, int index, int limit) {
4942         char c1 = a[index];
4943         if (isHighSurrogate(c1) && ++index < limit) {
4944             char c2 = a[index];
4945             if (isLowSurrogate(c2)) {
4946                 return toCodePoint(c1, c2);
4947             }
4948         }
4949         return c1;
4950     }
4951 
4952     /**
4953      * Returns the code point preceding the given index of the
4954      * {@code CharSequence}. If the {@code char} value at
4955      * {@code (index - 1)} in the {@code CharSequence} is in
4956      * the low-surrogate range, {@code (index - 2)} is not
4957      * negative, and the {@code char} value at {@code (index - 2)}
4958      * in the {@code CharSequence} is in the
4959      * high-surrogate range, then the supplementary code point
4960      * corresponding to this surrogate pair is returned. Otherwise,
4961      * the {@code char} value at {@code (index - 1)} is
4962      * returned.
4963      *
4964      * @param seq the {@code CharSequence} instance
4965      * @param index the index following the code point that should be returned
4966      * @return the Unicode code point value before the given index.
4967      * @exception NullPointerException if {@code seq} is null.
4968      * @exception IndexOutOfBoundsException if the {@code index}
4969      * argument is less than 1 or greater than {@link
4970      * CharSequence#length() seq.length()}.
4971      * @since  1.5
4972      */
4973     public static int codePointBefore(CharSequence seq, int index) {
4974         char c2 = seq.charAt(--index);
4975         if (isLowSurrogate(c2) && index > 0) {
4976             char c1 = seq.charAt(--index);
4977             if (isHighSurrogate(c1)) {
4978                 return toCodePoint(c1, c2);
4979             }
4980         }
4981         return c2;
4982     }
4983 
4984     /**
4985      * Returns the code point preceding the given index of the
4986      * {@code char} array. If the {@code char} value at
4987      * {@code (index - 1)} in the {@code char} array is in
4988      * the low-surrogate range, {@code (index - 2)} is not
4989      * negative, and the {@code char} value at {@code (index - 2)}
4990      * in the {@code char} array is in the
4991      * high-surrogate range, then the supplementary code point
4992      * corresponding to this surrogate pair is returned. Otherwise,
4993      * the {@code char} value at {@code (index - 1)} is
4994      * returned.
4995      *
4996      * @param a the {@code char} array
4997      * @param index the index following the code point that should be returned
4998      * @return the Unicode code point value before the given index.
4999      * @exception NullPointerException if {@code a} is null.
5000      * @exception IndexOutOfBoundsException if the {@code index}
5001      * argument is less than 1 or greater than the length of the
5002      * {@code char} array
5003      * @since  1.5
5004      */
5005     public static int codePointBefore(char[] a, int index) {
5006         return codePointBeforeImpl(a, index, 0);
5007     }
5008 
5009     /**
5010      * Returns the code point preceding the given index of the
5011      * {@code char} array, where only array elements with
5012      * {@code index} greater than or equal to {@code start}
5013      * can be used. If the {@code char} value at {@code (index - 1)}
5014      * in the {@code char} array is in the
5015      * low-surrogate range, {@code (index - 2)} is not less than
5016      * {@code start}, and the {@code char} value at
5017      * {@code (index - 2)} in the {@code char} array is in
5018      * the high-surrogate range, then the supplementary code point
5019      * corresponding to this surrogate pair is returned. Otherwise,
5020      * the {@code char} value at {@code (index - 1)} is
5021      * returned.
5022      *
5023      * @param a the {@code char} array
5024      * @param index the index following the code point that should be returned
5025      * @param start the index of the first array element in the
5026      * {@code char} array
5027      * @return the Unicode code point value before the given index.
5028      * @exception NullPointerException if {@code a} is null.
5029      * @exception IndexOutOfBoundsException if the {@code index}
5030      * argument is not greater than the {@code start} argument or
5031      * is greater than the length of the {@code char} array, or
5032      * if the {@code start} argument is negative or not less than
5033      * the length of the {@code char} array.
5034      * @since  1.5
5035      */
5036     public static int codePointBefore(char[] a, int index, int start) {
5037         if (index <= start || start < 0 || start >= a.length) {
5038             throw new IndexOutOfBoundsException();
5039         }
5040         return codePointBeforeImpl(a, index, start);
5041     }
5042 
5043     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5044     static int codePointBeforeImpl(char[] a, int index, int start) {
5045         char c2 = a[--index];
5046         if (isLowSurrogate(c2) && index > start) {
5047             char c1 = a[--index];
5048             if (isHighSurrogate(c1)) {
5049                 return toCodePoint(c1, c2);
5050             }
5051         }
5052         return c2;
5053     }
5054 
5055     /**
5056      * Returns the leading surrogate (a
5057      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5058      * high surrogate code unit</a>) of the
5059      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5060      * surrogate pair</a>
5061      * representing the specified supplementary character (Unicode
5062      * code point) in the UTF-16 encoding.  If the specified character
5063      * is not a
5064      * <a href="Character.html#supplementary">supplementary character</a>,
5065      * an unspecified {@code char} is returned.
5066      *
5067      * <p>If
5068      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5069      * is {@code true}, then
5070      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5071      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5072      * are also always {@code true}.
5073      *
5074      * @param   codePoint a supplementary character (Unicode code point)
5075      * @return  the leading surrogate code unit used to represent the
5076      *          character in the UTF-16 encoding
5077      * @since   1.7
5078      */
5079     public static char highSurrogate(int codePoint) {
5080         return (char) ((codePoint >>> 10)
5081             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5082     }
5083 
5084     /**
5085      * Returns the trailing surrogate (a
5086      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5087      * low surrogate code unit</a>) of the
5088      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5089      * surrogate pair</a>
5090      * representing the specified supplementary character (Unicode
5091      * code point) in the UTF-16 encoding.  If the specified character
5092      * is not a
5093      * <a href="Character.html#supplementary">supplementary character</a>,
5094      * an unspecified {@code char} is returned.
5095      *
5096      * <p>If
5097      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5098      * is {@code true}, then
5099      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5100      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5101      * are also always {@code true}.
5102      *
5103      * @param   codePoint a supplementary character (Unicode code point)
5104      * @return  the trailing surrogate code unit used to represent the
5105      *          character in the UTF-16 encoding
5106      * @since   1.7
5107      */
5108     public static char lowSurrogate(int codePoint) {
5109         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5110     }
5111 
5112     /**
5113      * Converts the specified character (Unicode code point) to its
5114      * UTF-16 representation. If the specified code point is a BMP
5115      * (Basic Multilingual Plane or Plane 0) value, the same value is
5116      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5117      * specified code point is a supplementary character, its
5118      * surrogate values are stored in {@code dst[dstIndex]}
5119      * (high-surrogate) and {@code dst[dstIndex+1]}
5120      * (low-surrogate), and 2 is returned.
5121      *
5122      * @param  codePoint the character (Unicode code point) to be converted.
5123      * @param  dst an array of {@code char} in which the
5124      * {@code codePoint}'s UTF-16 value is stored.
5125      * @param dstIndex the start index into the {@code dst}
5126      * array where the converted value is stored.
5127      * @return 1 if the code point is a BMP code point, 2 if the
5128      * code point is a supplementary code point.
5129      * @exception IllegalArgumentException if the specified
5130      * {@code codePoint} is not a valid Unicode code point.
5131      * @exception NullPointerException if the specified {@code dst} is null.
5132      * @exception IndexOutOfBoundsException if {@code dstIndex}
5133      * is negative or not less than {@code dst.length}, or if
5134      * {@code dst} at {@code dstIndex} doesn't have enough
5135      * array element(s) to store the resulting {@code char}
5136      * value(s). (If {@code dstIndex} is equal to
5137      * {@code dst.length-1} and the specified
5138      * {@code codePoint} is a supplementary character, the
5139      * high-surrogate value is not stored in
5140      * {@code dst[dstIndex]}.)
5141      * @since  1.5
5142      */
5143     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5144         if (isBmpCodePoint(codePoint)) {
5145             dst[dstIndex] = (char) codePoint;
5146             return 1;
5147         } else if (isValidCodePoint(codePoint)) {
5148             toSurrogates(codePoint, dst, dstIndex);
5149             return 2;
5150         } else {
5151             throw new IllegalArgumentException();
5152         }
5153     }
5154 
5155     /**
5156      * Converts the specified character (Unicode code point) to its
5157      * UTF-16 representation stored in a {@code char} array. If
5158      * the specified code point is a BMP (Basic Multilingual Plane or
5159      * Plane 0) value, the resulting {@code char} array has
5160      * the same value as {@code codePoint}. If the specified code
5161      * point is a supplementary code point, the resulting
5162      * {@code char} array has the corresponding surrogate pair.
5163      *
5164      * @param  codePoint a Unicode code point
5165      * @return a {@code char} array having
5166      *         {@code codePoint}'s UTF-16 representation.
5167      * @exception IllegalArgumentException if the specified
5168      * {@code codePoint} is not a valid Unicode code point.
5169      * @since  1.5
5170      */
5171     public static char[] toChars(int codePoint) {
5172         if (isBmpCodePoint(codePoint)) {
5173             return new char[] { (char) codePoint };
5174         } else if (isValidCodePoint(codePoint)) {
5175             char[] result = new char[2];
5176             toSurrogates(codePoint, result, 0);
5177             return result;
5178         } else {
5179             throw new IllegalArgumentException();
5180         }
5181     }
5182 
5183     static void toSurrogates(int codePoint, char[] dst, int index) {
5184         // We write elements "backwards" to guarantee all-or-nothing
5185         dst[index+1] = lowSurrogate(codePoint);
5186         dst[index] = highSurrogate(codePoint);
5187     }
5188 
5189     /**
5190      * Returns the number of Unicode code points in the text range of
5191      * the specified char sequence. The text range begins at the
5192      * specified {@code beginIndex} and extends to the
5193      * {@code char} at index {@code endIndex - 1}. Thus the
5194      * length (in {@code char}s) of the text range is
5195      * {@code endIndex-beginIndex}. Unpaired surrogates within
5196      * the text range count as one code point each.
5197      *
5198      * @param seq the char sequence
5199      * @param beginIndex the index to the first {@code char} of
5200      * the text range.
5201      * @param endIndex the index after the last {@code char} of
5202      * the text range.
5203      * @return the number of Unicode code points in the specified text
5204      * range
5205      * @exception NullPointerException if {@code seq} is null.
5206      * @exception IndexOutOfBoundsException if the
5207      * {@code beginIndex} is negative, or {@code endIndex}
5208      * is larger than the length of the given sequence, or
5209      * {@code beginIndex} is larger than {@code endIndex}.
5210      * @since  1.5
5211      */
5212     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5213         int length = seq.length();
5214         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5215             throw new IndexOutOfBoundsException();
5216         }
5217         int n = endIndex - beginIndex;
5218         for (int i = beginIndex; i < endIndex; ) {
5219             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5220                 isLowSurrogate(seq.charAt(i))) {
5221                 n--;
5222                 i++;
5223             }
5224         }
5225         return n;
5226     }
5227 
5228     /**
5229      * Returns the number of Unicode code points in a subarray of the
5230      * {@code char} array argument. The {@code offset}
5231      * argument is the index of the first {@code char} of the
5232      * subarray and the {@code count} argument specifies the
5233      * length of the subarray in {@code char}s. Unpaired
5234      * surrogates within the subarray count as one code point each.
5235      *
5236      * @param a the {@code char} array
5237      * @param offset the index of the first {@code char} in the
5238      * given {@code char} array
5239      * @param count the length of the subarray in {@code char}s
5240      * @return the number of Unicode code points in the specified subarray
5241      * @exception NullPointerException if {@code a} is null.
5242      * @exception IndexOutOfBoundsException if {@code offset} or
5243      * {@code count} is negative, or if {@code offset +
5244      * count} is larger than the length of the given array.
5245      * @since  1.5
5246      */
5247     public static int codePointCount(char[] a, int offset, int count) {
5248         if (count > a.length - offset || offset < 0 || count < 0) {
5249             throw new IndexOutOfBoundsException();
5250         }
5251         return codePointCountImpl(a, offset, count);
5252     }
5253 
5254     static int codePointCountImpl(char[] a, int offset, int count) {
5255         int endIndex = offset + count;
5256         int n = count;
5257         for (int i = offset; i < endIndex; ) {
5258             if (isHighSurrogate(a[i++]) && i < endIndex &&
5259                 isLowSurrogate(a[i])) {
5260                 n--;
5261                 i++;
5262             }
5263         }
5264         return n;
5265     }
5266 
5267     /**
5268      * Returns the index within the given char sequence that is offset
5269      * from the given {@code index} by {@code codePointOffset}
5270      * code points. Unpaired surrogates within the text range given by
5271      * {@code index} and {@code codePointOffset} count as
5272      * one code point each.
5273      *
5274      * @param seq the char sequence
5275      * @param index the index to be offset
5276      * @param codePointOffset the offset in code points
5277      * @return the index within the char sequence
5278      * @exception NullPointerException if {@code seq} is null.
5279      * @exception IndexOutOfBoundsException if {@code index}
5280      *   is negative or larger then the length of the char sequence,
5281      *   or if {@code codePointOffset} is positive and the
5282      *   subsequence starting with {@code index} has fewer than
5283      *   {@code codePointOffset} code points, or if
5284      *   {@code codePointOffset} is negative and the subsequence
5285      *   before {@code index} has fewer than the absolute value
5286      *   of {@code codePointOffset} code points.
5287      * @since 1.5
5288      */
5289     public static int offsetByCodePoints(CharSequence seq, int index,
5290                                          int codePointOffset) {
5291         int length = seq.length();
5292         if (index < 0 || index > length) {
5293             throw new IndexOutOfBoundsException();
5294         }
5295 
5296         int x = index;
5297         if (codePointOffset >= 0) {
5298             int i;
5299             for (i = 0; x < length && i < codePointOffset; i++) {
5300                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5301                     isLowSurrogate(seq.charAt(x))) {
5302                     x++;
5303                 }
5304             }
5305             if (i < codePointOffset) {
5306                 throw new IndexOutOfBoundsException();
5307             }
5308         } else {
5309             int i;
5310             for (i = codePointOffset; x > 0 && i < 0; i++) {
5311                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5312                     isHighSurrogate(seq.charAt(x-1))) {
5313                     x--;
5314                 }
5315             }
5316             if (i < 0) {
5317                 throw new IndexOutOfBoundsException();
5318             }
5319         }
5320         return x;
5321     }
5322 
5323     /**
5324      * Returns the index within the given {@code char} subarray
5325      * that is offset from the given {@code index} by
5326      * {@code codePointOffset} code points. The
5327      * {@code start} and {@code count} arguments specify a
5328      * subarray of the {@code char} array. Unpaired surrogates
5329      * within the text range given by {@code index} and
5330      * {@code codePointOffset} count as one code point each.
5331      *
5332      * @param a the {@code char} array
5333      * @param start the index of the first {@code char} of the
5334      * subarray
5335      * @param count the length of the subarray in {@code char}s
5336      * @param index the index to be offset
5337      * @param codePointOffset the offset in code points
5338      * @return the index within the subarray
5339      * @exception NullPointerException if {@code a} is null.
5340      * @exception IndexOutOfBoundsException
5341      *   if {@code start} or {@code count} is negative,
5342      *   or if {@code start + count} is larger than the length of
5343      *   the given array,
5344      *   or if {@code index} is less than {@code start} or
5345      *   larger then {@code start + count},
5346      *   or if {@code codePointOffset} is positive and the text range
5347      *   starting with {@code index} and ending with {@code start + count - 1}
5348      *   has fewer than {@code codePointOffset} code
5349      *   points,
5350      *   or if {@code codePointOffset} is negative and the text range
5351      *   starting with {@code start} and ending with {@code index - 1}
5352      *   has fewer than the absolute value of
5353      *   {@code codePointOffset} code points.
5354      * @since 1.5
5355      */
5356     public static int offsetByCodePoints(char[] a, int start, int count,
5357                                          int index, int codePointOffset) {
5358         if (count > a.length-start || start < 0 || count < 0
5359             || index < start || index > start+count) {
5360             throw new IndexOutOfBoundsException();
5361         }
5362         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5363     }
5364 
5365     static int offsetByCodePointsImpl(char[]a, int start, int count,
5366                                       int index, int codePointOffset) {
5367         int x = index;
5368         if (codePointOffset >= 0) {
5369             int limit = start + count;
5370             int i;
5371             for (i = 0; x < limit && i < codePointOffset; i++) {
5372                 if (isHighSurrogate(a[x++]) && x < limit &&
5373                     isLowSurrogate(a[x])) {
5374                     x++;
5375                 }
5376             }
5377             if (i < codePointOffset) {
5378                 throw new IndexOutOfBoundsException();
5379             }
5380         } else {
5381             int i;
5382             for (i = codePointOffset; x > start && i < 0; i++) {
5383                 if (isLowSurrogate(a[--x]) && x > start &&
5384                     isHighSurrogate(a[x-1])) {
5385                     x--;
5386                 }
5387             }
5388             if (i < 0) {
5389                 throw new IndexOutOfBoundsException();
5390             }
5391         }
5392         return x;
5393     }
5394 
5395     /**
5396      * Determines if the specified character is a lowercase character.
5397      * <p>
5398      * A character is lowercase if its general category type, provided
5399      * by {@code Character.getType(ch)}, is
5400      * {@code LOWERCASE_LETTER}, or it has contributory property
5401      * Other_Lowercase as defined by the Unicode Standard.
5402      * <p>
5403      * The following are examples of lowercase characters:
5404      * <blockquote><pre>
5405      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5406      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5407      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5408      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5409      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5410      * </pre></blockquote>
5411      * <p> Many other Unicode characters are lowercase too.
5412      *
5413      * <p><b>Note:</b> This method cannot handle <a
5414      * href="#supplementary"> supplementary characters</a>. To support
5415      * all Unicode characters, including supplementary characters, use
5416      * the {@link #isLowerCase(int)} method.
5417      *
5418      * @param   ch   the character to be tested.
5419      * @return  {@code true} if the character is lowercase;
5420      *          {@code false} otherwise.
5421      * @see     Character#isLowerCase(char)
5422      * @see     Character#isTitleCase(char)
5423      * @see     Character#toLowerCase(char)
5424      * @see     Character#getType(char)
5425      */
5426     public static boolean isLowerCase(char ch) {
5427         return isLowerCase((int)ch);
5428     }
5429 
5430     /**
5431      * Determines if the specified character (Unicode code point) is a
5432      * lowercase character.
5433      * <p>
5434      * A character is lowercase if its general category type, provided
5435      * by {@link Character#getType getType(codePoint)}, is
5436      * {@code LOWERCASE_LETTER}, or it has contributory property
5437      * Other_Lowercase as defined by the Unicode Standard.
5438      * <p>
5439      * The following are examples of lowercase characters:
5440      * <blockquote><pre>
5441      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5442      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5443      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5444      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5445      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5446      * </pre></blockquote>
5447      * <p> Many other Unicode characters are lowercase too.
5448      *
5449      * @param   codePoint the character (Unicode code point) to be tested.
5450      * @return  {@code true} if the character is lowercase;
5451      *          {@code false} otherwise.
5452      * @see     Character#isLowerCase(int)
5453      * @see     Character#isTitleCase(int)
5454      * @see     Character#toLowerCase(int)
5455      * @see     Character#getType(int)
5456      * @since   1.5
5457      */
5458     public static boolean isLowerCase(int codePoint) {
5459         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5460                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5461     }
5462 
5463     /**
5464      * Determines if the specified character is an uppercase character.
5465      * <p>
5466      * A character is uppercase if its general category type, provided by
5467      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5468      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5469      * <p>
5470      * The following are examples of uppercase characters:
5471      * <blockquote><pre>
5472      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5473      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5474      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5475      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5476      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5477      * </pre></blockquote>
5478      * <p> Many other Unicode characters are uppercase too.
5479      *
5480      * <p><b>Note:</b> This method cannot handle <a
5481      * href="#supplementary"> supplementary characters</a>. To support
5482      * all Unicode characters, including supplementary characters, use
5483      * the {@link #isUpperCase(int)} method.
5484      *
5485      * @param   ch   the character to be tested.
5486      * @return  {@code true} if the character is uppercase;
5487      *          {@code false} otherwise.
5488      * @see     Character#isLowerCase(char)
5489      * @see     Character#isTitleCase(char)
5490      * @see     Character#toUpperCase(char)
5491      * @see     Character#getType(char)
5492      * @since   1.0
5493      */
5494     public static boolean isUpperCase(char ch) {
5495         return isUpperCase((int)ch);
5496     }
5497 
5498     /**
5499      * Determines if the specified character (Unicode code point) is an uppercase character.
5500      * <p>
5501      * A character is uppercase if its general category type, provided by
5502      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5503      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5504      * <p>
5505      * The following are examples of uppercase characters:
5506      * <blockquote><pre>
5507      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5508      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5509      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5510      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5511      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5512      * </pre></blockquote>
5513      * <p> Many other Unicode characters are uppercase too.
5514      *
5515      * @param   codePoint the character (Unicode code point) to be tested.
5516      * @return  {@code true} if the character is uppercase;
5517      *          {@code false} otherwise.
5518      * @see     Character#isLowerCase(int)
5519      * @see     Character#isTitleCase(int)
5520      * @see     Character#toUpperCase(int)
5521      * @see     Character#getType(int)
5522      * @since   1.5
5523      */
5524     public static boolean isUpperCase(int codePoint) {
5525         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5526                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5527     }
5528 
5529     /**
5530      * Determines if the specified character is a titlecase character.
5531      * <p>
5532      * A character is a titlecase character if its general
5533      * category type, provided by {@code Character.getType(ch)},
5534      * is {@code TITLECASE_LETTER}.
5535      * <p>
5536      * Some characters look like pairs of Latin letters. For example, there
5537      * is an uppercase letter that looks like "LJ" and has a corresponding
5538      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5539      * is the appropriate form to use when rendering a word in lowercase
5540      * with initial capitals, as for a book title.
5541      * <p>
5542      * These are some of the Unicode characters for which this method returns
5543      * {@code true}:
5544      * <ul>
5545      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5546      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5547      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5548      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5549      * </ul>
5550      * <p> Many other Unicode characters are titlecase too.
5551      *
5552      * <p><b>Note:</b> This method cannot handle <a
5553      * href="#supplementary"> supplementary characters</a>. To support
5554      * all Unicode characters, including supplementary characters, use
5555      * the {@link #isTitleCase(int)} method.
5556      *
5557      * @param   ch   the character to be tested.
5558      * @return  {@code true} if the character is titlecase;
5559      *          {@code false} otherwise.
5560      * @see     Character#isLowerCase(char)
5561      * @see     Character#isUpperCase(char)
5562      * @see     Character#toTitleCase(char)
5563      * @see     Character#getType(char)
5564      * @since   1.0.2
5565      */
5566     public static boolean isTitleCase(char ch) {
5567         return isTitleCase((int)ch);
5568     }
5569 
5570     /**
5571      * Determines if the specified character (Unicode code point) is a titlecase character.
5572      * <p>
5573      * A character is a titlecase character if its general
5574      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5575      * is {@code TITLECASE_LETTER}.
5576      * <p>
5577      * Some characters look like pairs of Latin letters. For example, there
5578      * is an uppercase letter that looks like "LJ" and has a corresponding
5579      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5580      * is the appropriate form to use when rendering a word in lowercase
5581      * with initial capitals, as for a book title.
5582      * <p>
5583      * These are some of the Unicode characters for which this method returns
5584      * {@code true}:
5585      * <ul>
5586      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5587      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5588      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5589      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5590      * </ul>
5591      * <p> Many other Unicode characters are titlecase too.
5592      *
5593      * @param   codePoint the character (Unicode code point) to be tested.
5594      * @return  {@code true} if the character is titlecase;
5595      *          {@code false} otherwise.
5596      * @see     Character#isLowerCase(int)
5597      * @see     Character#isUpperCase(int)
5598      * @see     Character#toTitleCase(int)
5599      * @see     Character#getType(int)
5600      * @since   1.5
5601      */
5602     public static boolean isTitleCase(int codePoint) {
5603         return getType(codePoint) == Character.TITLECASE_LETTER;
5604     }
5605 
5606     /**
5607      * Determines if the specified character is a digit.
5608      * <p>
5609      * A character is a digit if its general category type, provided
5610      * by {@code Character.getType(ch)}, is
5611      * {@code DECIMAL_DIGIT_NUMBER}.
5612      * <p>
5613      * Some Unicode character ranges that contain digits:
5614      * <ul>
5615      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5616      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5617      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5618      *     Arabic-Indic digits
5619      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5620      *     Extended Arabic-Indic digits
5621      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5622      *     Devanagari digits
5623      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5624      *     Fullwidth digits
5625      * </ul>
5626      *
5627      * Many other character ranges contain digits as well.
5628      *
5629      * <p><b>Note:</b> This method cannot handle <a
5630      * href="#supplementary"> supplementary characters</a>. To support
5631      * all Unicode characters, including supplementary characters, use
5632      * the {@link #isDigit(int)} method.
5633      *
5634      * @param   ch   the character to be tested.
5635      * @return  {@code true} if the character is a digit;
5636      *          {@code false} otherwise.
5637      * @see     Character#digit(char, int)
5638      * @see     Character#forDigit(int, int)
5639      * @see     Character#getType(char)
5640      */
5641     public static boolean isDigit(char ch) {
5642         return isDigit((int)ch);
5643     }
5644 
5645     /**
5646      * Determines if the specified character (Unicode code point) is a digit.
5647      * <p>
5648      * A character is a digit if its general category type, provided
5649      * by {@link Character#getType(int) getType(codePoint)}, is
5650      * {@code DECIMAL_DIGIT_NUMBER}.
5651      * <p>
5652      * Some Unicode character ranges that contain digits:
5653      * <ul>
5654      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5655      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5656      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5657      *     Arabic-Indic digits
5658      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5659      *     Extended Arabic-Indic digits
5660      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5661      *     Devanagari digits
5662      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5663      *     Fullwidth digits
5664      * </ul>
5665      *
5666      * Many other character ranges contain digits as well.
5667      *
5668      * @param   codePoint the character (Unicode code point) to be tested.
5669      * @return  {@code true} if the character is a digit;
5670      *          {@code false} otherwise.
5671      * @see     Character#forDigit(int, int)
5672      * @see     Character#getType(int)
5673      * @since   1.5
5674      */
5675     public static boolean isDigit(int codePoint) {
5676         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5677     }
5678 
5679     /**
5680      * Determines if a character is defined in Unicode.
5681      * <p>
5682      * A character is defined if at least one of the following is true:
5683      * <ul>
5684      * <li>It has an entry in the UnicodeData file.
5685      * <li>It has a value in a range defined by the UnicodeData file.
5686      * </ul>
5687      *
5688      * <p><b>Note:</b> This method cannot handle <a
5689      * href="#supplementary"> supplementary characters</a>. To support
5690      * all Unicode characters, including supplementary characters, use
5691      * the {@link #isDefined(int)} method.
5692      *
5693      * @param   ch   the character to be tested
5694      * @return  {@code true} if the character has a defined meaning
5695      *          in Unicode; {@code false} otherwise.
5696      * @see     Character#isDigit(char)
5697      * @see     Character#isLetter(char)
5698      * @see     Character#isLetterOrDigit(char)
5699      * @see     Character#isLowerCase(char)
5700      * @see     Character#isTitleCase(char)
5701      * @see     Character#isUpperCase(char)
5702      * @since   1.0.2
5703      */
5704     public static boolean isDefined(char ch) {
5705         return isDefined((int)ch);
5706     }
5707 
5708     /**
5709      * Determines if a character (Unicode code point) is defined in Unicode.
5710      * <p>
5711      * A character is defined if at least one of the following is true:
5712      * <ul>
5713      * <li>It has an entry in the UnicodeData file.
5714      * <li>It has a value in a range defined by the UnicodeData file.
5715      * </ul>
5716      *
5717      * @param   codePoint the character (Unicode code point) to be tested.
5718      * @return  {@code true} if the character has a defined meaning
5719      *          in Unicode; {@code false} otherwise.
5720      * @see     Character#isDigit(int)
5721      * @see     Character#isLetter(int)
5722      * @see     Character#isLetterOrDigit(int)
5723      * @see     Character#isLowerCase(int)
5724      * @see     Character#isTitleCase(int)
5725      * @see     Character#isUpperCase(int)
5726      * @since   1.5
5727      */
5728     public static boolean isDefined(int codePoint) {
5729         return getType(codePoint) != Character.UNASSIGNED;
5730     }
5731 
5732     /**
5733      * Determines if the specified character is a letter.
5734      * <p>
5735      * A character is considered to be a letter if its general
5736      * category type, provided by {@code Character.getType(ch)},
5737      * is any of the following:
5738      * <ul>
5739      * <li> {@code UPPERCASE_LETTER}
5740      * <li> {@code LOWERCASE_LETTER}
5741      * <li> {@code TITLECASE_LETTER}
5742      * <li> {@code MODIFIER_LETTER}
5743      * <li> {@code OTHER_LETTER}
5744      * </ul>
5745      *
5746      * Not all letters have case. Many characters are
5747      * letters but are neither uppercase nor lowercase nor titlecase.
5748      *
5749      * <p><b>Note:</b> This method cannot handle <a
5750      * href="#supplementary"> supplementary characters</a>. To support
5751      * all Unicode characters, including supplementary characters, use
5752      * the {@link #isLetter(int)} method.
5753      *
5754      * @param   ch   the character to be tested.
5755      * @return  {@code true} if the character is a letter;
5756      *          {@code false} otherwise.
5757      * @see     Character#isDigit(char)
5758      * @see     Character#isJavaIdentifierStart(char)
5759      * @see     Character#isJavaLetter(char)
5760      * @see     Character#isJavaLetterOrDigit(char)
5761      * @see     Character#isLetterOrDigit(char)
5762      * @see     Character#isLowerCase(char)
5763      * @see     Character#isTitleCase(char)
5764      * @see     Character#isUnicodeIdentifierStart(char)
5765      * @see     Character#isUpperCase(char)
5766      */
5767     public static boolean isLetter(char ch) {
5768         return isLetter((int)ch);
5769     }
5770 
5771     /**
5772      * Determines if the specified character (Unicode code point) is a letter.
5773      * <p>
5774      * A character is considered to be a letter if its general
5775      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5776      * is any of the following:
5777      * <ul>
5778      * <li> {@code UPPERCASE_LETTER}
5779      * <li> {@code LOWERCASE_LETTER}
5780      * <li> {@code TITLECASE_LETTER}
5781      * <li> {@code MODIFIER_LETTER}
5782      * <li> {@code OTHER_LETTER}
5783      * </ul>
5784      *
5785      * Not all letters have case. Many characters are
5786      * letters but are neither uppercase nor lowercase nor titlecase.
5787      *
5788      * @param   codePoint the character (Unicode code point) to be tested.
5789      * @return  {@code true} if the character is a letter;
5790      *          {@code false} otherwise.
5791      * @see     Character#isDigit(int)
5792      * @see     Character#isJavaIdentifierStart(int)
5793      * @see     Character#isLetterOrDigit(int)
5794      * @see     Character#isLowerCase(int)
5795      * @see     Character#isTitleCase(int)
5796      * @see     Character#isUnicodeIdentifierStart(int)
5797      * @see     Character#isUpperCase(int)
5798      * @since   1.5
5799      */
5800     public static boolean isLetter(int codePoint) {
5801         return ((((1 << Character.UPPERCASE_LETTER) |
5802             (1 << Character.LOWERCASE_LETTER) |
5803             (1 << Character.TITLECASE_LETTER) |
5804             (1 << Character.MODIFIER_LETTER) |
5805             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5806             != 0;
5807     }
5808 
5809     /**
5810      * Determines if the specified character is a letter or digit.
5811      * <p>
5812      * A character is considered to be a letter or digit if either
5813      * {@code Character.isLetter(char ch)} or
5814      * {@code Character.isDigit(char ch)} returns
5815      * {@code true} for the character.
5816      *
5817      * <p><b>Note:</b> This method cannot handle <a
5818      * href="#supplementary"> supplementary characters</a>. To support
5819      * all Unicode characters, including supplementary characters, use
5820      * the {@link #isLetterOrDigit(int)} method.
5821      *
5822      * @param   ch   the character to be tested.
5823      * @return  {@code true} if the character is a letter or digit;
5824      *          {@code false} otherwise.
5825      * @see     Character#isDigit(char)
5826      * @see     Character#isJavaIdentifierPart(char)
5827      * @see     Character#isJavaLetter(char)
5828      * @see     Character#isJavaLetterOrDigit(char)
5829      * @see     Character#isLetter(char)
5830      * @see     Character#isUnicodeIdentifierPart(char)
5831      * @since   1.0.2
5832      */
5833     public static boolean isLetterOrDigit(char ch) {
5834         return isLetterOrDigit((int)ch);
5835     }
5836 
5837     /**
5838      * Determines if the specified character (Unicode code point) is a letter or digit.
5839      * <p>
5840      * A character is considered to be a letter or digit if either
5841      * {@link #isLetter(int) isLetter(codePoint)} or
5842      * {@link #isDigit(int) isDigit(codePoint)} returns
5843      * {@code true} for the character.
5844      *
5845      * @param   codePoint the character (Unicode code point) to be tested.
5846      * @return  {@code true} if the character is a letter or digit;
5847      *          {@code false} otherwise.
5848      * @see     Character#isDigit(int)
5849      * @see     Character#isJavaIdentifierPart(int)
5850      * @see     Character#isLetter(int)
5851      * @see     Character#isUnicodeIdentifierPart(int)
5852      * @since   1.5
5853      */
5854     public static boolean isLetterOrDigit(int codePoint) {
5855         return ((((1 << Character.UPPERCASE_LETTER) |
5856             (1 << Character.LOWERCASE_LETTER) |
5857             (1 << Character.TITLECASE_LETTER) |
5858             (1 << Character.MODIFIER_LETTER) |
5859             (1 << Character.OTHER_LETTER) |
5860             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5861             != 0;
5862     }
5863 
5864     /**
5865      * Determines if the specified character is permissible as the first
5866      * character in a Java identifier.
5867      * <p>
5868      * A character may start a Java identifier if and only if
5869      * one of the following is true:
5870      * <ul>
5871      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5872      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5873      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5874      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5875      * </ul>
5876      *
5877      * @param   ch the character to be tested.
5878      * @return  {@code true} if the character may start a Java
5879      *          identifier; {@code false} otherwise.
5880      * @see     Character#isJavaLetterOrDigit(char)
5881      * @see     Character#isJavaIdentifierStart(char)
5882      * @see     Character#isJavaIdentifierPart(char)
5883      * @see     Character#isLetter(char)
5884      * @see     Character#isLetterOrDigit(char)
5885      * @see     Character#isUnicodeIdentifierStart(char)
5886      * @since   1.0.2
5887      * @deprecated Replaced by isJavaIdentifierStart(char).
5888      */
5889     @Deprecated
5890     public static boolean isJavaLetter(char ch) {
5891         return isJavaIdentifierStart(ch);
5892     }
5893 
5894     /**
5895      * Determines if the specified character may be part of a Java
5896      * identifier as other than the first character.
5897      * <p>
5898      * A character may be part of a Java identifier if and only if any
5899      * of the following are true:
5900      * <ul>
5901      * <li>  it is a letter
5902      * <li>  it is a currency symbol (such as {@code '$'})
5903      * <li>  it is a connecting punctuation character (such as {@code '_'})
5904      * <li>  it is a digit
5905      * <li>  it is a numeric letter (such as a Roman numeral character)
5906      * <li>  it is a combining mark
5907      * <li>  it is a non-spacing mark
5908      * <li> {@code isIdentifierIgnorable} returns
5909      * {@code true} for the character.
5910      * </ul>
5911      *
5912      * @param   ch the character to be tested.
5913      * @return  {@code true} if the character may be part of a
5914      *          Java identifier; {@code false} otherwise.
5915      * @see     Character#isJavaLetter(char)
5916      * @see     Character#isJavaIdentifierStart(char)
5917      * @see     Character#isJavaIdentifierPart(char)
5918      * @see     Character#isLetter(char)
5919      * @see     Character#isLetterOrDigit(char)
5920      * @see     Character#isUnicodeIdentifierPart(char)
5921      * @see     Character#isIdentifierIgnorable(char)
5922      * @since   1.0.2
5923      * @deprecated Replaced by isJavaIdentifierPart(char).
5924      */
5925     @Deprecated
5926     public static boolean isJavaLetterOrDigit(char ch) {
5927         return isJavaIdentifierPart(ch);
5928     }
5929 
5930     /**
5931      * Determines if the specified character (Unicode code point) is an alphabet.
5932      * <p>
5933      * A character is considered to be alphabetic if its general category type,
5934      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5935      * the following:
5936      * <ul>
5937      * <li> <code>UPPERCASE_LETTER</code>
5938      * <li> <code>LOWERCASE_LETTER</code>
5939      * <li> <code>TITLECASE_LETTER</code>
5940      * <li> <code>MODIFIER_LETTER</code>
5941      * <li> <code>OTHER_LETTER</code>
5942      * <li> <code>LETTER_NUMBER</code>
5943      * </ul>
5944      * or it has contributory property Other_Alphabetic as defined by the
5945      * Unicode Standard.
5946      *
5947      * @param   codePoint the character (Unicode code point) to be tested.
5948      * @return  <code>true</code> if the character is a Unicode alphabet
5949      *          character, <code>false</code> otherwise.
5950      * @since   1.7
5951      */
5952     public static boolean isAlphabetic(int codePoint) {
5953         return (((((1 << Character.UPPERCASE_LETTER) |
5954             (1 << Character.LOWERCASE_LETTER) |
5955             (1 << Character.TITLECASE_LETTER) |
5956             (1 << Character.MODIFIER_LETTER) |
5957             (1 << Character.OTHER_LETTER) |
5958             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5959             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5960     }
5961 
5962     /**
5963      * Determines if the specified character (Unicode code point) is a CJKV
5964      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5965      * the Unicode Standard.
5966      *
5967      * @param   codePoint the character (Unicode code point) to be tested.
5968      * @return  <code>true</code> if the character is a Unicode ideograph
5969      *          character, <code>false</code> otherwise.
5970      * @since   1.7
5971      */
5972     public static boolean isIdeographic(int codePoint) {
5973         return CharacterData.of(codePoint).isIdeographic(codePoint);
5974     }
5975 
5976     /**
5977      * Determines if the specified character is
5978      * permissible as the first character in a Java identifier.
5979      * <p>
5980      * A character may start a Java identifier if and only if
5981      * one of the following conditions is true:
5982      * <ul>
5983      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5984      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5985      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5986      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5987      * </ul>
5988      *
5989      * <p><b>Note:</b> This method cannot handle <a
5990      * href="#supplementary"> supplementary characters</a>. To support
5991      * all Unicode characters, including supplementary characters, use
5992      * the {@link #isJavaIdentifierStart(int)} method.
5993      *
5994      * @param   ch the character to be tested.
5995      * @return  {@code true} if the character may start a Java identifier;
5996      *          {@code false} otherwise.
5997      * @see     Character#isJavaIdentifierPart(char)
5998      * @see     Character#isLetter(char)
5999      * @see     Character#isUnicodeIdentifierStart(char)
6000      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6001      * @since   1.1
6002      */
6003     public static boolean isJavaIdentifierStart(char ch) {
6004         return isJavaIdentifierStart((int)ch);
6005     }
6006 
6007     /**
6008      * Determines if the character (Unicode code point) is
6009      * permissible as the first character in a Java identifier.
6010      * <p>
6011      * A character may start a Java identifier if and only if
6012      * one of the following conditions is true:
6013      * <ul>
6014      * <li> {@link #isLetter(int) isLetter(codePoint)}
6015      *      returns {@code true}
6016      * <li> {@link #getType(int) getType(codePoint)}
6017      *      returns {@code LETTER_NUMBER}
6018      * <li> the referenced character is a currency symbol (such as {@code '$'})
6019      * <li> the referenced character is a connecting punctuation character
6020      *      (such as {@code '_'}).
6021      * </ul>
6022      *
6023      * @param   codePoint the character (Unicode code point) to be tested.
6024      * @return  {@code true} if the character may start a Java identifier;
6025      *          {@code false} otherwise.
6026      * @see     Character#isJavaIdentifierPart(int)
6027      * @see     Character#isLetter(int)
6028      * @see     Character#isUnicodeIdentifierStart(int)
6029      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6030      * @since   1.5
6031      */
6032     public static boolean isJavaIdentifierStart(int codePoint) {
6033         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6034     }
6035 
6036     /**
6037      * Determines if the specified character may be part of a Java
6038      * identifier as other than the first character.
6039      * <p>
6040      * A character may be part of a Java identifier if any of the following
6041      * are true:
6042      * <ul>
6043      * <li>  it is a letter
6044      * <li>  it is a currency symbol (such as {@code '$'})
6045      * <li>  it is a connecting punctuation character (such as {@code '_'})
6046      * <li>  it is a digit
6047      * <li>  it is a numeric letter (such as a Roman numeral character)
6048      * <li>  it is a combining mark
6049      * <li>  it is a non-spacing mark
6050      * <li> {@code isIdentifierIgnorable} returns
6051      * {@code true} for the character
6052      * </ul>
6053      *
6054      * <p><b>Note:</b> This method cannot handle <a
6055      * href="#supplementary"> supplementary characters</a>. To support
6056      * all Unicode characters, including supplementary characters, use
6057      * the {@link #isJavaIdentifierPart(int)} method.
6058      *
6059      * @param   ch      the character to be tested.
6060      * @return {@code true} if the character may be part of a
6061      *          Java identifier; {@code false} otherwise.
6062      * @see     Character#isIdentifierIgnorable(char)
6063      * @see     Character#isJavaIdentifierStart(char)
6064      * @see     Character#isLetterOrDigit(char)
6065      * @see     Character#isUnicodeIdentifierPart(char)
6066      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6067      * @since   1.1
6068      */
6069     public static boolean isJavaIdentifierPart(char ch) {
6070         return isJavaIdentifierPart((int)ch);
6071     }
6072 
6073     /**
6074      * Determines if the character (Unicode code point) may be part of a Java
6075      * identifier as other than the first character.
6076      * <p>
6077      * A character may be part of a Java identifier if any of the following
6078      * are true:
6079      * <ul>
6080      * <li>  it is a letter
6081      * <li>  it is a currency symbol (such as {@code '$'})
6082      * <li>  it is a connecting punctuation character (such as {@code '_'})
6083      * <li>  it is a digit
6084      * <li>  it is a numeric letter (such as a Roman numeral character)
6085      * <li>  it is a combining mark
6086      * <li>  it is a non-spacing mark
6087      * <li> {@link #isIdentifierIgnorable(int)
6088      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6089      * the character
6090      * </ul>
6091      *
6092      * @param   codePoint the character (Unicode code point) to be tested.
6093      * @return {@code true} if the character may be part of a
6094      *          Java identifier; {@code false} otherwise.
6095      * @see     Character#isIdentifierIgnorable(int)
6096      * @see     Character#isJavaIdentifierStart(int)
6097      * @see     Character#isLetterOrDigit(int)
6098      * @see     Character#isUnicodeIdentifierPart(int)
6099      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6100      * @since   1.5
6101      */
6102     public static boolean isJavaIdentifierPart(int codePoint) {
6103         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6104     }
6105 
6106     /**
6107      * Determines if the specified character is permissible as the
6108      * first character in a Unicode identifier.
6109      * <p>
6110      * A character may start a Unicode identifier if and only if
6111      * one of the following conditions is true:
6112      * <ul>
6113      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6114      * <li> {@link #getType(char) getType(ch)} returns
6115      *      {@code LETTER_NUMBER}.
6116      * </ul>
6117      *
6118      * <p><b>Note:</b> This method cannot handle <a
6119      * href="#supplementary"> supplementary characters</a>. To support
6120      * all Unicode characters, including supplementary characters, use
6121      * the {@link #isUnicodeIdentifierStart(int)} method.
6122      *
6123      * @param   ch      the character to be tested.
6124      * @return  {@code true} if the character may start a Unicode
6125      *          identifier; {@code false} otherwise.
6126      * @see     Character#isJavaIdentifierStart(char)
6127      * @see     Character#isLetter(char)
6128      * @see     Character#isUnicodeIdentifierPart(char)
6129      * @since   1.1
6130      */
6131     public static boolean isUnicodeIdentifierStart(char ch) {
6132         return isUnicodeIdentifierStart((int)ch);
6133     }
6134 
6135     /**
6136      * Determines if the specified character (Unicode code point) is permissible as the
6137      * first character in a Unicode identifier.
6138      * <p>
6139      * A character may start a Unicode identifier if and only if
6140      * one of the following conditions is true:
6141      * <ul>
6142      * <li> {@link #isLetter(int) isLetter(codePoint)}
6143      *      returns {@code true}
6144      * <li> {@link #getType(int) getType(codePoint)}
6145      *      returns {@code LETTER_NUMBER}.
6146      * </ul>
6147      * @param   codePoint the character (Unicode code point) to be tested.
6148      * @return  {@code true} if the character may start a Unicode
6149      *          identifier; {@code false} otherwise.
6150      * @see     Character#isJavaIdentifierStart(int)
6151      * @see     Character#isLetter(int)
6152      * @see     Character#isUnicodeIdentifierPart(int)
6153      * @since   1.5
6154      */
6155     public static boolean isUnicodeIdentifierStart(int codePoint) {
6156         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6157     }
6158 
6159     /**
6160      * Determines if the specified character may be part of a Unicode
6161      * identifier as other than the first character.
6162      * <p>
6163      * A character may be part of a Unicode identifier if and only if
6164      * one of the following statements is true:
6165      * <ul>
6166      * <li>  it is a letter
6167      * <li>  it is a connecting punctuation character (such as {@code '_'})
6168      * <li>  it is a digit
6169      * <li>  it is a numeric letter (such as a Roman numeral character)
6170      * <li>  it is a combining mark
6171      * <li>  it is a non-spacing mark
6172      * <li> {@code isIdentifierIgnorable} returns
6173      * {@code true} for this character.
6174      * </ul>
6175      *
6176      * <p><b>Note:</b> This method cannot handle <a
6177      * href="#supplementary"> supplementary characters</a>. To support
6178      * all Unicode characters, including supplementary characters, use
6179      * the {@link #isUnicodeIdentifierPart(int)} method.
6180      *
6181      * @param   ch      the character to be tested.
6182      * @return  {@code true} if the character may be part of a
6183      *          Unicode identifier; {@code false} otherwise.
6184      * @see     Character#isIdentifierIgnorable(char)
6185      * @see     Character#isJavaIdentifierPart(char)
6186      * @see     Character#isLetterOrDigit(char)
6187      * @see     Character#isUnicodeIdentifierStart(char)
6188      * @since   1.1
6189      */
6190     public static boolean isUnicodeIdentifierPart(char ch) {
6191         return isUnicodeIdentifierPart((int)ch);
6192     }
6193 
6194     /**
6195      * Determines if the specified character (Unicode code point) may be part of a Unicode
6196      * identifier as other than the first character.
6197      * <p>
6198      * A character may be part of a Unicode identifier if and only if
6199      * one of the following statements is true:
6200      * <ul>
6201      * <li>  it is a letter
6202      * <li>  it is a connecting punctuation character (such as {@code '_'})
6203      * <li>  it is a digit
6204      * <li>  it is a numeric letter (such as a Roman numeral character)
6205      * <li>  it is a combining mark
6206      * <li>  it is a non-spacing mark
6207      * <li> {@code isIdentifierIgnorable} returns
6208      * {@code true} for this character.
6209      * </ul>
6210      * @param   codePoint the character (Unicode code point) to be tested.
6211      * @return  {@code true} if the character may be part of a
6212      *          Unicode identifier; {@code false} otherwise.
6213      * @see     Character#isIdentifierIgnorable(int)
6214      * @see     Character#isJavaIdentifierPart(int)
6215      * @see     Character#isLetterOrDigit(int)
6216      * @see     Character#isUnicodeIdentifierStart(int)
6217      * @since   1.5
6218      */
6219     public static boolean isUnicodeIdentifierPart(int codePoint) {
6220         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6221     }
6222 
6223     /**
6224      * Determines if the specified character should be regarded as
6225      * an ignorable character in a Java identifier or a Unicode identifier.
6226      * <p>
6227      * The following Unicode characters are ignorable in a Java identifier
6228      * or a Unicode identifier:
6229      * <ul>
6230      * <li>ISO control characters that are not whitespace
6231      * <ul>
6232      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6233      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6234      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6235      * </ul>
6236      *
6237      * <li>all characters that have the {@code FORMAT} general
6238      * category value
6239      * </ul>
6240      *
6241      * <p><b>Note:</b> This method cannot handle <a
6242      * href="#supplementary"> supplementary characters</a>. To support
6243      * all Unicode characters, including supplementary characters, use
6244      * the {@link #isIdentifierIgnorable(int)} method.
6245      *
6246      * @param   ch      the character to be tested.
6247      * @return  {@code true} if the character is an ignorable control
6248      *          character that may be part of a Java or Unicode identifier;
6249      *           {@code false} otherwise.
6250      * @see     Character#isJavaIdentifierPart(char)
6251      * @see     Character#isUnicodeIdentifierPart(char)
6252      * @since   1.1
6253      */
6254     public static boolean isIdentifierIgnorable(char ch) {
6255         return isIdentifierIgnorable((int)ch);
6256     }
6257 
6258     /**
6259      * Determines if the specified character (Unicode code point) should be regarded as
6260      * an ignorable character in a Java identifier or a Unicode identifier.
6261      * <p>
6262      * The following Unicode characters are ignorable in a Java identifier
6263      * or a Unicode identifier:
6264      * <ul>
6265      * <li>ISO control characters that are not whitespace
6266      * <ul>
6267      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6268      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6269      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6270      * </ul>
6271      *
6272      * <li>all characters that have the {@code FORMAT} general
6273      * category value
6274      * </ul>
6275      *
6276      * @param   codePoint the character (Unicode code point) to be tested.
6277      * @return  {@code true} if the character is an ignorable control
6278      *          character that may be part of a Java or Unicode identifier;
6279      *          {@code false} otherwise.
6280      * @see     Character#isJavaIdentifierPart(int)
6281      * @see     Character#isUnicodeIdentifierPart(int)
6282      * @since   1.5
6283      */
6284     public static boolean isIdentifierIgnorable(int codePoint) {
6285         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6286     }
6287 
6288     /**
6289      * Converts the character argument to lowercase using case
6290      * mapping information from the UnicodeData file.
6291      * <p>
6292      * Note that
6293      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6294      * does not always return {@code true} for some ranges of
6295      * characters, particularly those that are symbols or ideographs.
6296      *
6297      * <p>In general, {@link String#toLowerCase()} should be used to map
6298      * characters to lowercase. {@code String} case mapping methods
6299      * have several benefits over {@code Character} case mapping methods.
6300      * {@code String} case mapping methods can perform locale-sensitive
6301      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6302      * the {@code Character} case mapping methods cannot.
6303      *
6304      * <p><b>Note:</b> This method cannot handle <a
6305      * href="#supplementary"> supplementary characters</a>. To support
6306      * all Unicode characters, including supplementary characters, use
6307      * the {@link #toLowerCase(int)} method.
6308      *
6309      * @param   ch   the character to be converted.
6310      * @return  the lowercase equivalent of the character, if any;
6311      *          otherwise, the character itself.
6312      * @see     Character#isLowerCase(char)
6313      * @see     String#toLowerCase()
6314      */
6315     public static char toLowerCase(char ch) {
6316         return (char)toLowerCase((int)ch);
6317     }
6318 
6319     /**
6320      * Converts the character (Unicode code point) argument to
6321      * lowercase using case mapping information from the UnicodeData
6322      * file.
6323      *
6324      * <p> Note that
6325      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6326      * does not always return {@code true} for some ranges of
6327      * characters, particularly those that are symbols or ideographs.
6328      *
6329      * <p>In general, {@link String#toLowerCase()} should be used to map
6330      * characters to lowercase. {@code String} case mapping methods
6331      * have several benefits over {@code Character} case mapping methods.
6332      * {@code String} case mapping methods can perform locale-sensitive
6333      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6334      * the {@code Character} case mapping methods cannot.
6335      *
6336      * @param   codePoint   the character (Unicode code point) to be converted.
6337      * @return  the lowercase equivalent of the character (Unicode code
6338      *          point), if any; otherwise, the character itself.
6339      * @see     Character#isLowerCase(int)
6340      * @see     String#toLowerCase()
6341      *
6342      * @since   1.5
6343      */
6344     public static int toLowerCase(int codePoint) {
6345         return CharacterData.of(codePoint).toLowerCase(codePoint);
6346     }
6347 
6348     /**
6349      * Converts the character argument to uppercase using case mapping
6350      * information from the UnicodeData file.
6351      * <p>
6352      * Note that
6353      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6354      * does not always return {@code true} for some ranges of
6355      * characters, particularly those that are symbols or ideographs.
6356      *
6357      * <p>In general, {@link String#toUpperCase()} should be used to map
6358      * characters to uppercase. {@code String} case mapping methods
6359      * have several benefits over {@code Character} case mapping methods.
6360      * {@code String} case mapping methods can perform locale-sensitive
6361      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6362      * the {@code Character} case mapping methods cannot.
6363      *
6364      * <p><b>Note:</b> This method cannot handle <a
6365      * href="#supplementary"> supplementary characters</a>. To support
6366      * all Unicode characters, including supplementary characters, use
6367      * the {@link #toUpperCase(int)} method.
6368      *
6369      * @param   ch   the character to be converted.
6370      * @return  the uppercase equivalent of the character, if any;
6371      *          otherwise, the character itself.
6372      * @see     Character#isUpperCase(char)
6373      * @see     String#toUpperCase()
6374      */
6375     public static char toUpperCase(char ch) {
6376         return (char)toUpperCase((int)ch);
6377     }
6378 
6379     /**
6380      * Converts the character (Unicode code point) argument to
6381      * uppercase using case mapping information from the UnicodeData
6382      * file.
6383      *
6384      * <p>Note that
6385      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6386      * does not always return {@code true} for some ranges of
6387      * characters, particularly those that are symbols or ideographs.
6388      *
6389      * <p>In general, {@link String#toUpperCase()} should be used to map
6390      * characters to uppercase. {@code String} case mapping methods
6391      * have several benefits over {@code Character} case mapping methods.
6392      * {@code String} case mapping methods can perform locale-sensitive
6393      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6394      * the {@code Character} case mapping methods cannot.
6395      *
6396      * @param   codePoint   the character (Unicode code point) to be converted.
6397      * @return  the uppercase equivalent of the character, if any;
6398      *          otherwise, the character itself.
6399      * @see     Character#isUpperCase(int)
6400      * @see     String#toUpperCase()
6401      *
6402      * @since   1.5
6403      */
6404     public static int toUpperCase(int codePoint) {
6405         return CharacterData.of(codePoint).toUpperCase(codePoint);
6406     }
6407 
6408     /**
6409      * Converts the character argument to titlecase using case mapping
6410      * information from the UnicodeData file. If a character has no
6411      * explicit titlecase mapping and is not itself a titlecase char
6412      * according to UnicodeData, then the uppercase mapping is
6413      * returned as an equivalent titlecase mapping. If the
6414      * {@code char} argument is already a titlecase
6415      * {@code char}, the same {@code char} value will be
6416      * returned.
6417      * <p>
6418      * Note that
6419      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6420      * does not always return {@code true} for some ranges of
6421      * characters.
6422      *
6423      * <p><b>Note:</b> This method cannot handle <a
6424      * href="#supplementary"> supplementary characters</a>. To support
6425      * all Unicode characters, including supplementary characters, use
6426      * the {@link #toTitleCase(int)} method.
6427      *
6428      * @param   ch   the character to be converted.
6429      * @return  the titlecase equivalent of the character, if any;
6430      *          otherwise, the character itself.
6431      * @see     Character#isTitleCase(char)
6432      * @see     Character#toLowerCase(char)
6433      * @see     Character#toUpperCase(char)
6434      * @since   1.0.2
6435      */
6436     public static char toTitleCase(char ch) {
6437         return (char)toTitleCase((int)ch);
6438     }
6439 
6440     /**
6441      * Converts the character (Unicode code point) argument to titlecase using case mapping
6442      * information from the UnicodeData file. If a character has no
6443      * explicit titlecase mapping and is not itself a titlecase char
6444      * according to UnicodeData, then the uppercase mapping is
6445      * returned as an equivalent titlecase mapping. If the
6446      * character argument is already a titlecase
6447      * character, the same character value will be
6448      * returned.
6449      *
6450      * <p>Note that
6451      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6452      * does not always return {@code true} for some ranges of
6453      * characters.
6454      *
6455      * @param   codePoint   the character (Unicode code point) to be converted.
6456      * @return  the titlecase equivalent of the character, if any;
6457      *          otherwise, the character itself.
6458      * @see     Character#isTitleCase(int)
6459      * @see     Character#toLowerCase(int)
6460      * @see     Character#toUpperCase(int)
6461      * @since   1.5
6462      */
6463     public static int toTitleCase(int codePoint) {
6464         return CharacterData.of(codePoint).toTitleCase(codePoint);
6465     }
6466 
6467     /**
6468      * Returns the numeric value of the character {@code ch} in the
6469      * specified radix.
6470      * <p>
6471      * If the radix is not in the range {@code MIN_RADIX} &le;
6472      * {@code radix} &le; {@code MAX_RADIX} or if the
6473      * value of {@code ch} is not a valid digit in the specified
6474      * radix, {@code -1} is returned. A character is a valid digit
6475      * if at least one of the following is true:
6476      * <ul>
6477      * <li>The method {@code isDigit} is {@code true} of the character
6478      *     and the Unicode decimal digit value of the character (or its
6479      *     single-character decomposition) is less than the specified radix.
6480      *     In this case the decimal digit value is returned.
6481      * <li>The character is one of the uppercase Latin letters
6482      *     {@code 'A'} through {@code 'Z'} and its code is less than
6483      *     {@code radix + 'A' - 10}.
6484      *     In this case, {@code ch - 'A' + 10}
6485      *     is returned.
6486      * <li>The character is one of the lowercase Latin letters
6487      *     {@code 'a'} through {@code 'z'} and its code is less than
6488      *     {@code radix + 'a' - 10}.
6489      *     In this case, {@code ch - 'a' + 10}
6490      *     is returned.
6491      * <li>The character is one of the fullwidth uppercase Latin letters A
6492      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6493      *     and its code is less than
6494      *     {@code radix + '\u005CuFF21' - 10}.
6495      *     In this case, {@code ch - '\u005CuFF21' + 10}
6496      *     is returned.
6497      * <li>The character is one of the fullwidth lowercase Latin letters a
6498      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6499      *     and its code is less than
6500      *     {@code radix + '\u005CuFF41' - 10}.
6501      *     In this case, {@code ch - '\u005CuFF41' + 10}
6502      *     is returned.
6503      * </ul>
6504      *
6505      * <p><b>Note:</b> This method cannot handle <a
6506      * href="#supplementary"> supplementary characters</a>. To support
6507      * all Unicode characters, including supplementary characters, use
6508      * the {@link #digit(int, int)} method.
6509      *
6510      * @param   ch      the character to be converted.
6511      * @param   radix   the radix.
6512      * @return  the numeric value represented by the character in the
6513      *          specified radix.
6514      * @see     Character#forDigit(int, int)
6515      * @see     Character#isDigit(char)
6516      */
6517     public static int digit(char ch, int radix) {
6518         return digit((int)ch, radix);
6519     }
6520 
6521     /**
6522      * Returns the numeric value of the specified character (Unicode
6523      * code point) in the specified radix.
6524      *
6525      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6526      * {@code radix} &le; {@code MAX_RADIX} or if the
6527      * character is not a valid digit in the specified
6528      * radix, {@code -1} is returned. A character is a valid digit
6529      * if at least one of the following is true:
6530      * <ul>
6531      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6532      *     and the Unicode decimal digit value of the character (or its
6533      *     single-character decomposition) is less than the specified radix.
6534      *     In this case the decimal digit value is returned.
6535      * <li>The character is one of the uppercase Latin letters
6536      *     {@code 'A'} through {@code 'Z'} and its code is less than
6537      *     {@code radix + 'A' - 10}.
6538      *     In this case, {@code codePoint - 'A' + 10}
6539      *     is returned.
6540      * <li>The character is one of the lowercase Latin letters
6541      *     {@code 'a'} through {@code 'z'} and its code is less than
6542      *     {@code radix + 'a' - 10}.
6543      *     In this case, {@code codePoint - 'a' + 10}
6544      *     is returned.
6545      * <li>The character is one of the fullwidth uppercase Latin letters A
6546      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6547      *     and its code is less than
6548      *     {@code radix + '\u005CuFF21' - 10}.
6549      *     In this case,
6550      *     {@code codePoint - '\u005CuFF21' + 10}
6551      *     is returned.
6552      * <li>The character is one of the fullwidth lowercase Latin letters a
6553      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6554      *     and its code is less than
6555      *     {@code radix + '\u005CuFF41'- 10}.
6556      *     In this case,
6557      *     {@code codePoint - '\u005CuFF41' + 10}
6558      *     is returned.
6559      * </ul>
6560      *
6561      * @param   codePoint the character (Unicode code point) to be converted.
6562      * @param   radix   the radix.
6563      * @return  the numeric value represented by the character in the
6564      *          specified radix.
6565      * @see     Character#forDigit(int, int)
6566      * @see     Character#isDigit(int)
6567      * @since   1.5
6568      */
6569     public static int digit(int codePoint, int radix) {
6570         return CharacterData.of(codePoint).digit(codePoint, radix);
6571     }
6572 
6573     /**
6574      * Returns the {@code int} value that the specified Unicode
6575      * character represents. For example, the character
6576      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6577      * an int with a value of 50.
6578      * <p>
6579      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6580      * {@code '\u005Cu005A'}), lowercase
6581      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6582      * full width variant ({@code '\u005CuFF21'} through
6583      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6584      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6585      * through 35. This is independent of the Unicode specification,
6586      * which does not assign numeric values to these {@code char}
6587      * values.
6588      * <p>
6589      * If the character does not have a numeric value, then -1 is returned.
6590      * If the character has a numeric value that cannot be represented as a
6591      * nonnegative integer (for example, a fractional value), then -2
6592      * is returned.
6593      *
6594      * <p><b>Note:</b> This method cannot handle <a
6595      * href="#supplementary"> supplementary characters</a>. To support
6596      * all Unicode characters, including supplementary characters, use
6597      * the {@link #getNumericValue(int)} method.
6598      *
6599      * @param   ch      the character to be converted.
6600      * @return  the numeric value of the character, as a nonnegative {@code int}
6601      *           value; -2 if the character has a numeric value that is not a
6602      *          nonnegative integer; -1 if the character has no numeric value.
6603      * @see     Character#forDigit(int, int)
6604      * @see     Character#isDigit(char)
6605      * @since   1.1
6606      */
6607     public static int getNumericValue(char ch) {
6608         return getNumericValue((int)ch);
6609     }
6610 
6611     /**
6612      * Returns the {@code int} value that the specified
6613      * character (Unicode code point) represents. For example, the character
6614      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6615      * an {@code int} with a value of 50.
6616      * <p>
6617      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6618      * {@code '\u005Cu005A'}), lowercase
6619      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6620      * full width variant ({@code '\u005CuFF21'} through
6621      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6622      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6623      * through 35. This is independent of the Unicode specification,
6624      * which does not assign numeric values to these {@code char}
6625      * values.
6626      * <p>
6627      * If the character does not have a numeric value, then -1 is returned.
6628      * If the character has a numeric value that cannot be represented as a
6629      * nonnegative integer (for example, a fractional value), then -2
6630      * is returned.
6631      *
6632      * @param   codePoint the character (Unicode code point) to be converted.
6633      * @return  the numeric value of the character, as a nonnegative {@code int}
6634      *          value; -2 if the character has a numeric value that is not a
6635      *          nonnegative integer; -1 if the character has no numeric value.
6636      * @see     Character#forDigit(int, int)
6637      * @see     Character#isDigit(int)
6638      * @since   1.5
6639      */
6640     public static int getNumericValue(int codePoint) {
6641         return CharacterData.of(codePoint).getNumericValue(codePoint);
6642     }
6643 
6644     /**
6645      * Determines if the specified character is ISO-LATIN-1 white space.
6646      * This method returns {@code true} for the following five
6647      * characters only:
6648      * <table summary="truechars">
6649      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6650      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6651      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6652      *     <td>{@code NEW LINE}</td></tr>
6653      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6654      *     <td>{@code FORM FEED}</td></tr>
6655      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6656      *     <td>{@code CARRIAGE RETURN}</td></tr>
6657      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6658      *     <td>{@code SPACE}</td></tr>
6659      * </table>
6660      *
6661      * @param      ch   the character to be tested.
6662      * @return     {@code true} if the character is ISO-LATIN-1 white
6663      *             space; {@code false} otherwise.
6664      * @see        Character#isSpaceChar(char)
6665      * @see        Character#isWhitespace(char)
6666      * @deprecated Replaced by isWhitespace(char).
6667      */
6668     @Deprecated
6669     public static boolean isSpace(char ch) {
6670         return (ch <= 0x0020) &&
6671             (((((1L << 0x0009) |
6672             (1L << 0x000A) |
6673             (1L << 0x000C) |
6674             (1L << 0x000D) |
6675             (1L << 0x0020)) >> ch) & 1L) != 0);
6676     }
6677 
6678 
6679     /**
6680      * Determines if the specified character is a Unicode space character.
6681      * A character is considered to be a space character if and only if
6682      * it is specified to be a space character by the Unicode Standard. This
6683      * method returns true if the character's general category type is any of
6684      * the following:
6685      * <ul>
6686      * <li> {@code SPACE_SEPARATOR}
6687      * <li> {@code LINE_SEPARATOR}
6688      * <li> {@code PARAGRAPH_SEPARATOR}
6689      * </ul>
6690      *
6691      * <p><b>Note:</b> This method cannot handle <a
6692      * href="#supplementary"> supplementary characters</a>. To support
6693      * all Unicode characters, including supplementary characters, use
6694      * the {@link #isSpaceChar(int)} method.
6695      *
6696      * @param   ch      the character to be tested.
6697      * @return  {@code true} if the character is a space character;
6698      *          {@code false} otherwise.
6699      * @see     Character#isWhitespace(char)
6700      * @since   1.1
6701      */
6702     public static boolean isSpaceChar(char ch) {
6703         return isSpaceChar((int)ch);
6704     }
6705 
6706     /**
6707      * Determines if the specified character (Unicode code point) is a
6708      * Unicode space character.  A character is considered to be a
6709      * space character if and only if it is specified to be a space
6710      * character by the Unicode Standard. This method returns true if
6711      * the character's general category type is any of the following:
6712      *
6713      * <ul>
6714      * <li> {@link #SPACE_SEPARATOR}
6715      * <li> {@link #LINE_SEPARATOR}
6716      * <li> {@link #PARAGRAPH_SEPARATOR}
6717      * </ul>
6718      *
6719      * @param   codePoint the character (Unicode code point) to be tested.
6720      * @return  {@code true} if the character is a space character;
6721      *          {@code false} otherwise.
6722      * @see     Character#isWhitespace(int)
6723      * @since   1.5
6724      */
6725     public static boolean isSpaceChar(int codePoint) {
6726         return ((((1 << Character.SPACE_SEPARATOR) |
6727                   (1 << Character.LINE_SEPARATOR) |
6728                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6729             != 0;
6730     }
6731 
6732     /**
6733      * Determines if the specified character is white space according to Java.
6734      * A character is a Java whitespace character if and only if it satisfies
6735      * one of the following criteria:
6736      * <ul>
6737      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6738      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6739      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6740      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6741      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6742      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6743      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6744      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6745      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6746      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6747      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6748      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6749      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6750      * </ul>
6751      *
6752      * <p><b>Note:</b> This method cannot handle <a
6753      * href="#supplementary"> supplementary characters</a>. To support
6754      * all Unicode characters, including supplementary characters, use
6755      * the {@link #isWhitespace(int)} method.
6756      *
6757      * @param   ch the character to be tested.
6758      * @return  {@code true} if the character is a Java whitespace
6759      *          character; {@code false} otherwise.
6760      * @see     Character#isSpaceChar(char)
6761      * @since   1.1
6762      */
6763     public static boolean isWhitespace(char ch) {
6764         return isWhitespace((int)ch);
6765     }
6766 
6767     /**
6768      * Determines if the specified character (Unicode code point) is
6769      * white space according to Java.  A character is a Java
6770      * whitespace character if and only if it satisfies one of the
6771      * following criteria:
6772      * <ul>
6773      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6774      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6775      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6776      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6777      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6778      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6779      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6780      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6781      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6782      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6783      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6784      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6785      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6786      * </ul>
6787      *
6788      * @param   codePoint the character (Unicode code point) to be tested.
6789      * @return  {@code true} if the character is a Java whitespace
6790      *          character; {@code false} otherwise.
6791      * @see     Character#isSpaceChar(int)
6792      * @since   1.5
6793      */
6794     public static boolean isWhitespace(int codePoint) {
6795         return CharacterData.of(codePoint).isWhitespace(codePoint);
6796     }
6797 
6798     /**
6799      * Determines if the specified character is an ISO control
6800      * character.  A character is considered to be an ISO control
6801      * character if its code is in the range {@code '\u005Cu0000'}
6802      * through {@code '\u005Cu001F'} or in the range
6803      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6804      *
6805      * <p><b>Note:</b> This method cannot handle <a
6806      * href="#supplementary"> supplementary characters</a>. To support
6807      * all Unicode characters, including supplementary characters, use
6808      * the {@link #isISOControl(int)} method.
6809      *
6810      * @param   ch      the character to be tested.
6811      * @return  {@code true} if the character is an ISO control character;
6812      *          {@code false} otherwise.
6813      *
6814      * @see     Character#isSpaceChar(char)
6815      * @see     Character#isWhitespace(char)
6816      * @since   1.1
6817      */
6818     public static boolean isISOControl(char ch) {
6819         return isISOControl((int)ch);
6820     }
6821 
6822     /**
6823      * Determines if the referenced character (Unicode code point) is an ISO control
6824      * character.  A character is considered to be an ISO control
6825      * character if its code is in the range {@code '\u005Cu0000'}
6826      * through {@code '\u005Cu001F'} or in the range
6827      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6828      *
6829      * @param   codePoint the character (Unicode code point) to be tested.
6830      * @return  {@code true} if the character is an ISO control character;
6831      *          {@code false} otherwise.
6832      * @see     Character#isSpaceChar(int)
6833      * @see     Character#isWhitespace(int)
6834      * @since   1.5
6835      */
6836     public static boolean isISOControl(int codePoint) {
6837         // Optimized form of:
6838         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6839         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6840         return codePoint <= 0x9F &&
6841             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6842     }
6843 
6844     /**
6845      * Returns a value indicating a character's general category.
6846      *
6847      * <p><b>Note:</b> This method cannot handle <a
6848      * href="#supplementary"> supplementary characters</a>. To support
6849      * all Unicode characters, including supplementary characters, use
6850      * the {@link #getType(int)} method.
6851      *
6852      * @param   ch      the character to be tested.
6853      * @return  a value of type {@code int} representing the
6854      *          character's general category.
6855      * @see     Character#COMBINING_SPACING_MARK
6856      * @see     Character#CONNECTOR_PUNCTUATION
6857      * @see     Character#CONTROL
6858      * @see     Character#CURRENCY_SYMBOL
6859      * @see     Character#DASH_PUNCTUATION
6860      * @see     Character#DECIMAL_DIGIT_NUMBER
6861      * @see     Character#ENCLOSING_MARK
6862      * @see     Character#END_PUNCTUATION
6863      * @see     Character#FINAL_QUOTE_PUNCTUATION
6864      * @see     Character#FORMAT
6865      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6866      * @see     Character#LETTER_NUMBER
6867      * @see     Character#LINE_SEPARATOR
6868      * @see     Character#LOWERCASE_LETTER
6869      * @see     Character#MATH_SYMBOL
6870      * @see     Character#MODIFIER_LETTER
6871      * @see     Character#MODIFIER_SYMBOL
6872      * @see     Character#NON_SPACING_MARK
6873      * @see     Character#OTHER_LETTER
6874      * @see     Character#OTHER_NUMBER
6875      * @see     Character#OTHER_PUNCTUATION
6876      * @see     Character#OTHER_SYMBOL
6877      * @see     Character#PARAGRAPH_SEPARATOR
6878      * @see     Character#PRIVATE_USE
6879      * @see     Character#SPACE_SEPARATOR
6880      * @see     Character#START_PUNCTUATION
6881      * @see     Character#SURROGATE
6882      * @see     Character#TITLECASE_LETTER
6883      * @see     Character#UNASSIGNED
6884      * @see     Character#UPPERCASE_LETTER
6885      * @since   1.1
6886      */
6887     public static int getType(char ch) {
6888         return getType((int)ch);
6889     }
6890 
6891     /**
6892      * Returns a value indicating a character's general category.
6893      *
6894      * @param   codePoint the character (Unicode code point) to be tested.
6895      * @return  a value of type {@code int} representing the
6896      *          character's general category.
6897      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6898      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6899      * @see     Character#CONTROL CONTROL
6900      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6901      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6902      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6903      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6904      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6905      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6906      * @see     Character#FORMAT FORMAT
6907      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6908      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6909      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6910      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6911      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6912      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6913      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6914      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6915      * @see     Character#OTHER_LETTER OTHER_LETTER
6916      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6917      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6918      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6919      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6920      * @see     Character#PRIVATE_USE PRIVATE_USE
6921      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6922      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6923      * @see     Character#SURROGATE SURROGATE
6924      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6925      * @see     Character#UNASSIGNED UNASSIGNED
6926      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6927      * @since   1.5
6928      */
6929     public static int getType(int codePoint) {
6930         return CharacterData.of(codePoint).getType(codePoint);
6931     }
6932 
6933     /**
6934      * Determines the character representation for a specific digit in
6935      * the specified radix. If the value of {@code radix} is not a
6936      * valid radix, or the value of {@code digit} is not a valid
6937      * digit in the specified radix, the null character
6938      * ({@code '\u005Cu0000'}) is returned.
6939      * <p>
6940      * The {@code radix} argument is valid if it is greater than or
6941      * equal to {@code MIN_RADIX} and less than or equal to
6942      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6943      * {@code 0 <= digit < radix}.
6944      * <p>
6945      * If the digit is less than 10, then
6946      * {@code '0' + digit} is returned. Otherwise, the value
6947      * {@code 'a' + digit - 10} is returned.
6948      *
6949      * @param   digit   the number to convert to a character.
6950      * @param   radix   the radix.
6951      * @return  the {@code char} representation of the specified digit
6952      *          in the specified radix.
6953      * @see     Character#MIN_RADIX
6954      * @see     Character#MAX_RADIX
6955      * @see     Character#digit(char, int)
6956      */
6957     public static char forDigit(int digit, int radix) {
6958         if ((digit >= radix) || (digit < 0)) {
6959             return '\0';
6960         }
6961         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6962             return '\0';
6963         }
6964         if (digit < 10) {
6965             return (char)('0' + digit);
6966         }
6967         return (char)('a' - 10 + digit);
6968     }
6969 
6970     /**
6971      * Returns the Unicode directionality property for the given
6972      * character.  Character directionality is used to calculate the
6973      * visual ordering of text. The directionality value of undefined
6974      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6975      *
6976      * <p><b>Note:</b> This method cannot handle <a
6977      * href="#supplementary"> supplementary characters</a>. To support
6978      * all Unicode characters, including supplementary characters, use
6979      * the {@link #getDirectionality(int)} method.
6980      *
6981      * @param  ch {@code char} for which the directionality property
6982      *            is requested.
6983      * @return the directionality property of the {@code char} value.
6984      *
6985      * @see Character#DIRECTIONALITY_UNDEFINED
6986      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6987      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6988      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6989      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6990      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6991      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6992      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6993      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6994      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6995      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6996      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6997      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6998      * @see Character#DIRECTIONALITY_WHITESPACE
6999      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7000      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7001      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7002      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7003      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7004      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7005      * @since 1.4
7006      */
7007     public static byte getDirectionality(char ch) {
7008         return getDirectionality((int)ch);
7009     }
7010 
7011     /**
7012      * Returns the Unicode directionality property for the given
7013      * character (Unicode code point).  Character directionality is
7014      * used to calculate the visual ordering of text. The
7015      * directionality value of undefined character is {@link
7016      * #DIRECTIONALITY_UNDEFINED}.
7017      *
7018      * @param   codePoint the character (Unicode code point) for which
7019      *          the directionality property is requested.
7020      * @return the directionality property of the character.
7021      *
7022      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7023      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7024      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7025      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7026      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7027      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7028      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7029      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7030      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7031      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7032      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7033      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7034      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7035      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7036      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7037      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7038      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7039      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7040      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7041      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7042      * @since    1.5
7043      */
7044     public static byte getDirectionality(int codePoint) {
7045         return CharacterData.of(codePoint).getDirectionality(codePoint);
7046     }
7047 
7048     /**
7049      * Determines whether the character is mirrored according to the
7050      * Unicode specification.  Mirrored characters should have their
7051      * glyphs horizontally mirrored when displayed in text that is
7052      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7053      * PARENTHESIS is semantically defined to be an <i>opening
7054      * parenthesis</i>.  This will appear as a "(" in text that is
7055      * left-to-right but as a ")" in text that is right-to-left.
7056      *
7057      * <p><b>Note:</b> This method cannot handle <a
7058      * href="#supplementary"> supplementary characters</a>. To support
7059      * all Unicode characters, including supplementary characters, use
7060      * the {@link #isMirrored(int)} method.
7061      *
7062      * @param  ch {@code char} for which the mirrored property is requested
7063      * @return {@code true} if the char is mirrored, {@code false}
7064      *         if the {@code char} is not mirrored or is not defined.
7065      * @since 1.4
7066      */
7067     public static boolean isMirrored(char ch) {
7068         return isMirrored((int)ch);
7069     }
7070 
7071     /**
7072      * Determines whether the specified character (Unicode code point)
7073      * is mirrored according to the Unicode specification.  Mirrored
7074      * characters should have their glyphs horizontally mirrored when
7075      * displayed in text that is right-to-left.  For example,
7076      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7077      * defined to be an <i>opening parenthesis</i>.  This will appear
7078      * as a "(" in text that is left-to-right but as a ")" in text
7079      * that is right-to-left.
7080      *
7081      * @param   codePoint the character (Unicode code point) to be tested.
7082      * @return  {@code true} if the character is mirrored, {@code false}
7083      *          if the character is not mirrored or is not defined.
7084      * @since   1.5
7085      */
7086     public static boolean isMirrored(int codePoint) {
7087         return CharacterData.of(codePoint).isMirrored(codePoint);
7088     }
7089 
7090     /**
7091      * Compares two {@code Character} objects numerically.
7092      *
7093      * @param   anotherCharacter   the {@code Character} to be compared.
7094 
7095      * @return  the value {@code 0} if the argument {@code Character}
7096      *          is equal to this {@code Character}; a value less than
7097      *          {@code 0} if this {@code Character} is numerically less
7098      *          than the {@code Character} argument; and a value greater than
7099      *          {@code 0} if this {@code Character} is numerically greater
7100      *          than the {@code Character} argument (unsigned comparison).
7101      *          Note that this is strictly a numerical comparison; it is not
7102      *          locale-dependent.
7103      * @since   1.2
7104      */
7105     public int compareTo(Character anotherCharacter) {
7106         return compare(this.value, anotherCharacter.value);
7107     }
7108 
7109     /**
7110      * Compares two {@code char} values numerically.
7111      * The value returned is identical to what would be returned by:
7112      * <pre>
7113      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7114      * </pre>
7115      *
7116      * @param  x the first {@code char} to compare
7117      * @param  y the second {@code char} to compare
7118      * @return the value {@code 0} if {@code x == y};
7119      *         a value less than {@code 0} if {@code x < y}; and
7120      *         a value greater than {@code 0} if {@code x > y}
7121      * @since 1.7
7122      */
7123     public static int compare(char x, char y) {
7124         return x - y;
7125     }
7126 
7127     /**
7128      * Converts the character (Unicode code point) argument to uppercase using
7129      * information from the UnicodeData file.
7130      *
7131      * @param   codePoint   the character (Unicode code point) to be converted.
7132      * @return  either the uppercase equivalent of the character, if
7133      *          any, or an error flag ({@code Character.ERROR})
7134      *          that indicates that a 1:M {@code char} mapping exists.
7135      * @see     Character#isLowerCase(char)
7136      * @see     Character#isUpperCase(char)
7137      * @see     Character#toLowerCase(char)
7138      * @see     Character#toTitleCase(char)
7139      * @since 1.4
7140      */
7141     static int toUpperCaseEx(int codePoint) {
7142         assert isValidCodePoint(codePoint);
7143         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7144     }
7145 
7146     /**
7147      * Converts the character (Unicode code point) argument to uppercase using case
7148      * mapping information from the SpecialCasing file in the Unicode
7149      * specification. If a character has no explicit uppercase
7150      * mapping, then the {@code char} itself is returned in the
7151      * {@code char[]}.
7152      *
7153      * @param   codePoint   the character (Unicode code point) to be converted.
7154      * @return a {@code char[]} with the uppercased character.
7155      * @since 1.4
7156      */
7157     static char[] toUpperCaseCharArray(int codePoint) {
7158         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7159         assert isBmpCodePoint(codePoint);
7160         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7161     }
7162 
7163     /**
7164      * The number of bits used to represent a <tt>char</tt> value in unsigned
7165      * binary form, constant {@code 16}.
7166      *
7167      * @since 1.5
7168      */
7169     public static final int SIZE = 16;
7170 
7171     /**
7172      * The number of bytes used to represent a {@code char} value in unsigned
7173      * binary form.
7174      *
7175      * @since 1.8
7176      */
7177     public static final int BYTES = SIZE / Byte.SIZE;
7178 
7179     /**
7180      * Returns the value obtained by reversing the order of the bytes in the
7181      * specified <tt>char</tt> value.
7182      *
7183      * @param ch The {@code char} of which to reverse the byte order.
7184      * @return the value obtained by reversing (or, equivalently, swapping)
7185      *     the bytes in the specified <tt>char</tt> value.
7186      * @since 1.5
7187      */
7188     @HotSpotIntrinsicCandidate
7189     public static char reverseBytes(char ch) {
7190         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7191     }
7192 
7193     /**
7194      * Returns the Unicode name of the specified character
7195      * {@code codePoint}, or null if the code point is
7196      * {@link #UNASSIGNED unassigned}.
7197      * <p>
7198      * Note: if the specified character is not assigned a name by
7199      * the <i>UnicodeData</i> file (part of the Unicode Character
7200      * Database maintained by the Unicode Consortium), the returned
7201      * name is the same as the result of expression.
7202      *
7203      * <blockquote>{@code
7204      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7205      *     + " "
7206      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7207      *
7208      * }</blockquote>
7209      *
7210      * @param  codePoint the character (Unicode code point)
7211      *
7212      * @return the Unicode name of the specified character, or null if
7213      *         the code point is unassigned.
7214      *
7215      * @exception IllegalArgumentException if the specified
7216      *            {@code codePoint} is not a valid Unicode
7217      *            code point.
7218      *
7219      * @since 1.7
7220      */
7221     public static String getName(int codePoint) {
7222         if (!isValidCodePoint(codePoint)) {
7223             throw new IllegalArgumentException();
7224         }
7225         String name = CharacterName.get(codePoint);
7226         if (name != null)
7227             return name;
7228         if (getType(codePoint) == UNASSIGNED)
7229             return null;
7230         UnicodeBlock block = UnicodeBlock.of(codePoint);
7231         if (block != null)
7232             return block.toString().replace('_', ' ') + " "
7233                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7234         // should never come here
7235         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7236     }
7237 }