New src/java.base/share/classes/java/lang/Character.java

   1 /*
   2  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.2.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * Standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (\uD800-\uDBFF), the second from the
  80  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648         /**
 649          * 510  - the expected number of enteties
 650          * 0.75 - the default load factor of HashMap
 651          */
 652         private static Map<String, UnicodeBlock> map =
 653                 new HashMap<>((int)(510 / 0.75f + 1.0f));
 654 
 655         /**
 656          * Creates a UnicodeBlock with the given identifier name.
 657          * This name must be the same as the block identifier.
 658          */
 659         private UnicodeBlock(String idName) {
 660             super(idName);
 661             map.put(idName, this);
 662         }
 663 
 664         /**
 665          * Creates a UnicodeBlock with the given identifier name and
 666          * alias name.
 667          */
 668         private UnicodeBlock(String idName, String alias) {
 669             this(idName);
 670             map.put(alias, this);
 671         }
 672 
 673         /**
 674          * Creates a UnicodeBlock with the given identifier name and
 675          * alias names.
 676          */
 677         private UnicodeBlock(String idName, String... aliases) {
 678             this(idName);
 679             for (String alias : aliases)
 680                 map.put(alias, this);
 681         }
 682 
 683         /**
 684          * Constant for the "Basic Latin" Unicode character block.
 685          * @since 1.2
 686          */
 687         public static final UnicodeBlock  BASIC_LATIN =
 688             new UnicodeBlock("BASIC_LATIN",
 689                              "BASIC LATIN",
 690                              "BASICLATIN");
 691 
 692         /**
 693          * Constant for the "Latin-1 Supplement" Unicode character block.
 694          * @since 1.2
 695          */
 696         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 697             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 698                              "LATIN-1 SUPPLEMENT",
 699                              "LATIN-1SUPPLEMENT");
 700 
 701         /**
 702          * Constant for the "Latin Extended-A" Unicode character block.
 703          * @since 1.2
 704          */
 705         public static final UnicodeBlock LATIN_EXTENDED_A =
 706             new UnicodeBlock("LATIN_EXTENDED_A",
 707                              "LATIN EXTENDED-A",
 708                              "LATINEXTENDED-A");
 709 
 710         /**
 711          * Constant for the "Latin Extended-B" Unicode character block.
 712          * @since 1.2
 713          */
 714         public static final UnicodeBlock LATIN_EXTENDED_B =
 715             new UnicodeBlock("LATIN_EXTENDED_B",
 716                              "LATIN EXTENDED-B",
 717                              "LATINEXTENDED-B");
 718 
 719         /**
 720          * Constant for the "IPA Extensions" Unicode character block.
 721          * @since 1.2
 722          */
 723         public static final UnicodeBlock IPA_EXTENSIONS =
 724             new UnicodeBlock("IPA_EXTENSIONS",
 725                              "IPA EXTENSIONS",
 726                              "IPAEXTENSIONS");
 727 
 728         /**
 729          * Constant for the "Spacing Modifier Letters" Unicode character block.
 730          * @since 1.2
 731          */
 732         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 733             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 734                              "SPACING MODIFIER LETTERS",
 735                              "SPACINGMODIFIERLETTERS");
 736 
 737         /**
 738          * Constant for the "Combining Diacritical Marks" Unicode character block.
 739          * @since 1.2
 740          */
 741         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 742             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 743                              "COMBINING DIACRITICAL MARKS",
 744                              "COMBININGDIACRITICALMARKS");
 745 
 746         /**
 747          * Constant for the "Greek and Coptic" Unicode character block.
 748          * <p>
 749          * This block was previously known as the "Greek" block.
 750          *
 751          * @since 1.2
 752          */
 753         public static final UnicodeBlock GREEK =
 754             new UnicodeBlock("GREEK",
 755                              "GREEK AND COPTIC",
 756                              "GREEKANDCOPTIC");
 757 
 758         /**
 759          * Constant for the "Cyrillic" Unicode character block.
 760          * @since 1.2
 761          */
 762         public static final UnicodeBlock CYRILLIC =
 763             new UnicodeBlock("CYRILLIC");
 764 
 765         /**
 766          * Constant for the "Armenian" Unicode character block.
 767          * @since 1.2
 768          */
 769         public static final UnicodeBlock ARMENIAN =
 770             new UnicodeBlock("ARMENIAN");
 771 
 772         /**
 773          * Constant for the "Hebrew" Unicode character block.
 774          * @since 1.2
 775          */
 776         public static final UnicodeBlock HEBREW =
 777             new UnicodeBlock("HEBREW");
 778 
 779         /**
 780          * Constant for the "Arabic" Unicode character block.
 781          * @since 1.2
 782          */
 783         public static final UnicodeBlock ARABIC =
 784             new UnicodeBlock("ARABIC");
 785 
 786         /**
 787          * Constant for the "Devanagari" Unicode character block.
 788          * @since 1.2
 789          */
 790         public static final UnicodeBlock DEVANAGARI =
 791             new UnicodeBlock("DEVANAGARI");
 792 
 793         /**
 794          * Constant for the "Bengali" Unicode character block.
 795          * @since 1.2
 796          */
 797         public static final UnicodeBlock BENGALI =
 798             new UnicodeBlock("BENGALI");
 799 
 800         /**
 801          * Constant for the "Gurmukhi" Unicode character block.
 802          * @since 1.2
 803          */
 804         public static final UnicodeBlock GURMUKHI =
 805             new UnicodeBlock("GURMUKHI");
 806 
 807         /**
 808          * Constant for the "Gujarati" Unicode character block.
 809          * @since 1.2
 810          */
 811         public static final UnicodeBlock GUJARATI =
 812             new UnicodeBlock("GUJARATI");
 813 
 814         /**
 815          * Constant for the "Oriya" Unicode character block.
 816          * @since 1.2
 817          */
 818         public static final UnicodeBlock ORIYA =
 819             new UnicodeBlock("ORIYA");
 820 
 821         /**
 822          * Constant for the "Tamil" Unicode character block.
 823          * @since 1.2
 824          */
 825         public static final UnicodeBlock TAMIL =
 826             new UnicodeBlock("TAMIL");
 827 
 828         /**
 829          * Constant for the "Telugu" Unicode character block.
 830          * @since 1.2
 831          */
 832         public static final UnicodeBlock TELUGU =
 833             new UnicodeBlock("TELUGU");
 834 
 835         /**
 836          * Constant for the "Kannada" Unicode character block.
 837          * @since 1.2
 838          */
 839         public static final UnicodeBlock KANNADA =
 840             new UnicodeBlock("KANNADA");
 841 
 842         /**
 843          * Constant for the "Malayalam" Unicode character block.
 844          * @since 1.2
 845          */
 846         public static final UnicodeBlock MALAYALAM =
 847             new UnicodeBlock("MALAYALAM");
 848 
 849         /**
 850          * Constant for the "Thai" Unicode character block.
 851          * @since 1.2
 852          */
 853         public static final UnicodeBlock THAI =
 854             new UnicodeBlock("THAI");
 855 
 856         /**
 857          * Constant for the "Lao" Unicode character block.
 858          * @since 1.2
 859          */
 860         public static final UnicodeBlock LAO =
 861             new UnicodeBlock("LAO");
 862 
 863         /**
 864          * Constant for the "Tibetan" Unicode character block.
 865          * @since 1.2
 866          */
 867         public static final UnicodeBlock TIBETAN =
 868             new UnicodeBlock("TIBETAN");
 869 
 870         /**
 871          * Constant for the "Georgian" Unicode character block.
 872          * @since 1.2
 873          */
 874         public static final UnicodeBlock GEORGIAN =
 875             new UnicodeBlock("GEORGIAN");
 876 
 877         /**
 878          * Constant for the "Hangul Jamo" Unicode character block.
 879          * @since 1.2
 880          */
 881         public static final UnicodeBlock HANGUL_JAMO =
 882             new UnicodeBlock("HANGUL_JAMO",
 883                              "HANGUL JAMO",
 884                              "HANGULJAMO");
 885 
 886         /**
 887          * Constant for the "Latin Extended Additional" Unicode character block.
 888          * @since 1.2
 889          */
 890         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 891             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 892                              "LATIN EXTENDED ADDITIONAL",
 893                              "LATINEXTENDEDADDITIONAL");
 894 
 895         /**
 896          * Constant for the "Greek Extended" Unicode character block.
 897          * @since 1.2
 898          */
 899         public static final UnicodeBlock GREEK_EXTENDED =
 900             new UnicodeBlock("GREEK_EXTENDED",
 901                              "GREEK EXTENDED",
 902                              "GREEKEXTENDED");
 903 
 904         /**
 905          * Constant for the "General Punctuation" Unicode character block.
 906          * @since 1.2
 907          */
 908         public static final UnicodeBlock GENERAL_PUNCTUATION =
 909             new UnicodeBlock("GENERAL_PUNCTUATION",
 910                              "GENERAL PUNCTUATION",
 911                              "GENERALPUNCTUATION");
 912 
 913         /**
 914          * Constant for the "Superscripts and Subscripts" Unicode character
 915          * block.
 916          * @since 1.2
 917          */
 918         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 919             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 920                              "SUPERSCRIPTS AND SUBSCRIPTS",
 921                              "SUPERSCRIPTSANDSUBSCRIPTS");
 922 
 923         /**
 924          * Constant for the "Currency Symbols" Unicode character block.
 925          * @since 1.2
 926          */
 927         public static final UnicodeBlock CURRENCY_SYMBOLS =
 928             new UnicodeBlock("CURRENCY_SYMBOLS",
 929                              "CURRENCY SYMBOLS",
 930                              "CURRENCYSYMBOLS");
 931 
 932         /**
 933          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 934          * character block.
 935          * <p>
 936          * This block was previously known as "Combining Marks for Symbols".
 937          * @since 1.2
 938          */
 939         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 940             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 941                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 942                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 943                              "COMBINING MARKS FOR SYMBOLS",
 944                              "COMBININGMARKSFORSYMBOLS");
 945 
 946         /**
 947          * Constant for the "Letterlike Symbols" Unicode character block.
 948          * @since 1.2
 949          */
 950         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 951             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 952                              "LETTERLIKE SYMBOLS",
 953                              "LETTERLIKESYMBOLS");
 954 
 955         /**
 956          * Constant for the "Number Forms" Unicode character block.
 957          * @since 1.2
 958          */
 959         public static final UnicodeBlock NUMBER_FORMS =
 960             new UnicodeBlock("NUMBER_FORMS",
 961                              "NUMBER FORMS",
 962                              "NUMBERFORMS");
 963 
 964         /**
 965          * Constant for the "Arrows" Unicode character block.
 966          * @since 1.2
 967          */
 968         public static final UnicodeBlock ARROWS =
 969             new UnicodeBlock("ARROWS");
 970 
 971         /**
 972          * Constant for the "Mathematical Operators" Unicode character block.
 973          * @since 1.2
 974          */
 975         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 976             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 977                              "MATHEMATICAL OPERATORS",
 978                              "MATHEMATICALOPERATORS");
 979 
 980         /**
 981          * Constant for the "Miscellaneous Technical" Unicode character block.
 982          * @since 1.2
 983          */
 984         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 985             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 986                              "MISCELLANEOUS TECHNICAL",
 987                              "MISCELLANEOUSTECHNICAL");
 988 
 989         /**
 990          * Constant for the "Control Pictures" Unicode character block.
 991          * @since 1.2
 992          */
 993         public static final UnicodeBlock CONTROL_PICTURES =
 994             new UnicodeBlock("CONTROL_PICTURES",
 995                              "CONTROL PICTURES",
 996                              "CONTROLPICTURES");
 997 
 998         /**
 999          * Constant for the "Optical Character Recognition" Unicode character block.
1000          * @since 1.2
1001          */
1002         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1003             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1004                              "OPTICAL CHARACTER RECOGNITION",
1005                              "OPTICALCHARACTERRECOGNITION");
1006 
1007         /**
1008          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1009          * @since 1.2
1010          */
1011         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1012             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1013                              "ENCLOSED ALPHANUMERICS",
1014                              "ENCLOSEDALPHANUMERICS");
1015 
1016         /**
1017          * Constant for the "Box Drawing" Unicode character block.
1018          * @since 1.2
1019          */
1020         public static final UnicodeBlock BOX_DRAWING =
1021             new UnicodeBlock("BOX_DRAWING",
1022                              "BOX DRAWING",
1023                              "BOXDRAWING");
1024 
1025         /**
1026          * Constant for the "Block Elements" Unicode character block.
1027          * @since 1.2
1028          */
1029         public static final UnicodeBlock BLOCK_ELEMENTS =
1030             new UnicodeBlock("BLOCK_ELEMENTS",
1031                              "BLOCK ELEMENTS",
1032                              "BLOCKELEMENTS");
1033 
1034         /**
1035          * Constant for the "Geometric Shapes" Unicode character block.
1036          * @since 1.2
1037          */
1038         public static final UnicodeBlock GEOMETRIC_SHAPES =
1039             new UnicodeBlock("GEOMETRIC_SHAPES",
1040                              "GEOMETRIC SHAPES",
1041                              "GEOMETRICSHAPES");
1042 
1043         /**
1044          * Constant for the "Miscellaneous Symbols" Unicode character block.
1045          * @since 1.2
1046          */
1047         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1048             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1049                              "MISCELLANEOUS SYMBOLS",
1050                              "MISCELLANEOUSSYMBOLS");
1051 
1052         /**
1053          * Constant for the "Dingbats" Unicode character block.
1054          * @since 1.2
1055          */
1056         public static final UnicodeBlock DINGBATS =
1057             new UnicodeBlock("DINGBATS");
1058 
1059         /**
1060          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1061          * @since 1.2
1062          */
1063         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1064             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1065                              "CJK SYMBOLS AND PUNCTUATION",
1066                              "CJKSYMBOLSANDPUNCTUATION");
1067 
1068         /**
1069          * Constant for the "Hiragana" Unicode character block.
1070          * @since 1.2
1071          */
1072         public static final UnicodeBlock HIRAGANA =
1073             new UnicodeBlock("HIRAGANA");
1074 
1075         /**
1076          * Constant for the "Katakana" Unicode character block.
1077          * @since 1.2
1078          */
1079         public static final UnicodeBlock KATAKANA =
1080             new UnicodeBlock("KATAKANA");
1081 
1082         /**
1083          * Constant for the "Bopomofo" Unicode character block.
1084          * @since 1.2
1085          */
1086         public static final UnicodeBlock BOPOMOFO =
1087             new UnicodeBlock("BOPOMOFO");
1088 
1089         /**
1090          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1091          * @since 1.2
1092          */
1093         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1094             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1095                              "HANGUL COMPATIBILITY JAMO",
1096                              "HANGULCOMPATIBILITYJAMO");
1097 
1098         /**
1099          * Constant for the "Kanbun" Unicode character block.
1100          * @since 1.2
1101          */
1102         public static final UnicodeBlock KANBUN =
1103             new UnicodeBlock("KANBUN");
1104 
1105         /**
1106          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1107          * @since 1.2
1108          */
1109         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1110             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1111                              "ENCLOSED CJK LETTERS AND MONTHS",
1112                              "ENCLOSEDCJKLETTERSANDMONTHS");
1113 
1114         /**
1115          * Constant for the "CJK Compatibility" Unicode character block.
1116          * @since 1.2
1117          */
1118         public static final UnicodeBlock CJK_COMPATIBILITY =
1119             new UnicodeBlock("CJK_COMPATIBILITY",
1120                              "CJK COMPATIBILITY",
1121                              "CJKCOMPATIBILITY");
1122 
1123         /**
1124          * Constant for the "CJK Unified Ideographs" Unicode character block.
1125          * @since 1.2
1126          */
1127         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1128             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1129                              "CJK UNIFIED IDEOGRAPHS",
1130                              "CJKUNIFIEDIDEOGRAPHS");
1131 
1132         /**
1133          * Constant for the "Hangul Syllables" Unicode character block.
1134          * @since 1.2
1135          */
1136         public static final UnicodeBlock HANGUL_SYLLABLES =
1137             new UnicodeBlock("HANGUL_SYLLABLES",
1138                              "HANGUL SYLLABLES",
1139                              "HANGULSYLLABLES");
1140 
1141         /**
1142          * Constant for the "Private Use Area" Unicode character block.
1143          * @since 1.2
1144          */
1145         public static final UnicodeBlock PRIVATE_USE_AREA =
1146             new UnicodeBlock("PRIVATE_USE_AREA",
1147                              "PRIVATE USE AREA",
1148                              "PRIVATEUSEAREA");
1149 
1150         /**
1151          * Constant for the "CJK Compatibility Ideographs" Unicode character
1152          * block.
1153          * @since 1.2
1154          */
1155         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1156             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1157                              "CJK COMPATIBILITY IDEOGRAPHS",
1158                              "CJKCOMPATIBILITYIDEOGRAPHS");
1159 
1160         /**
1161          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1162          * @since 1.2
1163          */
1164         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1165             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1166                              "ALPHABETIC PRESENTATION FORMS",
1167                              "ALPHABETICPRESENTATIONFORMS");
1168 
1169         /**
1170          * Constant for the "Arabic Presentation Forms-A" Unicode character
1171          * block.
1172          * @since 1.2
1173          */
1174         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1175             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1176                              "ARABIC PRESENTATION FORMS-A",
1177                              "ARABICPRESENTATIONFORMS-A");
1178 
1179         /**
1180          * Constant for the "Combining Half Marks" Unicode character block.
1181          * @since 1.2
1182          */
1183         public static final UnicodeBlock COMBINING_HALF_MARKS =
1184             new UnicodeBlock("COMBINING_HALF_MARKS",
1185                              "COMBINING HALF MARKS",
1186                              "COMBININGHALFMARKS");
1187 
1188         /**
1189          * Constant for the "CJK Compatibility Forms" Unicode character block.
1190          * @since 1.2
1191          */
1192         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1193             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1194                              "CJK COMPATIBILITY FORMS",
1195                              "CJKCOMPATIBILITYFORMS");
1196 
1197         /**
1198          * Constant for the "Small Form Variants" Unicode character block.
1199          * @since 1.2
1200          */
1201         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1202             new UnicodeBlock("SMALL_FORM_VARIANTS",
1203                              "SMALL FORM VARIANTS",
1204                              "SMALLFORMVARIANTS");
1205 
1206         /**
1207          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1208          * @since 1.2
1209          */
1210         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1211             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1212                              "ARABIC PRESENTATION FORMS-B",
1213                              "ARABICPRESENTATIONFORMS-B");
1214 
1215         /**
1216          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1217          * block.
1218          * @since 1.2
1219          */
1220         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1221             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1222                              "HALFWIDTH AND FULLWIDTH FORMS",
1223                              "HALFWIDTHANDFULLWIDTHFORMS");
1224 
1225         /**
1226          * Constant for the "Specials" Unicode character block.
1227          * @since 1.2
1228          */
1229         public static final UnicodeBlock SPECIALS =
1230             new UnicodeBlock("SPECIALS");
1231 
1232         /**
1233          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1234          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1235          *             {@link #LOW_SURROGATES}. These new constants match
1236          *             the block definitions of the Unicode Standard.
1237          *             The {@link #of(char)} and {@link #of(int)} methods
1238          *             return the new constants, not SURROGATES_AREA.
1239          */
1240         @Deprecated
1241         public static final UnicodeBlock SURROGATES_AREA =
1242             new UnicodeBlock("SURROGATES_AREA");
1243 
1244         /**
1245          * Constant for the "Syriac" Unicode character block.
1246          * @since 1.4
1247          */
1248         public static final UnicodeBlock SYRIAC =
1249             new UnicodeBlock("SYRIAC");
1250 
1251         /**
1252          * Constant for the "Thaana" Unicode character block.
1253          * @since 1.4
1254          */
1255         public static final UnicodeBlock THAANA =
1256             new UnicodeBlock("THAANA");
1257 
1258         /**
1259          * Constant for the "Sinhala" Unicode character block.
1260          * @since 1.4
1261          */
1262         public static final UnicodeBlock SINHALA =
1263             new UnicodeBlock("SINHALA");
1264 
1265         /**
1266          * Constant for the "Myanmar" Unicode character block.
1267          * @since 1.4
1268          */
1269         public static final UnicodeBlock MYANMAR =
1270             new UnicodeBlock("MYANMAR");
1271 
1272         /**
1273          * Constant for the "Ethiopic" Unicode character block.
1274          * @since 1.4
1275          */
1276         public static final UnicodeBlock ETHIOPIC =
1277             new UnicodeBlock("ETHIOPIC");
1278 
1279         /**
1280          * Constant for the "Cherokee" Unicode character block.
1281          * @since 1.4
1282          */
1283         public static final UnicodeBlock CHEROKEE =
1284             new UnicodeBlock("CHEROKEE");
1285 
1286         /**
1287          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1288          * @since 1.4
1289          */
1290         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1291             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1292                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1293                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1294 
1295         /**
1296          * Constant for the "Ogham" Unicode character block.
1297          * @since 1.4
1298          */
1299         public static final UnicodeBlock OGHAM =
1300             new UnicodeBlock("OGHAM");
1301 
1302         /**
1303          * Constant for the "Runic" Unicode character block.
1304          * @since 1.4
1305          */
1306         public static final UnicodeBlock RUNIC =
1307             new UnicodeBlock("RUNIC");
1308 
1309         /**
1310          * Constant for the "Khmer" Unicode character block.
1311          * @since 1.4
1312          */
1313         public static final UnicodeBlock KHMER =
1314             new UnicodeBlock("KHMER");
1315 
1316         /**
1317          * Constant for the "Mongolian" Unicode character block.
1318          * @since 1.4
1319          */
1320         public static final UnicodeBlock MONGOLIAN =
1321             new UnicodeBlock("MONGOLIAN");
1322 
1323         /**
1324          * Constant for the "Braille Patterns" Unicode character block.
1325          * @since 1.4
1326          */
1327         public static final UnicodeBlock BRAILLE_PATTERNS =
1328             new UnicodeBlock("BRAILLE_PATTERNS",
1329                              "BRAILLE PATTERNS",
1330                              "BRAILLEPATTERNS");
1331 
1332         /**
1333          * Constant for the "CJK Radicals Supplement" Unicode character block.
1334          * @since 1.4
1335          */
1336         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1337             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1338                              "CJK RADICALS SUPPLEMENT",
1339                              "CJKRADICALSSUPPLEMENT");
1340 
1341         /**
1342          * Constant for the "Kangxi Radicals" Unicode character block.
1343          * @since 1.4
1344          */
1345         public static final UnicodeBlock KANGXI_RADICALS =
1346             new UnicodeBlock("KANGXI_RADICALS",
1347                              "KANGXI RADICALS",
1348                              "KANGXIRADICALS");
1349 
1350         /**
1351          * Constant for the "Ideographic Description Characters" Unicode character block.
1352          * @since 1.4
1353          */
1354         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1355             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1356                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1357                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1358 
1359         /**
1360          * Constant for the "Bopomofo Extended" Unicode character block.
1361          * @since 1.4
1362          */
1363         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1364             new UnicodeBlock("BOPOMOFO_EXTENDED",
1365                              "BOPOMOFO EXTENDED",
1366                              "BOPOMOFOEXTENDED");
1367 
1368         /**
1369          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1370          * @since 1.4
1371          */
1372         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1373             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1374                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1375                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1376 
1377         /**
1378          * Constant for the "Yi Syllables" Unicode character block.
1379          * @since 1.4
1380          */
1381         public static final UnicodeBlock YI_SYLLABLES =
1382             new UnicodeBlock("YI_SYLLABLES",
1383                              "YI SYLLABLES",
1384                              "YISYLLABLES");
1385 
1386         /**
1387          * Constant for the "Yi Radicals" Unicode character block.
1388          * @since 1.4
1389          */
1390         public static final UnicodeBlock YI_RADICALS =
1391             new UnicodeBlock("YI_RADICALS",
1392                              "YI RADICALS",
1393                              "YIRADICALS");
1394 
1395         /**
1396          * Constant for the "Cyrillic Supplementary" Unicode character block.
1397          * @since 1.5
1398          */
1399         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1400             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1401                              "CYRILLIC SUPPLEMENTARY",
1402                              "CYRILLICSUPPLEMENTARY",
1403                              "CYRILLIC SUPPLEMENT",
1404                              "CYRILLICSUPPLEMENT");
1405 
1406         /**
1407          * Constant for the "Tagalog" Unicode character block.
1408          * @since 1.5
1409          */
1410         public static final UnicodeBlock TAGALOG =
1411             new UnicodeBlock("TAGALOG");
1412 
1413         /**
1414          * Constant for the "Hanunoo" Unicode character block.
1415          * @since 1.5
1416          */
1417         public static final UnicodeBlock HANUNOO =
1418             new UnicodeBlock("HANUNOO");
1419 
1420         /**
1421          * Constant for the "Buhid" Unicode character block.
1422          * @since 1.5
1423          */
1424         public static final UnicodeBlock BUHID =
1425             new UnicodeBlock("BUHID");
1426 
1427         /**
1428          * Constant for the "Tagbanwa" Unicode character block.
1429          * @since 1.5
1430          */
1431         public static final UnicodeBlock TAGBANWA =
1432             new UnicodeBlock("TAGBANWA");
1433 
1434         /**
1435          * Constant for the "Limbu" Unicode character block.
1436          * @since 1.5
1437          */
1438         public static final UnicodeBlock LIMBU =
1439             new UnicodeBlock("LIMBU");
1440 
1441         /**
1442          * Constant for the "Tai Le" Unicode character block.
1443          * @since 1.5
1444          */
1445         public static final UnicodeBlock TAI_LE =
1446             new UnicodeBlock("TAI_LE",
1447                              "TAI LE",
1448                              "TAILE");
1449 
1450         /**
1451          * Constant for the "Khmer Symbols" Unicode character block.
1452          * @since 1.5
1453          */
1454         public static final UnicodeBlock KHMER_SYMBOLS =
1455             new UnicodeBlock("KHMER_SYMBOLS",
1456                              "KHMER SYMBOLS",
1457                              "KHMERSYMBOLS");
1458 
1459         /**
1460          * Constant for the "Phonetic Extensions" Unicode character block.
1461          * @since 1.5
1462          */
1463         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1464             new UnicodeBlock("PHONETIC_EXTENSIONS",
1465                              "PHONETIC EXTENSIONS",
1466                              "PHONETICEXTENSIONS");
1467 
1468         /**
1469          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1470          * @since 1.5
1471          */
1472         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1473             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1474                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1475                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1476 
1477         /**
1478          * Constant for the "Supplemental Arrows-A" Unicode character block.
1479          * @since 1.5
1480          */
1481         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1482             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1483                              "SUPPLEMENTAL ARROWS-A",
1484                              "SUPPLEMENTALARROWS-A");
1485 
1486         /**
1487          * Constant for the "Supplemental Arrows-B" Unicode character block.
1488          * @since 1.5
1489          */
1490         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1491             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1492                              "SUPPLEMENTAL ARROWS-B",
1493                              "SUPPLEMENTALARROWS-B");
1494 
1495         /**
1496          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1497          * character block.
1498          * @since 1.5
1499          */
1500         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1501             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1502                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1503                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1504 
1505         /**
1506          * Constant for the "Supplemental Mathematical Operators" Unicode
1507          * character block.
1508          * @since 1.5
1509          */
1510         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1511             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1512                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1513                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1514 
1515         /**
1516          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1517          * block.
1518          * @since 1.5
1519          */
1520         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1521             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1522                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1523                              "MISCELLANEOUSSYMBOLSANDARROWS");
1524 
1525         /**
1526          * Constant for the "Katakana Phonetic Extensions" Unicode character
1527          * block.
1528          * @since 1.5
1529          */
1530         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1531             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1532                              "KATAKANA PHONETIC EXTENSIONS",
1533                              "KATAKANAPHONETICEXTENSIONS");
1534 
1535         /**
1536          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1537          * @since 1.5
1538          */
1539         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1540             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1541                              "YIJING HEXAGRAM SYMBOLS",
1542                              "YIJINGHEXAGRAMSYMBOLS");
1543 
1544         /**
1545          * Constant for the "Variation Selectors" Unicode character block.
1546          * @since 1.5
1547          */
1548         public static final UnicodeBlock VARIATION_SELECTORS =
1549             new UnicodeBlock("VARIATION_SELECTORS",
1550                              "VARIATION SELECTORS",
1551                              "VARIATIONSELECTORS");
1552 
1553         /**
1554          * Constant for the "Linear B Syllabary" Unicode character block.
1555          * @since 1.5
1556          */
1557         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1558             new UnicodeBlock("LINEAR_B_SYLLABARY",
1559                              "LINEAR B SYLLABARY",
1560                              "LINEARBSYLLABARY");
1561 
1562         /**
1563          * Constant for the "Linear B Ideograms" Unicode character block.
1564          * @since 1.5
1565          */
1566         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1567             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1568                              "LINEAR B IDEOGRAMS",
1569                              "LINEARBIDEOGRAMS");
1570 
1571         /**
1572          * Constant for the "Aegean Numbers" Unicode character block.
1573          * @since 1.5
1574          */
1575         public static final UnicodeBlock AEGEAN_NUMBERS =
1576             new UnicodeBlock("AEGEAN_NUMBERS",
1577                              "AEGEAN NUMBERS",
1578                              "AEGEANNUMBERS");
1579 
1580         /**
1581          * Constant for the "Old Italic" Unicode character block.
1582          * @since 1.5
1583          */
1584         public static final UnicodeBlock OLD_ITALIC =
1585             new UnicodeBlock("OLD_ITALIC",
1586                              "OLD ITALIC",
1587                              "OLDITALIC");
1588 
1589         /**
1590          * Constant for the "Gothic" Unicode character block.
1591          * @since 1.5
1592          */
1593         public static final UnicodeBlock GOTHIC =
1594             new UnicodeBlock("GOTHIC");
1595 
1596         /**
1597          * Constant for the "Ugaritic" Unicode character block.
1598          * @since 1.5
1599          */
1600         public static final UnicodeBlock UGARITIC =
1601             new UnicodeBlock("UGARITIC");
1602 
1603         /**
1604          * Constant for the "Deseret" Unicode character block.
1605          * @since 1.5
1606          */
1607         public static final UnicodeBlock DESERET =
1608             new UnicodeBlock("DESERET");
1609 
1610         /**
1611          * Constant for the "Shavian" Unicode character block.
1612          * @since 1.5
1613          */
1614         public static final UnicodeBlock SHAVIAN =
1615             new UnicodeBlock("SHAVIAN");
1616 
1617         /**
1618          * Constant for the "Osmanya" Unicode character block.
1619          * @since 1.5
1620          */
1621         public static final UnicodeBlock OSMANYA =
1622             new UnicodeBlock("OSMANYA");
1623 
1624         /**
1625          * Constant for the "Cypriot Syllabary" Unicode character block.
1626          * @since 1.5
1627          */
1628         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1629             new UnicodeBlock("CYPRIOT_SYLLABARY",
1630                              "CYPRIOT SYLLABARY",
1631                              "CYPRIOTSYLLABARY");
1632 
1633         /**
1634          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1635          * @since 1.5
1636          */
1637         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1638             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1639                              "BYZANTINE MUSICAL SYMBOLS",
1640                              "BYZANTINEMUSICALSYMBOLS");
1641 
1642         /**
1643          * Constant for the "Musical Symbols" Unicode character block.
1644          * @since 1.5
1645          */
1646         public static final UnicodeBlock MUSICAL_SYMBOLS =
1647             new UnicodeBlock("MUSICAL_SYMBOLS",
1648                              "MUSICAL SYMBOLS",
1649                              "MUSICALSYMBOLS");
1650 
1651         /**
1652          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1653          * @since 1.5
1654          */
1655         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1656             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1657                              "TAI XUAN JING SYMBOLS",
1658                              "TAIXUANJINGSYMBOLS");
1659 
1660         /**
1661          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1662          * character block.
1663          * @since 1.5
1664          */
1665         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1666             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1667                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1668                              "MATHEMATICALALPHANUMERICSYMBOLS");
1669 
1670         /**
1671          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1672          * character block.
1673          * @since 1.5
1674          */
1675         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1676             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1677                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1678                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1679 
1680         /**
1681          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1682          * @since 1.5
1683          */
1684         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1685             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1686                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1687                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1688 
1689         /**
1690          * Constant for the "Tags" Unicode character block.
1691          * @since 1.5
1692          */
1693         public static final UnicodeBlock TAGS =
1694             new UnicodeBlock("TAGS");
1695 
1696         /**
1697          * Constant for the "Variation Selectors Supplement" Unicode character
1698          * block.
1699          * @since 1.5
1700          */
1701         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1702             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1703                              "VARIATION SELECTORS SUPPLEMENT",
1704                              "VARIATIONSELECTORSSUPPLEMENT");
1705 
1706         /**
1707          * Constant for the "Supplementary Private Use Area-A" Unicode character
1708          * block.
1709          * @since 1.5
1710          */
1711         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1712             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1713                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1714                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1715 
1716         /**
1717          * Constant for the "Supplementary Private Use Area-B" Unicode character
1718          * block.
1719          * @since 1.5
1720          */
1721         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1722             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1723                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1724                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1725 
1726         /**
1727          * Constant for the "High Surrogates" Unicode character block.
1728          * This block represents codepoint values in the high surrogate
1729          * range: U+D800 through U+DB7F
1730          *
1731          * @since 1.5
1732          */
1733         public static final UnicodeBlock HIGH_SURROGATES =
1734             new UnicodeBlock("HIGH_SURROGATES",
1735                              "HIGH SURROGATES",
1736                              "HIGHSURROGATES");
1737 
1738         /**
1739          * Constant for the "High Private Use Surrogates" Unicode character
1740          * block.
1741          * This block represents codepoint values in the private use high
1742          * surrogate range: U+DB80 through U+DBFF
1743          *
1744          * @since 1.5
1745          */
1746         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1747             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1748                              "HIGH PRIVATE USE SURROGATES",
1749                              "HIGHPRIVATEUSESURROGATES");
1750 
1751         /**
1752          * Constant for the "Low Surrogates" Unicode character block.
1753          * This block represents codepoint values in the low surrogate
1754          * range: U+DC00 through U+DFFF
1755          *
1756          * @since 1.5
1757          */
1758         public static final UnicodeBlock LOW_SURROGATES =
1759             new UnicodeBlock("LOW_SURROGATES",
1760                              "LOW SURROGATES",
1761                              "LOWSURROGATES");
1762 
1763         /**
1764          * Constant for the "Arabic Supplement" Unicode character block.
1765          * @since 1.7
1766          */
1767         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1768             new UnicodeBlock("ARABIC_SUPPLEMENT",
1769                              "ARABIC SUPPLEMENT",
1770                              "ARABICSUPPLEMENT");
1771 
1772         /**
1773          * Constant for the "NKo" Unicode character block.
1774          * @since 1.7
1775          */
1776         public static final UnicodeBlock NKO =
1777             new UnicodeBlock("NKO");
1778 
1779         /**
1780          * Constant for the "Samaritan" Unicode character block.
1781          * @since 1.7
1782          */
1783         public static final UnicodeBlock SAMARITAN =
1784             new UnicodeBlock("SAMARITAN");
1785 
1786         /**
1787          * Constant for the "Mandaic" Unicode character block.
1788          * @since 1.7
1789          */
1790         public static final UnicodeBlock MANDAIC =
1791             new UnicodeBlock("MANDAIC");
1792 
1793         /**
1794          * Constant for the "Ethiopic Supplement" Unicode character block.
1795          * @since 1.7
1796          */
1797         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1798             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1799                              "ETHIOPIC SUPPLEMENT",
1800                              "ETHIOPICSUPPLEMENT");
1801 
1802         /**
1803          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1804          * Unicode character block.
1805          * @since 1.7
1806          */
1807         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1808             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1809                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1810                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1811 
1812         /**
1813          * Constant for the "New Tai Lue" Unicode character block.
1814          * @since 1.7
1815          */
1816         public static final UnicodeBlock NEW_TAI_LUE =
1817             new UnicodeBlock("NEW_TAI_LUE",
1818                              "NEW TAI LUE",
1819                              "NEWTAILUE");
1820 
1821         /**
1822          * Constant for the "Buginese" Unicode character block.
1823          * @since 1.7
1824          */
1825         public static final UnicodeBlock BUGINESE =
1826             new UnicodeBlock("BUGINESE");
1827 
1828         /**
1829          * Constant for the "Tai Tham" Unicode character block.
1830          * @since 1.7
1831          */
1832         public static final UnicodeBlock TAI_THAM =
1833             new UnicodeBlock("TAI_THAM",
1834                              "TAI THAM",
1835                              "TAITHAM");
1836 
1837         /**
1838          * Constant for the "Balinese" Unicode character block.
1839          * @since 1.7
1840          */
1841         public static final UnicodeBlock BALINESE =
1842             new UnicodeBlock("BALINESE");
1843 
1844         /**
1845          * Constant for the "Sundanese" Unicode character block.
1846          * @since 1.7
1847          */
1848         public static final UnicodeBlock SUNDANESE =
1849             new UnicodeBlock("SUNDANESE");
1850 
1851         /**
1852          * Constant for the "Batak" Unicode character block.
1853          * @since 1.7
1854          */
1855         public static final UnicodeBlock BATAK =
1856             new UnicodeBlock("BATAK");
1857 
1858         /**
1859          * Constant for the "Lepcha" Unicode character block.
1860          * @since 1.7
1861          */
1862         public static final UnicodeBlock LEPCHA =
1863             new UnicodeBlock("LEPCHA");
1864 
1865         /**
1866          * Constant for the "Ol Chiki" Unicode character block.
1867          * @since 1.7
1868          */
1869         public static final UnicodeBlock OL_CHIKI =
1870             new UnicodeBlock("OL_CHIKI",
1871                              "OL CHIKI",
1872                              "OLCHIKI");
1873 
1874         /**
1875          * Constant for the "Vedic Extensions" Unicode character block.
1876          * @since 1.7
1877          */
1878         public static final UnicodeBlock VEDIC_EXTENSIONS =
1879             new UnicodeBlock("VEDIC_EXTENSIONS",
1880                              "VEDIC EXTENSIONS",
1881                              "VEDICEXTENSIONS");
1882 
1883         /**
1884          * Constant for the "Phonetic Extensions Supplement" Unicode character
1885          * block.
1886          * @since 1.7
1887          */
1888         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1889             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1890                              "PHONETIC EXTENSIONS SUPPLEMENT",
1891                              "PHONETICEXTENSIONSSUPPLEMENT");
1892 
1893         /**
1894          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1895          * character block.
1896          * @since 1.7
1897          */
1898         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1899             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1900                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1901                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1902 
1903         /**
1904          * Constant for the "Glagolitic" Unicode character block.
1905          * @since 1.7
1906          */
1907         public static final UnicodeBlock GLAGOLITIC =
1908             new UnicodeBlock("GLAGOLITIC");
1909 
1910         /**
1911          * Constant for the "Latin Extended-C" Unicode character block.
1912          * @since 1.7
1913          */
1914         public static final UnicodeBlock LATIN_EXTENDED_C =
1915             new UnicodeBlock("LATIN_EXTENDED_C",
1916                              "LATIN EXTENDED-C",
1917                              "LATINEXTENDED-C");
1918 
1919         /**
1920          * Constant for the "Coptic" Unicode character block.
1921          * @since 1.7
1922          */
1923         public static final UnicodeBlock COPTIC =
1924             new UnicodeBlock("COPTIC");
1925 
1926         /**
1927          * Constant for the "Georgian Supplement" Unicode character block.
1928          * @since 1.7
1929          */
1930         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1931             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1932                              "GEORGIAN SUPPLEMENT",
1933                              "GEORGIANSUPPLEMENT");
1934 
1935         /**
1936          * Constant for the "Tifinagh" Unicode character block.
1937          * @since 1.7
1938          */
1939         public static final UnicodeBlock TIFINAGH =
1940             new UnicodeBlock("TIFINAGH");
1941 
1942         /**
1943          * Constant for the "Ethiopic Extended" Unicode character block.
1944          * @since 1.7
1945          */
1946         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1947             new UnicodeBlock("ETHIOPIC_EXTENDED",
1948                              "ETHIOPIC EXTENDED",
1949                              "ETHIOPICEXTENDED");
1950 
1951         /**
1952          * Constant for the "Cyrillic Extended-A" Unicode character block.
1953          * @since 1.7
1954          */
1955         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1956             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1957                              "CYRILLIC EXTENDED-A",
1958                              "CYRILLICEXTENDED-A");
1959 
1960         /**
1961          * Constant for the "Supplemental Punctuation" Unicode character block.
1962          * @since 1.7
1963          */
1964         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1965             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1966                              "SUPPLEMENTAL PUNCTUATION",
1967                              "SUPPLEMENTALPUNCTUATION");
1968 
1969         /**
1970          * Constant for the "CJK Strokes" Unicode character block.
1971          * @since 1.7
1972          */
1973         public static final UnicodeBlock CJK_STROKES =
1974             new UnicodeBlock("CJK_STROKES",
1975                              "CJK STROKES",
1976                              "CJKSTROKES");
1977 
1978         /**
1979          * Constant for the "Lisu" Unicode character block.
1980          * @since 1.7
1981          */
1982         public static final UnicodeBlock LISU =
1983             new UnicodeBlock("LISU");
1984 
1985         /**
1986          * Constant for the "Vai" Unicode character block.
1987          * @since 1.7
1988          */
1989         public static final UnicodeBlock VAI =
1990             new UnicodeBlock("VAI");
1991 
1992         /**
1993          * Constant for the "Cyrillic Extended-B" Unicode character block.
1994          * @since 1.7
1995          */
1996         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1997             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1998                              "CYRILLIC EXTENDED-B",
1999                              "CYRILLICEXTENDED-B");
2000 
2001         /**
2002          * Constant for the "Bamum" Unicode character block.
2003          * @since 1.7
2004          */
2005         public static final UnicodeBlock BAMUM =
2006             new UnicodeBlock("BAMUM");
2007 
2008         /**
2009          * Constant for the "Modifier Tone Letters" Unicode character block.
2010          * @since 1.7
2011          */
2012         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2013             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2014                              "MODIFIER TONE LETTERS",
2015                              "MODIFIERTONELETTERS");
2016 
2017         /**
2018          * Constant for the "Latin Extended-D" Unicode character block.
2019          * @since 1.7
2020          */
2021         public static final UnicodeBlock LATIN_EXTENDED_D =
2022             new UnicodeBlock("LATIN_EXTENDED_D",
2023                              "LATIN EXTENDED-D",
2024                              "LATINEXTENDED-D");
2025 
2026         /**
2027          * Constant for the "Syloti Nagri" Unicode character block.
2028          * @since 1.7
2029          */
2030         public static final UnicodeBlock SYLOTI_NAGRI =
2031             new UnicodeBlock("SYLOTI_NAGRI",
2032                              "SYLOTI NAGRI",
2033                              "SYLOTINAGRI");
2034 
2035         /**
2036          * Constant for the "Common Indic Number Forms" Unicode character block.
2037          * @since 1.7
2038          */
2039         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2040             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2041                              "COMMON INDIC NUMBER FORMS",
2042                              "COMMONINDICNUMBERFORMS");
2043 
2044         /**
2045          * Constant for the "Phags-pa" Unicode character block.
2046          * @since 1.7
2047          */
2048         public static final UnicodeBlock PHAGS_PA =
2049             new UnicodeBlock("PHAGS_PA",
2050                              "PHAGS-PA");
2051 
2052         /**
2053          * Constant for the "Saurashtra" Unicode character block.
2054          * @since 1.7
2055          */
2056         public static final UnicodeBlock SAURASHTRA =
2057             new UnicodeBlock("SAURASHTRA");
2058 
2059         /**
2060          * Constant for the "Devanagari Extended" Unicode character block.
2061          * @since 1.7
2062          */
2063         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2064             new UnicodeBlock("DEVANAGARI_EXTENDED",
2065                              "DEVANAGARI EXTENDED",
2066                              "DEVANAGARIEXTENDED");
2067 
2068         /**
2069          * Constant for the "Kayah Li" Unicode character block.
2070          * @since 1.7
2071          */
2072         public static final UnicodeBlock KAYAH_LI =
2073             new UnicodeBlock("KAYAH_LI",
2074                              "KAYAH LI",
2075                              "KAYAHLI");
2076 
2077         /**
2078          * Constant for the "Rejang" Unicode character block.
2079          * @since 1.7
2080          */
2081         public static final UnicodeBlock REJANG =
2082             new UnicodeBlock("REJANG");
2083 
2084         /**
2085          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2086          * @since 1.7
2087          */
2088         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2089             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2090                              "HANGUL JAMO EXTENDED-A",
2091                              "HANGULJAMOEXTENDED-A");
2092 
2093         /**
2094          * Constant for the "Javanese" Unicode character block.
2095          * @since 1.7
2096          */
2097         public static final UnicodeBlock JAVANESE =
2098             new UnicodeBlock("JAVANESE");
2099 
2100         /**
2101          * Constant for the "Cham" Unicode character block.
2102          * @since 1.7
2103          */
2104         public static final UnicodeBlock CHAM =
2105             new UnicodeBlock("CHAM");
2106 
2107         /**
2108          * Constant for the "Myanmar Extended-A" Unicode character block.
2109          * @since 1.7
2110          */
2111         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2112             new UnicodeBlock("MYANMAR_EXTENDED_A",
2113                              "MYANMAR EXTENDED-A",
2114                              "MYANMAREXTENDED-A");
2115 
2116         /**
2117          * Constant for the "Tai Viet" Unicode character block.
2118          * @since 1.7
2119          */
2120         public static final UnicodeBlock TAI_VIET =
2121             new UnicodeBlock("TAI_VIET",
2122                              "TAI VIET",
2123                              "TAIVIET");
2124 
2125         /**
2126          * Constant for the "Ethiopic Extended-A" Unicode character block.
2127          * @since 1.7
2128          */
2129         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2130             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2131                              "ETHIOPIC EXTENDED-A",
2132                              "ETHIOPICEXTENDED-A");
2133 
2134         /**
2135          * Constant for the "Meetei Mayek" Unicode character block.
2136          * @since 1.7
2137          */
2138         public static final UnicodeBlock MEETEI_MAYEK =
2139             new UnicodeBlock("MEETEI_MAYEK",
2140                              "MEETEI MAYEK",
2141                              "MEETEIMAYEK");
2142 
2143         /**
2144          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2145          * @since 1.7
2146          */
2147         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2148             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2149                              "HANGUL JAMO EXTENDED-B",
2150                              "HANGULJAMOEXTENDED-B");
2151 
2152         /**
2153          * Constant for the "Vertical Forms" Unicode character block.
2154          * @since 1.7
2155          */
2156         public static final UnicodeBlock VERTICAL_FORMS =
2157             new UnicodeBlock("VERTICAL_FORMS",
2158                              "VERTICAL FORMS",
2159                              "VERTICALFORMS");
2160 
2161         /**
2162          * Constant for the "Ancient Greek Numbers" Unicode character block.
2163          * @since 1.7
2164          */
2165         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2166             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2167                              "ANCIENT GREEK NUMBERS",
2168                              "ANCIENTGREEKNUMBERS");
2169 
2170         /**
2171          * Constant for the "Ancient Symbols" Unicode character block.
2172          * @since 1.7
2173          */
2174         public static final UnicodeBlock ANCIENT_SYMBOLS =
2175             new UnicodeBlock("ANCIENT_SYMBOLS",
2176                              "ANCIENT SYMBOLS",
2177                              "ANCIENTSYMBOLS");
2178 
2179         /**
2180          * Constant for the "Phaistos Disc" Unicode character block.
2181          * @since 1.7
2182          */
2183         public static final UnicodeBlock PHAISTOS_DISC =
2184             new UnicodeBlock("PHAISTOS_DISC",
2185                              "PHAISTOS DISC",
2186                              "PHAISTOSDISC");
2187 
2188         /**
2189          * Constant for the "Lycian" Unicode character block.
2190          * @since 1.7
2191          */
2192         public static final UnicodeBlock LYCIAN =
2193             new UnicodeBlock("LYCIAN");
2194 
2195         /**
2196          * Constant for the "Carian" Unicode character block.
2197          * @since 1.7
2198          */
2199         public static final UnicodeBlock CARIAN =
2200             new UnicodeBlock("CARIAN");
2201 
2202         /**
2203          * Constant for the "Old Persian" Unicode character block.
2204          * @since 1.7
2205          */
2206         public static final UnicodeBlock OLD_PERSIAN =
2207             new UnicodeBlock("OLD_PERSIAN",
2208                              "OLD PERSIAN",
2209                              "OLDPERSIAN");
2210 
2211         /**
2212          * Constant for the "Imperial Aramaic" Unicode character block.
2213          * @since 1.7
2214          */
2215         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2216             new UnicodeBlock("IMPERIAL_ARAMAIC",
2217                              "IMPERIAL ARAMAIC",
2218                              "IMPERIALARAMAIC");
2219 
2220         /**
2221          * Constant for the "Phoenician" Unicode character block.
2222          * @since 1.7
2223          */
2224         public static final UnicodeBlock PHOENICIAN =
2225             new UnicodeBlock("PHOENICIAN");
2226 
2227         /**
2228          * Constant for the "Lydian" Unicode character block.
2229          * @since 1.7
2230          */
2231         public static final UnicodeBlock LYDIAN =
2232             new UnicodeBlock("LYDIAN");
2233 
2234         /**
2235          * Constant for the "Kharoshthi" Unicode character block.
2236          * @since 1.7
2237          */
2238         public static final UnicodeBlock KHAROSHTHI =
2239             new UnicodeBlock("KHAROSHTHI");
2240 
2241         /**
2242          * Constant for the "Old South Arabian" Unicode character block.
2243          * @since 1.7
2244          */
2245         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2246             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2247                              "OLD SOUTH ARABIAN",
2248                              "OLDSOUTHARABIAN");
2249 
2250         /**
2251          * Constant for the "Avestan" Unicode character block.
2252          * @since 1.7
2253          */
2254         public static final UnicodeBlock AVESTAN =
2255             new UnicodeBlock("AVESTAN");
2256 
2257         /**
2258          * Constant for the "Inscriptional Parthian" Unicode character block.
2259          * @since 1.7
2260          */
2261         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2262             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2263                              "INSCRIPTIONAL PARTHIAN",
2264                              "INSCRIPTIONALPARTHIAN");
2265 
2266         /**
2267          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2268          * @since 1.7
2269          */
2270         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2271             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2272                              "INSCRIPTIONAL PAHLAVI",
2273                              "INSCRIPTIONALPAHLAVI");
2274 
2275         /**
2276          * Constant for the "Old Turkic" Unicode character block.
2277          * @since 1.7
2278          */
2279         public static final UnicodeBlock OLD_TURKIC =
2280             new UnicodeBlock("OLD_TURKIC",
2281                              "OLD TURKIC",
2282                              "OLDTURKIC");
2283 
2284         /**
2285          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2286          * @since 1.7
2287          */
2288         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2289             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2290                              "RUMI NUMERAL SYMBOLS",
2291                              "RUMINUMERALSYMBOLS");
2292 
2293         /**
2294          * Constant for the "Brahmi" Unicode character block.
2295          * @since 1.7
2296          */
2297         public static final UnicodeBlock BRAHMI =
2298             new UnicodeBlock("BRAHMI");
2299 
2300         /**
2301          * Constant for the "Kaithi" Unicode character block.
2302          * @since 1.7
2303          */
2304         public static final UnicodeBlock KAITHI =
2305             new UnicodeBlock("KAITHI");
2306 
2307         /**
2308          * Constant for the "Cuneiform" Unicode character block.
2309          * @since 1.7
2310          */
2311         public static final UnicodeBlock CUNEIFORM =
2312             new UnicodeBlock("CUNEIFORM");
2313 
2314         /**
2315          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2316          * character block.
2317          * @since 1.7
2318          */
2319         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2320             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2321                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2322                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2323 
2324         /**
2325          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2326          * @since 1.7
2327          */
2328         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2329             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2330                              "EGYPTIAN HIEROGLYPHS",
2331                              "EGYPTIANHIEROGLYPHS");
2332 
2333         /**
2334          * Constant for the "Bamum Supplement" Unicode character block.
2335          * @since 1.7
2336          */
2337         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2338             new UnicodeBlock("BAMUM_SUPPLEMENT",
2339                              "BAMUM SUPPLEMENT",
2340                              "BAMUMSUPPLEMENT");
2341 
2342         /**
2343          * Constant for the "Kana Supplement" Unicode character block.
2344          * @since 1.7
2345          */
2346         public static final UnicodeBlock KANA_SUPPLEMENT =
2347             new UnicodeBlock("KANA_SUPPLEMENT",
2348                              "KANA SUPPLEMENT",
2349                              "KANASUPPLEMENT");
2350 
2351         /**
2352          * Constant for the "Ancient Greek Musical Notation" Unicode character
2353          * block.
2354          * @since 1.7
2355          */
2356         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2357             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2358                              "ANCIENT GREEK MUSICAL NOTATION",
2359                              "ANCIENTGREEKMUSICALNOTATION");
2360 
2361         /**
2362          * Constant for the "Counting Rod Numerals" Unicode character block.
2363          * @since 1.7
2364          */
2365         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2366             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2367                              "COUNTING ROD NUMERALS",
2368                              "COUNTINGRODNUMERALS");
2369 
2370         /**
2371          * Constant for the "Mahjong Tiles" Unicode character block.
2372          * @since 1.7
2373          */
2374         public static final UnicodeBlock MAHJONG_TILES =
2375             new UnicodeBlock("MAHJONG_TILES",
2376                              "MAHJONG TILES",
2377                              "MAHJONGTILES");
2378 
2379         /**
2380          * Constant for the "Domino Tiles" Unicode character block.
2381          * @since 1.7
2382          */
2383         public static final UnicodeBlock DOMINO_TILES =
2384             new UnicodeBlock("DOMINO_TILES",
2385                              "DOMINO TILES",
2386                              "DOMINOTILES");
2387 
2388         /**
2389          * Constant for the "Playing Cards" Unicode character block.
2390          * @since 1.7
2391          */
2392         public static final UnicodeBlock PLAYING_CARDS =
2393             new UnicodeBlock("PLAYING_CARDS",
2394                              "PLAYING CARDS",
2395                              "PLAYINGCARDS");
2396 
2397         /**
2398          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2399          * block.
2400          * @since 1.7
2401          */
2402         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2403             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2404                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2405                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2406 
2407         /**
2408          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2409          * block.
2410          * @since 1.7
2411          */
2412         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2413             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2414                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2415                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2416 
2417         /**
2418          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2419          * character block.
2420          * @since 1.7
2421          */
2422         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2423             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2424                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2425                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2426 
2427         /**
2428          * Constant for the "Emoticons" Unicode character block.
2429          * @since 1.7
2430          */
2431         public static final UnicodeBlock EMOTICONS =
2432             new UnicodeBlock("EMOTICONS");
2433 
2434         /**
2435          * Constant for the "Transport And Map Symbols" Unicode character block.
2436          * @since 1.7
2437          */
2438         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2439             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2440                              "TRANSPORT AND MAP SYMBOLS",
2441                              "TRANSPORTANDMAPSYMBOLS");
2442 
2443         /**
2444          * Constant for the "Alchemical Symbols" Unicode character block.
2445          * @since 1.7
2446          */
2447         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2448             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2449                              "ALCHEMICAL SYMBOLS",
2450                              "ALCHEMICALSYMBOLS");
2451 
2452         /**
2453          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2454          * character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2458             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2459                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2460                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2461 
2462         /**
2463          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2464          * character block.
2465          * @since 1.7
2466          */
2467         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2468             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2469                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2470                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2471 
2472         /**
2473          * Constant for the "Arabic Extended-A" Unicode character block.
2474          * @since 1.8
2475          */
2476         public static final UnicodeBlock ARABIC_EXTENDED_A =
2477             new UnicodeBlock("ARABIC_EXTENDED_A",
2478                              "ARABIC EXTENDED-A",
2479                              "ARABICEXTENDED-A");
2480 
2481         /**
2482          * Constant for the "Sundanese Supplement" Unicode character block.
2483          * @since 1.8
2484          */
2485         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2486             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2487                              "SUNDANESE SUPPLEMENT",
2488                              "SUNDANESESUPPLEMENT");
2489 
2490         /**
2491          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2492          * @since 1.8
2493          */
2494         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2495             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2496                              "MEETEI MAYEK EXTENSIONS",
2497                              "MEETEIMAYEKEXTENSIONS");
2498 
2499         /**
2500          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2501          * @since 1.8
2502          */
2503         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2504             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2505                              "MEROITIC HIEROGLYPHS",
2506                              "MEROITICHIEROGLYPHS");
2507 
2508         /**
2509          * Constant for the "Meroitic Cursive" Unicode character block.
2510          * @since 1.8
2511          */
2512         public static final UnicodeBlock MEROITIC_CURSIVE =
2513             new UnicodeBlock("MEROITIC_CURSIVE",
2514                              "MEROITIC CURSIVE",
2515                              "MEROITICCURSIVE");
2516 
2517         /**
2518          * Constant for the "Sora Sompeng" Unicode character block.
2519          * @since 1.8
2520          */
2521         public static final UnicodeBlock SORA_SOMPENG =
2522             new UnicodeBlock("SORA_SOMPENG",
2523                              "SORA SOMPENG",
2524                              "SORASOMPENG");
2525 
2526         /**
2527          * Constant for the "Chakma" Unicode character block.
2528          * @since 1.8
2529          */
2530         public static final UnicodeBlock CHAKMA =
2531             new UnicodeBlock("CHAKMA");
2532 
2533         /**
2534          * Constant for the "Sharada" Unicode character block.
2535          * @since 1.8
2536          */
2537         public static final UnicodeBlock SHARADA =
2538             new UnicodeBlock("SHARADA");
2539 
2540         /**
2541          * Constant for the "Takri" Unicode character block.
2542          * @since 1.8
2543          */
2544         public static final UnicodeBlock TAKRI =
2545             new UnicodeBlock("TAKRI");
2546 
2547         /**
2548          * Constant for the "Miao" Unicode character block.
2549          * @since 1.8
2550          */
2551         public static final UnicodeBlock MIAO =
2552             new UnicodeBlock("MIAO");
2553 
2554         /**
2555          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2556          * character block.
2557          * @since 1.8
2558          */
2559         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2560             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2561                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2562                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2563 
2564         private static final int blockStarts[] = {
2565             0x0000,   // 0000..007F; Basic Latin
2566             0x0080,   // 0080..00FF; Latin-1 Supplement
2567             0x0100,   // 0100..017F; Latin Extended-A
2568             0x0180,   // 0180..024F; Latin Extended-B
2569             0x0250,   // 0250..02AF; IPA Extensions
2570             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2571             0x0300,   // 0300..036F; Combining Diacritical Marks
2572             0x0370,   // 0370..03FF; Greek and Coptic
2573             0x0400,   // 0400..04FF; Cyrillic
2574             0x0500,   // 0500..052F; Cyrillic Supplement
2575             0x0530,   // 0530..058F; Armenian
2576             0x0590,   // 0590..05FF; Hebrew
2577             0x0600,   // 0600..06FF; Arabic
2578             0x0700,   // 0700..074F; Syriac
2579             0x0750,   // 0750..077F; Arabic Supplement
2580             0x0780,   // 0780..07BF; Thaana
2581             0x07C0,   // 07C0..07FF; NKo
2582             0x0800,   // 0800..083F; Samaritan
2583             0x0840,   // 0840..085F; Mandaic
2584             0x0860,   //             unassigned
2585             0x08A0,   // 08A0..08FF; Arabic Extended-A
2586             0x0900,   // 0900..097F; Devanagari
2587             0x0980,   // 0980..09FF; Bengali
2588             0x0A00,   // 0A00..0A7F; Gurmukhi
2589             0x0A80,   // 0A80..0AFF; Gujarati
2590             0x0B00,   // 0B00..0B7F; Oriya
2591             0x0B80,   // 0B80..0BFF; Tamil
2592             0x0C00,   // 0C00..0C7F; Telugu
2593             0x0C80,   // 0C80..0CFF; Kannada
2594             0x0D00,   // 0D00..0D7F; Malayalam
2595             0x0D80,   // 0D80..0DFF; Sinhala
2596             0x0E00,   // 0E00..0E7F; Thai
2597             0x0E80,   // 0E80..0EFF; Lao
2598             0x0F00,   // 0F00..0FFF; Tibetan
2599             0x1000,   // 1000..109F; Myanmar
2600             0x10A0,   // 10A0..10FF; Georgian
2601             0x1100,   // 1100..11FF; Hangul Jamo
2602             0x1200,   // 1200..137F; Ethiopic
2603             0x1380,   // 1380..139F; Ethiopic Supplement
2604             0x13A0,   // 13A0..13FF; Cherokee
2605             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2606             0x1680,   // 1680..169F; Ogham
2607             0x16A0,   // 16A0..16FF; Runic
2608             0x1700,   // 1700..171F; Tagalog
2609             0x1720,   // 1720..173F; Hanunoo
2610             0x1740,   // 1740..175F; Buhid
2611             0x1760,   // 1760..177F; Tagbanwa
2612             0x1780,   // 1780..17FF; Khmer
2613             0x1800,   // 1800..18AF; Mongolian
2614             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2615             0x1900,   // 1900..194F; Limbu
2616             0x1950,   // 1950..197F; Tai Le
2617             0x1980,   // 1980..19DF; New Tai Lue
2618             0x19E0,   // 19E0..19FF; Khmer Symbols
2619             0x1A00,   // 1A00..1A1F; Buginese
2620             0x1A20,   // 1A20..1AAF; Tai Tham
2621             0x1AB0,   //             unassigned
2622             0x1B00,   // 1B00..1B7F; Balinese
2623             0x1B80,   // 1B80..1BBF; Sundanese
2624             0x1BC0,   // 1BC0..1BFF; Batak
2625             0x1C00,   // 1C00..1C4F; Lepcha
2626             0x1C50,   // 1C50..1C7F; Ol Chiki
2627             0x1C80,   //             unassigned
2628             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2629             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2630             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2631             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2632             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2633             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2634             0x1F00,   // 1F00..1FFF; Greek Extended
2635             0x2000,   // 2000..206F; General Punctuation
2636             0x2070,   // 2070..209F; Superscripts and Subscripts
2637             0x20A0,   // 20A0..20CF; Currency Symbols
2638             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2639             0x2100,   // 2100..214F; Letterlike Symbols
2640             0x2150,   // 2150..218F; Number Forms
2641             0x2190,   // 2190..21FF; Arrows
2642             0x2200,   // 2200..22FF; Mathematical Operators
2643             0x2300,   // 2300..23FF; Miscellaneous Technical
2644             0x2400,   // 2400..243F; Control Pictures
2645             0x2440,   // 2440..245F; Optical Character Recognition
2646             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2647             0x2500,   // 2500..257F; Box Drawing
2648             0x2580,   // 2580..259F; Block Elements
2649             0x25A0,   // 25A0..25FF; Geometric Shapes
2650             0x2600,   // 2600..26FF; Miscellaneous Symbols
2651             0x2700,   // 2700..27BF; Dingbats
2652             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2653             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2654             0x2800,   // 2800..28FF; Braille Patterns
2655             0x2900,   // 2900..297F; Supplemental Arrows-B
2656             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2657             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2658             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2659             0x2C00,   // 2C00..2C5F; Glagolitic
2660             0x2C60,   // 2C60..2C7F; Latin Extended-C
2661             0x2C80,   // 2C80..2CFF; Coptic
2662             0x2D00,   // 2D00..2D2F; Georgian Supplement
2663             0x2D30,   // 2D30..2D7F; Tifinagh
2664             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2665             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2666             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2667             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2668             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2669             0x2FE0,   //             unassigned
2670             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2671             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2672             0x3040,   // 3040..309F; Hiragana
2673             0x30A0,   // 30A0..30FF; Katakana
2674             0x3100,   // 3100..312F; Bopomofo
2675             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2676             0x3190,   // 3190..319F; Kanbun
2677             0x31A0,   // 31A0..31BF; Bopomofo Extended
2678             0x31C0,   // 31C0..31EF; CJK Strokes
2679             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2680             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2681             0x3300,   // 3300..33FF; CJK Compatibility
2682             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2683             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2684             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2685             0xA000,   // A000..A48F; Yi Syllables
2686             0xA490,   // A490..A4CF; Yi Radicals
2687             0xA4D0,   // A4D0..A4FF; Lisu
2688             0xA500,   // A500..A63F; Vai
2689             0xA640,   // A640..A69F; Cyrillic Extended-B
2690             0xA6A0,   // A6A0..A6FF; Bamum
2691             0xA700,   // A700..A71F; Modifier Tone Letters
2692             0xA720,   // A720..A7FF; Latin Extended-D
2693             0xA800,   // A800..A82F; Syloti Nagri
2694             0xA830,   // A830..A83F; Common Indic Number Forms
2695             0xA840,   // A840..A87F; Phags-pa
2696             0xA880,   // A880..A8DF; Saurashtra
2697             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2698             0xA900,   // A900..A92F; Kayah Li
2699             0xA930,   // A930..A95F; Rejang
2700             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2701             0xA980,   // A980..A9DF; Javanese
2702             0xA9E0,   //             unassigned
2703             0xAA00,   // AA00..AA5F; Cham
2704             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2705             0xAA80,   // AA80..AADF; Tai Viet
2706             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2707             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2708             0xAB30,   //             unassigned
2709             0xABC0,   // ABC0..ABFF; Meetei Mayek
2710             0xAC00,   // AC00..D7AF; Hangul Syllables
2711             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2712             0xD800,   // D800..DB7F; High Surrogates
2713             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2714             0xDC00,   // DC00..DFFF; Low Surrogates
2715             0xE000,   // E000..F8FF; Private Use Area
2716             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2717             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2718             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2719             0xFE00,   // FE00..FE0F; Variation Selectors
2720             0xFE10,   // FE10..FE1F; Vertical Forms
2721             0xFE20,   // FE20..FE2F; Combining Half Marks
2722             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2723             0xFE50,   // FE50..FE6F; Small Form Variants
2724             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2725             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2726             0xFFF0,   // FFF0..FFFF; Specials
2727             0x10000,  // 10000..1007F; Linear B Syllabary
2728             0x10080,  // 10080..100FF; Linear B Ideograms
2729             0x10100,  // 10100..1013F; Aegean Numbers
2730             0x10140,  // 10140..1018F; Ancient Greek Numbers
2731             0x10190,  // 10190..101CF; Ancient Symbols
2732             0x101D0,  // 101D0..101FF; Phaistos Disc
2733             0x10200,  //               unassigned
2734             0x10280,  // 10280..1029F; Lycian
2735             0x102A0,  // 102A0..102DF; Carian
2736             0x102E0,  //               unassigned
2737             0x10300,  // 10300..1032F; Old Italic
2738             0x10330,  // 10330..1034F; Gothic
2739             0x10350,  //               unassigned
2740             0x10380,  // 10380..1039F; Ugaritic
2741             0x103A0,  // 103A0..103DF; Old Persian
2742             0x103E0,  //               unassigned
2743             0x10400,  // 10400..1044F; Deseret
2744             0x10450,  // 10450..1047F; Shavian
2745             0x10480,  // 10480..104AF; Osmanya
2746             0x104B0,  //               unassigned
2747             0x10800,  // 10800..1083F; Cypriot Syllabary
2748             0x10840,  // 10840..1085F; Imperial Aramaic
2749             0x10860,  //               unassigned
2750             0x10900,  // 10900..1091F; Phoenician
2751             0x10920,  // 10920..1093F; Lydian
2752             0x10940,  //               unassigned
2753             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2754             0x109A0,  // 109A0..109FF; Meroitic Cursive
2755             0x10A00,  // 10A00..10A5F; Kharoshthi
2756             0x10A60,  // 10A60..10A7F; Old South Arabian
2757             0x10A80,  //               unassigned
2758             0x10B00,  // 10B00..10B3F; Avestan
2759             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2760             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2761             0x10B80,  //               unassigned
2762             0x10C00,  // 10C00..10C4F; Old Turkic
2763             0x10C50,  //               unassigned
2764             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2765             0x10E80,  //               unassigned
2766             0x11000,  // 11000..1107F; Brahmi
2767             0x11080,  // 11080..110CF; Kaithi
2768             0x110D0,  // 110D0..110FF; Sora Sompeng
2769             0x11100,  // 11100..1114F; Chakma
2770             0x11150,  //               unassigned
2771             0x11180,  // 11180..111DF; Sharada
2772             0x111E0,  //               unassigned
2773             0x11680,  // 11680..116CF; Takri
2774             0x116D0,  //               unassigned
2775             0x12000,  // 12000..123FF; Cuneiform
2776             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2777             0x12480,  //               unassigned
2778             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2779             0x13430,  //               unassigned
2780             0x16800,  // 16800..16A3F; Bamum Supplement
2781             0x16A40,  //               unassigned
2782             0x16F00,  // 16F00..16F9F; Miao
2783             0x16FA0,  //               unassigned
2784             0x1B000,  // 1B000..1B0FF; Kana Supplement
2785             0x1B100,  //               unassigned
2786             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2787             0x1D100,  // 1D100..1D1FF; Musical Symbols
2788             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2789             0x1D250,  //               unassigned
2790             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2791             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2792             0x1D380,  //               unassigned
2793             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2794             0x1D800,  //               unassigned
2795             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2796             0x1EF00,  //               unassigned
2797             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2798             0x1F030,  // 1F030..1F09F; Domino Tiles
2799             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2800             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2801             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2802             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2803             0x1F600,  // 1F600..1F64F; Emoticons
2804             0x1F650,  //               unassigned
2805             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2806             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2807             0x1F780,  //               unassigned
2808             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2809             0x2A6E0,  //               unassigned
2810             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2811             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2812             0x2B820,  //               unassigned
2813             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2814             0x2FA20,  //               unassigned
2815             0xE0000,  // E0000..E007F; Tags
2816             0xE0080,  //               unassigned
2817             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2818             0xE01F0,  //               unassigned
2819             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2820             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2821         };
2822 
2823         private static final UnicodeBlock[] blocks = {
2824             BASIC_LATIN,
2825             LATIN_1_SUPPLEMENT,
2826             LATIN_EXTENDED_A,
2827             LATIN_EXTENDED_B,
2828             IPA_EXTENSIONS,
2829             SPACING_MODIFIER_LETTERS,
2830             COMBINING_DIACRITICAL_MARKS,
2831             GREEK,
2832             CYRILLIC,
2833             CYRILLIC_SUPPLEMENTARY,
2834             ARMENIAN,
2835             HEBREW,
2836             ARABIC,
2837             SYRIAC,
2838             ARABIC_SUPPLEMENT,
2839             THAANA,
2840             NKO,
2841             SAMARITAN,
2842             MANDAIC,
2843             null,
2844             ARABIC_EXTENDED_A,
2845             DEVANAGARI,
2846             BENGALI,
2847             GURMUKHI,
2848             GUJARATI,
2849             ORIYA,
2850             TAMIL,
2851             TELUGU,
2852             KANNADA,
2853             MALAYALAM,
2854             SINHALA,
2855             THAI,
2856             LAO,
2857             TIBETAN,
2858             MYANMAR,
2859             GEORGIAN,
2860             HANGUL_JAMO,
2861             ETHIOPIC,
2862             ETHIOPIC_SUPPLEMENT,
2863             CHEROKEE,
2864             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2865             OGHAM,
2866             RUNIC,
2867             TAGALOG,
2868             HANUNOO,
2869             BUHID,
2870             TAGBANWA,
2871             KHMER,
2872             MONGOLIAN,
2873             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2874             LIMBU,
2875             TAI_LE,
2876             NEW_TAI_LUE,
2877             KHMER_SYMBOLS,
2878             BUGINESE,
2879             TAI_THAM,
2880             null,
2881             BALINESE,
2882             SUNDANESE,
2883             BATAK,
2884             LEPCHA,
2885             OL_CHIKI,
2886             null,
2887             SUNDANESE_SUPPLEMENT,
2888             VEDIC_EXTENSIONS,
2889             PHONETIC_EXTENSIONS,
2890             PHONETIC_EXTENSIONS_SUPPLEMENT,
2891             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2892             LATIN_EXTENDED_ADDITIONAL,
2893             GREEK_EXTENDED,
2894             GENERAL_PUNCTUATION,
2895             SUPERSCRIPTS_AND_SUBSCRIPTS,
2896             CURRENCY_SYMBOLS,
2897             COMBINING_MARKS_FOR_SYMBOLS,
2898             LETTERLIKE_SYMBOLS,
2899             NUMBER_FORMS,
2900             ARROWS,
2901             MATHEMATICAL_OPERATORS,
2902             MISCELLANEOUS_TECHNICAL,
2903             CONTROL_PICTURES,
2904             OPTICAL_CHARACTER_RECOGNITION,
2905             ENCLOSED_ALPHANUMERICS,
2906             BOX_DRAWING,
2907             BLOCK_ELEMENTS,
2908             GEOMETRIC_SHAPES,
2909             MISCELLANEOUS_SYMBOLS,
2910             DINGBATS,
2911             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2912             SUPPLEMENTAL_ARROWS_A,
2913             BRAILLE_PATTERNS,
2914             SUPPLEMENTAL_ARROWS_B,
2915             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2916             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2917             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2918             GLAGOLITIC,
2919             LATIN_EXTENDED_C,
2920             COPTIC,
2921             GEORGIAN_SUPPLEMENT,
2922             TIFINAGH,
2923             ETHIOPIC_EXTENDED,
2924             CYRILLIC_EXTENDED_A,
2925             SUPPLEMENTAL_PUNCTUATION,
2926             CJK_RADICALS_SUPPLEMENT,
2927             KANGXI_RADICALS,
2928             null,
2929             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2930             CJK_SYMBOLS_AND_PUNCTUATION,
2931             HIRAGANA,
2932             KATAKANA,
2933             BOPOMOFO,
2934             HANGUL_COMPATIBILITY_JAMO,
2935             KANBUN,
2936             BOPOMOFO_EXTENDED,
2937             CJK_STROKES,
2938             KATAKANA_PHONETIC_EXTENSIONS,
2939             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2940             CJK_COMPATIBILITY,
2941             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2942             YIJING_HEXAGRAM_SYMBOLS,
2943             CJK_UNIFIED_IDEOGRAPHS,
2944             YI_SYLLABLES,
2945             YI_RADICALS,
2946             LISU,
2947             VAI,
2948             CYRILLIC_EXTENDED_B,
2949             BAMUM,
2950             MODIFIER_TONE_LETTERS,
2951             LATIN_EXTENDED_D,
2952             SYLOTI_NAGRI,
2953             COMMON_INDIC_NUMBER_FORMS,
2954             PHAGS_PA,
2955             SAURASHTRA,
2956             DEVANAGARI_EXTENDED,
2957             KAYAH_LI,
2958             REJANG,
2959             HANGUL_JAMO_EXTENDED_A,
2960             JAVANESE,
2961             null,
2962             CHAM,
2963             MYANMAR_EXTENDED_A,
2964             TAI_VIET,
2965             MEETEI_MAYEK_EXTENSIONS,
2966             ETHIOPIC_EXTENDED_A,
2967             null,
2968             MEETEI_MAYEK,
2969             HANGUL_SYLLABLES,
2970             HANGUL_JAMO_EXTENDED_B,
2971             HIGH_SURROGATES,
2972             HIGH_PRIVATE_USE_SURROGATES,
2973             LOW_SURROGATES,
2974             PRIVATE_USE_AREA,
2975             CJK_COMPATIBILITY_IDEOGRAPHS,
2976             ALPHABETIC_PRESENTATION_FORMS,
2977             ARABIC_PRESENTATION_FORMS_A,
2978             VARIATION_SELECTORS,
2979             VERTICAL_FORMS,
2980             COMBINING_HALF_MARKS,
2981             CJK_COMPATIBILITY_FORMS,
2982             SMALL_FORM_VARIANTS,
2983             ARABIC_PRESENTATION_FORMS_B,
2984             HALFWIDTH_AND_FULLWIDTH_FORMS,
2985             SPECIALS,
2986             LINEAR_B_SYLLABARY,
2987             LINEAR_B_IDEOGRAMS,
2988             AEGEAN_NUMBERS,
2989             ANCIENT_GREEK_NUMBERS,
2990             ANCIENT_SYMBOLS,
2991             PHAISTOS_DISC,
2992             null,
2993             LYCIAN,
2994             CARIAN,
2995             null,
2996             OLD_ITALIC,
2997             GOTHIC,
2998             null,
2999             UGARITIC,
3000             OLD_PERSIAN,
3001             null,
3002             DESERET,
3003             SHAVIAN,
3004             OSMANYA,
3005             null,
3006             CYPRIOT_SYLLABARY,
3007             IMPERIAL_ARAMAIC,
3008             null,
3009             PHOENICIAN,
3010             LYDIAN,
3011             null,
3012             MEROITIC_HIEROGLYPHS,
3013             MEROITIC_CURSIVE,
3014             KHAROSHTHI,
3015             OLD_SOUTH_ARABIAN,
3016             null,
3017             AVESTAN,
3018             INSCRIPTIONAL_PARTHIAN,
3019             INSCRIPTIONAL_PAHLAVI,
3020             null,
3021             OLD_TURKIC,
3022             null,
3023             RUMI_NUMERAL_SYMBOLS,
3024             null,
3025             BRAHMI,
3026             KAITHI,
3027             SORA_SOMPENG,
3028             CHAKMA,
3029             null,
3030             SHARADA,
3031             null,
3032             TAKRI,
3033             null,
3034             CUNEIFORM,
3035             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3036             null,
3037             EGYPTIAN_HIEROGLYPHS,
3038             null,
3039             BAMUM_SUPPLEMENT,
3040             null,
3041             MIAO,
3042             null,
3043             KANA_SUPPLEMENT,
3044             null,
3045             BYZANTINE_MUSICAL_SYMBOLS,
3046             MUSICAL_SYMBOLS,
3047             ANCIENT_GREEK_MUSICAL_NOTATION,
3048             null,
3049             TAI_XUAN_JING_SYMBOLS,
3050             COUNTING_ROD_NUMERALS,
3051             null,
3052             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3053             null,
3054             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3055             null,
3056             MAHJONG_TILES,
3057             DOMINO_TILES,
3058             PLAYING_CARDS,
3059             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3060             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3061             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3062             EMOTICONS,
3063             null,
3064             TRANSPORT_AND_MAP_SYMBOLS,
3065             ALCHEMICAL_SYMBOLS,
3066             null,
3067             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3068             null,
3069             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3070             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3071             null,
3072             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3073             null,
3074             TAGS,
3075             null,
3076             VARIATION_SELECTORS_SUPPLEMENT,
3077             null,
3078             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3079             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3080         };
3081 
3082 
3083         /**
3084          * Returns the object representing the Unicode block containing the
3085          * given character, or {@code null} if the character is not a
3086          * member of a defined block.
3087          *
3088          * <p><b>Note:</b> This method cannot handle
3089          * <a href="Character.html#supplementary"> supplementary
3090          * characters</a>.  To support all Unicode characters, including
3091          * supplementary characters, use the {@link #of(int)} method.
3092          *
3093          * @param   c  The character in question
3094          * @return  The {@code UnicodeBlock} instance representing the
3095          *          Unicode block of which this character is a member, or
3096          *          {@code null} if the character is not a member of any
3097          *          Unicode block
3098          */
3099         public static UnicodeBlock of(char c) {
3100             return of((int)c);
3101         }
3102 
3103         /**
3104          * Returns the object representing the Unicode block
3105          * containing the given character (Unicode code point), or
3106          * {@code null} if the character is not a member of a
3107          * defined block.
3108          *
3109          * @param   codePoint the character (Unicode code point) in question.
3110          * @return  The {@code UnicodeBlock} instance representing the
3111          *          Unicode block of which this character is a member, or
3112          *          {@code null} if the character is not a member of any
3113          *          Unicode block
3114          * @exception IllegalArgumentException if the specified
3115          * {@code codePoint} is an invalid Unicode code point.
3116          * @see Character#isValidCodePoint(int)
3117          * @since   1.5
3118          */
3119         public static UnicodeBlock of(int codePoint) {
3120             if (!isValidCodePoint(codePoint)) {
3121                 throw new IllegalArgumentException();
3122             }
3123 
3124             int top, bottom, current;
3125             bottom = 0;
3126             top = blockStarts.length;
3127             current = top/2;
3128 
3129             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3130             while (top - bottom > 1) {
3131                 if (codePoint >= blockStarts[current]) {
3132                     bottom = current;
3133                 } else {
3134                     top = current;
3135                 }
3136                 current = (top + bottom) / 2;
3137             }
3138             return blocks[current];
3139         }
3140 
3141         /**
3142          * Returns the UnicodeBlock with the given name. Block
3143          * names are determined by The Unicode Standard. The file
3144          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3145          * version of the standard. The {@link Character} class specifies
3146          * the version of the standard that it supports.
3147          * <p>
3148          * This method accepts block names in the following forms:
3149          * <ol>
3150          * <li> Canonical block names as defined by the Unicode Standard.
3151          * For example, the standard defines a "Basic Latin" block. Therefore, this
3152          * method accepts "Basic Latin" as a valid block name. The documentation of
3153          * each UnicodeBlock provides the canonical name.
3154          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3155          * is a valid block name for the "Basic Latin" block.
3156          * <li>The text representation of each constant UnicodeBlock identifier.
3157          * For example, this method will return the {@link #BASIC_LATIN} block if
3158          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3159          * hyphens in the canonical name with underscores.
3160          * </ol>
3161          * Finally, character case is ignored for all of the valid block name forms.
3162          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3163          * The en_US locale's case mapping rules are used to provide case-insensitive
3164          * string comparisons for block name validation.
3165          * <p>
3166          * If the Unicode Standard changes block names, both the previous and
3167          * current names will be accepted.
3168          *
3169          * @param blockName A {@code UnicodeBlock} name.
3170          * @return The {@code UnicodeBlock} instance identified
3171          *         by {@code blockName}
3172          * @throws IllegalArgumentException if {@code blockName} is an
3173          *         invalid name
3174          * @throws NullPointerException if {@code blockName} is null
3175          * @since 1.5
3176          */
3177         public static final UnicodeBlock forName(String blockName) {
3178             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3179             if (block == null) {
3180                 throw new IllegalArgumentException();
3181             }
3182             return block;
3183         }
3184     }
3185 
3186 
3187     /**
3188      * A family of character subsets representing the character scripts
3189      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3190      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3191      * character is assigned to a single Unicode script, either a specific
3192      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3193      * one of the following three special values,
3194      * {@link Character.UnicodeScript#INHERITED Inherited},
3195      * {@link Character.UnicodeScript#COMMON Common} or
3196      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3197      *
3198      * @since 1.7
3199      */
3200     public static enum UnicodeScript {
3201         /**
3202          * Unicode script "Common".
3203          */
3204         COMMON,
3205 
3206         /**
3207          * Unicode script "Latin".
3208          */
3209         LATIN,
3210 
3211         /**
3212          * Unicode script "Greek".
3213          */
3214         GREEK,
3215 
3216         /**
3217          * Unicode script "Cyrillic".
3218          */
3219         CYRILLIC,
3220 
3221         /**
3222          * Unicode script "Armenian".
3223          */
3224         ARMENIAN,
3225 
3226         /**
3227          * Unicode script "Hebrew".
3228          */
3229         HEBREW,
3230 
3231         /**
3232          * Unicode script "Arabic".
3233          */
3234         ARABIC,
3235 
3236         /**
3237          * Unicode script "Syriac".
3238          */
3239         SYRIAC,
3240 
3241         /**
3242          * Unicode script "Thaana".
3243          */
3244         THAANA,
3245 
3246         /**
3247          * Unicode script "Devanagari".
3248          */
3249         DEVANAGARI,
3250 
3251         /**
3252          * Unicode script "Bengali".
3253          */
3254         BENGALI,
3255 
3256         /**
3257          * Unicode script "Gurmukhi".
3258          */
3259         GURMUKHI,
3260 
3261         /**
3262          * Unicode script "Gujarati".
3263          */
3264         GUJARATI,
3265 
3266         /**
3267          * Unicode script "Oriya".
3268          */
3269         ORIYA,
3270 
3271         /**
3272          * Unicode script "Tamil".
3273          */
3274         TAMIL,
3275 
3276         /**
3277          * Unicode script "Telugu".
3278          */
3279         TELUGU,
3280 
3281         /**
3282          * Unicode script "Kannada".
3283          */
3284         KANNADA,
3285 
3286         /**
3287          * Unicode script "Malayalam".
3288          */
3289         MALAYALAM,
3290 
3291         /**
3292          * Unicode script "Sinhala".
3293          */
3294         SINHALA,
3295 
3296         /**
3297          * Unicode script "Thai".
3298          */
3299         THAI,
3300 
3301         /**
3302          * Unicode script "Lao".
3303          */
3304         LAO,
3305 
3306         /**
3307          * Unicode script "Tibetan".
3308          */
3309         TIBETAN,
3310 
3311         /**
3312          * Unicode script "Myanmar".
3313          */
3314         MYANMAR,
3315 
3316         /**
3317          * Unicode script "Georgian".
3318          */
3319         GEORGIAN,
3320 
3321         /**
3322          * Unicode script "Hangul".
3323          */
3324         HANGUL,
3325 
3326         /**
3327          * Unicode script "Ethiopic".
3328          */
3329         ETHIOPIC,
3330 
3331         /**
3332          * Unicode script "Cherokee".
3333          */
3334         CHEROKEE,
3335 
3336         /**
3337          * Unicode script "Canadian_Aboriginal".
3338          */
3339         CANADIAN_ABORIGINAL,
3340 
3341         /**
3342          * Unicode script "Ogham".
3343          */
3344         OGHAM,
3345 
3346         /**
3347          * Unicode script "Runic".
3348          */
3349         RUNIC,
3350 
3351         /**
3352          * Unicode script "Khmer".
3353          */
3354         KHMER,
3355 
3356         /**
3357          * Unicode script "Mongolian".
3358          */
3359         MONGOLIAN,
3360 
3361         /**
3362          * Unicode script "Hiragana".
3363          */
3364         HIRAGANA,
3365 
3366         /**
3367          * Unicode script "Katakana".
3368          */
3369         KATAKANA,
3370 
3371         /**
3372          * Unicode script "Bopomofo".
3373          */
3374         BOPOMOFO,
3375 
3376         /**
3377          * Unicode script "Han".
3378          */
3379         HAN,
3380 
3381         /**
3382          * Unicode script "Yi".
3383          */
3384         YI,
3385 
3386         /**
3387          * Unicode script "Old_Italic".
3388          */
3389         OLD_ITALIC,
3390 
3391         /**
3392          * Unicode script "Gothic".
3393          */
3394         GOTHIC,
3395 
3396         /**
3397          * Unicode script "Deseret".
3398          */
3399         DESERET,
3400 
3401         /**
3402          * Unicode script "Inherited".
3403          */
3404         INHERITED,
3405 
3406         /**
3407          * Unicode script "Tagalog".
3408          */
3409         TAGALOG,
3410 
3411         /**
3412          * Unicode script "Hanunoo".
3413          */
3414         HANUNOO,
3415 
3416         /**
3417          * Unicode script "Buhid".
3418          */
3419         BUHID,
3420 
3421         /**
3422          * Unicode script "Tagbanwa".
3423          */
3424         TAGBANWA,
3425 
3426         /**
3427          * Unicode script "Limbu".
3428          */
3429         LIMBU,
3430 
3431         /**
3432          * Unicode script "Tai_Le".
3433          */
3434         TAI_LE,
3435 
3436         /**
3437          * Unicode script "Linear_B".
3438          */
3439         LINEAR_B,
3440 
3441         /**
3442          * Unicode script "Ugaritic".
3443          */
3444         UGARITIC,
3445 
3446         /**
3447          * Unicode script "Shavian".
3448          */
3449         SHAVIAN,
3450 
3451         /**
3452          * Unicode script "Osmanya".
3453          */
3454         OSMANYA,
3455 
3456         /**
3457          * Unicode script "Cypriot".
3458          */
3459         CYPRIOT,
3460 
3461         /**
3462          * Unicode script "Braille".
3463          */
3464         BRAILLE,
3465 
3466         /**
3467          * Unicode script "Buginese".
3468          */
3469         BUGINESE,
3470 
3471         /**
3472          * Unicode script "Coptic".
3473          */
3474         COPTIC,
3475 
3476         /**
3477          * Unicode script "New_Tai_Lue".
3478          */
3479         NEW_TAI_LUE,
3480 
3481         /**
3482          * Unicode script "Glagolitic".
3483          */
3484         GLAGOLITIC,
3485 
3486         /**
3487          * Unicode script "Tifinagh".
3488          */
3489         TIFINAGH,
3490 
3491         /**
3492          * Unicode script "Syloti_Nagri".
3493          */
3494         SYLOTI_NAGRI,
3495 
3496         /**
3497          * Unicode script "Old_Persian".
3498          */
3499         OLD_PERSIAN,
3500 
3501         /**
3502          * Unicode script "Kharoshthi".
3503          */
3504         KHAROSHTHI,
3505 
3506         /**
3507          * Unicode script "Balinese".
3508          */
3509         BALINESE,
3510 
3511         /**
3512          * Unicode script "Cuneiform".
3513          */
3514         CUNEIFORM,
3515 
3516         /**
3517          * Unicode script "Phoenician".
3518          */
3519         PHOENICIAN,
3520 
3521         /**
3522          * Unicode script "Phags_Pa".
3523          */
3524         PHAGS_PA,
3525 
3526         /**
3527          * Unicode script "Nko".
3528          */
3529         NKO,
3530 
3531         /**
3532          * Unicode script "Sundanese".
3533          */
3534         SUNDANESE,
3535 
3536         /**
3537          * Unicode script "Batak".
3538          */
3539         BATAK,
3540 
3541         /**
3542          * Unicode script "Lepcha".
3543          */
3544         LEPCHA,
3545 
3546         /**
3547          * Unicode script "Ol_Chiki".
3548          */
3549         OL_CHIKI,
3550 
3551         /**
3552          * Unicode script "Vai".
3553          */
3554         VAI,
3555 
3556         /**
3557          * Unicode script "Saurashtra".
3558          */
3559         SAURASHTRA,
3560 
3561         /**
3562          * Unicode script "Kayah_Li".
3563          */
3564         KAYAH_LI,
3565 
3566         /**
3567          * Unicode script "Rejang".
3568          */
3569         REJANG,
3570 
3571         /**
3572          * Unicode script "Lycian".
3573          */
3574         LYCIAN,
3575 
3576         /**
3577          * Unicode script "Carian".
3578          */
3579         CARIAN,
3580 
3581         /**
3582          * Unicode script "Lydian".
3583          */
3584         LYDIAN,
3585 
3586         /**
3587          * Unicode script "Cham".
3588          */
3589         CHAM,
3590 
3591         /**
3592          * Unicode script "Tai_Tham".
3593          */
3594         TAI_THAM,
3595 
3596         /**
3597          * Unicode script "Tai_Viet".
3598          */
3599         TAI_VIET,
3600 
3601         /**
3602          * Unicode script "Avestan".
3603          */
3604         AVESTAN,
3605 
3606         /**
3607          * Unicode script "Egyptian_Hieroglyphs".
3608          */
3609         EGYPTIAN_HIEROGLYPHS,
3610 
3611         /**
3612          * Unicode script "Samaritan".
3613          */
3614         SAMARITAN,
3615 
3616         /**
3617          * Unicode script "Mandaic".
3618          */
3619         MANDAIC,
3620 
3621         /**
3622          * Unicode script "Lisu".
3623          */
3624         LISU,
3625 
3626         /**
3627          * Unicode script "Bamum".
3628          */
3629         BAMUM,
3630 
3631         /**
3632          * Unicode script "Javanese".
3633          */
3634         JAVANESE,
3635 
3636         /**
3637          * Unicode script "Meetei_Mayek".
3638          */
3639         MEETEI_MAYEK,
3640 
3641         /**
3642          * Unicode script "Imperial_Aramaic".
3643          */
3644         IMPERIAL_ARAMAIC,
3645 
3646         /**
3647          * Unicode script "Old_South_Arabian".
3648          */
3649         OLD_SOUTH_ARABIAN,
3650 
3651         /**
3652          * Unicode script "Inscriptional_Parthian".
3653          */
3654         INSCRIPTIONAL_PARTHIAN,
3655 
3656         /**
3657          * Unicode script "Inscriptional_Pahlavi".
3658          */
3659         INSCRIPTIONAL_PAHLAVI,
3660 
3661         /**
3662          * Unicode script "Old_Turkic".
3663          */
3664         OLD_TURKIC,
3665 
3666         /**
3667          * Unicode script "Brahmi".
3668          */
3669         BRAHMI,
3670 
3671         /**
3672          * Unicode script "Kaithi".
3673          */
3674         KAITHI,
3675 
3676         /**
3677          * Unicode script "Meroitic Hieroglyphs".
3678          */
3679         MEROITIC_HIEROGLYPHS,
3680 
3681         /**
3682          * Unicode script "Meroitic Cursive".
3683          */
3684         MEROITIC_CURSIVE,
3685 
3686         /**
3687          * Unicode script "Sora Sompeng".
3688          */
3689         SORA_SOMPENG,
3690 
3691         /**
3692          * Unicode script "Chakma".
3693          */
3694         CHAKMA,
3695 
3696         /**
3697          * Unicode script "Sharada".
3698          */
3699         SHARADA,
3700 
3701         /**
3702          * Unicode script "Takri".
3703          */
3704         TAKRI,
3705 
3706         /**
3707          * Unicode script "Miao".
3708          */
3709         MIAO,
3710 
3711         /**
3712          * Unicode script "Unknown".
3713          */
3714         UNKNOWN;
3715 
3716         private static final int[] scriptStarts = {
3717             0x0000,   // 0000..0040; COMMON
3718             0x0041,   // 0041..005A; LATIN
3719             0x005B,   // 005B..0060; COMMON
3720             0x0061,   // 0061..007A; LATIN
3721             0x007B,   // 007B..00A9; COMMON
3722             0x00AA,   // 00AA..00AA; LATIN
3723             0x00AB,   // 00AB..00B9; COMMON
3724             0x00BA,   // 00BA..00BA; LATIN
3725             0x00BB,   // 00BB..00BF; COMMON
3726             0x00C0,   // 00C0..00D6; LATIN
3727             0x00D7,   // 00D7..00D7; COMMON
3728             0x00D8,   // 00D8..00F6; LATIN
3729             0x00F7,   // 00F7..00F7; COMMON
3730             0x00F8,   // 00F8..02B8; LATIN
3731             0x02B9,   // 02B9..02DF; COMMON
3732             0x02E0,   // 02E0..02E4; LATIN
3733             0x02E5,   // 02E5..02E9; COMMON
3734             0x02EA,   // 02EA..02EB; BOPOMOFO
3735             0x02EC,   // 02EC..02FF; COMMON
3736             0x0300,   // 0300..036F; INHERITED
3737             0x0370,   // 0370..0373; GREEK
3738             0x0374,   // 0374..0374; COMMON
3739             0x0375,   // 0375..037D; GREEK
3740             0x037E,   // 037E..0383; COMMON
3741             0x0384,   // 0384..0384; GREEK
3742             0x0385,   // 0385..0385; COMMON
3743             0x0386,   // 0386..0386; GREEK
3744             0x0387,   // 0387..0387; COMMON
3745             0x0388,   // 0388..03E1; GREEK
3746             0x03E2,   // 03E2..03EF; COPTIC
3747             0x03F0,   // 03F0..03FF; GREEK
3748             0x0400,   // 0400..0484; CYRILLIC
3749             0x0485,   // 0485..0486; INHERITED
3750             0x0487,   // 0487..0530; CYRILLIC
3751             0x0531,   // 0531..0588; ARMENIAN
3752             0x0589,   // 0589..0589; COMMON
3753             0x058A,   // 058A..0590; ARMENIAN
3754             0x0591,   // 0591..05FF; HEBREW
3755             0x0600,   // 0600..060B; ARABIC
3756             0x060C,   // 060C..060C; COMMON
3757             0x060D,   // 060D..061A; ARABIC
3758             0x061B,   // 061B..061D; COMMON
3759             0x061E,   // 061E..061E; ARABIC
3760             0x061F,   // 061F..061F; COMMON
3761             0x0620,   // 0620..063F; ARABIC
3762             0x0640,   // 0640..0640; COMMON
3763             0x0641,   // 0641..064A; ARABIC
3764             0x064B,   // 064B..0655; INHERITED
3765             0x0656,   // 0656..065F; ARABIC
3766             0x0660,   // 0660..0669; COMMON
3767             0x066A,   // 066A..066F; ARABIC
3768             0x0670,   // 0670..0670; INHERITED
3769             0x0671,   // 0671..06DC; ARABIC
3770             0x06DD,   // 06DD..06DD; COMMON
3771             0x06DE,   // 06DE..06FF; ARABIC
3772             0x0700,   // 0700..074F; SYRIAC
3773             0x0750,   // 0750..077F; ARABIC
3774             0x0780,   // 0780..07BF; THAANA
3775             0x07C0,   // 07C0..07FF; NKO
3776             0x0800,   // 0800..083F; SAMARITAN
3777             0x0840,   // 0840..089F; MANDAIC
3778             0x08A0,   // 08A0..08FF; ARABIC
3779             0x0900,   // 0900..0950; DEVANAGARI
3780             0x0951,   // 0951..0952; INHERITED
3781             0x0953,   // 0953..0963; DEVANAGARI
3782             0x0964,   // 0964..0965; COMMON
3783             0x0966,   // 0966..0980; DEVANAGARI
3784             0x0981,   // 0981..0A00; BENGALI
3785             0x0A01,   // 0A01..0A80; GURMUKHI
3786             0x0A81,   // 0A81..0B00; GUJARATI
3787             0x0B01,   // 0B01..0B81; ORIYA
3788             0x0B82,   // 0B82..0C00; TAMIL
3789             0x0C01,   // 0C01..0C81; TELUGU
3790             0x0C82,   // 0C82..0CF0; KANNADA
3791             0x0D02,   // 0D02..0D81; MALAYALAM
3792             0x0D82,   // 0D82..0E00; SINHALA
3793             0x0E01,   // 0E01..0E3E; THAI
3794             0x0E3F,   // 0E3F..0E3F; COMMON
3795             0x0E40,   // 0E40..0E80; THAI
3796             0x0E81,   // 0E81..0EFF; LAO
3797             0x0F00,   // 0F00..0FD4; TIBETAN
3798             0x0FD5,   // 0FD5..0FD8; COMMON
3799             0x0FD9,   // 0FD9..0FFF; TIBETAN
3800             0x1000,   // 1000..109F; MYANMAR
3801             0x10A0,   // 10A0..10FA; GEORGIAN
3802             0x10FB,   // 10FB..10FB; COMMON
3803             0x10FC,   // 10FC..10FF; GEORGIAN
3804             0x1100,   // 1100..11FF; HANGUL
3805             0x1200,   // 1200..139F; ETHIOPIC
3806             0x13A0,   // 13A0..13FF; CHEROKEE
3807             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3808             0x1680,   // 1680..169F; OGHAM
3809             0x16A0,   // 16A0..16EA; RUNIC
3810             0x16EB,   // 16EB..16ED; COMMON
3811             0x16EE,   // 16EE..16FF; RUNIC
3812             0x1700,   // 1700..171F; TAGALOG
3813             0x1720,   // 1720..1734; HANUNOO
3814             0x1735,   // 1735..173F; COMMON
3815             0x1740,   // 1740..175F; BUHID
3816             0x1760,   // 1760..177F; TAGBANWA
3817             0x1780,   // 1780..17FF; KHMER
3818             0x1800,   // 1800..1801; MONGOLIAN
3819             0x1802,   // 1802..1803; COMMON
3820             0x1804,   // 1804..1804; MONGOLIAN
3821             0x1805,   // 1805..1805; COMMON
3822             0x1806,   // 1806..18AF; MONGOLIAN
3823             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3824             0x1900,   // 1900..194F; LIMBU
3825             0x1950,   // 1950..197F; TAI_LE
3826             0x1980,   // 1980..19DF; NEW_TAI_LUE
3827             0x19E0,   // 19E0..19FF; KHMER
3828             0x1A00,   // 1A00..1A1F; BUGINESE
3829             0x1A20,   // 1A20..1AFF; TAI_THAM
3830             0x1B00,   // 1B00..1B7F; BALINESE
3831             0x1B80,   // 1B80..1BBF; SUNDANESE
3832             0x1BC0,   // 1BC0..1BFF; BATAK
3833             0x1C00,   // 1C00..1C4F; LEPCHA
3834             0x1C50,   // 1C50..1CBF; OL_CHIKI
3835             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3836             0x1CD0,   // 1CD0..1CD2; INHERITED
3837             0x1CD3,   // 1CD3..1CD3; COMMON
3838             0x1CD4,   // 1CD4..1CE0; INHERITED
3839             0x1CE1,   // 1CE1..1CE1; COMMON
3840             0x1CE2,   // 1CE2..1CE8; INHERITED
3841             0x1CE9,   // 1CE9..1CEC; COMMON
3842             0x1CED,   // 1CED..1CED; INHERITED
3843             0x1CEE,   // 1CEE..1CF3; COMMON
3844             0x1CF4,   // 1CF4..1CF4; INHERITED
3845             0x1CF5,   // 1CF5..1CFF; COMMON
3846             0x1D00,   // 1D00..1D25; LATIN
3847             0x1D26,   // 1D26..1D2A; GREEK
3848             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3849             0x1D2C,   // 1D2C..1D5C; LATIN
3850             0x1D5D,   // 1D5D..1D61; GREEK
3851             0x1D62,   // 1D62..1D65; LATIN
3852             0x1D66,   // 1D66..1D6A; GREEK
3853             0x1D6B,   // 1D6B..1D77; LATIN
3854             0x1D78,   // 1D78..1D78; CYRILLIC
3855             0x1D79,   // 1D79..1DBE; LATIN
3856             0x1DBF,   // 1DBF..1DBF; GREEK
3857             0x1DC0,   // 1DC0..1DFF; INHERITED
3858             0x1E00,   // 1E00..1EFF; LATIN
3859             0x1F00,   // 1F00..1FFF; GREEK
3860             0x2000,   // 2000..200B; COMMON
3861             0x200C,   // 200C..200D; INHERITED
3862             0x200E,   // 200E..2070; COMMON
3863             0x2071,   // 2071..2073; LATIN
3864             0x2074,   // 2074..207E; COMMON
3865             0x207F,   // 207F..207F; LATIN
3866             0x2080,   // 2080..208F; COMMON
3867             0x2090,   // 2090..209F; LATIN
3868             0x20A0,   // 20A0..20CF; COMMON
3869             0x20D0,   // 20D0..20FF; INHERITED
3870             0x2100,   // 2100..2125; COMMON
3871             0x2126,   // 2126..2126; GREEK
3872             0x2127,   // 2127..2129; COMMON
3873             0x212A,   // 212A..212B; LATIN
3874             0x212C,   // 212C..2131; COMMON
3875             0x2132,   // 2132..2132; LATIN
3876             0x2133,   // 2133..214D; COMMON
3877             0x214E,   // 214E..214E; LATIN
3878             0x214F,   // 214F..215F; COMMON
3879             0x2160,   // 2160..2188; LATIN
3880             0x2189,   // 2189..27FF; COMMON
3881             0x2800,   // 2800..28FF; BRAILLE
3882             0x2900,   // 2900..2BFF; COMMON
3883             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3884             0x2C60,   // 2C60..2C7F; LATIN
3885             0x2C80,   // 2C80..2CFF; COPTIC
3886             0x2D00,   // 2D00..2D2F; GEORGIAN
3887             0x2D30,   // 2D30..2D7F; TIFINAGH
3888             0x2D80,   // 2D80..2DDF; ETHIOPIC
3889             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3890             0x2E00,   // 2E00..2E7F; COMMON
3891             0x2E80,   // 2E80..2FEF; HAN
3892             0x2FF0,   // 2FF0..3004; COMMON
3893             0x3005,   // 3005..3005; HAN
3894             0x3006,   // 3006..3006; COMMON
3895             0x3007,   // 3007..3007; HAN
3896             0x3008,   // 3008..3020; COMMON
3897             0x3021,   // 3021..3029; HAN
3898             0x302A,   // 302A..302D; INHERITED
3899             0x302E,   // 302E..302F; HANGUL
3900             0x3030,   // 3030..3037; COMMON
3901             0x3038,   // 3038..303B; HAN
3902             0x303C,   // 303C..3040; COMMON
3903             0x3041,   // 3041..3098; HIRAGANA
3904             0x3099,   // 3099..309A; INHERITED
3905             0x309B,   // 309B..309C; COMMON
3906             0x309D,   // 309D..309F; HIRAGANA
3907             0x30A0,   // 30A0..30A0; COMMON
3908             0x30A1,   // 30A1..30FA; KATAKANA
3909             0x30FB,   // 30FB..30FC; COMMON
3910             0x30FD,   // 30FD..3104; KATAKANA
3911             0x3105,   // 3105..3130; BOPOMOFO
3912             0x3131,   // 3131..318F; HANGUL
3913             0x3190,   // 3190..319F; COMMON
3914             0x31A0,   // 31A0..31BF; BOPOMOFO
3915             0x31C0,   // 31C0..31EF; COMMON
3916             0x31F0,   // 31F0..31FF; KATAKANA
3917             0x3200,   // 3200..321F; HANGUL
3918             0x3220,   // 3220..325F; COMMON
3919             0x3260,   // 3260..327E; HANGUL
3920             0x327F,   // 327F..32CF; COMMON
3921             0x32D0,   // 32D0..3357; KATAKANA
3922             0x3358,   // 3358..33FF; COMMON
3923             0x3400,   // 3400..4DBF; HAN
3924             0x4DC0,   // 4DC0..4DFF; COMMON
3925             0x4E00,   // 4E00..9FFF; HAN
3926             0xA000,   // A000..A4CF; YI
3927             0xA4D0,   // A4D0..A4FF; LISU
3928             0xA500,   // A500..A63F; VAI
3929             0xA640,   // A640..A69F; CYRILLIC
3930             0xA6A0,   // A6A0..A6FF; BAMUM
3931             0xA700,   // A700..A721; COMMON
3932             0xA722,   // A722..A787; LATIN
3933             0xA788,   // A788..A78A; COMMON
3934             0xA78B,   // A78B..A7FF; LATIN
3935             0xA800,   // A800..A82F; SYLOTI_NAGRI
3936             0xA830,   // A830..A83F; COMMON
3937             0xA840,   // A840..A87F; PHAGS_PA
3938             0xA880,   // A880..A8DF; SAURASHTRA
3939             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3940             0xA900,   // A900..A92F; KAYAH_LI
3941             0xA930,   // A930..A95F; REJANG
3942             0xA960,   // A960..A97F; HANGUL
3943             0xA980,   // A980..A9FF; JAVANESE
3944             0xAA00,   // AA00..AA5F; CHAM
3945             0xAA60,   // AA60..AA7F; MYANMAR
3946             0xAA80,   // AA80..AADF; TAI_VIET
3947             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3948             0xAB01,   // AB01..ABBF; ETHIOPIC
3949             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3950             0xAC00,   // AC00..D7FB; HANGUL
3951             0xD7FC,   // D7FC..F8FF; UNKNOWN
3952             0xF900,   // F900..FAFF; HAN
3953             0xFB00,   // FB00..FB12; LATIN
3954             0xFB13,   // FB13..FB1C; ARMENIAN
3955             0xFB1D,   // FB1D..FB4F; HEBREW
3956             0xFB50,   // FB50..FD3D; ARABIC
3957             0xFD3E,   // FD3E..FD4F; COMMON
3958             0xFD50,   // FD50..FDFC; ARABIC
3959             0xFDFD,   // FDFD..FDFF; COMMON
3960             0xFE00,   // FE00..FE0F; INHERITED
3961             0xFE10,   // FE10..FE1F; COMMON
3962             0xFE20,   // FE20..FE2F; INHERITED
3963             0xFE30,   // FE30..FE6F; COMMON
3964             0xFE70,   // FE70..FEFE; ARABIC
3965             0xFEFF,   // FEFF..FF20; COMMON
3966             0xFF21,   // FF21..FF3A; LATIN
3967             0xFF3B,   // FF3B..FF40; COMMON
3968             0xFF41,   // FF41..FF5A; LATIN
3969             0xFF5B,   // FF5B..FF65; COMMON
3970             0xFF66,   // FF66..FF6F; KATAKANA
3971             0xFF70,   // FF70..FF70; COMMON
3972             0xFF71,   // FF71..FF9D; KATAKANA
3973             0xFF9E,   // FF9E..FF9F; COMMON
3974             0xFFA0,   // FFA0..FFDF; HANGUL
3975             0xFFE0,   // FFE0..FFFF; COMMON
3976             0x10000,  // 10000..100FF; LINEAR_B
3977             0x10100,  // 10100..1013F; COMMON
3978             0x10140,  // 10140..1018F; GREEK
3979             0x10190,  // 10190..101FC; COMMON
3980             0x101FD,  // 101FD..1027F; INHERITED
3981             0x10280,  // 10280..1029F; LYCIAN
3982             0x102A0,  // 102A0..102FF; CARIAN
3983             0x10300,  // 10300..1032F; OLD_ITALIC
3984             0x10330,  // 10330..1037F; GOTHIC
3985             0x10380,  // 10380..1039F; UGARITIC
3986             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3987             0x10400,  // 10400..1044F; DESERET
3988             0x10450,  // 10450..1047F; SHAVIAN
3989             0x10480,  // 10480..107FF; OSMANYA
3990             0x10800,  // 10800..1083F; CYPRIOT
3991             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3992             0x10900,  // 10900..1091F; PHOENICIAN
3993             0x10920,  // 10920..1097F; LYDIAN
3994             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3995             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3996             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3997             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3998             0x10B00,  // 10B00..10B3F; AVESTAN
3999             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4000             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4001             0x10C00,  // 10C00..10E5F; OLD_TURKIC
4002             0x10E60,  // 10E60..10FFF; ARABIC
4003             0x11000,  // 11000..1107F; BRAHMI
4004             0x11080,  // 11080..110CF; KAITHI
4005             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4006             0x11100,  // 11100..1117F; CHAKMA
4007             0x11180,  // 11180..1167F; SHARADA
4008             0x11680,  // 11680..116CF; TAKRI
4009             0x12000,  // 12000..12FFF; CUNEIFORM
4010             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4011             0x16800,  // 16800..16A38; BAMUM
4012             0x16F00,  // 16F00..16F9F; MIAO
4013             0x1B000,  // 1B000..1B000; KATAKANA
4014             0x1B001,  // 1B001..1CFFF; HIRAGANA
4015             0x1D000,  // 1D000..1D166; COMMON
4016             0x1D167,  // 1D167..1D169; INHERITED
4017             0x1D16A,  // 1D16A..1D17A; COMMON
4018             0x1D17B,  // 1D17B..1D182; INHERITED
4019             0x1D183,  // 1D183..1D184; COMMON
4020             0x1D185,  // 1D185..1D18B; INHERITED
4021             0x1D18C,  // 1D18C..1D1A9; COMMON
4022             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4023             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4024             0x1D200,  // 1D200..1D2FF; GREEK
4025             0x1D300,  // 1D300..1EDFF; COMMON
4026             0x1EE00,  // 1EE00..1EFFF; ARABIC
4027             0x1F000,  // 1F000..1F1FF; COMMON
4028             0x1F200,  // 1F200..1F200; HIRAGANA
4029             0x1F201,  // 1F210..1FFFF; COMMON
4030             0x20000,  // 20000..E0000; HAN
4031             0xE0001,  // E0001..E00FF; COMMON
4032             0xE0100,  // E0100..E01EF; INHERITED
4033             0xE01F0   // E01F0..10FFFF; UNKNOWN
4034 
4035         };
4036 
4037         private static final UnicodeScript[] scripts = {
4038             COMMON,
4039             LATIN,
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             LATIN,
4048             COMMON,
4049             LATIN,
4050             COMMON,
4051             LATIN,
4052             COMMON,
4053             LATIN,
4054             COMMON,
4055             BOPOMOFO,
4056             COMMON,
4057             INHERITED,
4058             GREEK,
4059             COMMON,
4060             GREEK,
4061             COMMON,
4062             GREEK,
4063             COMMON,
4064             GREEK,
4065             COMMON,
4066             GREEK,
4067             COPTIC,
4068             GREEK,
4069             CYRILLIC,
4070             INHERITED,
4071             CYRILLIC,
4072             ARMENIAN,
4073             COMMON,
4074             ARMENIAN,
4075             HEBREW,
4076             ARABIC,
4077             COMMON,
4078             ARABIC,
4079             COMMON,
4080             ARABIC,
4081             COMMON,
4082             ARABIC,
4083             COMMON,
4084             ARABIC,
4085             INHERITED,
4086             ARABIC,
4087             COMMON,
4088             ARABIC,
4089             INHERITED,
4090             ARABIC,
4091             COMMON,
4092             ARABIC,
4093             SYRIAC,
4094             ARABIC,
4095             THAANA,
4096             NKO,
4097             SAMARITAN,
4098             MANDAIC,
4099             ARABIC,
4100             DEVANAGARI,
4101             INHERITED,
4102             DEVANAGARI,
4103             COMMON,
4104             DEVANAGARI,
4105             BENGALI,
4106             GURMUKHI,
4107             GUJARATI,
4108             ORIYA,
4109             TAMIL,
4110             TELUGU,
4111             KANNADA,
4112             MALAYALAM,
4113             SINHALA,
4114             THAI,
4115             COMMON,
4116             THAI,
4117             LAO,
4118             TIBETAN,
4119             COMMON,
4120             TIBETAN,
4121             MYANMAR,
4122             GEORGIAN,
4123             COMMON,
4124             GEORGIAN,
4125             HANGUL,
4126             ETHIOPIC,
4127             CHEROKEE,
4128             CANADIAN_ABORIGINAL,
4129             OGHAM,
4130             RUNIC,
4131             COMMON,
4132             RUNIC,
4133             TAGALOG,
4134             HANUNOO,
4135             COMMON,
4136             BUHID,
4137             TAGBANWA,
4138             KHMER,
4139             MONGOLIAN,
4140             COMMON,
4141             MONGOLIAN,
4142             COMMON,
4143             MONGOLIAN,
4144             CANADIAN_ABORIGINAL,
4145             LIMBU,
4146             TAI_LE,
4147             NEW_TAI_LUE,
4148             KHMER,
4149             BUGINESE,
4150             TAI_THAM,
4151             BALINESE,
4152             SUNDANESE,
4153             BATAK,
4154             LEPCHA,
4155             OL_CHIKI,
4156             SUNDANESE,
4157             INHERITED,
4158             COMMON,
4159             INHERITED,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             INHERITED,
4164             COMMON,
4165             INHERITED,
4166             COMMON,
4167             LATIN,
4168             GREEK,
4169             CYRILLIC,
4170             LATIN,
4171             GREEK,
4172             LATIN,
4173             GREEK,
4174             LATIN,
4175             CYRILLIC,
4176             LATIN,
4177             GREEK,
4178             INHERITED,
4179             LATIN,
4180             GREEK,
4181             COMMON,
4182             INHERITED,
4183             COMMON,
4184             LATIN,
4185             COMMON,
4186             LATIN,
4187             COMMON,
4188             LATIN,
4189             COMMON,
4190             INHERITED,
4191             COMMON,
4192             GREEK,
4193             COMMON,
4194             LATIN,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             LATIN,
4199             COMMON,
4200             LATIN,
4201             COMMON,
4202             BRAILLE,
4203             COMMON,
4204             GLAGOLITIC,
4205             LATIN,
4206             COPTIC,
4207             GEORGIAN,
4208             TIFINAGH,
4209             ETHIOPIC,
4210             CYRILLIC,
4211             COMMON,
4212             HAN,
4213             COMMON,
4214             HAN,
4215             COMMON,
4216             HAN,
4217             COMMON,
4218             HAN,
4219             INHERITED,
4220             HANGUL,
4221             COMMON,
4222             HAN,
4223             COMMON,
4224             HIRAGANA,
4225             INHERITED,
4226             COMMON,
4227             HIRAGANA,
4228             COMMON,
4229             KATAKANA,
4230             COMMON,
4231             KATAKANA,
4232             BOPOMOFO,
4233             HANGUL,
4234             COMMON,
4235             BOPOMOFO,
4236             COMMON,
4237             KATAKANA,
4238             HANGUL,
4239             COMMON,
4240             HANGUL,
4241             COMMON,
4242             KATAKANA,
4243             COMMON,
4244             HAN,
4245             COMMON,
4246             HAN,
4247             YI,
4248             LISU,
4249             VAI,
4250             CYRILLIC,
4251             BAMUM,
4252             COMMON,
4253             LATIN,
4254             COMMON,
4255             LATIN,
4256             SYLOTI_NAGRI,
4257             COMMON,
4258             PHAGS_PA,
4259             SAURASHTRA,
4260             DEVANAGARI,
4261             KAYAH_LI,
4262             REJANG,
4263             HANGUL,
4264             JAVANESE,
4265             CHAM,
4266             MYANMAR,
4267             TAI_VIET,
4268             MEETEI_MAYEK,
4269             ETHIOPIC,
4270             MEETEI_MAYEK,
4271             HANGUL,
4272             UNKNOWN     ,
4273             HAN,
4274             LATIN,
4275             ARMENIAN,
4276             HEBREW,
4277             ARABIC,
4278             COMMON,
4279             ARABIC,
4280             COMMON,
4281             INHERITED,
4282             COMMON,
4283             INHERITED,
4284             COMMON,
4285             ARABIC,
4286             COMMON,
4287             LATIN,
4288             COMMON,
4289             LATIN,
4290             COMMON,
4291             KATAKANA,
4292             COMMON,
4293             KATAKANA,
4294             COMMON,
4295             HANGUL,
4296             COMMON,
4297             LINEAR_B,
4298             COMMON,
4299             GREEK,
4300             COMMON,
4301             INHERITED,
4302             LYCIAN,
4303             CARIAN,
4304             OLD_ITALIC,
4305             GOTHIC,
4306             UGARITIC,
4307             OLD_PERSIAN,
4308             DESERET,
4309             SHAVIAN,
4310             OSMANYA,
4311             CYPRIOT,
4312             IMPERIAL_ARAMAIC,
4313             PHOENICIAN,
4314             LYDIAN,
4315             MEROITIC_HIEROGLYPHS,
4316             MEROITIC_CURSIVE,
4317             KHAROSHTHI,
4318             OLD_SOUTH_ARABIAN,
4319             AVESTAN,
4320             INSCRIPTIONAL_PARTHIAN,
4321             INSCRIPTIONAL_PAHLAVI,
4322             OLD_TURKIC,
4323             ARABIC,
4324             BRAHMI,
4325             KAITHI,
4326             SORA_SOMPENG,
4327             CHAKMA,
4328             SHARADA,
4329             TAKRI,
4330             CUNEIFORM,
4331             EGYPTIAN_HIEROGLYPHS,
4332             BAMUM,
4333             MIAO,
4334             KATAKANA,
4335             HIRAGANA,
4336             COMMON,
4337             INHERITED,
4338             COMMON,
4339             INHERITED,
4340             COMMON,
4341             INHERITED,
4342             COMMON,
4343             INHERITED,
4344             COMMON,
4345             GREEK,
4346             COMMON,
4347             ARABIC,
4348             COMMON,
4349             HIRAGANA,
4350             COMMON,
4351             HAN,
4352             COMMON,
4353             INHERITED,
4354             UNKNOWN
4355         };
4356 
4357         private static HashMap<String, Character.UnicodeScript> aliases;
4358         static {
4359             aliases = new HashMap<>(128);
4360             aliases.put("ARAB", ARABIC);
4361             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4362             aliases.put("ARMN", ARMENIAN);
4363             aliases.put("AVST", AVESTAN);
4364             aliases.put("BALI", BALINESE);
4365             aliases.put("BAMU", BAMUM);
4366             aliases.put("BATK", BATAK);
4367             aliases.put("BENG", BENGALI);
4368             aliases.put("BOPO", BOPOMOFO);
4369             aliases.put("BRAI", BRAILLE);
4370             aliases.put("BRAH", BRAHMI);
4371             aliases.put("BUGI", BUGINESE);
4372             aliases.put("BUHD", BUHID);
4373             aliases.put("CAKM", CHAKMA);
4374             aliases.put("CANS", CANADIAN_ABORIGINAL);
4375             aliases.put("CARI", CARIAN);
4376             aliases.put("CHAM", CHAM);
4377             aliases.put("CHER", CHEROKEE);
4378             aliases.put("COPT", COPTIC);
4379             aliases.put("CPRT", CYPRIOT);
4380             aliases.put("CYRL", CYRILLIC);
4381             aliases.put("DEVA", DEVANAGARI);
4382             aliases.put("DSRT", DESERET);
4383             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4384             aliases.put("ETHI", ETHIOPIC);
4385             aliases.put("GEOR", GEORGIAN);
4386             aliases.put("GLAG", GLAGOLITIC);
4387             aliases.put("GOTH", GOTHIC);
4388             aliases.put("GREK", GREEK);
4389             aliases.put("GUJR", GUJARATI);
4390             aliases.put("GURU", GURMUKHI);
4391             aliases.put("HANG", HANGUL);
4392             aliases.put("HANI", HAN);
4393             aliases.put("HANO", HANUNOO);
4394             aliases.put("HEBR", HEBREW);
4395             aliases.put("HIRA", HIRAGANA);
4396             // it appears we don't have the KATAKANA_OR_HIRAGANA
4397             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4398             aliases.put("ITAL", OLD_ITALIC);
4399             aliases.put("JAVA", JAVANESE);
4400             aliases.put("KALI", KAYAH_LI);
4401             aliases.put("KANA", KATAKANA);
4402             aliases.put("KHAR", KHAROSHTHI);
4403             aliases.put("KHMR", KHMER);
4404             aliases.put("KNDA", KANNADA);
4405             aliases.put("KTHI", KAITHI);
4406             aliases.put("LANA", TAI_THAM);
4407             aliases.put("LAOO", LAO);
4408             aliases.put("LATN", LATIN);
4409             aliases.put("LEPC", LEPCHA);
4410             aliases.put("LIMB", LIMBU);
4411             aliases.put("LINB", LINEAR_B);
4412             aliases.put("LISU", LISU);
4413             aliases.put("LYCI", LYCIAN);
4414             aliases.put("LYDI", LYDIAN);
4415             aliases.put("MAND", MANDAIC);
4416             aliases.put("MERC", MEROITIC_CURSIVE);
4417             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4418             aliases.put("MLYM", MALAYALAM);
4419             aliases.put("MONG", MONGOLIAN);
4420             aliases.put("MTEI", MEETEI_MAYEK);
4421             aliases.put("MYMR", MYANMAR);
4422             aliases.put("NKOO", NKO);
4423             aliases.put("OGAM", OGHAM);
4424             aliases.put("OLCK", OL_CHIKI);
4425             aliases.put("ORKH", OLD_TURKIC);
4426             aliases.put("ORYA", ORIYA);
4427             aliases.put("OSMA", OSMANYA);
4428             aliases.put("PHAG", PHAGS_PA);
4429             aliases.put("PLRD", MIAO);
4430             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4431             aliases.put("PHNX", PHOENICIAN);
4432             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4433             aliases.put("RJNG", REJANG);
4434             aliases.put("RUNR", RUNIC);
4435             aliases.put("SAMR", SAMARITAN);
4436             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4437             aliases.put("SAUR", SAURASHTRA);
4438             aliases.put("SHAW", SHAVIAN);
4439             aliases.put("SHRD", SHARADA);
4440             aliases.put("SINH", SINHALA);
4441             aliases.put("SORA", SORA_SOMPENG);
4442             aliases.put("SUND", SUNDANESE);
4443             aliases.put("SYLO", SYLOTI_NAGRI);
4444             aliases.put("SYRC", SYRIAC);
4445             aliases.put("TAGB", TAGBANWA);
4446             aliases.put("TALE", TAI_LE);
4447             aliases.put("TAKR", TAKRI);
4448             aliases.put("TALU", NEW_TAI_LUE);
4449             aliases.put("TAML", TAMIL);
4450             aliases.put("TAVT", TAI_VIET);
4451             aliases.put("TELU", TELUGU);
4452             aliases.put("TFNG", TIFINAGH);
4453             aliases.put("TGLG", TAGALOG);
4454             aliases.put("THAA", THAANA);
4455             aliases.put("THAI", THAI);
4456             aliases.put("TIBT", TIBETAN);
4457             aliases.put("UGAR", UGARITIC);
4458             aliases.put("VAII", VAI);
4459             aliases.put("XPEO", OLD_PERSIAN);
4460             aliases.put("XSUX", CUNEIFORM);
4461             aliases.put("YIII", YI);
4462             aliases.put("ZINH", INHERITED);
4463             aliases.put("ZYYY", COMMON);
4464             aliases.put("ZZZZ", UNKNOWN);
4465         }
4466 
4467         /**
4468          * Returns the enum constant representing the Unicode script of which
4469          * the given character (Unicode code point) is assigned to.
4470          *
4471          * @param   codePoint the character (Unicode code point) in question.
4472          * @return  The {@code UnicodeScript} constant representing the
4473          *          Unicode script of which this character is assigned to.
4474          *
4475          * @exception IllegalArgumentException if the specified
4476          * {@code codePoint} is an invalid Unicode code point.
4477          * @see Character#isValidCodePoint(int)
4478          *
4479          */
4480         public static UnicodeScript of(int codePoint) {
4481             if (!isValidCodePoint(codePoint))
4482                 throw new IllegalArgumentException();
4483             int type = getType(codePoint);
4484             // leave SURROGATE and PRIVATE_USE for table lookup
4485             if (type == UNASSIGNED)
4486                 return UNKNOWN;
4487             int index = Arrays.binarySearch(scriptStarts, codePoint);
4488             if (index < 0)
4489                 index = -index - 2;
4490             return scripts[index];
4491         }
4492 
4493         /**
4494          * Returns the UnicodeScript constant with the given Unicode script
4495          * name or the script name alias. Script names and their aliases are
4496          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4497          * and PropertyValueAliases&lt;version&gt;.txt define script names
4498          * and the script name aliases for a particular version of the
4499          * standard. The {@link Character} class specifies the version of
4500          * the standard that it supports.
4501          * <p>
4502          * Character case is ignored for all of the valid script names.
4503          * The en_US locale's case mapping rules are used to provide
4504          * case-insensitive string comparisons for script name validation.
4505          *
4506          * @param scriptName A {@code UnicodeScript} name.
4507          * @return The {@code UnicodeScript} constant identified
4508          *         by {@code scriptName}
4509          * @throws IllegalArgumentException if {@code scriptName} is an
4510          *         invalid name
4511          * @throws NullPointerException if {@code scriptName} is null
4512          */
4513         public static final UnicodeScript forName(String scriptName) {
4514             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4515                                  //.replace(' ', '_'));
4516             UnicodeScript sc = aliases.get(scriptName);
4517             if (sc != null)
4518                 return sc;
4519             return valueOf(scriptName);
4520         }
4521     }
4522 
4523     /**
4524      * The value of the {@code Character}.
4525      *
4526      * @serial
4527      */
4528     private final char value;
4529 
4530     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4531     private static final long serialVersionUID = 3786198910865385080L;
4532 
4533     /**
4534      * Constructs a newly allocated {@code Character} object that
4535      * represents the specified {@code char} value.
4536      *
4537      * @param  value   the value to be represented by the
4538      *                  {@code Character} object.
4539      */
4540     public Character(char value) {
4541         this.value = value;
4542     }
4543 
4544     private static class CharacterCache {
4545         private CharacterCache(){}
4546 
4547         static final Character cache[] = new Character[127 + 1];
4548 
4549         static {
4550             for (int i = 0; i < cache.length; i++)
4551                 cache[i] = new Character((char)i);
4552         }
4553     }
4554 
4555     /**
4556      * Returns a <tt>Character</tt> instance representing the specified
4557      * <tt>char</tt> value.
4558      * If a new <tt>Character</tt> instance is not required, this method
4559      * should generally be used in preference to the constructor
4560      * {@link #Character(char)}, as this method is likely to yield
4561      * significantly better space and time performance by caching
4562      * frequently requested values.
4563      *
4564      * This method will always cache values in the range {@code
4565      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4566      * cache other values outside of this range.
4567      *
4568      * @param  c a char value.
4569      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4570      * @since  1.5
4571      */
4572     public static Character valueOf(char c) {
4573         if (c <= 127) { // must cache
4574             return CharacterCache.cache[(int)c];
4575         }
4576         return new Character(c);
4577     }
4578 
4579     /**
4580      * Returns the value of this {@code Character} object.
4581      * @return  the primitive {@code char} value represented by
4582      *          this object.
4583      */
4584     public char charValue() {
4585         return value;
4586     }
4587 
4588     /**
4589      * Returns a hash code for this {@code Character}; equal to the result
4590      * of invoking {@code charValue()}.
4591      *
4592      * @return a hash code value for this {@code Character}
4593      */
4594     @Override
4595     public int hashCode() {
4596         return Character.hashCode(value);
4597     }
4598 
4599     /**
4600      * Returns a hash code for a {@code char} value; compatible with
4601      * {@code Character.hashCode()}.
4602      *
4603      * @since 1.8
4604      *
4605      * @param value The {@code char} for which to return a hash code.
4606      * @return a hash code value for a {@code char} value.
4607      */
4608     public static int hashCode(char value) {
4609         return (int)value;
4610     }
4611 
4612     /**
4613      * Compares this object against the specified object.
4614      * The result is {@code true} if and only if the argument is not
4615      * {@code null} and is a {@code Character} object that
4616      * represents the same {@code char} value as this object.
4617      *
4618      * @param   obj   the object to compare with.
4619      * @return  {@code true} if the objects are the same;
4620      *          {@code false} otherwise.
4621      */
4622     public boolean equals(Object obj) {
4623         if (obj instanceof Character) {
4624             return value == ((Character)obj).charValue();
4625         }
4626         return false;
4627     }
4628 
4629     /**
4630      * Returns a {@code String} object representing this
4631      * {@code Character}'s value.  The result is a string of
4632      * length 1 whose sole component is the primitive
4633      * {@code char} value represented by this
4634      * {@code Character} object.
4635      *
4636      * @return  a string representation of this object.
4637      */
4638     public String toString() {
4639         char buf[] = {value};
4640         return String.valueOf(buf);
4641     }
4642 
4643     /**
4644      * Returns a {@code String} object representing the
4645      * specified {@code char}.  The result is a string of length
4646      * 1 consisting solely of the specified {@code char}.
4647      *
4648      * @param c the {@code char} to be converted
4649      * @return the string representation of the specified {@code char}
4650      * @since 1.4
4651      */
4652     public static String toString(char c) {
4653         return String.valueOf(c);
4654     }
4655 
4656     /**
4657      * Determines whether the specified code point is a valid
4658      * <a href="http://www.unicode.org/glossary/#code_point">
4659      * Unicode code point value</a>.
4660      *
4661      * @param  codePoint the Unicode code point to be tested
4662      * @return {@code true} if the specified code point value is between
4663      *         {@link #MIN_CODE_POINT} and
4664      *         {@link #MAX_CODE_POINT} inclusive;
4665      *         {@code false} otherwise.
4666      * @since  1.5
4667      */
4668     public static boolean isValidCodePoint(int codePoint) {
4669         // Optimized form of:
4670         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4671         int plane = codePoint >>> 16;
4672         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4673     }
4674 
4675     /**
4676      * Determines whether the specified character (Unicode code point)
4677      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4678      * Such code points can be represented using a single {@code char}.
4679      *
4680      * @param  codePoint the character (Unicode code point) to be tested
4681      * @return {@code true} if the specified code point is between
4682      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4683      *         {@code false} otherwise.
4684      * @since  1.7
4685      */
4686     public static boolean isBmpCodePoint(int codePoint) {
4687         return codePoint >>> 16 == 0;
4688         // Optimized form of:
4689         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4690         // We consistently use logical shift (>>>) to facilitate
4691         // additional runtime optimizations.
4692     }
4693 
4694     /**
4695      * Determines whether the specified character (Unicode code point)
4696      * is in the <a href="#supplementary">supplementary character</a> range.
4697      *
4698      * @param  codePoint the character (Unicode code point) to be tested
4699      * @return {@code true} if the specified code point is between
4700      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4701      *         {@link #MAX_CODE_POINT} inclusive;
4702      *         {@code false} otherwise.
4703      * @since  1.5
4704      */
4705     public static boolean isSupplementaryCodePoint(int codePoint) {
4706         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4707             && codePoint <  MAX_CODE_POINT + 1;
4708     }
4709 
4710     /**
4711      * Determines if the given {@code char} value is a
4712      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4713      * Unicode high-surrogate code unit</a>
4714      * (also known as <i>leading-surrogate code unit</i>).
4715      *
4716      * <p>Such values do not represent characters by themselves,
4717      * but are used in the representation of
4718      * <a href="#supplementary">supplementary characters</a>
4719      * in the UTF-16 encoding.
4720      *
4721      * @param  ch the {@code char} value to be tested.
4722      * @return {@code true} if the {@code char} value is between
4723      *         {@link #MIN_HIGH_SURROGATE} and
4724      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4725      *         {@code false} otherwise.
4726      * @see    Character#isLowSurrogate(char)
4727      * @see    Character.UnicodeBlock#of(int)
4728      * @since  1.5
4729      */
4730     public static boolean isHighSurrogate(char ch) {
4731         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4732         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4733     }
4734 
4735     /**
4736      * Determines if the given {@code char} value is a
4737      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4738      * Unicode low-surrogate code unit</a>
4739      * (also known as <i>trailing-surrogate code unit</i>).
4740      *
4741      * <p>Such values do not represent characters by themselves,
4742      * but are used in the representation of
4743      * <a href="#supplementary">supplementary characters</a>
4744      * in the UTF-16 encoding.
4745      *
4746      * @param  ch the {@code char} value to be tested.
4747      * @return {@code true} if the {@code char} value is between
4748      *         {@link #MIN_LOW_SURROGATE} and
4749      *         {@link #MAX_LOW_SURROGATE} inclusive;
4750      *         {@code false} otherwise.
4751      * @see    Character#isHighSurrogate(char)
4752      * @since  1.5
4753      */
4754     public static boolean isLowSurrogate(char ch) {
4755         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4756     }
4757 
4758     /**
4759      * Determines if the given {@code char} value is a Unicode
4760      * <i>surrogate code unit</i>.
4761      *
4762      * <p>Such values do not represent characters by themselves,
4763      * but are used in the representation of
4764      * <a href="#supplementary">supplementary characters</a>
4765      * in the UTF-16 encoding.
4766      *
4767      * <p>A char value is a surrogate code unit if and only if it is either
4768      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4769      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4770      *
4771      * @param  ch the {@code char} value to be tested.
4772      * @return {@code true} if the {@code char} value is between
4773      *         {@link #MIN_SURROGATE} and
4774      *         {@link #MAX_SURROGATE} inclusive;
4775      *         {@code false} otherwise.
4776      * @since  1.7
4777      */
4778     public static boolean isSurrogate(char ch) {
4779         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4780     }
4781 
4782     /**
4783      * Determines whether the specified pair of {@code char}
4784      * values is a valid
4785      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4786      * Unicode surrogate pair</a>.
4787 
4788      * <p>This method is equivalent to the expression:
4789      * <blockquote><pre>{@code
4790      * isHighSurrogate(high) && isLowSurrogate(low)
4791      * }</pre></blockquote>
4792      *
4793      * @param  high the high-surrogate code value to be tested
4794      * @param  low the low-surrogate code value to be tested
4795      * @return {@code true} if the specified high and
4796      * low-surrogate code values represent a valid surrogate pair;
4797      * {@code false} otherwise.
4798      * @since  1.5
4799      */
4800     public static boolean isSurrogatePair(char high, char low) {
4801         return isHighSurrogate(high) && isLowSurrogate(low);
4802     }
4803 
4804     /**
4805      * Determines the number of {@code char} values needed to
4806      * represent the specified character (Unicode code point). If the
4807      * specified character is equal to or greater than 0x10000, then
4808      * the method returns 2. Otherwise, the method returns 1.
4809      *
4810      * <p>This method doesn't validate the specified character to be a
4811      * valid Unicode code point. The caller must validate the
4812      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4813      * if necessary.
4814      *
4815      * @param   codePoint the character (Unicode code point) to be tested.
4816      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4817      * @see     Character#isSupplementaryCodePoint(int)
4818      * @since   1.5
4819      */
4820     public static int charCount(int codePoint) {
4821         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4822     }
4823 
4824     /**
4825      * Converts the specified surrogate pair to its supplementary code
4826      * point value. This method does not validate the specified
4827      * surrogate pair. The caller must validate it using {@link
4828      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4829      *
4830      * @param  high the high-surrogate code unit
4831      * @param  low the low-surrogate code unit
4832      * @return the supplementary code point composed from the
4833      *         specified surrogate pair.
4834      * @since  1.5
4835      */
4836     public static int toCodePoint(char high, char low) {
4837         // Optimized form of:
4838         // return ((high - MIN_HIGH_SURROGATE) << 10)
4839         //         + (low - MIN_LOW_SURROGATE)
4840         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4841         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4842                                        - (MIN_HIGH_SURROGATE << 10)
4843                                        - MIN_LOW_SURROGATE);
4844     }
4845 
4846     /**
4847      * Returns the code point at the given index of the
4848      * {@code CharSequence}. If the {@code char} value at
4849      * the given index in the {@code CharSequence} is in the
4850      * high-surrogate range, the following index is less than the
4851      * length of the {@code CharSequence}, and the
4852      * {@code char} value at the following index is in the
4853      * low-surrogate range, then the supplementary code point
4854      * corresponding to this surrogate pair is returned. Otherwise,
4855      * the {@code char} value at the given index is returned.
4856      *
4857      * @param seq a sequence of {@code char} values (Unicode code
4858      * units)
4859      * @param index the index to the {@code char} values (Unicode
4860      * code units) in {@code seq} to be converted
4861      * @return the Unicode code point at the given index
4862      * @exception NullPointerException if {@code seq} is null.
4863      * @exception IndexOutOfBoundsException if the value
4864      * {@code index} is negative or not less than
4865      * {@link CharSequence#length() seq.length()}.
4866      * @since  1.5
4867      */
4868     public static int codePointAt(CharSequence seq, int index) {
4869         char c1 = seq.charAt(index);
4870         if (isHighSurrogate(c1) && ++index < seq.length()) {
4871             char c2 = seq.charAt(index);
4872             if (isLowSurrogate(c2)) {
4873                 return toCodePoint(c1, c2);
4874             }
4875         }
4876         return c1;
4877     }
4878 
4879     /**
4880      * Returns the code point at the given index of the
4881      * {@code char} array. If the {@code char} value at
4882      * the given index in the {@code char} array is in the
4883      * high-surrogate range, the following index is less than the
4884      * length of the {@code char} array, and the
4885      * {@code char} value at the following index is in the
4886      * low-surrogate range, then the supplementary code point
4887      * corresponding to this surrogate pair is returned. Otherwise,
4888      * the {@code char} value at the given index is returned.
4889      *
4890      * @param a the {@code char} array
4891      * @param index the index to the {@code char} values (Unicode
4892      * code units) in the {@code char} array to be converted
4893      * @return the Unicode code point at the given index
4894      * @exception NullPointerException if {@code a} is null.
4895      * @exception IndexOutOfBoundsException if the value
4896      * {@code index} is negative or not less than
4897      * the length of the {@code char} array.
4898      * @since  1.5
4899      */
4900     public static int codePointAt(char[] a, int index) {
4901         return codePointAtImpl(a, index, a.length);
4902     }
4903 
4904     /**
4905      * Returns the code point at the given index of the
4906      * {@code char} array, where only array elements with
4907      * {@code index} less than {@code limit} can be used. If
4908      * the {@code char} value at the given index in the
4909      * {@code char} array is in the high-surrogate range, the
4910      * following index is less than the {@code limit}, and the
4911      * {@code char} value at the following index is in the
4912      * low-surrogate range, then the supplementary code point
4913      * corresponding to this surrogate pair is returned. Otherwise,
4914      * the {@code char} value at the given index is returned.
4915      *
4916      * @param a the {@code char} array
4917      * @param index the index to the {@code char} values (Unicode
4918      * code units) in the {@code char} array to be converted
4919      * @param limit the index after the last array element that
4920      * can be used in the {@code char} array
4921      * @return the Unicode code point at the given index
4922      * @exception NullPointerException if {@code a} is null.
4923      * @exception IndexOutOfBoundsException if the {@code index}
4924      * argument is negative or not less than the {@code limit}
4925      * argument, or if the {@code limit} argument is negative or
4926      * greater than the length of the {@code char} array.
4927      * @since  1.5
4928      */
4929     public static int codePointAt(char[] a, int index, int limit) {
4930         if (index >= limit || limit < 0 || limit > a.length) {
4931             throw new IndexOutOfBoundsException();
4932         }
4933         return codePointAtImpl(a, index, limit);
4934     }
4935 
4936     // throws ArrayIndexOutOfBoundsException if index out of bounds
4937     static int codePointAtImpl(char[] a, int index, int limit) {
4938         char c1 = a[index];
4939         if (isHighSurrogate(c1) && ++index < limit) {
4940             char c2 = a[index];
4941             if (isLowSurrogate(c2)) {
4942                 return toCodePoint(c1, c2);
4943             }
4944         }
4945         return c1;
4946     }
4947 
4948     /**
4949      * Returns the code point preceding the given index of the
4950      * {@code CharSequence}. If the {@code char} value at
4951      * {@code (index - 1)} in the {@code CharSequence} is in
4952      * the low-surrogate range, {@code (index - 2)} is not
4953      * negative, and the {@code char} value at {@code (index - 2)}
4954      * in the {@code CharSequence} is in the
4955      * high-surrogate range, then the supplementary code point
4956      * corresponding to this surrogate pair is returned. Otherwise,
4957      * the {@code char} value at {@code (index - 1)} is
4958      * returned.
4959      *
4960      * @param seq the {@code CharSequence} instance
4961      * @param index the index following the code point that should be returned
4962      * @return the Unicode code point value before the given index.
4963      * @exception NullPointerException if {@code seq} is null.
4964      * @exception IndexOutOfBoundsException if the {@code index}
4965      * argument is less than 1 or greater than {@link
4966      * CharSequence#length() seq.length()}.
4967      * @since  1.5
4968      */
4969     public static int codePointBefore(CharSequence seq, int index) {
4970         char c2 = seq.charAt(--index);
4971         if (isLowSurrogate(c2) && index > 0) {
4972             char c1 = seq.charAt(--index);
4973             if (isHighSurrogate(c1)) {
4974                 return toCodePoint(c1, c2);
4975             }
4976         }
4977         return c2;
4978     }
4979 
4980     /**
4981      * Returns the code point preceding the given index of the
4982      * {@code char} array. If the {@code char} value at
4983      * {@code (index - 1)} in the {@code char} array is in
4984      * the low-surrogate range, {@code (index - 2)} is not
4985      * negative, and the {@code char} value at {@code (index - 2)}
4986      * in the {@code char} array is in the
4987      * high-surrogate range, then the supplementary code point
4988      * corresponding to this surrogate pair is returned. Otherwise,
4989      * the {@code char} value at {@code (index - 1)} is
4990      * returned.
4991      *
4992      * @param a the {@code char} array
4993      * @param index the index following the code point that should be returned
4994      * @return the Unicode code point value before the given index.
4995      * @exception NullPointerException if {@code a} is null.
4996      * @exception IndexOutOfBoundsException if the {@code index}
4997      * argument is less than 1 or greater than the length of the
4998      * {@code char} array
4999      * @since  1.5
5000      */
5001     public static int codePointBefore(char[] a, int index) {
5002         return codePointBeforeImpl(a, index, 0);
5003     }
5004 
5005     /**
5006      * Returns the code point preceding the given index of the
5007      * {@code char} array, where only array elements with
5008      * {@code index} greater than or equal to {@code start}
5009      * can be used. If the {@code char} value at {@code (index - 1)}
5010      * in the {@code char} array is in the
5011      * low-surrogate range, {@code (index - 2)} is not less than
5012      * {@code start}, and the {@code char} value at
5013      * {@code (index - 2)} in the {@code char} array is in
5014      * the high-surrogate range, then the supplementary code point
5015      * corresponding to this surrogate pair is returned. Otherwise,
5016      * the {@code char} value at {@code (index - 1)} is
5017      * returned.
5018      *
5019      * @param a the {@code char} array
5020      * @param index the index following the code point that should be returned
5021      * @param start the index of the first array element in the
5022      * {@code char} array
5023      * @return the Unicode code point value before the given index.
5024      * @exception NullPointerException if {@code a} is null.
5025      * @exception IndexOutOfBoundsException if the {@code index}
5026      * argument is not greater than the {@code start} argument or
5027      * is greater than the length of the {@code char} array, or
5028      * if the {@code start} argument is negative or not less than
5029      * the length of the {@code char} array.
5030      * @since  1.5
5031      */
5032     public static int codePointBefore(char[] a, int index, int start) {
5033         if (index <= start || start < 0 || start >= a.length) {
5034             throw new IndexOutOfBoundsException();
5035         }
5036         return codePointBeforeImpl(a, index, start);
5037     }
5038 
5039     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5040     static int codePointBeforeImpl(char[] a, int index, int start) {
5041         char c2 = a[--index];
5042         if (isLowSurrogate(c2) && index > start) {
5043             char c1 = a[--index];
5044             if (isHighSurrogate(c1)) {
5045                 return toCodePoint(c1, c2);
5046             }
5047         }
5048         return c2;
5049     }
5050 
5051     /**
5052      * Returns the leading surrogate (a
5053      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5054      * high surrogate code unit</a>) of the
5055      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5056      * surrogate pair</a>
5057      * representing the specified supplementary character (Unicode
5058      * code point) in the UTF-16 encoding.  If the specified character
5059      * is not a
5060      * <a href="Character.html#supplementary">supplementary character</a>,
5061      * an unspecified {@code char} is returned.
5062      *
5063      * <p>If
5064      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5065      * is {@code true}, then
5066      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5067      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5068      * are also always {@code true}.
5069      *
5070      * @param   codePoint a supplementary character (Unicode code point)
5071      * @return  the leading surrogate code unit used to represent the
5072      *          character in the UTF-16 encoding
5073      * @since   1.7
5074      */
5075     public static char highSurrogate(int codePoint) {
5076         return (char) ((codePoint >>> 10)
5077             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5078     }
5079 
5080     /**
5081      * Returns the trailing surrogate (a
5082      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5083      * low surrogate code unit</a>) of the
5084      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5085      * surrogate pair</a>
5086      * representing the specified supplementary character (Unicode
5087      * code point) in the UTF-16 encoding.  If the specified character
5088      * is not a
5089      * <a href="Character.html#supplementary">supplementary character</a>,
5090      * an unspecified {@code char} is returned.
5091      *
5092      * <p>If
5093      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5094      * is {@code true}, then
5095      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5096      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5097      * are also always {@code true}.
5098      *
5099      * @param   codePoint a supplementary character (Unicode code point)
5100      * @return  the trailing surrogate code unit used to represent the
5101      *          character in the UTF-16 encoding
5102      * @since   1.7
5103      */
5104     public static char lowSurrogate(int codePoint) {
5105         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5106     }
5107 
5108     /**
5109      * Converts the specified character (Unicode code point) to its
5110      * UTF-16 representation. If the specified code point is a BMP
5111      * (Basic Multilingual Plane or Plane 0) value, the same value is
5112      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5113      * specified code point is a supplementary character, its
5114      * surrogate values are stored in {@code dst[dstIndex]}
5115      * (high-surrogate) and {@code dst[dstIndex+1]}
5116      * (low-surrogate), and 2 is returned.
5117      *
5118      * @param  codePoint the character (Unicode code point) to be converted.
5119      * @param  dst an array of {@code char} in which the
5120      * {@code codePoint}'s UTF-16 value is stored.
5121      * @param dstIndex the start index into the {@code dst}
5122      * array where the converted value is stored.
5123      * @return 1 if the code point is a BMP code point, 2 if the
5124      * code point is a supplementary code point.
5125      * @exception IllegalArgumentException if the specified
5126      * {@code codePoint} is not a valid Unicode code point.
5127      * @exception NullPointerException if the specified {@code dst} is null.
5128      * @exception IndexOutOfBoundsException if {@code dstIndex}
5129      * is negative or not less than {@code dst.length}, or if
5130      * {@code dst} at {@code dstIndex} doesn't have enough
5131      * array element(s) to store the resulting {@code char}
5132      * value(s). (If {@code dstIndex} is equal to
5133      * {@code dst.length-1} and the specified
5134      * {@code codePoint} is a supplementary character, the
5135      * high-surrogate value is not stored in
5136      * {@code dst[dstIndex]}.)
5137      * @since  1.5
5138      */
5139     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5140         if (isBmpCodePoint(codePoint)) {
5141             dst[dstIndex] = (char) codePoint;
5142             return 1;
5143         } else if (isValidCodePoint(codePoint)) {
5144             toSurrogates(codePoint, dst, dstIndex);
5145             return 2;
5146         } else {
5147             throw new IllegalArgumentException();
5148         }
5149     }
5150 
5151     /**
5152      * Converts the specified character (Unicode code point) to its
5153      * UTF-16 representation stored in a {@code char} array. If
5154      * the specified code point is a BMP (Basic Multilingual Plane or
5155      * Plane 0) value, the resulting {@code char} array has
5156      * the same value as {@code codePoint}. If the specified code
5157      * point is a supplementary code point, the resulting
5158      * {@code char} array has the corresponding surrogate pair.
5159      *
5160      * @param  codePoint a Unicode code point
5161      * @return a {@code char} array having
5162      *         {@code codePoint}'s UTF-16 representation.
5163      * @exception IllegalArgumentException if the specified
5164      * {@code codePoint} is not a valid Unicode code point.
5165      * @since  1.5
5166      */
5167     public static char[] toChars(int codePoint) {
5168         if (isBmpCodePoint(codePoint)) {
5169             return new char[] { (char) codePoint };
5170         } else if (isValidCodePoint(codePoint)) {
5171             char[] result = new char[2];
5172             toSurrogates(codePoint, result, 0);
5173             return result;
5174         } else {
5175             throw new IllegalArgumentException();
5176         }
5177     }
5178 
5179     static void toSurrogates(int codePoint, char[] dst, int index) {
5180         // We write elements "backwards" to guarantee all-or-nothing
5181         dst[index+1] = lowSurrogate(codePoint);
5182         dst[index] = highSurrogate(codePoint);
5183     }
5184 
5185     /**
5186      * Returns the number of Unicode code points in the text range of
5187      * the specified char sequence. The text range begins at the
5188      * specified {@code beginIndex} and extends to the
5189      * {@code char} at index {@code endIndex - 1}. Thus the
5190      * length (in {@code char}s) of the text range is
5191      * {@code endIndex-beginIndex}. Unpaired surrogates within
5192      * the text range count as one code point each.
5193      *
5194      * @param seq the char sequence
5195      * @param beginIndex the index to the first {@code char} of
5196      * the text range.
5197      * @param endIndex the index after the last {@code char} of
5198      * the text range.
5199      * @return the number of Unicode code points in the specified text
5200      * range
5201      * @exception NullPointerException if {@code seq} is null.
5202      * @exception IndexOutOfBoundsException if the
5203      * {@code beginIndex} is negative, or {@code endIndex}
5204      * is larger than the length of the given sequence, or
5205      * {@code beginIndex} is larger than {@code endIndex}.
5206      * @since  1.5
5207      */
5208     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5209         int length = seq.length();
5210         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5211             throw new IndexOutOfBoundsException();
5212         }
5213         int n = endIndex - beginIndex;
5214         for (int i = beginIndex; i < endIndex; ) {
5215             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5216                 isLowSurrogate(seq.charAt(i))) {
5217                 n--;
5218                 i++;
5219             }
5220         }
5221         return n;
5222     }
5223 
5224     /**
5225      * Returns the number of Unicode code points in a subarray of the
5226      * {@code char} array argument. The {@code offset}
5227      * argument is the index of the first {@code char} of the
5228      * subarray and the {@code count} argument specifies the
5229      * length of the subarray in {@code char}s. Unpaired
5230      * surrogates within the subarray count as one code point each.
5231      *
5232      * @param a the {@code char} array
5233      * @param offset the index of the first {@code char} in the
5234      * given {@code char} array
5235      * @param count the length of the subarray in {@code char}s
5236      * @return the number of Unicode code points in the specified subarray
5237      * @exception NullPointerException if {@code a} is null.
5238      * @exception IndexOutOfBoundsException if {@code offset} or
5239      * {@code count} is negative, or if {@code offset +
5240      * count} is larger than the length of the given array.
5241      * @since  1.5
5242      */
5243     public static int codePointCount(char[] a, int offset, int count) {
5244         if (count > a.length - offset || offset < 0 || count < 0) {
5245             throw new IndexOutOfBoundsException();
5246         }
5247         return codePointCountImpl(a, offset, count);
5248     }
5249 
5250     static int codePointCountImpl(char[] a, int offset, int count) {
5251         int endIndex = offset + count;
5252         int n = count;
5253         for (int i = offset; i < endIndex; ) {
5254             if (isHighSurrogate(a[i++]) && i < endIndex &&
5255                 isLowSurrogate(a[i])) {
5256                 n--;
5257                 i++;
5258             }
5259         }
5260         return n;
5261     }
5262 
5263     /**
5264      * Returns the index within the given char sequence that is offset
5265      * from the given {@code index} by {@code codePointOffset}
5266      * code points. Unpaired surrogates within the text range given by
5267      * {@code index} and {@code codePointOffset} count as
5268      * one code point each.
5269      *
5270      * @param seq the char sequence
5271      * @param index the index to be offset
5272      * @param codePointOffset the offset in code points
5273      * @return the index within the char sequence
5274      * @exception NullPointerException if {@code seq} is null.
5275      * @exception IndexOutOfBoundsException if {@code index}
5276      *   is negative or larger then the length of the char sequence,
5277      *   or if {@code codePointOffset} is positive and the
5278      *   subsequence starting with {@code index} has fewer than
5279      *   {@code codePointOffset} code points, or if
5280      *   {@code codePointOffset} is negative and the subsequence
5281      *   before {@code index} has fewer than the absolute value
5282      *   of {@code codePointOffset} code points.
5283      * @since 1.5
5284      */
5285     public static int offsetByCodePoints(CharSequence seq, int index,
5286                                          int codePointOffset) {
5287         int length = seq.length();
5288         if (index < 0 || index > length) {
5289             throw new IndexOutOfBoundsException();
5290         }
5291 
5292         int x = index;
5293         if (codePointOffset >= 0) {
5294             int i;
5295             for (i = 0; x < length && i < codePointOffset; i++) {
5296                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5297                     isLowSurrogate(seq.charAt(x))) {
5298                     x++;
5299                 }
5300             }
5301             if (i < codePointOffset) {
5302                 throw new IndexOutOfBoundsException();
5303             }
5304         } else {
5305             int i;
5306             for (i = codePointOffset; x > 0 && i < 0; i++) {
5307                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5308                     isHighSurrogate(seq.charAt(x-1))) {
5309                     x--;
5310                 }
5311             }
5312             if (i < 0) {
5313                 throw new IndexOutOfBoundsException();
5314             }
5315         }
5316         return x;
5317     }
5318 
5319     /**
5320      * Returns the index within the given {@code char} subarray
5321      * that is offset from the given {@code index} by
5322      * {@code codePointOffset} code points. The
5323      * {@code start} and {@code count} arguments specify a
5324      * subarray of the {@code char} array. Unpaired surrogates
5325      * within the text range given by {@code index} and
5326      * {@code codePointOffset} count as one code point each.
5327      *
5328      * @param a the {@code char} array
5329      * @param start the index of the first {@code char} of the
5330      * subarray
5331      * @param count the length of the subarray in {@code char}s
5332      * @param index the index to be offset
5333      * @param codePointOffset the offset in code points
5334      * @return the index within the subarray
5335      * @exception NullPointerException if {@code a} is null.
5336      * @exception IndexOutOfBoundsException
5337      *   if {@code start} or {@code count} is negative,
5338      *   or if {@code start + count} is larger than the length of
5339      *   the given array,
5340      *   or if {@code index} is less than {@code start} or
5341      *   larger then {@code start + count},
5342      *   or if {@code codePointOffset} is positive and the text range
5343      *   starting with {@code index} and ending with {@code start + count - 1}
5344      *   has fewer than {@code codePointOffset} code
5345      *   points,
5346      *   or if {@code codePointOffset} is negative and the text range
5347      *   starting with {@code start} and ending with {@code index - 1}
5348      *   has fewer than the absolute value of
5349      *   {@code codePointOffset} code points.
5350      * @since 1.5
5351      */
5352     public static int offsetByCodePoints(char[] a, int start, int count,
5353                                          int index, int codePointOffset) {
5354         if (count > a.length-start || start < 0 || count < 0
5355             || index < start || index > start+count) {
5356             throw new IndexOutOfBoundsException();
5357         }
5358         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5359     }
5360 
5361     static int offsetByCodePointsImpl(char[]a, int start, int count,
5362                                       int index, int codePointOffset) {
5363         int x = index;
5364         if (codePointOffset >= 0) {
5365             int limit = start + count;
5366             int i;
5367             for (i = 0; x < limit && i < codePointOffset; i++) {
5368                 if (isHighSurrogate(a[x++]) && x < limit &&
5369                     isLowSurrogate(a[x])) {
5370                     x++;
5371                 }
5372             }
5373             if (i < codePointOffset) {
5374                 throw new IndexOutOfBoundsException();
5375             }
5376         } else {
5377             int i;
5378             for (i = codePointOffset; x > start && i < 0; i++) {
5379                 if (isLowSurrogate(a[--x]) && x > start &&
5380                     isHighSurrogate(a[x-1])) {
5381                     x--;
5382                 }
5383             }
5384             if (i < 0) {
5385                 throw new IndexOutOfBoundsException();
5386             }
5387         }
5388         return x;
5389     }
5390 
5391     /**
5392      * Determines if the specified character is a lowercase character.
5393      * <p>
5394      * A character is lowercase if its general category type, provided
5395      * by {@code Character.getType(ch)}, is
5396      * {@code LOWERCASE_LETTER}, or it has contributory property
5397      * Other_Lowercase as defined by the Unicode Standard.
5398      * <p>
5399      * The following are examples of lowercase characters:
5400      * <blockquote><pre>
5401      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5402      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5403      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5404      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5405      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5406      * </pre></blockquote>
5407      * <p> Many other Unicode characters are lowercase too.
5408      *
5409      * <p><b>Note:</b> This method cannot handle <a
5410      * href="#supplementary"> supplementary characters</a>. To support
5411      * all Unicode characters, including supplementary characters, use
5412      * the {@link #isLowerCase(int)} method.
5413      *
5414      * @param   ch   the character to be tested.
5415      * @return  {@code true} if the character is lowercase;
5416      *          {@code false} otherwise.
5417      * @see     Character#isLowerCase(char)
5418      * @see     Character#isTitleCase(char)
5419      * @see     Character#toLowerCase(char)
5420      * @see     Character#getType(char)
5421      */
5422     public static boolean isLowerCase(char ch) {
5423         return isLowerCase((int)ch);
5424     }
5425 
5426     /**
5427      * Determines if the specified character (Unicode code point) is a
5428      * lowercase character.
5429      * <p>
5430      * A character is lowercase if its general category type, provided
5431      * by {@link Character#getType getType(codePoint)}, is
5432      * {@code LOWERCASE_LETTER}, or it has contributory property
5433      * Other_Lowercase as defined by the Unicode Standard.
5434      * <p>
5435      * The following are examples of lowercase characters:
5436      * <blockquote><pre>
5437      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5438      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5439      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5440      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5441      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5442      * </pre></blockquote>
5443      * <p> Many other Unicode characters are lowercase too.
5444      *
5445      * @param   codePoint the character (Unicode code point) to be tested.
5446      * @return  {@code true} if the character is lowercase;
5447      *          {@code false} otherwise.
5448      * @see     Character#isLowerCase(int)
5449      * @see     Character#isTitleCase(int)
5450      * @see     Character#toLowerCase(int)
5451      * @see     Character#getType(int)
5452      * @since   1.5
5453      */
5454     public static boolean isLowerCase(int codePoint) {
5455         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5456                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5457     }
5458 
5459     /**
5460      * Determines if the specified character is an uppercase character.
5461      * <p>
5462      * A character is uppercase if its general category type, provided by
5463      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5464      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5465      * <p>
5466      * The following are examples of uppercase characters:
5467      * <blockquote><pre>
5468      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5469      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5470      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5471      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5472      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5473      * </pre></blockquote>
5474      * <p> Many other Unicode characters are uppercase too.
5475      *
5476      * <p><b>Note:</b> This method cannot handle <a
5477      * href="#supplementary"> supplementary characters</a>. To support
5478      * all Unicode characters, including supplementary characters, use
5479      * the {@link #isUpperCase(int)} method.
5480      *
5481      * @param   ch   the character to be tested.
5482      * @return  {@code true} if the character is uppercase;
5483      *          {@code false} otherwise.
5484      * @see     Character#isLowerCase(char)
5485      * @see     Character#isTitleCase(char)
5486      * @see     Character#toUpperCase(char)
5487      * @see     Character#getType(char)
5488      * @since   1.0
5489      */
5490     public static boolean isUpperCase(char ch) {
5491         return isUpperCase((int)ch);
5492     }
5493 
5494     /**
5495      * Determines if the specified character (Unicode code point) is an uppercase character.
5496      * <p>
5497      * A character is uppercase if its general category type, provided by
5498      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5499      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5500      * <p>
5501      * The following are examples of uppercase characters:
5502      * <blockquote><pre>
5503      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5504      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5505      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5506      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5507      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5508      * </pre></blockquote>
5509      * <p> Many other Unicode characters are uppercase too.
5510      *
5511      * @param   codePoint the character (Unicode code point) to be tested.
5512      * @return  {@code true} if the character is uppercase;
5513      *          {@code false} otherwise.
5514      * @see     Character#isLowerCase(int)
5515      * @see     Character#isTitleCase(int)
5516      * @see     Character#toUpperCase(int)
5517      * @see     Character#getType(int)
5518      * @since   1.5
5519      */
5520     public static boolean isUpperCase(int codePoint) {
5521         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5522                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5523     }
5524 
5525     /**
5526      * Determines if the specified character is a titlecase character.
5527      * <p>
5528      * A character is a titlecase character if its general
5529      * category type, provided by {@code Character.getType(ch)},
5530      * is {@code TITLECASE_LETTER}.
5531      * <p>
5532      * Some characters look like pairs of Latin letters. For example, there
5533      * is an uppercase letter that looks like "LJ" and has a corresponding
5534      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5535      * is the appropriate form to use when rendering a word in lowercase
5536      * with initial capitals, as for a book title.
5537      * <p>
5538      * These are some of the Unicode characters for which this method returns
5539      * {@code true}:
5540      * <ul>
5541      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5542      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5543      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5544      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5545      * </ul>
5546      * <p> Many other Unicode characters are titlecase too.
5547      *
5548      * <p><b>Note:</b> This method cannot handle <a
5549      * href="#supplementary"> supplementary characters</a>. To support
5550      * all Unicode characters, including supplementary characters, use
5551      * the {@link #isTitleCase(int)} method.
5552      *
5553      * @param   ch   the character to be tested.
5554      * @return  {@code true} if the character is titlecase;
5555      *          {@code false} otherwise.
5556      * @see     Character#isLowerCase(char)
5557      * @see     Character#isUpperCase(char)
5558      * @see     Character#toTitleCase(char)
5559      * @see     Character#getType(char)
5560      * @since   1.0.2
5561      */
5562     public static boolean isTitleCase(char ch) {
5563         return isTitleCase((int)ch);
5564     }
5565 
5566     /**
5567      * Determines if the specified character (Unicode code point) is a titlecase character.
5568      * <p>
5569      * A character is a titlecase character if its general
5570      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5571      * is {@code TITLECASE_LETTER}.
5572      * <p>
5573      * Some characters look like pairs of Latin letters. For example, there
5574      * is an uppercase letter that looks like "LJ" and has a corresponding
5575      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5576      * is the appropriate form to use when rendering a word in lowercase
5577      * with initial capitals, as for a book title.
5578      * <p>
5579      * These are some of the Unicode characters for which this method returns
5580      * {@code true}:
5581      * <ul>
5582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5583      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5584      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5585      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5586      * </ul>
5587      * <p> Many other Unicode characters are titlecase too.
5588      *
5589      * @param   codePoint the character (Unicode code point) to be tested.
5590      * @return  {@code true} if the character is titlecase;
5591      *          {@code false} otherwise.
5592      * @see     Character#isLowerCase(int)
5593      * @see     Character#isUpperCase(int)
5594      * @see     Character#toTitleCase(int)
5595      * @see     Character#getType(int)
5596      * @since   1.5
5597      */
5598     public static boolean isTitleCase(int codePoint) {
5599         return getType(codePoint) == Character.TITLECASE_LETTER;
5600     }
5601 
5602     /**
5603      * Determines if the specified character is a digit.
5604      * <p>
5605      * A character is a digit if its general category type, provided
5606      * by {@code Character.getType(ch)}, is
5607      * {@code DECIMAL_DIGIT_NUMBER}.
5608      * <p>
5609      * Some Unicode character ranges that contain digits:
5610      * <ul>
5611      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5612      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5613      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5614      *     Arabic-Indic digits
5615      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5616      *     Extended Arabic-Indic digits
5617      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5618      *     Devanagari digits
5619      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5620      *     Fullwidth digits
5621      * </ul>
5622      *
5623      * Many other character ranges contain digits as well.
5624      *
5625      * <p><b>Note:</b> This method cannot handle <a
5626      * href="#supplementary"> supplementary characters</a>. To support
5627      * all Unicode characters, including supplementary characters, use
5628      * the {@link #isDigit(int)} method.
5629      *
5630      * @param   ch   the character to be tested.
5631      * @return  {@code true} if the character is a digit;
5632      *          {@code false} otherwise.
5633      * @see     Character#digit(char, int)
5634      * @see     Character#forDigit(int, int)
5635      * @see     Character#getType(char)
5636      */
5637     public static boolean isDigit(char ch) {
5638         return isDigit((int)ch);
5639     }
5640 
5641     /**
5642      * Determines if the specified character (Unicode code point) is a digit.
5643      * <p>
5644      * A character is a digit if its general category type, provided
5645      * by {@link Character#getType(int) getType(codePoint)}, is
5646      * {@code DECIMAL_DIGIT_NUMBER}.
5647      * <p>
5648      * Some Unicode character ranges that contain digits:
5649      * <ul>
5650      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5651      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5652      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5653      *     Arabic-Indic digits
5654      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5655      *     Extended Arabic-Indic digits
5656      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5657      *     Devanagari digits
5658      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5659      *     Fullwidth digits
5660      * </ul>
5661      *
5662      * Many other character ranges contain digits as well.
5663      *
5664      * @param   codePoint the character (Unicode code point) to be tested.
5665      * @return  {@code true} if the character is a digit;
5666      *          {@code false} otherwise.
5667      * @see     Character#forDigit(int, int)
5668      * @see     Character#getType(int)
5669      * @since   1.5
5670      */
5671     public static boolean isDigit(int codePoint) {
5672         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5673     }
5674 
5675     /**
5676      * Determines if a character is defined in Unicode.
5677      * <p>
5678      * A character is defined if at least one of the following is true:
5679      * <ul>
5680      * <li>It has an entry in the UnicodeData file.
5681      * <li>It has a value in a range defined by the UnicodeData file.
5682      * </ul>
5683      *
5684      * <p><b>Note:</b> This method cannot handle <a
5685      * href="#supplementary"> supplementary characters</a>. To support
5686      * all Unicode characters, including supplementary characters, use
5687      * the {@link #isDefined(int)} method.
5688      *
5689      * @param   ch   the character to be tested
5690      * @return  {@code true} if the character has a defined meaning
5691      *          in Unicode; {@code false} otherwise.
5692      * @see     Character#isDigit(char)
5693      * @see     Character#isLetter(char)
5694      * @see     Character#isLetterOrDigit(char)
5695      * @see     Character#isLowerCase(char)
5696      * @see     Character#isTitleCase(char)
5697      * @see     Character#isUpperCase(char)
5698      * @since   1.0.2
5699      */
5700     public static boolean isDefined(char ch) {
5701         return isDefined((int)ch);
5702     }
5703 
5704     /**
5705      * Determines if a character (Unicode code point) is defined in Unicode.
5706      * <p>
5707      * A character is defined if at least one of the following is true:
5708      * <ul>
5709      * <li>It has an entry in the UnicodeData file.
5710      * <li>It has a value in a range defined by the UnicodeData file.
5711      * </ul>
5712      *
5713      * @param   codePoint the character (Unicode code point) to be tested.
5714      * @return  {@code true} if the character has a defined meaning
5715      *          in Unicode; {@code false} otherwise.
5716      * @see     Character#isDigit(int)
5717      * @see     Character#isLetter(int)
5718      * @see     Character#isLetterOrDigit(int)
5719      * @see     Character#isLowerCase(int)
5720      * @see     Character#isTitleCase(int)
5721      * @see     Character#isUpperCase(int)
5722      * @since   1.5
5723      */
5724     public static boolean isDefined(int codePoint) {
5725         return getType(codePoint) != Character.UNASSIGNED;
5726     }
5727 
5728     /**
5729      * Determines if the specified character is a letter.
5730      * <p>
5731      * A character is considered to be a letter if its general
5732      * category type, provided by {@code Character.getType(ch)},
5733      * is any of the following:
5734      * <ul>
5735      * <li> {@code UPPERCASE_LETTER}
5736      * <li> {@code LOWERCASE_LETTER}
5737      * <li> {@code TITLECASE_LETTER}
5738      * <li> {@code MODIFIER_LETTER}
5739      * <li> {@code OTHER_LETTER}
5740      * </ul>
5741      *
5742      * Not all letters have case. Many characters are
5743      * letters but are neither uppercase nor lowercase nor titlecase.
5744      *
5745      * <p><b>Note:</b> This method cannot handle <a
5746      * href="#supplementary"> supplementary characters</a>. To support
5747      * all Unicode characters, including supplementary characters, use
5748      * the {@link #isLetter(int)} method.
5749      *
5750      * @param   ch   the character to be tested.
5751      * @return  {@code true} if the character is a letter;
5752      *          {@code false} otherwise.
5753      * @see     Character#isDigit(char)
5754      * @see     Character#isJavaIdentifierStart(char)
5755      * @see     Character#isJavaLetter(char)
5756      * @see     Character#isJavaLetterOrDigit(char)
5757      * @see     Character#isLetterOrDigit(char)
5758      * @see     Character#isLowerCase(char)
5759      * @see     Character#isTitleCase(char)
5760      * @see     Character#isUnicodeIdentifierStart(char)
5761      * @see     Character#isUpperCase(char)
5762      */
5763     public static boolean isLetter(char ch) {
5764         return isLetter((int)ch);
5765     }
5766 
5767     /**
5768      * Determines if the specified character (Unicode code point) is a letter.
5769      * <p>
5770      * A character is considered to be a letter if its general
5771      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5772      * is any of the following:
5773      * <ul>
5774      * <li> {@code UPPERCASE_LETTER}
5775      * <li> {@code LOWERCASE_LETTER}
5776      * <li> {@code TITLECASE_LETTER}
5777      * <li> {@code MODIFIER_LETTER}
5778      * <li> {@code OTHER_LETTER}
5779      * </ul>
5780      *
5781      * Not all letters have case. Many characters are
5782      * letters but are neither uppercase nor lowercase nor titlecase.
5783      *
5784      * @param   codePoint the character (Unicode code point) to be tested.
5785      * @return  {@code true} if the character is a letter;
5786      *          {@code false} otherwise.
5787      * @see     Character#isDigit(int)
5788      * @see     Character#isJavaIdentifierStart(int)
5789      * @see     Character#isLetterOrDigit(int)
5790      * @see     Character#isLowerCase(int)
5791      * @see     Character#isTitleCase(int)
5792      * @see     Character#isUnicodeIdentifierStart(int)
5793      * @see     Character#isUpperCase(int)
5794      * @since   1.5
5795      */
5796     public static boolean isLetter(int codePoint) {
5797         return ((((1 << Character.UPPERCASE_LETTER) |
5798             (1 << Character.LOWERCASE_LETTER) |
5799             (1 << Character.TITLECASE_LETTER) |
5800             (1 << Character.MODIFIER_LETTER) |
5801             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5802             != 0;
5803     }
5804 
5805     /**
5806      * Determines if the specified character is a letter or digit.
5807      * <p>
5808      * A character is considered to be a letter or digit if either
5809      * {@code Character.isLetter(char ch)} or
5810      * {@code Character.isDigit(char ch)} returns
5811      * {@code true} for the character.
5812      *
5813      * <p><b>Note:</b> This method cannot handle <a
5814      * href="#supplementary"> supplementary characters</a>. To support
5815      * all Unicode characters, including supplementary characters, use
5816      * the {@link #isLetterOrDigit(int)} method.
5817      *
5818      * @param   ch   the character to be tested.
5819      * @return  {@code true} if the character is a letter or digit;
5820      *          {@code false} otherwise.
5821      * @see     Character#isDigit(char)
5822      * @see     Character#isJavaIdentifierPart(char)
5823      * @see     Character#isJavaLetter(char)
5824      * @see     Character#isJavaLetterOrDigit(char)
5825      * @see     Character#isLetter(char)
5826      * @see     Character#isUnicodeIdentifierPart(char)
5827      * @since   1.0.2
5828      */
5829     public static boolean isLetterOrDigit(char ch) {
5830         return isLetterOrDigit((int)ch);
5831     }
5832 
5833     /**
5834      * Determines if the specified character (Unicode code point) is a letter or digit.
5835      * <p>
5836      * A character is considered to be a letter or digit if either
5837      * {@link #isLetter(int) isLetter(codePoint)} or
5838      * {@link #isDigit(int) isDigit(codePoint)} returns
5839      * {@code true} for the character.
5840      *
5841      * @param   codePoint the character (Unicode code point) to be tested.
5842      * @return  {@code true} if the character is a letter or digit;
5843      *          {@code false} otherwise.
5844      * @see     Character#isDigit(int)
5845      * @see     Character#isJavaIdentifierPart(int)
5846      * @see     Character#isLetter(int)
5847      * @see     Character#isUnicodeIdentifierPart(int)
5848      * @since   1.5
5849      */
5850     public static boolean isLetterOrDigit(int codePoint) {
5851         return ((((1 << Character.UPPERCASE_LETTER) |
5852             (1 << Character.LOWERCASE_LETTER) |
5853             (1 << Character.TITLECASE_LETTER) |
5854             (1 << Character.MODIFIER_LETTER) |
5855             (1 << Character.OTHER_LETTER) |
5856             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5857             != 0;
5858     }
5859 
5860     /**
5861      * Determines if the specified character is permissible as the first
5862      * character in a Java identifier.
5863      * <p>
5864      * A character may start a Java identifier if and only if
5865      * one of the following is true:
5866      * <ul>
5867      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5868      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5869      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5870      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5871      * </ul>
5872      *
5873      * @param   ch the character to be tested.
5874      * @return  {@code true} if the character may start a Java
5875      *          identifier; {@code false} otherwise.
5876      * @see     Character#isJavaLetterOrDigit(char)
5877      * @see     Character#isJavaIdentifierStart(char)
5878      * @see     Character#isJavaIdentifierPart(char)
5879      * @see     Character#isLetter(char)
5880      * @see     Character#isLetterOrDigit(char)
5881      * @see     Character#isUnicodeIdentifierStart(char)
5882      * @since   1.0.2
5883      * @deprecated Replaced by isJavaIdentifierStart(char).
5884      */
5885     @Deprecated
5886     public static boolean isJavaLetter(char ch) {
5887         return isJavaIdentifierStart(ch);
5888     }
5889 
5890     /**
5891      * Determines if the specified character may be part of a Java
5892      * identifier as other than the first character.
5893      * <p>
5894      * A character may be part of a Java identifier if and only if any
5895      * of the following are true:
5896      * <ul>
5897      * <li>  it is a letter
5898      * <li>  it is a currency symbol (such as {@code '$'})
5899      * <li>  it is a connecting punctuation character (such as {@code '_'})
5900      * <li>  it is a digit
5901      * <li>  it is a numeric letter (such as a Roman numeral character)
5902      * <li>  it is a combining mark
5903      * <li>  it is a non-spacing mark
5904      * <li> {@code isIdentifierIgnorable} returns
5905      * {@code true} for the character.
5906      * </ul>
5907      *
5908      * @param   ch the character to be tested.
5909      * @return  {@code true} if the character may be part of a
5910      *          Java identifier; {@code false} otherwise.
5911      * @see     Character#isJavaLetter(char)
5912      * @see     Character#isJavaIdentifierStart(char)
5913      * @see     Character#isJavaIdentifierPart(char)
5914      * @see     Character#isLetter(char)
5915      * @see     Character#isLetterOrDigit(char)
5916      * @see     Character#isUnicodeIdentifierPart(char)
5917      * @see     Character#isIdentifierIgnorable(char)
5918      * @since   1.0.2
5919      * @deprecated Replaced by isJavaIdentifierPart(char).
5920      */
5921     @Deprecated
5922     public static boolean isJavaLetterOrDigit(char ch) {
5923         return isJavaIdentifierPart(ch);
5924     }
5925 
5926     /**
5927      * Determines if the specified character (Unicode code point) is an alphabet.
5928      * <p>
5929      * A character is considered to be alphabetic if its general category type,
5930      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5931      * the following:
5932      * <ul>
5933      * <li> <code>UPPERCASE_LETTER</code>
5934      * <li> <code>LOWERCASE_LETTER</code>
5935      * <li> <code>TITLECASE_LETTER</code>
5936      * <li> <code>MODIFIER_LETTER</code>
5937      * <li> <code>OTHER_LETTER</code>
5938      * <li> <code>LETTER_NUMBER</code>
5939      * </ul>
5940      * or it has contributory property Other_Alphabetic as defined by the
5941      * Unicode Standard.
5942      *
5943      * @param   codePoint the character (Unicode code point) to be tested.
5944      * @return  <code>true</code> if the character is a Unicode alphabet
5945      *          character, <code>false</code> otherwise.
5946      * @since   1.7
5947      */
5948     public static boolean isAlphabetic(int codePoint) {
5949         return (((((1 << Character.UPPERCASE_LETTER) |
5950             (1 << Character.LOWERCASE_LETTER) |
5951             (1 << Character.TITLECASE_LETTER) |
5952             (1 << Character.MODIFIER_LETTER) |
5953             (1 << Character.OTHER_LETTER) |
5954             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5955             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5956     }
5957 
5958     /**
5959      * Determines if the specified character (Unicode code point) is a CJKV
5960      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5961      * the Unicode Standard.
5962      *
5963      * @param   codePoint the character (Unicode code point) to be tested.
5964      * @return  <code>true</code> if the character is a Unicode ideograph
5965      *          character, <code>false</code> otherwise.
5966      * @since   1.7
5967      */
5968     public static boolean isIdeographic(int codePoint) {
5969         return CharacterData.of(codePoint).isIdeographic(codePoint);
5970     }
5971 
5972     /**
5973      * Determines if the specified character is
5974      * permissible as the first character in a Java identifier.
5975      * <p>
5976      * A character may start a Java identifier if and only if
5977      * one of the following conditions is true:
5978      * <ul>
5979      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5980      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5981      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5982      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5983      * </ul>
5984      *
5985      * <p><b>Note:</b> This method cannot handle <a
5986      * href="#supplementary"> supplementary characters</a>. To support
5987      * all Unicode characters, including supplementary characters, use
5988      * the {@link #isJavaIdentifierStart(int)} method.
5989      *
5990      * @param   ch the character to be tested.
5991      * @return  {@code true} if the character may start a Java identifier;
5992      *          {@code false} otherwise.
5993      * @see     Character#isJavaIdentifierPart(char)
5994      * @see     Character#isLetter(char)
5995      * @see     Character#isUnicodeIdentifierStart(char)
5996      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5997      * @since   1.1
5998      */
5999     public static boolean isJavaIdentifierStart(char ch) {
6000         return isJavaIdentifierStart((int)ch);
6001     }
6002 
6003     /**
6004      * Determines if the character (Unicode code point) is
6005      * permissible as the first character in a Java identifier.
6006      * <p>
6007      * A character may start a Java identifier if and only if
6008      * one of the following conditions is true:
6009      * <ul>
6010      * <li> {@link #isLetter(int) isLetter(codePoint)}
6011      *      returns {@code true}
6012      * <li> {@link #getType(int) getType(codePoint)}
6013      *      returns {@code LETTER_NUMBER}
6014      * <li> the referenced character is a currency symbol (such as {@code '$'})
6015      * <li> the referenced character is a connecting punctuation character
6016      *      (such as {@code '_'}).
6017      * </ul>
6018      *
6019      * @param   codePoint the character (Unicode code point) to be tested.
6020      * @return  {@code true} if the character may start a Java identifier;
6021      *          {@code false} otherwise.
6022      * @see     Character#isJavaIdentifierPart(int)
6023      * @see     Character#isLetter(int)
6024      * @see     Character#isUnicodeIdentifierStart(int)
6025      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6026      * @since   1.5
6027      */
6028     public static boolean isJavaIdentifierStart(int codePoint) {
6029         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6030     }
6031 
6032     /**
6033      * Determines if the specified character may be part of a Java
6034      * identifier as other than the first character.
6035      * <p>
6036      * A character may be part of a Java identifier if any of the following
6037      * are true:
6038      * <ul>
6039      * <li>  it is a letter
6040      * <li>  it is a currency symbol (such as {@code '$'})
6041      * <li>  it is a connecting punctuation character (such as {@code '_'})
6042      * <li>  it is a digit
6043      * <li>  it is a numeric letter (such as a Roman numeral character)
6044      * <li>  it is a combining mark
6045      * <li>  it is a non-spacing mark
6046      * <li> {@code isIdentifierIgnorable} returns
6047      * {@code true} for the character
6048      * </ul>
6049      *
6050      * <p><b>Note:</b> This method cannot handle <a
6051      * href="#supplementary"> supplementary characters</a>. To support
6052      * all Unicode characters, including supplementary characters, use
6053      * the {@link #isJavaIdentifierPart(int)} method.
6054      *
6055      * @param   ch      the character to be tested.
6056      * @return {@code true} if the character may be part of a
6057      *          Java identifier; {@code false} otherwise.
6058      * @see     Character#isIdentifierIgnorable(char)
6059      * @see     Character#isJavaIdentifierStart(char)
6060      * @see     Character#isLetterOrDigit(char)
6061      * @see     Character#isUnicodeIdentifierPart(char)
6062      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6063      * @since   1.1
6064      */
6065     public static boolean isJavaIdentifierPart(char ch) {
6066         return isJavaIdentifierPart((int)ch);
6067     }
6068 
6069     /**
6070      * Determines if the character (Unicode code point) may be part of a Java
6071      * identifier as other than the first character.
6072      * <p>
6073      * A character may be part of a Java identifier if any of the following
6074      * are true:
6075      * <ul>
6076      * <li>  it is a letter
6077      * <li>  it is a currency symbol (such as {@code '$'})
6078      * <li>  it is a connecting punctuation character (such as {@code '_'})
6079      * <li>  it is a digit
6080      * <li>  it is a numeric letter (such as a Roman numeral character)
6081      * <li>  it is a combining mark
6082      * <li>  it is a non-spacing mark
6083      * <li> {@link #isIdentifierIgnorable(int)
6084      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6085      * the character
6086      * </ul>
6087      *
6088      * @param   codePoint the character (Unicode code point) to be tested.
6089      * @return {@code true} if the character may be part of a
6090      *          Java identifier; {@code false} otherwise.
6091      * @see     Character#isIdentifierIgnorable(int)
6092      * @see     Character#isJavaIdentifierStart(int)
6093      * @see     Character#isLetterOrDigit(int)
6094      * @see     Character#isUnicodeIdentifierPart(int)
6095      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6096      * @since   1.5
6097      */
6098     public static boolean isJavaIdentifierPart(int codePoint) {
6099         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6100     }
6101 
6102     /**
6103      * Determines if the specified character is permissible as the
6104      * first character in a Unicode identifier.
6105      * <p>
6106      * A character may start a Unicode identifier if and only if
6107      * one of the following conditions is true:
6108      * <ul>
6109      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6110      * <li> {@link #getType(char) getType(ch)} returns
6111      *      {@code LETTER_NUMBER}.
6112      * </ul>
6113      *
6114      * <p><b>Note:</b> This method cannot handle <a
6115      * href="#supplementary"> supplementary characters</a>. To support
6116      * all Unicode characters, including supplementary characters, use
6117      * the {@link #isUnicodeIdentifierStart(int)} method.
6118      *
6119      * @param   ch      the character to be tested.
6120      * @return  {@code true} if the character may start a Unicode
6121      *          identifier; {@code false} otherwise.
6122      * @see     Character#isJavaIdentifierStart(char)
6123      * @see     Character#isLetter(char)
6124      * @see     Character#isUnicodeIdentifierPart(char)
6125      * @since   1.1
6126      */
6127     public static boolean isUnicodeIdentifierStart(char ch) {
6128         return isUnicodeIdentifierStart((int)ch);
6129     }
6130 
6131     /**
6132      * Determines if the specified character (Unicode code point) is permissible as the
6133      * first character in a Unicode identifier.
6134      * <p>
6135      * A character may start a Unicode identifier if and only if
6136      * one of the following conditions is true:
6137      * <ul>
6138      * <li> {@link #isLetter(int) isLetter(codePoint)}
6139      *      returns {@code true}
6140      * <li> {@link #getType(int) getType(codePoint)}
6141      *      returns {@code LETTER_NUMBER}.
6142      * </ul>
6143      * @param   codePoint the character (Unicode code point) to be tested.
6144      * @return  {@code true} if the character may start a Unicode
6145      *          identifier; {@code false} otherwise.
6146      * @see     Character#isJavaIdentifierStart(int)
6147      * @see     Character#isLetter(int)
6148      * @see     Character#isUnicodeIdentifierPart(int)
6149      * @since   1.5
6150      */
6151     public static boolean isUnicodeIdentifierStart(int codePoint) {
6152         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6153     }
6154 
6155     /**
6156      * Determines if the specified character may be part of a Unicode
6157      * identifier as other than the first character.
6158      * <p>
6159      * A character may be part of a Unicode identifier if and only if
6160      * one of the following statements is true:
6161      * <ul>
6162      * <li>  it is a letter
6163      * <li>  it is a connecting punctuation character (such as {@code '_'})
6164      * <li>  it is a digit
6165      * <li>  it is a numeric letter (such as a Roman numeral character)
6166      * <li>  it is a combining mark
6167      * <li>  it is a non-spacing mark
6168      * <li> {@code isIdentifierIgnorable} returns
6169      * {@code true} for this character.
6170      * </ul>
6171      *
6172      * <p><b>Note:</b> This method cannot handle <a
6173      * href="#supplementary"> supplementary characters</a>. To support
6174      * all Unicode characters, including supplementary characters, use
6175      * the {@link #isUnicodeIdentifierPart(int)} method.
6176      *
6177      * @param   ch      the character to be tested.
6178      * @return  {@code true} if the character may be part of a
6179      *          Unicode identifier; {@code false} otherwise.
6180      * @see     Character#isIdentifierIgnorable(char)
6181      * @see     Character#isJavaIdentifierPart(char)
6182      * @see     Character#isLetterOrDigit(char)
6183      * @see     Character#isUnicodeIdentifierStart(char)
6184      * @since   1.1
6185      */
6186     public static boolean isUnicodeIdentifierPart(char ch) {
6187         return isUnicodeIdentifierPart((int)ch);
6188     }
6189 
6190     /**
6191      * Determines if the specified character (Unicode code point) may be part of a Unicode
6192      * identifier as other than the first character.
6193      * <p>
6194      * A character may be part of a Unicode identifier if and only if
6195      * one of the following statements is true:
6196      * <ul>
6197      * <li>  it is a letter
6198      * <li>  it is a connecting punctuation character (such as {@code '_'})
6199      * <li>  it is a digit
6200      * <li>  it is a numeric letter (such as a Roman numeral character)
6201      * <li>  it is a combining mark
6202      * <li>  it is a non-spacing mark
6203      * <li> {@code isIdentifierIgnorable} returns
6204      * {@code true} for this character.
6205      * </ul>
6206      * @param   codePoint the character (Unicode code point) to be tested.
6207      * @return  {@code true} if the character may be part of a
6208      *          Unicode identifier; {@code false} otherwise.
6209      * @see     Character#isIdentifierIgnorable(int)
6210      * @see     Character#isJavaIdentifierPart(int)
6211      * @see     Character#isLetterOrDigit(int)
6212      * @see     Character#isUnicodeIdentifierStart(int)
6213      * @since   1.5
6214      */
6215     public static boolean isUnicodeIdentifierPart(int codePoint) {
6216         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6217     }
6218 
6219     /**
6220      * Determines if the specified character should be regarded as
6221      * an ignorable character in a Java identifier or a Unicode identifier.
6222      * <p>
6223      * The following Unicode characters are ignorable in a Java identifier
6224      * or a Unicode identifier:
6225      * <ul>
6226      * <li>ISO control characters that are not whitespace
6227      * <ul>
6228      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6229      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6230      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6231      * </ul>
6232      *
6233      * <li>all characters that have the {@code FORMAT} general
6234      * category value
6235      * </ul>
6236      *
6237      * <p><b>Note:</b> This method cannot handle <a
6238      * href="#supplementary"> supplementary characters</a>. To support
6239      * all Unicode characters, including supplementary characters, use
6240      * the {@link #isIdentifierIgnorable(int)} method.
6241      *
6242      * @param   ch      the character to be tested.
6243      * @return  {@code true} if the character is an ignorable control
6244      *          character that may be part of a Java or Unicode identifier;
6245      *           {@code false} otherwise.
6246      * @see     Character#isJavaIdentifierPart(char)
6247      * @see     Character#isUnicodeIdentifierPart(char)
6248      * @since   1.1
6249      */
6250     public static boolean isIdentifierIgnorable(char ch) {
6251         return isIdentifierIgnorable((int)ch);
6252     }
6253 
6254     /**
6255      * Determines if the specified character (Unicode code point) should be regarded as
6256      * an ignorable character in a Java identifier or a Unicode identifier.
6257      * <p>
6258      * The following Unicode characters are ignorable in a Java identifier
6259      * or a Unicode identifier:
6260      * <ul>
6261      * <li>ISO control characters that are not whitespace
6262      * <ul>
6263      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6264      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6265      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6266      * </ul>
6267      *
6268      * <li>all characters that have the {@code FORMAT} general
6269      * category value
6270      * </ul>
6271      *
6272      * @param   codePoint the character (Unicode code point) to be tested.
6273      * @return  {@code true} if the character is an ignorable control
6274      *          character that may be part of a Java or Unicode identifier;
6275      *          {@code false} otherwise.
6276      * @see     Character#isJavaIdentifierPart(int)
6277      * @see     Character#isUnicodeIdentifierPart(int)
6278      * @since   1.5
6279      */
6280     public static boolean isIdentifierIgnorable(int codePoint) {
6281         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6282     }
6283 
6284     /**
6285      * Converts the character argument to lowercase using case
6286      * mapping information from the UnicodeData file.
6287      * <p>
6288      * Note that
6289      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6290      * does not always return {@code true} for some ranges of
6291      * characters, particularly those that are symbols or ideographs.
6292      *
6293      * <p>In general, {@link String#toLowerCase()} should be used to map
6294      * characters to lowercase. {@code String} case mapping methods
6295      * have several benefits over {@code Character} case mapping methods.
6296      * {@code String} case mapping methods can perform locale-sensitive
6297      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6298      * the {@code Character} case mapping methods cannot.
6299      *
6300      * <p><b>Note:</b> This method cannot handle <a
6301      * href="#supplementary"> supplementary characters</a>. To support
6302      * all Unicode characters, including supplementary characters, use
6303      * the {@link #toLowerCase(int)} method.
6304      *
6305      * @param   ch   the character to be converted.
6306      * @return  the lowercase equivalent of the character, if any;
6307      *          otherwise, the character itself.
6308      * @see     Character#isLowerCase(char)
6309      * @see     String#toLowerCase()
6310      */
6311     public static char toLowerCase(char ch) {
6312         return (char)toLowerCase((int)ch);
6313     }
6314 
6315     /**
6316      * Converts the character (Unicode code point) argument to
6317      * lowercase using case mapping information from the UnicodeData
6318      * file.
6319      *
6320      * <p> Note that
6321      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6322      * does not always return {@code true} for some ranges of
6323      * characters, particularly those that are symbols or ideographs.
6324      *
6325      * <p>In general, {@link String#toLowerCase()} should be used to map
6326      * characters to lowercase. {@code String} case mapping methods
6327      * have several benefits over {@code Character} case mapping methods.
6328      * {@code String} case mapping methods can perform locale-sensitive
6329      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6330      * the {@code Character} case mapping methods cannot.
6331      *
6332      * @param   codePoint   the character (Unicode code point) to be converted.
6333      * @return  the lowercase equivalent of the character (Unicode code
6334      *          point), if any; otherwise, the character itself.
6335      * @see     Character#isLowerCase(int)
6336      * @see     String#toLowerCase()
6337      *
6338      * @since   1.5
6339      */
6340     public static int toLowerCase(int codePoint) {
6341         return CharacterData.of(codePoint).toLowerCase(codePoint);
6342     }
6343 
6344     /**
6345      * Converts the character argument to uppercase using case mapping
6346      * information from the UnicodeData file.
6347      * <p>
6348      * Note that
6349      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6350      * does not always return {@code true} for some ranges of
6351      * characters, particularly those that are symbols or ideographs.
6352      *
6353      * <p>In general, {@link String#toUpperCase()} should be used to map
6354      * characters to uppercase. {@code String} case mapping methods
6355      * have several benefits over {@code Character} case mapping methods.
6356      * {@code String} case mapping methods can perform locale-sensitive
6357      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6358      * the {@code Character} case mapping methods cannot.
6359      *
6360      * <p><b>Note:</b> This method cannot handle <a
6361      * href="#supplementary"> supplementary characters</a>. To support
6362      * all Unicode characters, including supplementary characters, use
6363      * the {@link #toUpperCase(int)} method.
6364      *
6365      * @param   ch   the character to be converted.
6366      * @return  the uppercase equivalent of the character, if any;
6367      *          otherwise, the character itself.
6368      * @see     Character#isUpperCase(char)
6369      * @see     String#toUpperCase()
6370      */
6371     public static char toUpperCase(char ch) {
6372         return (char)toUpperCase((int)ch);
6373     }
6374 
6375     /**
6376      * Converts the character (Unicode code point) argument to
6377      * uppercase using case mapping information from the UnicodeData
6378      * file.
6379      *
6380      * <p>Note that
6381      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6382      * does not always return {@code true} for some ranges of
6383      * characters, particularly those that are symbols or ideographs.
6384      *
6385      * <p>In general, {@link String#toUpperCase()} should be used to map
6386      * characters to uppercase. {@code String} case mapping methods
6387      * have several benefits over {@code Character} case mapping methods.
6388      * {@code String} case mapping methods can perform locale-sensitive
6389      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6390      * the {@code Character} case mapping methods cannot.
6391      *
6392      * @param   codePoint   the character (Unicode code point) to be converted.
6393      * @return  the uppercase equivalent of the character, if any;
6394      *          otherwise, the character itself.
6395      * @see     Character#isUpperCase(int)
6396      * @see     String#toUpperCase()
6397      *
6398      * @since   1.5
6399      */
6400     public static int toUpperCase(int codePoint) {
6401         return CharacterData.of(codePoint).toUpperCase(codePoint);
6402     }
6403 
6404     /**
6405      * Converts the character argument to titlecase using case mapping
6406      * information from the UnicodeData file. If a character has no
6407      * explicit titlecase mapping and is not itself a titlecase char
6408      * according to UnicodeData, then the uppercase mapping is
6409      * returned as an equivalent titlecase mapping. If the
6410      * {@code char} argument is already a titlecase
6411      * {@code char}, the same {@code char} value will be
6412      * returned.
6413      * <p>
6414      * Note that
6415      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6416      * does not always return {@code true} for some ranges of
6417      * characters.
6418      *
6419      * <p><b>Note:</b> This method cannot handle <a
6420      * href="#supplementary"> supplementary characters</a>. To support
6421      * all Unicode characters, including supplementary characters, use
6422      * the {@link #toTitleCase(int)} method.
6423      *
6424      * @param   ch   the character to be converted.
6425      * @return  the titlecase equivalent of the character, if any;
6426      *          otherwise, the character itself.
6427      * @see     Character#isTitleCase(char)
6428      * @see     Character#toLowerCase(char)
6429      * @see     Character#toUpperCase(char)
6430      * @since   1.0.2
6431      */
6432     public static char toTitleCase(char ch) {
6433         return (char)toTitleCase((int)ch);
6434     }
6435 
6436     /**
6437      * Converts the character (Unicode code point) argument to titlecase using case mapping
6438      * information from the UnicodeData file. If a character has no
6439      * explicit titlecase mapping and is not itself a titlecase char
6440      * according to UnicodeData, then the uppercase mapping is
6441      * returned as an equivalent titlecase mapping. If the
6442      * character argument is already a titlecase
6443      * character, the same character value will be
6444      * returned.
6445      *
6446      * <p>Note that
6447      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6448      * does not always return {@code true} for some ranges of
6449      * characters.
6450      *
6451      * @param   codePoint   the character (Unicode code point) to be converted.
6452      * @return  the titlecase equivalent of the character, if any;
6453      *          otherwise, the character itself.
6454      * @see     Character#isTitleCase(int)
6455      * @see     Character#toLowerCase(int)
6456      * @see     Character#toUpperCase(int)
6457      * @since   1.5
6458      */
6459     public static int toTitleCase(int codePoint) {
6460         return CharacterData.of(codePoint).toTitleCase(codePoint);
6461     }
6462 
6463     /**
6464      * Returns the numeric value of the character {@code ch} in the
6465      * specified radix.
6466      * <p>
6467      * If the radix is not in the range {@code MIN_RADIX} &le;
6468      * {@code radix} &le; {@code MAX_RADIX} or if the
6469      * value of {@code ch} is not a valid digit in the specified
6470      * radix, {@code -1} is returned. A character is a valid digit
6471      * if at least one of the following is true:
6472      * <ul>
6473      * <li>The method {@code isDigit} is {@code true} of the character
6474      *     and the Unicode decimal digit value of the character (or its
6475      *     single-character decomposition) is less than the specified radix.
6476      *     In this case the decimal digit value is returned.
6477      * <li>The character is one of the uppercase Latin letters
6478      *     {@code 'A'} through {@code 'Z'} and its code is less than
6479      *     {@code radix + 'A' - 10}.
6480      *     In this case, {@code ch - 'A' + 10}
6481      *     is returned.
6482      * <li>The character is one of the lowercase Latin letters
6483      *     {@code 'a'} through {@code 'z'} and its code is less than
6484      *     {@code radix + 'a' - 10}.
6485      *     In this case, {@code ch - 'a' + 10}
6486      *     is returned.
6487      * <li>The character is one of the fullwidth uppercase Latin letters A
6488      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6489      *     and its code is less than
6490      *     {@code radix + '\u005CuFF21' - 10}.
6491      *     In this case, {@code ch - '\u005CuFF21' + 10}
6492      *     is returned.
6493      * <li>The character is one of the fullwidth lowercase Latin letters a
6494      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6495      *     and its code is less than
6496      *     {@code radix + '\u005CuFF41' - 10}.
6497      *     In this case, {@code ch - '\u005CuFF41' + 10}
6498      *     is returned.
6499      * </ul>
6500      *
6501      * <p><b>Note:</b> This method cannot handle <a
6502      * href="#supplementary"> supplementary characters</a>. To support
6503      * all Unicode characters, including supplementary characters, use
6504      * the {@link #digit(int, int)} method.
6505      *
6506      * @param   ch      the character to be converted.
6507      * @param   radix   the radix.
6508      * @return  the numeric value represented by the character in the
6509      *          specified radix.
6510      * @see     Character#forDigit(int, int)
6511      * @see     Character#isDigit(char)
6512      */
6513     public static int digit(char ch, int radix) {
6514         return digit((int)ch, radix);
6515     }
6516 
6517     /**
6518      * Returns the numeric value of the specified character (Unicode
6519      * code point) in the specified radix.
6520      *
6521      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6522      * {@code radix} &le; {@code MAX_RADIX} or if the
6523      * character is not a valid digit in the specified
6524      * radix, {@code -1} is returned. A character is a valid digit
6525      * if at least one of the following is true:
6526      * <ul>
6527      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6528      *     and the Unicode decimal digit value of the character (or its
6529      *     single-character decomposition) is less than the specified radix.
6530      *     In this case the decimal digit value is returned.
6531      * <li>The character is one of the uppercase Latin letters
6532      *     {@code 'A'} through {@code 'Z'} and its code is less than
6533      *     {@code radix + 'A' - 10}.
6534      *     In this case, {@code codePoint - 'A' + 10}
6535      *     is returned.
6536      * <li>The character is one of the lowercase Latin letters
6537      *     {@code 'a'} through {@code 'z'} and its code is less than
6538      *     {@code radix + 'a' - 10}.
6539      *     In this case, {@code codePoint - 'a' + 10}
6540      *     is returned.
6541      * <li>The character is one of the fullwidth uppercase Latin letters A
6542      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6543      *     and its code is less than
6544      *     {@code radix + '\u005CuFF21' - 10}.
6545      *     In this case,
6546      *     {@code codePoint - '\u005CuFF21' + 10}
6547      *     is returned.
6548      * <li>The character is one of the fullwidth lowercase Latin letters a
6549      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6550      *     and its code is less than
6551      *     {@code radix + '\u005CuFF41'- 10}.
6552      *     In this case,
6553      *     {@code codePoint - '\u005CuFF41' + 10}
6554      *     is returned.
6555      * </ul>
6556      *
6557      * @param   codePoint the character (Unicode code point) to be converted.
6558      * @param   radix   the radix.
6559      * @return  the numeric value represented by the character in the
6560      *          specified radix.
6561      * @see     Character#forDigit(int, int)
6562      * @see     Character#isDigit(int)
6563      * @since   1.5
6564      */
6565     public static int digit(int codePoint, int radix) {
6566         return CharacterData.of(codePoint).digit(codePoint, radix);
6567     }
6568 
6569     /**
6570      * Returns the {@code int} value that the specified Unicode
6571      * character represents. For example, the character
6572      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6573      * an int with a value of 50.
6574      * <p>
6575      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6576      * {@code '\u005Cu005A'}), lowercase
6577      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6578      * full width variant ({@code '\u005CuFF21'} through
6579      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6580      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6581      * through 35. This is independent of the Unicode specification,
6582      * which does not assign numeric values to these {@code char}
6583      * values.
6584      * <p>
6585      * If the character does not have a numeric value, then -1 is returned.
6586      * If the character has a numeric value that cannot be represented as a
6587      * nonnegative integer (for example, a fractional value), then -2
6588      * is returned.
6589      *
6590      * <p><b>Note:</b> This method cannot handle <a
6591      * href="#supplementary"> supplementary characters</a>. To support
6592      * all Unicode characters, including supplementary characters, use
6593      * the {@link #getNumericValue(int)} method.
6594      *
6595      * @param   ch      the character to be converted.
6596      * @return  the numeric value of the character, as a nonnegative {@code int}
6597      *           value; -2 if the character has a numeric value that is not a
6598      *          nonnegative integer; -1 if the character has no numeric value.
6599      * @see     Character#forDigit(int, int)
6600      * @see     Character#isDigit(char)
6601      * @since   1.1
6602      */
6603     public static int getNumericValue(char ch) {
6604         return getNumericValue((int)ch);
6605     }
6606 
6607     /**
6608      * Returns the {@code int} value that the specified
6609      * character (Unicode code point) represents. For example, the character
6610      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6611      * an {@code int} with a value of 50.
6612      * <p>
6613      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6614      * {@code '\u005Cu005A'}), lowercase
6615      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6616      * full width variant ({@code '\u005CuFF21'} through
6617      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6618      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6619      * through 35. This is independent of the Unicode specification,
6620      * which does not assign numeric values to these {@code char}
6621      * values.
6622      * <p>
6623      * If the character does not have a numeric value, then -1 is returned.
6624      * If the character has a numeric value that cannot be represented as a
6625      * nonnegative integer (for example, a fractional value), then -2
6626      * is returned.
6627      *
6628      * @param   codePoint the character (Unicode code point) to be converted.
6629      * @return  the numeric value of the character, as a nonnegative {@code int}
6630      *          value; -2 if the character has a numeric value that is not a
6631      *          nonnegative integer; -1 if the character has no numeric value.
6632      * @see     Character#forDigit(int, int)
6633      * @see     Character#isDigit(int)
6634      * @since   1.5
6635      */
6636     public static int getNumericValue(int codePoint) {
6637         return CharacterData.of(codePoint).getNumericValue(codePoint);
6638     }
6639 
6640     /**
6641      * Determines if the specified character is ISO-LATIN-1 white space.
6642      * This method returns {@code true} for the following five
6643      * characters only:
6644      * <table summary="truechars">
6645      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6646      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6647      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6648      *     <td>{@code NEW LINE}</td></tr>
6649      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6650      *     <td>{@code FORM FEED}</td></tr>
6651      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6652      *     <td>{@code CARRIAGE RETURN}</td></tr>
6653      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6654      *     <td>{@code SPACE}</td></tr>
6655      * </table>
6656      *
6657      * @param      ch   the character to be tested.
6658      * @return     {@code true} if the character is ISO-LATIN-1 white
6659      *             space; {@code false} otherwise.
6660      * @see        Character#isSpaceChar(char)
6661      * @see        Character#isWhitespace(char)
6662      * @deprecated Replaced by isWhitespace(char).
6663      */
6664     @Deprecated
6665     public static boolean isSpace(char ch) {
6666         return (ch <= 0x0020) &&
6667             (((((1L << 0x0009) |
6668             (1L << 0x000A) |
6669             (1L << 0x000C) |
6670             (1L << 0x000D) |
6671             (1L << 0x0020)) >> ch) & 1L) != 0);
6672     }
6673 
6674 
6675     /**
6676      * Determines if the specified character is a Unicode space character.
6677      * A character is considered to be a space character if and only if
6678      * it is specified to be a space character by the Unicode Standard. This
6679      * method returns true if the character's general category type is any of
6680      * the following:
6681      * <ul>
6682      * <li> {@code SPACE_SEPARATOR}
6683      * <li> {@code LINE_SEPARATOR}
6684      * <li> {@code PARAGRAPH_SEPARATOR}
6685      * </ul>
6686      *
6687      * <p><b>Note:</b> This method cannot handle <a
6688      * href="#supplementary"> supplementary characters</a>. To support
6689      * all Unicode characters, including supplementary characters, use
6690      * the {@link #isSpaceChar(int)} method.
6691      *
6692      * @param   ch      the character to be tested.
6693      * @return  {@code true} if the character is a space character;
6694      *          {@code false} otherwise.
6695      * @see     Character#isWhitespace(char)
6696      * @since   1.1
6697      */
6698     public static boolean isSpaceChar(char ch) {
6699         return isSpaceChar((int)ch);
6700     }
6701 
6702     /**
6703      * Determines if the specified character (Unicode code point) is a
6704      * Unicode space character.  A character is considered to be a
6705      * space character if and only if it is specified to be a space
6706      * character by the Unicode Standard. This method returns true if
6707      * the character's general category type is any of the following:
6708      *
6709      * <ul>
6710      * <li> {@link #SPACE_SEPARATOR}
6711      * <li> {@link #LINE_SEPARATOR}
6712      * <li> {@link #PARAGRAPH_SEPARATOR}
6713      * </ul>
6714      *
6715      * @param   codePoint the character (Unicode code point) to be tested.
6716      * @return  {@code true} if the character is a space character;
6717      *          {@code false} otherwise.
6718      * @see     Character#isWhitespace(int)
6719      * @since   1.5
6720      */
6721     public static boolean isSpaceChar(int codePoint) {
6722         return ((((1 << Character.SPACE_SEPARATOR) |
6723                   (1 << Character.LINE_SEPARATOR) |
6724                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6725             != 0;
6726     }
6727 
6728     /**
6729      * Determines if the specified character is white space according to Java.
6730      * A character is a Java whitespace character if and only if it satisfies
6731      * one of the following criteria:
6732      * <ul>
6733      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6734      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6735      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6736      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6737      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6738      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6739      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6740      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6741      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6742      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6743      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6744      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6745      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6746      * </ul>
6747      *
6748      * <p><b>Note:</b> This method cannot handle <a
6749      * href="#supplementary"> supplementary characters</a>. To support
6750      * all Unicode characters, including supplementary characters, use
6751      * the {@link #isWhitespace(int)} method.
6752      *
6753      * @param   ch the character to be tested.
6754      * @return  {@code true} if the character is a Java whitespace
6755      *          character; {@code false} otherwise.
6756      * @see     Character#isSpaceChar(char)
6757      * @since   1.1
6758      */
6759     public static boolean isWhitespace(char ch) {
6760         return isWhitespace((int)ch);
6761     }
6762 
6763     /**
6764      * Determines if the specified character (Unicode code point) is
6765      * white space according to Java.  A character is a Java
6766      * whitespace character if and only if it satisfies one of the
6767      * following criteria:
6768      * <ul>
6769      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6770      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6771      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6772      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6773      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6774      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6775      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6776      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6777      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6778      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6779      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6780      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6781      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6782      * </ul>
6783      *
6784      * @param   codePoint the character (Unicode code point) to be tested.
6785      * @return  {@code true} if the character is a Java whitespace
6786      *          character; {@code false} otherwise.
6787      * @see     Character#isSpaceChar(int)
6788      * @since   1.5
6789      */
6790     public static boolean isWhitespace(int codePoint) {
6791         return CharacterData.of(codePoint).isWhitespace(codePoint);
6792     }
6793 
6794     /**
6795      * Determines if the specified character is an ISO control
6796      * character.  A character is considered to be an ISO control
6797      * character if its code is in the range {@code '\u005Cu0000'}
6798      * through {@code '\u005Cu001F'} or in the range
6799      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6800      *
6801      * <p><b>Note:</b> This method cannot handle <a
6802      * href="#supplementary"> supplementary characters</a>. To support
6803      * all Unicode characters, including supplementary characters, use
6804      * the {@link #isISOControl(int)} method.
6805      *
6806      * @param   ch      the character to be tested.
6807      * @return  {@code true} if the character is an ISO control character;
6808      *          {@code false} otherwise.
6809      *
6810      * @see     Character#isSpaceChar(char)
6811      * @see     Character#isWhitespace(char)
6812      * @since   1.1
6813      */
6814     public static boolean isISOControl(char ch) {
6815         return isISOControl((int)ch);
6816     }
6817 
6818     /**
6819      * Determines if the referenced character (Unicode code point) is an ISO control
6820      * character.  A character is considered to be an ISO control
6821      * character if its code is in the range {@code '\u005Cu0000'}
6822      * through {@code '\u005Cu001F'} or in the range
6823      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6824      *
6825      * @param   codePoint the character (Unicode code point) to be tested.
6826      * @return  {@code true} if the character is an ISO control character;
6827      *          {@code false} otherwise.
6828      * @see     Character#isSpaceChar(int)
6829      * @see     Character#isWhitespace(int)
6830      * @since   1.5
6831      */
6832     public static boolean isISOControl(int codePoint) {
6833         // Optimized form of:
6834         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6835         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6836         return codePoint <= 0x9F &&
6837             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6838     }
6839 
6840     /**
6841      * Returns a value indicating a character's general category.
6842      *
6843      * <p><b>Note:</b> This method cannot handle <a
6844      * href="#supplementary"> supplementary characters</a>. To support
6845      * all Unicode characters, including supplementary characters, use
6846      * the {@link #getType(int)} method.
6847      *
6848      * @param   ch      the character to be tested.
6849      * @return  a value of type {@code int} representing the
6850      *          character's general category.
6851      * @see     Character#COMBINING_SPACING_MARK
6852      * @see     Character#CONNECTOR_PUNCTUATION
6853      * @see     Character#CONTROL
6854      * @see     Character#CURRENCY_SYMBOL
6855      * @see     Character#DASH_PUNCTUATION
6856      * @see     Character#DECIMAL_DIGIT_NUMBER
6857      * @see     Character#ENCLOSING_MARK
6858      * @see     Character#END_PUNCTUATION
6859      * @see     Character#FINAL_QUOTE_PUNCTUATION
6860      * @see     Character#FORMAT
6861      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6862      * @see     Character#LETTER_NUMBER
6863      * @see     Character#LINE_SEPARATOR
6864      * @see     Character#LOWERCASE_LETTER
6865      * @see     Character#MATH_SYMBOL
6866      * @see     Character#MODIFIER_LETTER
6867      * @see     Character#MODIFIER_SYMBOL
6868      * @see     Character#NON_SPACING_MARK
6869      * @see     Character#OTHER_LETTER
6870      * @see     Character#OTHER_NUMBER
6871      * @see     Character#OTHER_PUNCTUATION
6872      * @see     Character#OTHER_SYMBOL
6873      * @see     Character#PARAGRAPH_SEPARATOR
6874      * @see     Character#PRIVATE_USE
6875      * @see     Character#SPACE_SEPARATOR
6876      * @see     Character#START_PUNCTUATION
6877      * @see     Character#SURROGATE
6878      * @see     Character#TITLECASE_LETTER
6879      * @see     Character#UNASSIGNED
6880      * @see     Character#UPPERCASE_LETTER
6881      * @since   1.1
6882      */
6883     public static int getType(char ch) {
6884         return getType((int)ch);
6885     }
6886 
6887     /**
6888      * Returns a value indicating a character's general category.
6889      *
6890      * @param   codePoint the character (Unicode code point) to be tested.
6891      * @return  a value of type {@code int} representing the
6892      *          character's general category.
6893      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6894      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6895      * @see     Character#CONTROL CONTROL
6896      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6897      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6898      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6899      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6900      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6901      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6902      * @see     Character#FORMAT FORMAT
6903      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6904      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6905      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6906      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6907      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6908      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6909      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6910      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6911      * @see     Character#OTHER_LETTER OTHER_LETTER
6912      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6913      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6914      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6915      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6916      * @see     Character#PRIVATE_USE PRIVATE_USE
6917      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6918      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6919      * @see     Character#SURROGATE SURROGATE
6920      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6921      * @see     Character#UNASSIGNED UNASSIGNED
6922      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6923      * @since   1.5
6924      */
6925     public static int getType(int codePoint) {
6926         return CharacterData.of(codePoint).getType(codePoint);
6927     }
6928 
6929     /**
6930      * Determines the character representation for a specific digit in
6931      * the specified radix. If the value of {@code radix} is not a
6932      * valid radix, or the value of {@code digit} is not a valid
6933      * digit in the specified radix, the null character
6934      * ({@code '\u005Cu0000'}) is returned.
6935      * <p>
6936      * The {@code radix} argument is valid if it is greater than or
6937      * equal to {@code MIN_RADIX} and less than or equal to
6938      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6939      * {@code 0 <= digit < radix}.
6940      * <p>
6941      * If the digit is less than 10, then
6942      * {@code '0' + digit} is returned. Otherwise, the value
6943      * {@code 'a' + digit - 10} is returned.
6944      *
6945      * @param   digit   the number to convert to a character.
6946      * @param   radix   the radix.
6947      * @return  the {@code char} representation of the specified digit
6948      *          in the specified radix.
6949      * @see     Character#MIN_RADIX
6950      * @see     Character#MAX_RADIX
6951      * @see     Character#digit(char, int)
6952      */
6953     public static char forDigit(int digit, int radix) {
6954         if ((digit >= radix) || (digit < 0)) {
6955             return '\0';
6956         }
6957         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6958             return '\0';
6959         }
6960         if (digit < 10) {
6961             return (char)('0' + digit);
6962         }
6963         return (char)('a' - 10 + digit);
6964     }
6965 
6966     /**
6967      * Returns the Unicode directionality property for the given
6968      * character.  Character directionality is used to calculate the
6969      * visual ordering of text. The directionality value of undefined
6970      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6971      *
6972      * <p><b>Note:</b> This method cannot handle <a
6973      * href="#supplementary"> supplementary characters</a>. To support
6974      * all Unicode characters, including supplementary characters, use
6975      * the {@link #getDirectionality(int)} method.
6976      *
6977      * @param  ch {@code char} for which the directionality property
6978      *            is requested.
6979      * @return the directionality property of the {@code char} value.
6980      *
6981      * @see Character#DIRECTIONALITY_UNDEFINED
6982      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6983      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6984      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6985      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6986      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6987      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6988      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6989      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6990      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6991      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6992      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6993      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6994      * @see Character#DIRECTIONALITY_WHITESPACE
6995      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6996      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6997      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6998      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6999      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7000      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7001      * @since 1.4
7002      */
7003     public static byte getDirectionality(char ch) {
7004         return getDirectionality((int)ch);
7005     }
7006 
7007     /**
7008      * Returns the Unicode directionality property for the given
7009      * character (Unicode code point).  Character directionality is
7010      * used to calculate the visual ordering of text. The
7011      * directionality value of undefined character is {@link
7012      * #DIRECTIONALITY_UNDEFINED}.
7013      *
7014      * @param   codePoint the character (Unicode code point) for which
7015      *          the directionality property is requested.
7016      * @return the directionality property of the character.
7017      *
7018      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7019      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7020      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7021      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7022      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7023      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7024      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7025      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7026      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7027      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7028      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7029      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7030      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7031      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7032      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7033      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7034      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7035      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7036      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7037      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7038      * @since    1.5
7039      */
7040     public static byte getDirectionality(int codePoint) {
7041         return CharacterData.of(codePoint).getDirectionality(codePoint);
7042     }
7043 
7044     /**
7045      * Determines whether the character is mirrored according to the
7046      * Unicode specification.  Mirrored characters should have their
7047      * glyphs horizontally mirrored when displayed in text that is
7048      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7049      * PARENTHESIS is semantically defined to be an <i>opening
7050      * parenthesis</i>.  This will appear as a "(" in text that is
7051      * left-to-right but as a ")" in text that is right-to-left.
7052      *
7053      * <p><b>Note:</b> This method cannot handle <a
7054      * href="#supplementary"> supplementary characters</a>. To support
7055      * all Unicode characters, including supplementary characters, use
7056      * the {@link #isMirrored(int)} method.
7057      *
7058      * @param  ch {@code char} for which the mirrored property is requested
7059      * @return {@code true} if the char is mirrored, {@code false}
7060      *         if the {@code char} is not mirrored or is not defined.
7061      * @since 1.4
7062      */
7063     public static boolean isMirrored(char ch) {
7064         return isMirrored((int)ch);
7065     }
7066 
7067     /**
7068      * Determines whether the specified character (Unicode code point)
7069      * is mirrored according to the Unicode specification.  Mirrored
7070      * characters should have their glyphs horizontally mirrored when
7071      * displayed in text that is right-to-left.  For example,
7072      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7073      * defined to be an <i>opening parenthesis</i>.  This will appear
7074      * as a "(" in text that is left-to-right but as a ")" in text
7075      * that is right-to-left.
7076      *
7077      * @param   codePoint the character (Unicode code point) to be tested.
7078      * @return  {@code true} if the character is mirrored, {@code false}
7079      *          if the character is not mirrored or is not defined.
7080      * @since   1.5
7081      */
7082     public static boolean isMirrored(int codePoint) {
7083         return CharacterData.of(codePoint).isMirrored(codePoint);
7084     }
7085 
7086     /**
7087      * Compares two {@code Character} objects numerically.
7088      *
7089      * @param   anotherCharacter   the {@code Character} to be compared.
7090 
7091      * @return  the value {@code 0} if the argument {@code Character}
7092      *          is equal to this {@code Character}; a value less than
7093      *          {@code 0} if this {@code Character} is numerically less
7094      *          than the {@code Character} argument; and a value greater than
7095      *          {@code 0} if this {@code Character} is numerically greater
7096      *          than the {@code Character} argument (unsigned comparison).
7097      *          Note that this is strictly a numerical comparison; it is not
7098      *          locale-dependent.
7099      * @since   1.2
7100      */
7101     public int compareTo(Character anotherCharacter) {
7102         return compare(this.value, anotherCharacter.value);
7103     }
7104 
7105     /**
7106      * Compares two {@code char} values numerically.
7107      * The value returned is identical to what would be returned by:
7108      * <pre>
7109      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7110      * </pre>
7111      *
7112      * @param  x the first {@code char} to compare
7113      * @param  y the second {@code char} to compare
7114      * @return the value {@code 0} if {@code x == y};
7115      *         a value less than {@code 0} if {@code x < y}; and
7116      *         a value greater than {@code 0} if {@code x > y}
7117      * @since 1.7
7118      */
7119     public static int compare(char x, char y) {
7120         return x - y;
7121     }
7122 
7123     /**
7124      * Converts the character (Unicode code point) argument to uppercase using
7125      * information from the UnicodeData file.
7126      *
7127      * @param   codePoint   the character (Unicode code point) to be converted.
7128      * @return  either the uppercase equivalent of the character, if
7129      *          any, or an error flag ({@code Character.ERROR})
7130      *          that indicates that a 1:M {@code char} mapping exists.
7131      * @see     Character#isLowerCase(char)
7132      * @see     Character#isUpperCase(char)
7133      * @see     Character#toLowerCase(char)
7134      * @see     Character#toTitleCase(char)
7135      * @since 1.4
7136      */
7137     static int toUpperCaseEx(int codePoint) {
7138         assert isValidCodePoint(codePoint);
7139         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7140     }
7141 
7142     /**
7143      * Converts the character (Unicode code point) argument to uppercase using case
7144      * mapping information from the SpecialCasing file in the Unicode
7145      * specification. If a character has no explicit uppercase
7146      * mapping, then the {@code char} itself is returned in the
7147      * {@code char[]}.
7148      *
7149      * @param   codePoint   the character (Unicode code point) to be converted.
7150      * @return a {@code char[]} with the uppercased character.
7151      * @since 1.4
7152      */
7153     static char[] toUpperCaseCharArray(int codePoint) {
7154         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7155         assert isBmpCodePoint(codePoint);
7156         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7157     }
7158 
7159     /**
7160      * The number of bits used to represent a <tt>char</tt> value in unsigned
7161      * binary form, constant {@code 16}.
7162      *
7163      * @since 1.5
7164      */
7165     public static final int SIZE = 16;
7166 
7167     /**
7168      * The number of bytes used to represent a {@code char} value in unsigned
7169      * binary form.
7170      *
7171      * @since 1.8
7172      */
7173     public static final int BYTES = SIZE / Byte.SIZE;
7174 
7175     /**
7176      * Returns the value obtained by reversing the order of the bytes in the
7177      * specified <tt>char</tt> value.
7178      *
7179      * @param ch The {@code char} of which to reverse the byte order.
7180      * @return the value obtained by reversing (or, equivalently, swapping)
7181      *     the bytes in the specified <tt>char</tt> value.
7182      * @since 1.5
7183      */
7184     public static char reverseBytes(char ch) {
7185         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7186     }
7187 
7188     /**
7189      * Returns the Unicode name of the specified character
7190      * {@code codePoint}, or null if the code point is
7191      * {@link #UNASSIGNED unassigned}.
7192      * <p>
7193      * Note: if the specified character is not assigned a name by
7194      * the <i>UnicodeData</i> file (part of the Unicode Character
7195      * Database maintained by the Unicode Consortium), the returned
7196      * name is the same as the result of expression.
7197      *
7198      * <blockquote>{@code
7199      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7200      *     + " "
7201      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7202      *
7203      * }</blockquote>
7204      *
7205      * @param  codePoint the character (Unicode code point)
7206      *
7207      * @return the Unicode name of the specified character, or null if
7208      *         the code point is unassigned.
7209      *
7210      * @exception IllegalArgumentException if the specified
7211      *            {@code codePoint} is not a valid Unicode
7212      *            code point.
7213      *
7214      * @since 1.7
7215      */
7216     public static String getName(int codePoint) {
7217         if (!isValidCodePoint(codePoint)) {
7218             throw new IllegalArgumentException();
7219         }
7220         String name = CharacterName.get(codePoint);
7221         if (name != null)
7222             return name;
7223         if (getType(codePoint) == UNASSIGNED)
7224             return null;
7225         UnicodeBlock block = UnicodeBlock.of(codePoint);
7226         if (block != null)
7227             return block.toString().replace('_', ' ') + " "
7228                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7229         // should never come here
7230         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7231     }
7232 }