1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.2.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * Standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (\uD800-\uDBFF), the second from the
  80  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648 
 649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 650 
 651         /**
 652          * Creates a UnicodeBlock with the given identifier name.
 653          * This name must be the same as the block identifier.
 654          */
 655         private UnicodeBlock(String idName) {
 656             super(idName);
 657             map.put(idName, this);
 658         }
 659 
 660         /**
 661          * Creates a UnicodeBlock with the given identifier name and
 662          * alias name.
 663          */
 664         private UnicodeBlock(String idName, String alias) {
 665             this(idName);
 666             map.put(alias, this);
 667         }
 668 
 669         /**
 670          * Creates a UnicodeBlock with the given identifier name and
 671          * alias names.
 672          */
 673         private UnicodeBlock(String idName, String... aliases) {
 674             this(idName);
 675             for (String alias : aliases)
 676                 map.put(alias, this);
 677         }
 678 
 679         /**
 680          * Constant for the "Basic Latin" Unicode character block.
 681          * @since 1.2
 682          */
 683         public static final UnicodeBlock  BASIC_LATIN =
 684             new UnicodeBlock("BASIC_LATIN",
 685                              "BASIC LATIN",
 686                              "BASICLATIN");
 687 
 688         /**
 689          * Constant for the "Latin-1 Supplement" Unicode character block.
 690          * @since 1.2
 691          */
 692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 693             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 694                              "LATIN-1 SUPPLEMENT",
 695                              "LATIN-1SUPPLEMENT");
 696 
 697         /**
 698          * Constant for the "Latin Extended-A" Unicode character block.
 699          * @since 1.2
 700          */
 701         public static final UnicodeBlock LATIN_EXTENDED_A =
 702             new UnicodeBlock("LATIN_EXTENDED_A",
 703                              "LATIN EXTENDED-A",
 704                              "LATINEXTENDED-A");
 705 
 706         /**
 707          * Constant for the "Latin Extended-B" Unicode character block.
 708          * @since 1.2
 709          */
 710         public static final UnicodeBlock LATIN_EXTENDED_B =
 711             new UnicodeBlock("LATIN_EXTENDED_B",
 712                              "LATIN EXTENDED-B",
 713                              "LATINEXTENDED-B");
 714 
 715         /**
 716          * Constant for the "IPA Extensions" Unicode character block.
 717          * @since 1.2
 718          */
 719         public static final UnicodeBlock IPA_EXTENSIONS =
 720             new UnicodeBlock("IPA_EXTENSIONS",
 721                              "IPA EXTENSIONS",
 722                              "IPAEXTENSIONS");
 723 
 724         /**
 725          * Constant for the "Spacing Modifier Letters" Unicode character block.
 726          * @since 1.2
 727          */
 728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 730                              "SPACING MODIFIER LETTERS",
 731                              "SPACINGMODIFIERLETTERS");
 732 
 733         /**
 734          * Constant for the "Combining Diacritical Marks" Unicode character block.
 735          * @since 1.2
 736          */
 737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 739                              "COMBINING DIACRITICAL MARKS",
 740                              "COMBININGDIACRITICALMARKS");
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK =
 750             new UnicodeBlock("GREEK",
 751                              "GREEK AND COPTIC",
 752                              "GREEKANDCOPTIC");
 753 
 754         /**
 755          * Constant for the "Cyrillic" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock CYRILLIC =
 759             new UnicodeBlock("CYRILLIC");
 760 
 761         /**
 762          * Constant for the "Armenian" Unicode character block.
 763          * @since 1.2
 764          */
 765         public static final UnicodeBlock ARMENIAN =
 766             new UnicodeBlock("ARMENIAN");
 767 
 768         /**
 769          * Constant for the "Hebrew" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock HEBREW =
 773             new UnicodeBlock("HEBREW");
 774 
 775         /**
 776          * Constant for the "Arabic" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARABIC =
 780             new UnicodeBlock("ARABIC");
 781 
 782         /**
 783          * Constant for the "Devanagari" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock DEVANAGARI =
 787             new UnicodeBlock("DEVANAGARI");
 788 
 789         /**
 790          * Constant for the "Bengali" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock BENGALI =
 794             new UnicodeBlock("BENGALI");
 795 
 796         /**
 797          * Constant for the "Gurmukhi" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock GURMUKHI =
 801             new UnicodeBlock("GURMUKHI");
 802 
 803         /**
 804          * Constant for the "Gujarati" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock GUJARATI =
 808             new UnicodeBlock("GUJARATI");
 809 
 810         /**
 811          * Constant for the "Oriya" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock ORIYA =
 815             new UnicodeBlock("ORIYA");
 816 
 817         /**
 818          * Constant for the "Tamil" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock TAMIL =
 822             new UnicodeBlock("TAMIL");
 823 
 824         /**
 825          * Constant for the "Telugu" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock TELUGU =
 829             new UnicodeBlock("TELUGU");
 830 
 831         /**
 832          * Constant for the "Kannada" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock KANNADA =
 836             new UnicodeBlock("KANNADA");
 837 
 838         /**
 839          * Constant for the "Malayalam" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock MALAYALAM =
 843             new UnicodeBlock("MALAYALAM");
 844 
 845         /**
 846          * Constant for the "Thai" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock THAI =
 850             new UnicodeBlock("THAI");
 851 
 852         /**
 853          * Constant for the "Lao" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock LAO =
 857             new UnicodeBlock("LAO");
 858 
 859         /**
 860          * Constant for the "Tibetan" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock TIBETAN =
 864             new UnicodeBlock("TIBETAN");
 865 
 866         /**
 867          * Constant for the "Georgian" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock GEORGIAN =
 871             new UnicodeBlock("GEORGIAN");
 872 
 873         /**
 874          * Constant for the "Hangul Jamo" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock HANGUL_JAMO =
 878             new UnicodeBlock("HANGUL_JAMO",
 879                              "HANGUL JAMO",
 880                              "HANGULJAMO");
 881 
 882         /**
 883          * Constant for the "Latin Extended Additional" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 888                              "LATIN EXTENDED ADDITIONAL",
 889                              "LATINEXTENDEDADDITIONAL");
 890 
 891         /**
 892          * Constant for the "Greek Extended" Unicode character block.
 893          * @since 1.2
 894          */
 895         public static final UnicodeBlock GREEK_EXTENDED =
 896             new UnicodeBlock("GREEK_EXTENDED",
 897                              "GREEK EXTENDED",
 898                              "GREEKEXTENDED");
 899 
 900         /**
 901          * Constant for the "General Punctuation" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock GENERAL_PUNCTUATION =
 905             new UnicodeBlock("GENERAL_PUNCTUATION",
 906                              "GENERAL PUNCTUATION",
 907                              "GENERALPUNCTUATION");
 908 
 909         /**
 910          * Constant for the "Superscripts and Subscripts" Unicode character
 911          * block.
 912          * @since 1.2
 913          */
 914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 916                              "SUPERSCRIPTS AND SUBSCRIPTS",
 917                              "SUPERSCRIPTSANDSUBSCRIPTS");
 918 
 919         /**
 920          * Constant for the "Currency Symbols" Unicode character block.
 921          * @since 1.2
 922          */
 923         public static final UnicodeBlock CURRENCY_SYMBOLS =
 924             new UnicodeBlock("CURRENCY_SYMBOLS",
 925                              "CURRENCY SYMBOLS",
 926                              "CURRENCYSYMBOLS");
 927 
 928         /**
 929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 930          * character block.
 931          * <p>
 932          * This block was previously known as "Combining Marks for Symbols".
 933          * @since 1.2
 934          */
 935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 939                              "COMBINING MARKS FOR SYMBOLS",
 940                              "COMBININGMARKSFORSYMBOLS");
 941 
 942         /**
 943          * Constant for the "Letterlike Symbols" Unicode character block.
 944          * @since 1.2
 945          */
 946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 947             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 948                              "LETTERLIKE SYMBOLS",
 949                              "LETTERLIKESYMBOLS");
 950 
 951         /**
 952          * Constant for the "Number Forms" Unicode character block.
 953          * @since 1.2
 954          */
 955         public static final UnicodeBlock NUMBER_FORMS =
 956             new UnicodeBlock("NUMBER_FORMS",
 957                              "NUMBER FORMS",
 958                              "NUMBERFORMS");
 959 
 960         /**
 961          * Constant for the "Arrows" Unicode character block.
 962          * @since 1.2
 963          */
 964         public static final UnicodeBlock ARROWS =
 965             new UnicodeBlock("ARROWS");
 966 
 967         /**
 968          * Constant for the "Mathematical Operators" Unicode character block.
 969          * @since 1.2
 970          */
 971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 972             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 973                              "MATHEMATICAL OPERATORS",
 974                              "MATHEMATICALOPERATORS");
 975 
 976         /**
 977          * Constant for the "Miscellaneous Technical" Unicode character block.
 978          * @since 1.2
 979          */
 980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 982                              "MISCELLANEOUS TECHNICAL",
 983                              "MISCELLANEOUSTECHNICAL");
 984 
 985         /**
 986          * Constant for the "Control Pictures" Unicode character block.
 987          * @since 1.2
 988          */
 989         public static final UnicodeBlock CONTROL_PICTURES =
 990             new UnicodeBlock("CONTROL_PICTURES",
 991                              "CONTROL PICTURES",
 992                              "CONTROLPICTURES");
 993 
 994         /**
 995          * Constant for the "Optical Character Recognition" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1000                              "OPTICAL CHARACTER RECOGNITION",
1001                              "OPTICALCHARACTERRECOGNITION");
1002 
1003         /**
1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1009                              "ENCLOSED ALPHANUMERICS",
1010                              "ENCLOSEDALPHANUMERICS");
1011 
1012         /**
1013          * Constant for the "Box Drawing" Unicode character block.
1014          * @since 1.2
1015          */
1016         public static final UnicodeBlock BOX_DRAWING =
1017             new UnicodeBlock("BOX_DRAWING",
1018                              "BOX DRAWING",
1019                              "BOXDRAWING");
1020 
1021         /**
1022          * Constant for the "Block Elements" Unicode character block.
1023          * @since 1.2
1024          */
1025         public static final UnicodeBlock BLOCK_ELEMENTS =
1026             new UnicodeBlock("BLOCK_ELEMENTS",
1027                              "BLOCK ELEMENTS",
1028                              "BLOCKELEMENTS");
1029 
1030         /**
1031          * Constant for the "Geometric Shapes" Unicode character block.
1032          * @since 1.2
1033          */
1034         public static final UnicodeBlock GEOMETRIC_SHAPES =
1035             new UnicodeBlock("GEOMETRIC_SHAPES",
1036                              "GEOMETRIC SHAPES",
1037                              "GEOMETRICSHAPES");
1038 
1039         /**
1040          * Constant for the "Miscellaneous Symbols" Unicode character block.
1041          * @since 1.2
1042          */
1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1045                              "MISCELLANEOUS SYMBOLS",
1046                              "MISCELLANEOUSSYMBOLS");
1047 
1048         /**
1049          * Constant for the "Dingbats" Unicode character block.
1050          * @since 1.2
1051          */
1052         public static final UnicodeBlock DINGBATS =
1053             new UnicodeBlock("DINGBATS");
1054 
1055         /**
1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1057          * @since 1.2
1058          */
1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1061                              "CJK SYMBOLS AND PUNCTUATION",
1062                              "CJKSYMBOLSANDPUNCTUATION");
1063 
1064         /**
1065          * Constant for the "Hiragana" Unicode character block.
1066          * @since 1.2
1067          */
1068         public static final UnicodeBlock HIRAGANA =
1069             new UnicodeBlock("HIRAGANA");
1070 
1071         /**
1072          * Constant for the "Katakana" Unicode character block.
1073          * @since 1.2
1074          */
1075         public static final UnicodeBlock KATAKANA =
1076             new UnicodeBlock("KATAKANA");
1077 
1078         /**
1079          * Constant for the "Bopomofo" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock BOPOMOFO =
1083             new UnicodeBlock("BOPOMOFO");
1084 
1085         /**
1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1091                              "HANGUL COMPATIBILITY JAMO",
1092                              "HANGULCOMPATIBILITYJAMO");
1093 
1094         /**
1095          * Constant for the "Kanbun" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock KANBUN =
1099             new UnicodeBlock("KANBUN");
1100 
1101         /**
1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1107                              "ENCLOSED CJK LETTERS AND MONTHS",
1108                              "ENCLOSEDCJKLETTERSANDMONTHS");
1109 
1110         /**
1111          * Constant for the "CJK Compatibility" Unicode character block.
1112          * @since 1.2
1113          */
1114         public static final UnicodeBlock CJK_COMPATIBILITY =
1115             new UnicodeBlock("CJK_COMPATIBILITY",
1116                              "CJK COMPATIBILITY",
1117                              "CJKCOMPATIBILITY");
1118 
1119         /**
1120          * Constant for the "CJK Unified Ideographs" Unicode character block.
1121          * @since 1.2
1122          */
1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1125                              "CJK UNIFIED IDEOGRAPHS",
1126                              "CJKUNIFIEDIDEOGRAPHS");
1127 
1128         /**
1129          * Constant for the "Hangul Syllables" Unicode character block.
1130          * @since 1.2
1131          */
1132         public static final UnicodeBlock HANGUL_SYLLABLES =
1133             new UnicodeBlock("HANGUL_SYLLABLES",
1134                              "HANGUL SYLLABLES",
1135                              "HANGULSYLLABLES");
1136 
1137         /**
1138          * Constant for the "Private Use Area" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock PRIVATE_USE_AREA =
1142             new UnicodeBlock("PRIVATE_USE_AREA",
1143                              "PRIVATE USE AREA",
1144                              "PRIVATEUSEAREA");
1145 
1146         /**
1147          * Constant for the "CJK Compatibility Ideographs" Unicode character
1148          * block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1153                              "CJK COMPATIBILITY IDEOGRAPHS",
1154                              "CJKCOMPATIBILITYIDEOGRAPHS");
1155 
1156         /**
1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1162                              "ALPHABETIC PRESENTATION FORMS",
1163                              "ALPHABETICPRESENTATIONFORMS");
1164 
1165         /**
1166          * Constant for the "Arabic Presentation Forms-A" Unicode character
1167          * block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1172                              "ARABIC PRESENTATION FORMS-A",
1173                              "ARABICPRESENTATIONFORMS-A");
1174 
1175         /**
1176          * Constant for the "Combining Half Marks" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock COMBINING_HALF_MARKS =
1180             new UnicodeBlock("COMBINING_HALF_MARKS",
1181                              "COMBINING HALF MARKS",
1182                              "COMBININGHALFMARKS");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Forms" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1190                              "CJK COMPATIBILITY FORMS",
1191                              "CJKCOMPATIBILITYFORMS");
1192 
1193         /**
1194          * Constant for the "Small Form Variants" Unicode character block.
1195          * @since 1.2
1196          */
1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1198             new UnicodeBlock("SMALL_FORM_VARIANTS",
1199                              "SMALL FORM VARIANTS",
1200                              "SMALLFORMVARIANTS");
1201 
1202         /**
1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1204          * @since 1.2
1205          */
1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1208                              "ARABIC PRESENTATION FORMS-B",
1209                              "ARABICPRESENTATIONFORMS-B");
1210 
1211         /**
1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1213          * block.
1214          * @since 1.2
1215          */
1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1218                              "HALFWIDTH AND FULLWIDTH FORMS",
1219                              "HALFWIDTHANDFULLWIDTHFORMS");
1220 
1221         /**
1222          * Constant for the "Specials" Unicode character block.
1223          * @since 1.2
1224          */
1225         public static final UnicodeBlock SPECIALS =
1226             new UnicodeBlock("SPECIALS");
1227 
1228         /**
1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1231          *             {@link #LOW_SURROGATES}. These new constants match
1232          *             the block definitions of the Unicode Standard.
1233          *             The {@link #of(char)} and {@link #of(int)} methods
1234          *             return the new constants, not SURROGATES_AREA.
1235          */
1236         @Deprecated
1237         public static final UnicodeBlock SURROGATES_AREA =
1238             new UnicodeBlock("SURROGATES_AREA");
1239 
1240         /**
1241          * Constant for the "Syriac" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock SYRIAC =
1245             new UnicodeBlock("SYRIAC");
1246 
1247         /**
1248          * Constant for the "Thaana" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock THAANA =
1252             new UnicodeBlock("THAANA");
1253 
1254         /**
1255          * Constant for the "Sinhala" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock SINHALA =
1259             new UnicodeBlock("SINHALA");
1260 
1261         /**
1262          * Constant for the "Myanmar" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MYANMAR =
1266             new UnicodeBlock("MYANMAR");
1267 
1268         /**
1269          * Constant for the "Ethiopic" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock ETHIOPIC =
1273             new UnicodeBlock("ETHIOPIC");
1274 
1275         /**
1276          * Constant for the "Cherokee" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock CHEROKEE =
1280             new UnicodeBlock("CHEROKEE");
1281 
1282         /**
1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1290 
1291         /**
1292          * Constant for the "Ogham" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock OGHAM =
1296             new UnicodeBlock("OGHAM");
1297 
1298         /**
1299          * Constant for the "Runic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock RUNIC =
1303             new UnicodeBlock("RUNIC");
1304 
1305         /**
1306          * Constant for the "Khmer" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock KHMER =
1310             new UnicodeBlock("KHMER");
1311 
1312         /**
1313          * Constant for the "Mongolian" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock MONGOLIAN =
1317             new UnicodeBlock("MONGOLIAN");
1318 
1319         /**
1320          * Constant for the "Braille Patterns" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock BRAILLE_PATTERNS =
1324             new UnicodeBlock("BRAILLE_PATTERNS",
1325                              "BRAILLE PATTERNS",
1326                              "BRAILLEPATTERNS");
1327 
1328         /**
1329          * Constant for the "CJK Radicals Supplement" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1334                              "CJK RADICALS SUPPLEMENT",
1335                              "CJKRADICALSSUPPLEMENT");
1336 
1337         /**
1338          * Constant for the "Kangxi Radicals" Unicode character block.
1339          * @since 1.4
1340          */
1341         public static final UnicodeBlock KANGXI_RADICALS =
1342             new UnicodeBlock("KANGXI_RADICALS",
1343                              "KANGXI RADICALS",
1344                              "KANGXIRADICALS");
1345 
1346         /**
1347          * Constant for the "Ideographic Description Characters" Unicode character block.
1348          * @since 1.4
1349          */
1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1354 
1355         /**
1356          * Constant for the "Bopomofo Extended" Unicode character block.
1357          * @since 1.4
1358          */
1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1360             new UnicodeBlock("BOPOMOFO_EXTENDED",
1361                              "BOPOMOFO EXTENDED",
1362                              "BOPOMOFOEXTENDED");
1363 
1364         /**
1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1366          * @since 1.4
1367          */
1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1372 
1373         /**
1374          * Constant for the "Yi Syllables" Unicode character block.
1375          * @since 1.4
1376          */
1377         public static final UnicodeBlock YI_SYLLABLES =
1378             new UnicodeBlock("YI_SYLLABLES",
1379                              "YI SYLLABLES",
1380                              "YISYLLABLES");
1381 
1382         /**
1383          * Constant for the "Yi Radicals" Unicode character block.
1384          * @since 1.4
1385          */
1386         public static final UnicodeBlock YI_RADICALS =
1387             new UnicodeBlock("YI_RADICALS",
1388                              "YI RADICALS",
1389                              "YIRADICALS");
1390 
1391         /**
1392          * Constant for the "Cyrillic Supplementary" Unicode character block.
1393          * @since 1.5
1394          */
1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1397                              "CYRILLIC SUPPLEMENTARY",
1398                              "CYRILLICSUPPLEMENTARY",
1399                              "CYRILLIC SUPPLEMENT",
1400                              "CYRILLICSUPPLEMENT");
1401 
1402         /**
1403          * Constant for the "Tagalog" Unicode character block.
1404          * @since 1.5
1405          */
1406         public static final UnicodeBlock TAGALOG =
1407             new UnicodeBlock("TAGALOG");
1408 
1409         /**
1410          * Constant for the "Hanunoo" Unicode character block.
1411          * @since 1.5
1412          */
1413         public static final UnicodeBlock HANUNOO =
1414             new UnicodeBlock("HANUNOO");
1415 
1416         /**
1417          * Constant for the "Buhid" Unicode character block.
1418          * @since 1.5
1419          */
1420         public static final UnicodeBlock BUHID =
1421             new UnicodeBlock("BUHID");
1422 
1423         /**
1424          * Constant for the "Tagbanwa" Unicode character block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock TAGBANWA =
1428             new UnicodeBlock("TAGBANWA");
1429 
1430         /**
1431          * Constant for the "Limbu" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock LIMBU =
1435             new UnicodeBlock("LIMBU");
1436 
1437         /**
1438          * Constant for the "Tai Le" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAI_LE =
1442             new UnicodeBlock("TAI_LE",
1443                              "TAI LE",
1444                              "TAILE");
1445 
1446         /**
1447          * Constant for the "Khmer Symbols" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock KHMER_SYMBOLS =
1451             new UnicodeBlock("KHMER_SYMBOLS",
1452                              "KHMER SYMBOLS",
1453                              "KHMERSYMBOLS");
1454 
1455         /**
1456          * Constant for the "Phonetic Extensions" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1460             new UnicodeBlock("PHONETIC_EXTENSIONS",
1461                              "PHONETIC EXTENSIONS",
1462                              "PHONETICEXTENSIONS");
1463 
1464         /**
1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1472 
1473         /**
1474          * Constant for the "Supplemental Arrows-A" Unicode character block.
1475          * @since 1.5
1476          */
1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1479                              "SUPPLEMENTAL ARROWS-A",
1480                              "SUPPLEMENTALARROWS-A");
1481 
1482         /**
1483          * Constant for the "Supplemental Arrows-B" Unicode character block.
1484          * @since 1.5
1485          */
1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1488                              "SUPPLEMENTAL ARROWS-B",
1489                              "SUPPLEMENTALARROWS-B");
1490 
1491         /**
1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1493          * character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1500 
1501         /**
1502          * Constant for the "Supplemental Mathematical Operators" Unicode
1503          * character block.
1504          * @since 1.5
1505          */
1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1510 
1511         /**
1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1513          * block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1519                              "MISCELLANEOUSSYMBOLSANDARROWS");
1520 
1521         /**
1522          * Constant for the "Katakana Phonetic Extensions" Unicode character
1523          * block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1528                              "KATAKANA PHONETIC EXTENSIONS",
1529                              "KATAKANAPHONETICEXTENSIONS");
1530 
1531         /**
1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1537                              "YIJING HEXAGRAM SYMBOLS",
1538                              "YIJINGHEXAGRAMSYMBOLS");
1539 
1540         /**
1541          * Constant for the "Variation Selectors" Unicode character block.
1542          * @since 1.5
1543          */
1544         public static final UnicodeBlock VARIATION_SELECTORS =
1545             new UnicodeBlock("VARIATION_SELECTORS",
1546                              "VARIATION SELECTORS",
1547                              "VARIATIONSELECTORS");
1548 
1549         /**
1550          * Constant for the "Linear B Syllabary" Unicode character block.
1551          * @since 1.5
1552          */
1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1554             new UnicodeBlock("LINEAR_B_SYLLABARY",
1555                              "LINEAR B SYLLABARY",
1556                              "LINEARBSYLLABARY");
1557 
1558         /**
1559          * Constant for the "Linear B Ideograms" Unicode character block.
1560          * @since 1.5
1561          */
1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1564                              "LINEAR B IDEOGRAMS",
1565                              "LINEARBIDEOGRAMS");
1566 
1567         /**
1568          * Constant for the "Aegean Numbers" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock AEGEAN_NUMBERS =
1572             new UnicodeBlock("AEGEAN_NUMBERS",
1573                              "AEGEAN NUMBERS",
1574                              "AEGEANNUMBERS");
1575 
1576         /**
1577          * Constant for the "Old Italic" Unicode character block.
1578          * @since 1.5
1579          */
1580         public static final UnicodeBlock OLD_ITALIC =
1581             new UnicodeBlock("OLD_ITALIC",
1582                              "OLD ITALIC",
1583                              "OLDITALIC");
1584 
1585         /**
1586          * Constant for the "Gothic" Unicode character block.
1587          * @since 1.5
1588          */
1589         public static final UnicodeBlock GOTHIC =
1590             new UnicodeBlock("GOTHIC");
1591 
1592         /**
1593          * Constant for the "Ugaritic" Unicode character block.
1594          * @since 1.5
1595          */
1596         public static final UnicodeBlock UGARITIC =
1597             new UnicodeBlock("UGARITIC");
1598 
1599         /**
1600          * Constant for the "Deseret" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock DESERET =
1604             new UnicodeBlock("DESERET");
1605 
1606         /**
1607          * Constant for the "Shavian" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock SHAVIAN =
1611             new UnicodeBlock("SHAVIAN");
1612 
1613         /**
1614          * Constant for the "Osmanya" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock OSMANYA =
1618             new UnicodeBlock("OSMANYA");
1619 
1620         /**
1621          * Constant for the "Cypriot Syllabary" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1625             new UnicodeBlock("CYPRIOT_SYLLABARY",
1626                              "CYPRIOT SYLLABARY",
1627                              "CYPRIOTSYLLABARY");
1628 
1629         /**
1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1635                              "BYZANTINE MUSICAL SYMBOLS",
1636                              "BYZANTINEMUSICALSYMBOLS");
1637 
1638         /**
1639          * Constant for the "Musical Symbols" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock MUSICAL_SYMBOLS =
1643             new UnicodeBlock("MUSICAL_SYMBOLS",
1644                              "MUSICAL SYMBOLS",
1645                              "MUSICALSYMBOLS");
1646 
1647         /**
1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1649          * @since 1.5
1650          */
1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1653                              "TAI XUAN JING SYMBOLS",
1654                              "TAIXUANJINGSYMBOLS");
1655 
1656         /**
1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1658          * character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1664                              "MATHEMATICALALPHANUMERICSYMBOLS");
1665 
1666         /**
1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1668          * character block.
1669          * @since 1.5
1670          */
1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1675 
1676         /**
1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1678          * @since 1.5
1679          */
1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1684 
1685         /**
1686          * Constant for the "Tags" Unicode character block.
1687          * @since 1.5
1688          */
1689         public static final UnicodeBlock TAGS =
1690             new UnicodeBlock("TAGS");
1691 
1692         /**
1693          * Constant for the "Variation Selectors Supplement" Unicode character
1694          * block.
1695          * @since 1.5
1696          */
1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1699                              "VARIATION SELECTORS SUPPLEMENT",
1700                              "VARIATIONSELECTORSSUPPLEMENT");
1701 
1702         /**
1703          * Constant for the "Supplementary Private Use Area-A" Unicode character
1704          * block.
1705          * @since 1.5
1706          */
1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1711 
1712         /**
1713          * Constant for the "Supplementary Private Use Area-B" Unicode character
1714          * block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1721 
1722         /**
1723          * Constant for the "High Surrogates" Unicode character block.
1724          * This block represents codepoint values in the high surrogate
1725          * range: U+D800 through U+DB7F
1726          *
1727          * @since 1.5
1728          */
1729         public static final UnicodeBlock HIGH_SURROGATES =
1730             new UnicodeBlock("HIGH_SURROGATES",
1731                              "HIGH SURROGATES",
1732                              "HIGHSURROGATES");
1733 
1734         /**
1735          * Constant for the "High Private Use Surrogates" Unicode character
1736          * block.
1737          * This block represents codepoint values in the private use high
1738          * surrogate range: U+DB80 through U+DBFF
1739          *
1740          * @since 1.5
1741          */
1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1744                              "HIGH PRIVATE USE SURROGATES",
1745                              "HIGHPRIVATEUSESURROGATES");
1746 
1747         /**
1748          * Constant for the "Low Surrogates" Unicode character block.
1749          * This block represents codepoint values in the low surrogate
1750          * range: U+DC00 through U+DFFF
1751          *
1752          * @since 1.5
1753          */
1754         public static final UnicodeBlock LOW_SURROGATES =
1755             new UnicodeBlock("LOW_SURROGATES",
1756                              "LOW SURROGATES",
1757                              "LOWSURROGATES");
1758 
1759         /**
1760          * Constant for the "Arabic Supplement" Unicode character block.
1761          * @since 1.7
1762          */
1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1764             new UnicodeBlock("ARABIC_SUPPLEMENT",
1765                              "ARABIC SUPPLEMENT",
1766                              "ARABICSUPPLEMENT");
1767 
1768         /**
1769          * Constant for the "NKo" Unicode character block.
1770          * @since 1.7
1771          */
1772         public static final UnicodeBlock NKO =
1773             new UnicodeBlock("NKO");
1774 
1775         /**
1776          * Constant for the "Samaritan" Unicode character block.
1777          * @since 1.7
1778          */
1779         public static final UnicodeBlock SAMARITAN =
1780             new UnicodeBlock("SAMARITAN");
1781 
1782         /**
1783          * Constant for the "Mandaic" Unicode character block.
1784          * @since 1.7
1785          */
1786         public static final UnicodeBlock MANDAIC =
1787             new UnicodeBlock("MANDAIC");
1788 
1789         /**
1790          * Constant for the "Ethiopic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1795                              "ETHIOPIC SUPPLEMENT",
1796                              "ETHIOPICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1800          * Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1807 
1808         /**
1809          * Constant for the "New Tai Lue" Unicode character block.
1810          * @since 1.7
1811          */
1812         public static final UnicodeBlock NEW_TAI_LUE =
1813             new UnicodeBlock("NEW_TAI_LUE",
1814                              "NEW TAI LUE",
1815                              "NEWTAILUE");
1816 
1817         /**
1818          * Constant for the "Buginese" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock BUGINESE =
1822             new UnicodeBlock("BUGINESE");
1823 
1824         /**
1825          * Constant for the "Tai Tham" Unicode character block.
1826          * @since 1.7
1827          */
1828         public static final UnicodeBlock TAI_THAM =
1829             new UnicodeBlock("TAI_THAM",
1830                              "TAI THAM",
1831                              "TAITHAM");
1832 
1833         /**
1834          * Constant for the "Balinese" Unicode character block.
1835          * @since 1.7
1836          */
1837         public static final UnicodeBlock BALINESE =
1838             new UnicodeBlock("BALINESE");
1839 
1840         /**
1841          * Constant for the "Sundanese" Unicode character block.
1842          * @since 1.7
1843          */
1844         public static final UnicodeBlock SUNDANESE =
1845             new UnicodeBlock("SUNDANESE");
1846 
1847         /**
1848          * Constant for the "Batak" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BATAK =
1852             new UnicodeBlock("BATAK");
1853 
1854         /**
1855          * Constant for the "Lepcha" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock LEPCHA =
1859             new UnicodeBlock("LEPCHA");
1860 
1861         /**
1862          * Constant for the "Ol Chiki" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock OL_CHIKI =
1866             new UnicodeBlock("OL_CHIKI",
1867                              "OL CHIKI",
1868                              "OLCHIKI");
1869 
1870         /**
1871          * Constant for the "Vedic Extensions" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
1875             new UnicodeBlock("VEDIC_EXTENSIONS",
1876                              "VEDIC EXTENSIONS",
1877                              "VEDICEXTENSIONS");
1878 
1879         /**
1880          * Constant for the "Phonetic Extensions Supplement" Unicode character
1881          * block.
1882          * @since 1.7
1883          */
1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1886                              "PHONETIC EXTENSIONS SUPPLEMENT",
1887                              "PHONETICEXTENSIONSSUPPLEMENT");
1888 
1889         /**
1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1891          * character block.
1892          * @since 1.7
1893          */
1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1898 
1899         /**
1900          * Constant for the "Glagolitic" Unicode character block.
1901          * @since 1.7
1902          */
1903         public static final UnicodeBlock GLAGOLITIC =
1904             new UnicodeBlock("GLAGOLITIC");
1905 
1906         /**
1907          * Constant for the "Latin Extended-C" Unicode character block.
1908          * @since 1.7
1909          */
1910         public static final UnicodeBlock LATIN_EXTENDED_C =
1911             new UnicodeBlock("LATIN_EXTENDED_C",
1912                              "LATIN EXTENDED-C",
1913                              "LATINEXTENDED-C");
1914 
1915         /**
1916          * Constant for the "Coptic" Unicode character block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock COPTIC =
1920             new UnicodeBlock("COPTIC");
1921 
1922         /**
1923          * Constant for the "Georgian Supplement" Unicode character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1928                              "GEORGIAN SUPPLEMENT",
1929                              "GEORGIANSUPPLEMENT");
1930 
1931         /**
1932          * Constant for the "Tifinagh" Unicode character block.
1933          * @since 1.7
1934          */
1935         public static final UnicodeBlock TIFINAGH =
1936             new UnicodeBlock("TIFINAGH");
1937 
1938         /**
1939          * Constant for the "Ethiopic Extended" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1943             new UnicodeBlock("ETHIOPIC_EXTENDED",
1944                              "ETHIOPIC EXTENDED",
1945                              "ETHIOPICEXTENDED");
1946 
1947         /**
1948          * Constant for the "Cyrillic Extended-A" Unicode character block.
1949          * @since 1.7
1950          */
1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1953                              "CYRILLIC EXTENDED-A",
1954                              "CYRILLICEXTENDED-A");
1955 
1956         /**
1957          * Constant for the "Supplemental Punctuation" Unicode character block.
1958          * @since 1.7
1959          */
1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1962                              "SUPPLEMENTAL PUNCTUATION",
1963                              "SUPPLEMENTALPUNCTUATION");
1964 
1965         /**
1966          * Constant for the "CJK Strokes" Unicode character block.
1967          * @since 1.7
1968          */
1969         public static final UnicodeBlock CJK_STROKES =
1970             new UnicodeBlock("CJK_STROKES",
1971                              "CJK STROKES",
1972                              "CJKSTROKES");
1973 
1974         /**
1975          * Constant for the "Lisu" Unicode character block.
1976          * @since 1.7
1977          */
1978         public static final UnicodeBlock LISU =
1979             new UnicodeBlock("LISU");
1980 
1981         /**
1982          * Constant for the "Vai" Unicode character block.
1983          * @since 1.7
1984          */
1985         public static final UnicodeBlock VAI =
1986             new UnicodeBlock("VAI");
1987 
1988         /**
1989          * Constant for the "Cyrillic Extended-B" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1994                              "CYRILLIC EXTENDED-B",
1995                              "CYRILLICEXTENDED-B");
1996 
1997         /**
1998          * Constant for the "Bamum" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock BAMUM =
2002             new UnicodeBlock("BAMUM");
2003 
2004         /**
2005          * Constant for the "Modifier Tone Letters" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2010                              "MODIFIER TONE LETTERS",
2011                              "MODIFIERTONELETTERS");
2012 
2013         /**
2014          * Constant for the "Latin Extended-D" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LATIN_EXTENDED_D =
2018             new UnicodeBlock("LATIN_EXTENDED_D",
2019                              "LATIN EXTENDED-D",
2020                              "LATINEXTENDED-D");
2021 
2022         /**
2023          * Constant for the "Syloti Nagri" Unicode character block.
2024          * @since 1.7
2025          */
2026         public static final UnicodeBlock SYLOTI_NAGRI =
2027             new UnicodeBlock("SYLOTI_NAGRI",
2028                              "SYLOTI NAGRI",
2029                              "SYLOTINAGRI");
2030 
2031         /**
2032          * Constant for the "Common Indic Number Forms" Unicode character block.
2033          * @since 1.7
2034          */
2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2037                              "COMMON INDIC NUMBER FORMS",
2038                              "COMMONINDICNUMBERFORMS");
2039 
2040         /**
2041          * Constant for the "Phags-pa" Unicode character block.
2042          * @since 1.7
2043          */
2044         public static final UnicodeBlock PHAGS_PA =
2045             new UnicodeBlock("PHAGS_PA",
2046                              "PHAGS-PA");
2047 
2048         /**
2049          * Constant for the "Saurashtra" Unicode character block.
2050          * @since 1.7
2051          */
2052         public static final UnicodeBlock SAURASHTRA =
2053             new UnicodeBlock("SAURASHTRA");
2054 
2055         /**
2056          * Constant for the "Devanagari Extended" Unicode character block.
2057          * @since 1.7
2058          */
2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2060             new UnicodeBlock("DEVANAGARI_EXTENDED",
2061                              "DEVANAGARI EXTENDED",
2062                              "DEVANAGARIEXTENDED");
2063 
2064         /**
2065          * Constant for the "Kayah Li" Unicode character block.
2066          * @since 1.7
2067          */
2068         public static final UnicodeBlock KAYAH_LI =
2069             new UnicodeBlock("KAYAH_LI",
2070                              "KAYAH LI",
2071                              "KAYAHLI");
2072 
2073         /**
2074          * Constant for the "Rejang" Unicode character block.
2075          * @since 1.7
2076          */
2077         public static final UnicodeBlock REJANG =
2078             new UnicodeBlock("REJANG");
2079 
2080         /**
2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2082          * @since 1.7
2083          */
2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2086                              "HANGUL JAMO EXTENDED-A",
2087                              "HANGULJAMOEXTENDED-A");
2088 
2089         /**
2090          * Constant for the "Javanese" Unicode character block.
2091          * @since 1.7
2092          */
2093         public static final UnicodeBlock JAVANESE =
2094             new UnicodeBlock("JAVANESE");
2095 
2096         /**
2097          * Constant for the "Cham" Unicode character block.
2098          * @since 1.7
2099          */
2100         public static final UnicodeBlock CHAM =
2101             new UnicodeBlock("CHAM");
2102 
2103         /**
2104          * Constant for the "Myanmar Extended-A" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2108             new UnicodeBlock("MYANMAR_EXTENDED_A",
2109                              "MYANMAR EXTENDED-A",
2110                              "MYANMAREXTENDED-A");
2111 
2112         /**
2113          * Constant for the "Tai Viet" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock TAI_VIET =
2117             new UnicodeBlock("TAI_VIET",
2118                              "TAI VIET",
2119                              "TAIVIET");
2120 
2121         /**
2122          * Constant for the "Ethiopic Extended-A" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2127                              "ETHIOPIC EXTENDED-A",
2128                              "ETHIOPICEXTENDED-A");
2129 
2130         /**
2131          * Constant for the "Meetei Mayek" Unicode character block.
2132          * @since 1.7
2133          */
2134         public static final UnicodeBlock MEETEI_MAYEK =
2135             new UnicodeBlock("MEETEI_MAYEK",
2136                              "MEETEI MAYEK",
2137                              "MEETEIMAYEK");
2138 
2139         /**
2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2141          * @since 1.7
2142          */
2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2145                              "HANGUL JAMO EXTENDED-B",
2146                              "HANGULJAMOEXTENDED-B");
2147 
2148         /**
2149          * Constant for the "Vertical Forms" Unicode character block.
2150          * @since 1.7
2151          */
2152         public static final UnicodeBlock VERTICAL_FORMS =
2153             new UnicodeBlock("VERTICAL_FORMS",
2154                              "VERTICAL FORMS",
2155                              "VERTICALFORMS");
2156 
2157         /**
2158          * Constant for the "Ancient Greek Numbers" Unicode character block.
2159          * @since 1.7
2160          */
2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2163                              "ANCIENT GREEK NUMBERS",
2164                              "ANCIENTGREEKNUMBERS");
2165 
2166         /**
2167          * Constant for the "Ancient Symbols" Unicode character block.
2168          * @since 1.7
2169          */
2170         public static final UnicodeBlock ANCIENT_SYMBOLS =
2171             new UnicodeBlock("ANCIENT_SYMBOLS",
2172                              "ANCIENT SYMBOLS",
2173                              "ANCIENTSYMBOLS");
2174 
2175         /**
2176          * Constant for the "Phaistos Disc" Unicode character block.
2177          * @since 1.7
2178          */
2179         public static final UnicodeBlock PHAISTOS_DISC =
2180             new UnicodeBlock("PHAISTOS_DISC",
2181                              "PHAISTOS DISC",
2182                              "PHAISTOSDISC");
2183 
2184         /**
2185          * Constant for the "Lycian" Unicode character block.
2186          * @since 1.7
2187          */
2188         public static final UnicodeBlock LYCIAN =
2189             new UnicodeBlock("LYCIAN");
2190 
2191         /**
2192          * Constant for the "Carian" Unicode character block.
2193          * @since 1.7
2194          */
2195         public static final UnicodeBlock CARIAN =
2196             new UnicodeBlock("CARIAN");
2197 
2198         /**
2199          * Constant for the "Old Persian" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock OLD_PERSIAN =
2203             new UnicodeBlock("OLD_PERSIAN",
2204                              "OLD PERSIAN",
2205                              "OLDPERSIAN");
2206 
2207         /**
2208          * Constant for the "Imperial Aramaic" Unicode character block.
2209          * @since 1.7
2210          */
2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2212             new UnicodeBlock("IMPERIAL_ARAMAIC",
2213                              "IMPERIAL ARAMAIC",
2214                              "IMPERIALARAMAIC");
2215 
2216         /**
2217          * Constant for the "Phoenician" Unicode character block.
2218          * @since 1.7
2219          */
2220         public static final UnicodeBlock PHOENICIAN =
2221             new UnicodeBlock("PHOENICIAN");
2222 
2223         /**
2224          * Constant for the "Lydian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYDIAN =
2228             new UnicodeBlock("LYDIAN");
2229 
2230         /**
2231          * Constant for the "Kharoshthi" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock KHAROSHTHI =
2235             new UnicodeBlock("KHAROSHTHI");
2236 
2237         /**
2238          * Constant for the "Old South Arabian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2243                              "OLD SOUTH ARABIAN",
2244                              "OLDSOUTHARABIAN");
2245 
2246         /**
2247          * Constant for the "Avestan" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock AVESTAN =
2251             new UnicodeBlock("AVESTAN");
2252 
2253         /**
2254          * Constant for the "Inscriptional Parthian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2259                              "INSCRIPTIONAL PARTHIAN",
2260                              "INSCRIPTIONALPARTHIAN");
2261 
2262         /**
2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2268                              "INSCRIPTIONAL PAHLAVI",
2269                              "INSCRIPTIONALPAHLAVI");
2270 
2271         /**
2272          * Constant for the "Old Turkic" Unicode character block.
2273          * @since 1.7
2274          */
2275         public static final UnicodeBlock OLD_TURKIC =
2276             new UnicodeBlock("OLD_TURKIC",
2277                              "OLD TURKIC",
2278                              "OLDTURKIC");
2279 
2280         /**
2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2282          * @since 1.7
2283          */
2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2286                              "RUMI NUMERAL SYMBOLS",
2287                              "RUMINUMERALSYMBOLS");
2288 
2289         /**
2290          * Constant for the "Brahmi" Unicode character block.
2291          * @since 1.7
2292          */
2293         public static final UnicodeBlock BRAHMI =
2294             new UnicodeBlock("BRAHMI");
2295 
2296         /**
2297          * Constant for the "Kaithi" Unicode character block.
2298          * @since 1.7
2299          */
2300         public static final UnicodeBlock KAITHI =
2301             new UnicodeBlock("KAITHI");
2302 
2303         /**
2304          * Constant for the "Cuneiform" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock CUNEIFORM =
2308             new UnicodeBlock("CUNEIFORM");
2309 
2310         /**
2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2312          * character block.
2313          * @since 1.7
2314          */
2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2319 
2320         /**
2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2322          * @since 1.7
2323          */
2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2326                              "EGYPTIAN HIEROGLYPHS",
2327                              "EGYPTIANHIEROGLYPHS");
2328 
2329         /**
2330          * Constant for the "Bamum Supplement" Unicode character block.
2331          * @since 1.7
2332          */
2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2334             new UnicodeBlock("BAMUM_SUPPLEMENT",
2335                              "BAMUM SUPPLEMENT",
2336                              "BAMUMSUPPLEMENT");
2337 
2338         /**
2339          * Constant for the "Kana Supplement" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock KANA_SUPPLEMENT =
2343             new UnicodeBlock("KANA_SUPPLEMENT",
2344                              "KANA SUPPLEMENT",
2345                              "KANASUPPLEMENT");
2346 
2347         /**
2348          * Constant for the "Ancient Greek Musical Notation" Unicode character
2349          * block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2354                              "ANCIENT GREEK MUSICAL NOTATION",
2355                              "ANCIENTGREEKMUSICALNOTATION");
2356 
2357         /**
2358          * Constant for the "Counting Rod Numerals" Unicode character block.
2359          * @since 1.7
2360          */
2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2363                              "COUNTING ROD NUMERALS",
2364                              "COUNTINGRODNUMERALS");
2365 
2366         /**
2367          * Constant for the "Mahjong Tiles" Unicode character block.
2368          * @since 1.7
2369          */
2370         public static final UnicodeBlock MAHJONG_TILES =
2371             new UnicodeBlock("MAHJONG_TILES",
2372                              "MAHJONG TILES",
2373                              "MAHJONGTILES");
2374 
2375         /**
2376          * Constant for the "Domino Tiles" Unicode character block.
2377          * @since 1.7
2378          */
2379         public static final UnicodeBlock DOMINO_TILES =
2380             new UnicodeBlock("DOMINO_TILES",
2381                              "DOMINO TILES",
2382                              "DOMINOTILES");
2383 
2384         /**
2385          * Constant for the "Playing Cards" Unicode character block.
2386          * @since 1.7
2387          */
2388         public static final UnicodeBlock PLAYING_CARDS =
2389             new UnicodeBlock("PLAYING_CARDS",
2390                              "PLAYING CARDS",
2391                              "PLAYINGCARDS");
2392 
2393         /**
2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2395          * block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2402 
2403         /**
2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2405          * block.
2406          * @since 1.7
2407          */
2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2412 
2413         /**
2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2415          * character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2422 
2423         /**
2424          * Constant for the "Emoticons" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock EMOTICONS =
2428             new UnicodeBlock("EMOTICONS");
2429 
2430         /**
2431          * Constant for the "Transport And Map Symbols" Unicode character block.
2432          * @since 1.7
2433          */
2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2436                              "TRANSPORT AND MAP SYMBOLS",
2437                              "TRANSPORTANDMAPSYMBOLS");
2438 
2439         /**
2440          * Constant for the "Alchemical Symbols" Unicode character block.
2441          * @since 1.7
2442          */
2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2445                              "ALCHEMICAL SYMBOLS",
2446                              "ALCHEMICALSYMBOLS");
2447 
2448         /**
2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2450          * character block.
2451          * @since 1.7
2452          */
2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2457 
2458         /**
2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2460          * character block.
2461          * @since 1.7
2462          */
2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2467 
2468         /**
2469          * Constant for the "Arabic Extended-A" Unicode character block.
2470          * @since 1.8
2471          */
2472         public static final UnicodeBlock ARABIC_EXTENDED_A =
2473             new UnicodeBlock("ARABIC_EXTENDED_A",
2474                              "ARABIC EXTENDED-A",
2475                              "ARABICEXTENDED-A");
2476 
2477         /**
2478          * Constant for the "Sundanese Supplement" Unicode character block.
2479          * @since 1.8
2480          */
2481         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2482             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2483                              "SUNDANESE SUPPLEMENT",
2484                              "SUNDANESESUPPLEMENT");
2485 
2486         /**
2487          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2488          * @since 1.8
2489          */
2490         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2491             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2492                              "MEETEI MAYEK EXTENSIONS",
2493                              "MEETEIMAYEKEXTENSIONS");
2494 
2495         /**
2496          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2497          * @since 1.8
2498          */
2499         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2500             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2501                              "MEROITIC HIEROGLYPHS",
2502                              "MEROITICHIEROGLYPHS");
2503 
2504         /**
2505          * Constant for the "Meroitic Cursive" Unicode character block.
2506          * @since 1.8
2507          */
2508         public static final UnicodeBlock MEROITIC_CURSIVE =
2509             new UnicodeBlock("MEROITIC_CURSIVE",
2510                              "MEROITIC CURSIVE",
2511                              "MEROITICCURSIVE");
2512 
2513         /**
2514          * Constant for the "Sora Sompeng" Unicode character block.
2515          * @since 1.8
2516          */
2517         public static final UnicodeBlock SORA_SOMPENG =
2518             new UnicodeBlock("SORA_SOMPENG",
2519                              "SORA SOMPENG",
2520                              "SORASOMPENG");
2521 
2522         /**
2523          * Constant for the "Chakma" Unicode character block.
2524          * @since 1.8
2525          */
2526         public static final UnicodeBlock CHAKMA =
2527             new UnicodeBlock("CHAKMA");
2528 
2529         /**
2530          * Constant for the "Sharada" Unicode character block.
2531          * @since 1.8
2532          */
2533         public static final UnicodeBlock SHARADA =
2534             new UnicodeBlock("SHARADA");
2535 
2536         /**
2537          * Constant for the "Takri" Unicode character block.
2538          * @since 1.8
2539          */
2540         public static final UnicodeBlock TAKRI =
2541             new UnicodeBlock("TAKRI");
2542 
2543         /**
2544          * Constant for the "Miao" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock MIAO =
2548             new UnicodeBlock("MIAO");
2549 
2550         /**
2551          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2552          * character block.
2553          * @since 1.8
2554          */
2555         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2556             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2557                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2558                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2559 
2560         private static final int blockStarts[] = {
2561             0x0000,   // 0000..007F; Basic Latin
2562             0x0080,   // 0080..00FF; Latin-1 Supplement
2563             0x0100,   // 0100..017F; Latin Extended-A
2564             0x0180,   // 0180..024F; Latin Extended-B
2565             0x0250,   // 0250..02AF; IPA Extensions
2566             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2567             0x0300,   // 0300..036F; Combining Diacritical Marks
2568             0x0370,   // 0370..03FF; Greek and Coptic
2569             0x0400,   // 0400..04FF; Cyrillic
2570             0x0500,   // 0500..052F; Cyrillic Supplement
2571             0x0530,   // 0530..058F; Armenian
2572             0x0590,   // 0590..05FF; Hebrew
2573             0x0600,   // 0600..06FF; Arabic
2574             0x0700,   // 0700..074F; Syriac
2575             0x0750,   // 0750..077F; Arabic Supplement
2576             0x0780,   // 0780..07BF; Thaana
2577             0x07C0,   // 07C0..07FF; NKo
2578             0x0800,   // 0800..083F; Samaritan
2579             0x0840,   // 0840..085F; Mandaic
2580             0x0860,   //             unassigned
2581             0x08A0,   // 08A0..08FF; Arabic Extended-A
2582             0x0900,   // 0900..097F; Devanagari
2583             0x0980,   // 0980..09FF; Bengali
2584             0x0A00,   // 0A00..0A7F; Gurmukhi
2585             0x0A80,   // 0A80..0AFF; Gujarati
2586             0x0B00,   // 0B00..0B7F; Oriya
2587             0x0B80,   // 0B80..0BFF; Tamil
2588             0x0C00,   // 0C00..0C7F; Telugu
2589             0x0C80,   // 0C80..0CFF; Kannada
2590             0x0D00,   // 0D00..0D7F; Malayalam
2591             0x0D80,   // 0D80..0DFF; Sinhala
2592             0x0E00,   // 0E00..0E7F; Thai
2593             0x0E80,   // 0E80..0EFF; Lao
2594             0x0F00,   // 0F00..0FFF; Tibetan
2595             0x1000,   // 1000..109F; Myanmar
2596             0x10A0,   // 10A0..10FF; Georgian
2597             0x1100,   // 1100..11FF; Hangul Jamo
2598             0x1200,   // 1200..137F; Ethiopic
2599             0x1380,   // 1380..139F; Ethiopic Supplement
2600             0x13A0,   // 13A0..13FF; Cherokee
2601             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2602             0x1680,   // 1680..169F; Ogham
2603             0x16A0,   // 16A0..16FF; Runic
2604             0x1700,   // 1700..171F; Tagalog
2605             0x1720,   // 1720..173F; Hanunoo
2606             0x1740,   // 1740..175F; Buhid
2607             0x1760,   // 1760..177F; Tagbanwa
2608             0x1780,   // 1780..17FF; Khmer
2609             0x1800,   // 1800..18AF; Mongolian
2610             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2611             0x1900,   // 1900..194F; Limbu
2612             0x1950,   // 1950..197F; Tai Le
2613             0x1980,   // 1980..19DF; New Tai Lue
2614             0x19E0,   // 19E0..19FF; Khmer Symbols
2615             0x1A00,   // 1A00..1A1F; Buginese
2616             0x1A20,   // 1A20..1AAF; Tai Tham
2617             0x1AB0,   //             unassigned
2618             0x1B00,   // 1B00..1B7F; Balinese
2619             0x1B80,   // 1B80..1BBF; Sundanese
2620             0x1BC0,   // 1BC0..1BFF; Batak
2621             0x1C00,   // 1C00..1C4F; Lepcha
2622             0x1C50,   // 1C50..1C7F; Ol Chiki
2623             0x1C80,   //             unassigned
2624             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2625             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2626             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2627             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2628             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2629             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2630             0x1F00,   // 1F00..1FFF; Greek Extended
2631             0x2000,   // 2000..206F; General Punctuation
2632             0x2070,   // 2070..209F; Superscripts and Subscripts
2633             0x20A0,   // 20A0..20CF; Currency Symbols
2634             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2635             0x2100,   // 2100..214F; Letterlike Symbols
2636             0x2150,   // 2150..218F; Number Forms
2637             0x2190,   // 2190..21FF; Arrows
2638             0x2200,   // 2200..22FF; Mathematical Operators
2639             0x2300,   // 2300..23FF; Miscellaneous Technical
2640             0x2400,   // 2400..243F; Control Pictures
2641             0x2440,   // 2440..245F; Optical Character Recognition
2642             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2643             0x2500,   // 2500..257F; Box Drawing
2644             0x2580,   // 2580..259F; Block Elements
2645             0x25A0,   // 25A0..25FF; Geometric Shapes
2646             0x2600,   // 2600..26FF; Miscellaneous Symbols
2647             0x2700,   // 2700..27BF; Dingbats
2648             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2649             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2650             0x2800,   // 2800..28FF; Braille Patterns
2651             0x2900,   // 2900..297F; Supplemental Arrows-B
2652             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2653             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2654             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2655             0x2C00,   // 2C00..2C5F; Glagolitic
2656             0x2C60,   // 2C60..2C7F; Latin Extended-C
2657             0x2C80,   // 2C80..2CFF; Coptic
2658             0x2D00,   // 2D00..2D2F; Georgian Supplement
2659             0x2D30,   // 2D30..2D7F; Tifinagh
2660             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2661             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2662             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2663             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2664             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2665             0x2FE0,   //             unassigned
2666             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2667             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2668             0x3040,   // 3040..309F; Hiragana
2669             0x30A0,   // 30A0..30FF; Katakana
2670             0x3100,   // 3100..312F; Bopomofo
2671             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2672             0x3190,   // 3190..319F; Kanbun
2673             0x31A0,   // 31A0..31BF; Bopomofo Extended
2674             0x31C0,   // 31C0..31EF; CJK Strokes
2675             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2676             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2677             0x3300,   // 3300..33FF; CJK Compatibility
2678             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2679             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2680             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2681             0xA000,   // A000..A48F; Yi Syllables
2682             0xA490,   // A490..A4CF; Yi Radicals
2683             0xA4D0,   // A4D0..A4FF; Lisu
2684             0xA500,   // A500..A63F; Vai
2685             0xA640,   // A640..A69F; Cyrillic Extended-B
2686             0xA6A0,   // A6A0..A6FF; Bamum
2687             0xA700,   // A700..A71F; Modifier Tone Letters
2688             0xA720,   // A720..A7FF; Latin Extended-D
2689             0xA800,   // A800..A82F; Syloti Nagri
2690             0xA830,   // A830..A83F; Common Indic Number Forms
2691             0xA840,   // A840..A87F; Phags-pa
2692             0xA880,   // A880..A8DF; Saurashtra
2693             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2694             0xA900,   // A900..A92F; Kayah Li
2695             0xA930,   // A930..A95F; Rejang
2696             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2697             0xA980,   // A980..A9DF; Javanese
2698             0xA9E0,   //             unassigned
2699             0xAA00,   // AA00..AA5F; Cham
2700             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2701             0xAA80,   // AA80..AADF; Tai Viet
2702             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2703             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2704             0xAB30,   //             unassigned
2705             0xABC0,   // ABC0..ABFF; Meetei Mayek
2706             0xAC00,   // AC00..D7AF; Hangul Syllables
2707             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2708             0xD800,   // D800..DB7F; High Surrogates
2709             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2710             0xDC00,   // DC00..DFFF; Low Surrogates
2711             0xE000,   // E000..F8FF; Private Use Area
2712             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2713             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2714             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2715             0xFE00,   // FE00..FE0F; Variation Selectors
2716             0xFE10,   // FE10..FE1F; Vertical Forms
2717             0xFE20,   // FE20..FE2F; Combining Half Marks
2718             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2719             0xFE50,   // FE50..FE6F; Small Form Variants
2720             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2721             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2722             0xFFF0,   // FFF0..FFFF; Specials
2723             0x10000,  // 10000..1007F; Linear B Syllabary
2724             0x10080,  // 10080..100FF; Linear B Ideograms
2725             0x10100,  // 10100..1013F; Aegean Numbers
2726             0x10140,  // 10140..1018F; Ancient Greek Numbers
2727             0x10190,  // 10190..101CF; Ancient Symbols
2728             0x101D0,  // 101D0..101FF; Phaistos Disc
2729             0x10200,  //               unassigned
2730             0x10280,  // 10280..1029F; Lycian
2731             0x102A0,  // 102A0..102DF; Carian
2732             0x102E0,  //               unassigned
2733             0x10300,  // 10300..1032F; Old Italic
2734             0x10330,  // 10330..1034F; Gothic
2735             0x10350,  //               unassigned
2736             0x10380,  // 10380..1039F; Ugaritic
2737             0x103A0,  // 103A0..103DF; Old Persian
2738             0x103E0,  //               unassigned
2739             0x10400,  // 10400..1044F; Deseret
2740             0x10450,  // 10450..1047F; Shavian
2741             0x10480,  // 10480..104AF; Osmanya
2742             0x104B0,  //               unassigned
2743             0x10800,  // 10800..1083F; Cypriot Syllabary
2744             0x10840,  // 10840..1085F; Imperial Aramaic
2745             0x10860,  //               unassigned
2746             0x10900,  // 10900..1091F; Phoenician
2747             0x10920,  // 10920..1093F; Lydian
2748             0x10940,  //               unassigned
2749             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2750             0x109A0,  // 109A0..109FF; Meroitic Cursive
2751             0x10A00,  // 10A00..10A5F; Kharoshthi
2752             0x10A60,  // 10A60..10A7F; Old South Arabian
2753             0x10A80,  //               unassigned
2754             0x10B00,  // 10B00..10B3F; Avestan
2755             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2756             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2757             0x10B80,  //               unassigned
2758             0x10C00,  // 10C00..10C4F; Old Turkic
2759             0x10C50,  //               unassigned
2760             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2761             0x10E80,  //               unassigned
2762             0x11000,  // 11000..1107F; Brahmi
2763             0x11080,  // 11080..110CF; Kaithi
2764             0x110D0,  // 110D0..110FF; Sora Sompeng
2765             0x11100,  // 11100..1114F; Chakma
2766             0x11150,  //               unassigned
2767             0x11180,  // 11180..111DF; Sharada
2768             0x111E0,  //               unassigned
2769             0x11680,  // 11680..116CF; Takri
2770             0x116D0,  //               unassigned
2771             0x12000,  // 12000..123FF; Cuneiform
2772             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2773             0x12480,  //               unassigned
2774             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2775             0x13430,  //               unassigned
2776             0x16800,  // 16800..16A3F; Bamum Supplement
2777             0x16A40,  //               unassigned
2778             0x16F00,  // 16F00..16F9F; Miao
2779             0x16FA0,  //               unassigned
2780             0x1B000,  // 1B000..1B0FF; Kana Supplement
2781             0x1B100,  //               unassigned
2782             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2783             0x1D100,  // 1D100..1D1FF; Musical Symbols
2784             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2785             0x1D250,  //               unassigned
2786             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2787             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2788             0x1D380,  //               unassigned
2789             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2790             0x1D800,  //               unassigned
2791             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2792             0x1EF00,  //               unassigned
2793             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2794             0x1F030,  // 1F030..1F09F; Domino Tiles
2795             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2796             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2797             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2798             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2799             0x1F600,  // 1F600..1F64F; Emoticons
2800             0x1F650,  //               unassigned
2801             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2802             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2803             0x1F780,  //               unassigned
2804             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2805             0x2A6E0,  //               unassigned
2806             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2807             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2808             0x2B820,  //               unassigned
2809             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2810             0x2FA20,  //               unassigned
2811             0xE0000,  // E0000..E007F; Tags
2812             0xE0080,  //               unassigned
2813             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2814             0xE01F0,  //               unassigned
2815             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2816             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2817         };
2818 
2819         private static final UnicodeBlock[] blocks = {
2820             BASIC_LATIN,
2821             LATIN_1_SUPPLEMENT,
2822             LATIN_EXTENDED_A,
2823             LATIN_EXTENDED_B,
2824             IPA_EXTENSIONS,
2825             SPACING_MODIFIER_LETTERS,
2826             COMBINING_DIACRITICAL_MARKS,
2827             GREEK,
2828             CYRILLIC,
2829             CYRILLIC_SUPPLEMENTARY,
2830             ARMENIAN,
2831             HEBREW,
2832             ARABIC,
2833             SYRIAC,
2834             ARABIC_SUPPLEMENT,
2835             THAANA,
2836             NKO,
2837             SAMARITAN,
2838             MANDAIC,
2839             null,
2840             ARABIC_EXTENDED_A,
2841             DEVANAGARI,
2842             BENGALI,
2843             GURMUKHI,
2844             GUJARATI,
2845             ORIYA,
2846             TAMIL,
2847             TELUGU,
2848             KANNADA,
2849             MALAYALAM,
2850             SINHALA,
2851             THAI,
2852             LAO,
2853             TIBETAN,
2854             MYANMAR,
2855             GEORGIAN,
2856             HANGUL_JAMO,
2857             ETHIOPIC,
2858             ETHIOPIC_SUPPLEMENT,
2859             CHEROKEE,
2860             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2861             OGHAM,
2862             RUNIC,
2863             TAGALOG,
2864             HANUNOO,
2865             BUHID,
2866             TAGBANWA,
2867             KHMER,
2868             MONGOLIAN,
2869             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2870             LIMBU,
2871             TAI_LE,
2872             NEW_TAI_LUE,
2873             KHMER_SYMBOLS,
2874             BUGINESE,
2875             TAI_THAM,
2876             null,
2877             BALINESE,
2878             SUNDANESE,
2879             BATAK,
2880             LEPCHA,
2881             OL_CHIKI,
2882             null,
2883             SUNDANESE_SUPPLEMENT,
2884             VEDIC_EXTENSIONS,
2885             PHONETIC_EXTENSIONS,
2886             PHONETIC_EXTENSIONS_SUPPLEMENT,
2887             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2888             LATIN_EXTENDED_ADDITIONAL,
2889             GREEK_EXTENDED,
2890             GENERAL_PUNCTUATION,
2891             SUPERSCRIPTS_AND_SUBSCRIPTS,
2892             CURRENCY_SYMBOLS,
2893             COMBINING_MARKS_FOR_SYMBOLS,
2894             LETTERLIKE_SYMBOLS,
2895             NUMBER_FORMS,
2896             ARROWS,
2897             MATHEMATICAL_OPERATORS,
2898             MISCELLANEOUS_TECHNICAL,
2899             CONTROL_PICTURES,
2900             OPTICAL_CHARACTER_RECOGNITION,
2901             ENCLOSED_ALPHANUMERICS,
2902             BOX_DRAWING,
2903             BLOCK_ELEMENTS,
2904             GEOMETRIC_SHAPES,
2905             MISCELLANEOUS_SYMBOLS,
2906             DINGBATS,
2907             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2908             SUPPLEMENTAL_ARROWS_A,
2909             BRAILLE_PATTERNS,
2910             SUPPLEMENTAL_ARROWS_B,
2911             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2912             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2913             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2914             GLAGOLITIC,
2915             LATIN_EXTENDED_C,
2916             COPTIC,
2917             GEORGIAN_SUPPLEMENT,
2918             TIFINAGH,
2919             ETHIOPIC_EXTENDED,
2920             CYRILLIC_EXTENDED_A,
2921             SUPPLEMENTAL_PUNCTUATION,
2922             CJK_RADICALS_SUPPLEMENT,
2923             KANGXI_RADICALS,
2924             null,
2925             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2926             CJK_SYMBOLS_AND_PUNCTUATION,
2927             HIRAGANA,
2928             KATAKANA,
2929             BOPOMOFO,
2930             HANGUL_COMPATIBILITY_JAMO,
2931             KANBUN,
2932             BOPOMOFO_EXTENDED,
2933             CJK_STROKES,
2934             KATAKANA_PHONETIC_EXTENSIONS,
2935             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2936             CJK_COMPATIBILITY,
2937             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2938             YIJING_HEXAGRAM_SYMBOLS,
2939             CJK_UNIFIED_IDEOGRAPHS,
2940             YI_SYLLABLES,
2941             YI_RADICALS,
2942             LISU,
2943             VAI,
2944             CYRILLIC_EXTENDED_B,
2945             BAMUM,
2946             MODIFIER_TONE_LETTERS,
2947             LATIN_EXTENDED_D,
2948             SYLOTI_NAGRI,
2949             COMMON_INDIC_NUMBER_FORMS,
2950             PHAGS_PA,
2951             SAURASHTRA,
2952             DEVANAGARI_EXTENDED,
2953             KAYAH_LI,
2954             REJANG,
2955             HANGUL_JAMO_EXTENDED_A,
2956             JAVANESE,
2957             null,
2958             CHAM,
2959             MYANMAR_EXTENDED_A,
2960             TAI_VIET,
2961             MEETEI_MAYEK_EXTENSIONS,
2962             ETHIOPIC_EXTENDED_A,
2963             null,
2964             MEETEI_MAYEK,
2965             HANGUL_SYLLABLES,
2966             HANGUL_JAMO_EXTENDED_B,
2967             HIGH_SURROGATES,
2968             HIGH_PRIVATE_USE_SURROGATES,
2969             LOW_SURROGATES,
2970             PRIVATE_USE_AREA,
2971             CJK_COMPATIBILITY_IDEOGRAPHS,
2972             ALPHABETIC_PRESENTATION_FORMS,
2973             ARABIC_PRESENTATION_FORMS_A,
2974             VARIATION_SELECTORS,
2975             VERTICAL_FORMS,
2976             COMBINING_HALF_MARKS,
2977             CJK_COMPATIBILITY_FORMS,
2978             SMALL_FORM_VARIANTS,
2979             ARABIC_PRESENTATION_FORMS_B,
2980             HALFWIDTH_AND_FULLWIDTH_FORMS,
2981             SPECIALS,
2982             LINEAR_B_SYLLABARY,
2983             LINEAR_B_IDEOGRAMS,
2984             AEGEAN_NUMBERS,
2985             ANCIENT_GREEK_NUMBERS,
2986             ANCIENT_SYMBOLS,
2987             PHAISTOS_DISC,
2988             null,
2989             LYCIAN,
2990             CARIAN,
2991             null,
2992             OLD_ITALIC,
2993             GOTHIC,
2994             null,
2995             UGARITIC,
2996             OLD_PERSIAN,
2997             null,
2998             DESERET,
2999             SHAVIAN,
3000             OSMANYA,
3001             null,
3002             CYPRIOT_SYLLABARY,
3003             IMPERIAL_ARAMAIC,
3004             null,
3005             PHOENICIAN,
3006             LYDIAN,
3007             null,
3008             MEROITIC_HIEROGLYPHS,
3009             MEROITIC_CURSIVE,
3010             KHAROSHTHI,
3011             OLD_SOUTH_ARABIAN,
3012             null,
3013             AVESTAN,
3014             INSCRIPTIONAL_PARTHIAN,
3015             INSCRIPTIONAL_PAHLAVI,
3016             null,
3017             OLD_TURKIC,
3018             null,
3019             RUMI_NUMERAL_SYMBOLS,
3020             null,
3021             BRAHMI,
3022             KAITHI,
3023             SORA_SOMPENG,
3024             CHAKMA,
3025             null,
3026             SHARADA,
3027             null,
3028             TAKRI,
3029             null,
3030             CUNEIFORM,
3031             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3032             null,
3033             EGYPTIAN_HIEROGLYPHS,
3034             null,
3035             BAMUM_SUPPLEMENT,
3036             null,
3037             MIAO,
3038             null,
3039             KANA_SUPPLEMENT,
3040             null,
3041             BYZANTINE_MUSICAL_SYMBOLS,
3042             MUSICAL_SYMBOLS,
3043             ANCIENT_GREEK_MUSICAL_NOTATION,
3044             null,
3045             TAI_XUAN_JING_SYMBOLS,
3046             COUNTING_ROD_NUMERALS,
3047             null,
3048             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3049             null,
3050             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3051             null,
3052             MAHJONG_TILES,
3053             DOMINO_TILES,
3054             PLAYING_CARDS,
3055             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3056             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3057             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3058             EMOTICONS,
3059             null,
3060             TRANSPORT_AND_MAP_SYMBOLS,
3061             ALCHEMICAL_SYMBOLS,
3062             null,
3063             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3064             null,
3065             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3066             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3067             null,
3068             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3069             null,
3070             TAGS,
3071             null,
3072             VARIATION_SELECTORS_SUPPLEMENT,
3073             null,
3074             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3075             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3076         };
3077 
3078 
3079         /**
3080          * Returns the object representing the Unicode block containing the
3081          * given character, or {@code null} if the character is not a
3082          * member of a defined block.
3083          *
3084          * <p><b>Note:</b> This method cannot handle
3085          * <a href="Character.html#supplementary"> supplementary
3086          * characters</a>.  To support all Unicode characters, including
3087          * supplementary characters, use the {@link #of(int)} method.
3088          *
3089          * @param   c  The character in question
3090          * @return  The {@code UnicodeBlock} instance representing the
3091          *          Unicode block of which this character is a member, or
3092          *          {@code null} if the character is not a member of any
3093          *          Unicode block
3094          */
3095         public static UnicodeBlock of(char c) {
3096             return of((int)c);
3097         }
3098 
3099         /**
3100          * Returns the object representing the Unicode block
3101          * containing the given character (Unicode code point), or
3102          * {@code null} if the character is not a member of a
3103          * defined block.
3104          *
3105          * @param   codePoint the character (Unicode code point) in question.
3106          * @return  The {@code UnicodeBlock} instance representing the
3107          *          Unicode block of which this character is a member, or
3108          *          {@code null} if the character is not a member of any
3109          *          Unicode block
3110          * @exception IllegalArgumentException if the specified
3111          * {@code codePoint} is an invalid Unicode code point.
3112          * @see Character#isValidCodePoint(int)
3113          * @since   1.5
3114          */
3115         public static UnicodeBlock of(int codePoint) {
3116             if (!isValidCodePoint(codePoint)) {
3117                 throw new IllegalArgumentException();
3118             }
3119 
3120             int top, bottom, current;
3121             bottom = 0;
3122             top = blockStarts.length;
3123             current = top/2;
3124 
3125             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3126             while (top - bottom > 1) {
3127                 if (codePoint >= blockStarts[current]) {
3128                     bottom = current;
3129                 } else {
3130                     top = current;
3131                 }
3132                 current = (top + bottom) / 2;
3133             }
3134             return blocks[current];
3135         }
3136 
3137         /**
3138          * Returns the UnicodeBlock with the given name. Block
3139          * names are determined by The Unicode Standard. The file
3140          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3141          * version of the standard. The {@link Character} class specifies
3142          * the version of the standard that it supports.
3143          * <p>
3144          * This method accepts block names in the following forms:
3145          * <ol>
3146          * <li> Canonical block names as defined by the Unicode Standard.
3147          * For example, the standard defines a "Basic Latin" block. Therefore, this
3148          * method accepts "Basic Latin" as a valid block name. The documentation of
3149          * each UnicodeBlock provides the canonical name.
3150          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3151          * is a valid block name for the "Basic Latin" block.
3152          * <li>The text representation of each constant UnicodeBlock identifier.
3153          * For example, this method will return the {@link #BASIC_LATIN} block if
3154          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3155          * hyphens in the canonical name with underscores.
3156          * </ol>
3157          * Finally, character case is ignored for all of the valid block name forms.
3158          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3159          * The en_US locale's case mapping rules are used to provide case-insensitive
3160          * string comparisons for block name validation.
3161          * <p>
3162          * If the Unicode Standard changes block names, both the previous and
3163          * current names will be accepted.
3164          *
3165          * @param blockName A {@code UnicodeBlock} name.
3166          * @return The {@code UnicodeBlock} instance identified
3167          *         by {@code blockName}
3168          * @throws IllegalArgumentException if {@code blockName} is an
3169          *         invalid name
3170          * @throws NullPointerException if {@code blockName} is null
3171          * @since 1.5
3172          */
3173         public static final UnicodeBlock forName(String blockName) {
3174             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3175             if (block == null) {
3176                 throw new IllegalArgumentException();
3177             }
3178             return block;
3179         }
3180     }
3181 
3182 
3183     /**
3184      * A family of character subsets representing the character scripts
3185      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3186      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3187      * character is assigned to a single Unicode script, either a specific
3188      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3189      * one of the following three special values,
3190      * {@link Character.UnicodeScript#INHERITED Inherited},
3191      * {@link Character.UnicodeScript#COMMON Common} or
3192      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3193      *
3194      * @since 1.7
3195      */
3196     public static enum UnicodeScript {
3197         /**
3198          * Unicode script "Common".
3199          */
3200         COMMON,
3201 
3202         /**
3203          * Unicode script "Latin".
3204          */
3205         LATIN,
3206 
3207         /**
3208          * Unicode script "Greek".
3209          */
3210         GREEK,
3211 
3212         /**
3213          * Unicode script "Cyrillic".
3214          */
3215         CYRILLIC,
3216 
3217         /**
3218          * Unicode script "Armenian".
3219          */
3220         ARMENIAN,
3221 
3222         /**
3223          * Unicode script "Hebrew".
3224          */
3225         HEBREW,
3226 
3227         /**
3228          * Unicode script "Arabic".
3229          */
3230         ARABIC,
3231 
3232         /**
3233          * Unicode script "Syriac".
3234          */
3235         SYRIAC,
3236 
3237         /**
3238          * Unicode script "Thaana".
3239          */
3240         THAANA,
3241 
3242         /**
3243          * Unicode script "Devanagari".
3244          */
3245         DEVANAGARI,
3246 
3247         /**
3248          * Unicode script "Bengali".
3249          */
3250         BENGALI,
3251 
3252         /**
3253          * Unicode script "Gurmukhi".
3254          */
3255         GURMUKHI,
3256 
3257         /**
3258          * Unicode script "Gujarati".
3259          */
3260         GUJARATI,
3261 
3262         /**
3263          * Unicode script "Oriya".
3264          */
3265         ORIYA,
3266 
3267         /**
3268          * Unicode script "Tamil".
3269          */
3270         TAMIL,
3271 
3272         /**
3273          * Unicode script "Telugu".
3274          */
3275         TELUGU,
3276 
3277         /**
3278          * Unicode script "Kannada".
3279          */
3280         KANNADA,
3281 
3282         /**
3283          * Unicode script "Malayalam".
3284          */
3285         MALAYALAM,
3286 
3287         /**
3288          * Unicode script "Sinhala".
3289          */
3290         SINHALA,
3291 
3292         /**
3293          * Unicode script "Thai".
3294          */
3295         THAI,
3296 
3297         /**
3298          * Unicode script "Lao".
3299          */
3300         LAO,
3301 
3302         /**
3303          * Unicode script "Tibetan".
3304          */
3305         TIBETAN,
3306 
3307         /**
3308          * Unicode script "Myanmar".
3309          */
3310         MYANMAR,
3311 
3312         /**
3313          * Unicode script "Georgian".
3314          */
3315         GEORGIAN,
3316 
3317         /**
3318          * Unicode script "Hangul".
3319          */
3320         HANGUL,
3321 
3322         /**
3323          * Unicode script "Ethiopic".
3324          */
3325         ETHIOPIC,
3326 
3327         /**
3328          * Unicode script "Cherokee".
3329          */
3330         CHEROKEE,
3331 
3332         /**
3333          * Unicode script "Canadian_Aboriginal".
3334          */
3335         CANADIAN_ABORIGINAL,
3336 
3337         /**
3338          * Unicode script "Ogham".
3339          */
3340         OGHAM,
3341 
3342         /**
3343          * Unicode script "Runic".
3344          */
3345         RUNIC,
3346 
3347         /**
3348          * Unicode script "Khmer".
3349          */
3350         KHMER,
3351 
3352         /**
3353          * Unicode script "Mongolian".
3354          */
3355         MONGOLIAN,
3356 
3357         /**
3358          * Unicode script "Hiragana".
3359          */
3360         HIRAGANA,
3361 
3362         /**
3363          * Unicode script "Katakana".
3364          */
3365         KATAKANA,
3366 
3367         /**
3368          * Unicode script "Bopomofo".
3369          */
3370         BOPOMOFO,
3371 
3372         /**
3373          * Unicode script "Han".
3374          */
3375         HAN,
3376 
3377         /**
3378          * Unicode script "Yi".
3379          */
3380         YI,
3381 
3382         /**
3383          * Unicode script "Old_Italic".
3384          */
3385         OLD_ITALIC,
3386 
3387         /**
3388          * Unicode script "Gothic".
3389          */
3390         GOTHIC,
3391 
3392         /**
3393          * Unicode script "Deseret".
3394          */
3395         DESERET,
3396 
3397         /**
3398          * Unicode script "Inherited".
3399          */
3400         INHERITED,
3401 
3402         /**
3403          * Unicode script "Tagalog".
3404          */
3405         TAGALOG,
3406 
3407         /**
3408          * Unicode script "Hanunoo".
3409          */
3410         HANUNOO,
3411 
3412         /**
3413          * Unicode script "Buhid".
3414          */
3415         BUHID,
3416 
3417         /**
3418          * Unicode script "Tagbanwa".
3419          */
3420         TAGBANWA,
3421 
3422         /**
3423          * Unicode script "Limbu".
3424          */
3425         LIMBU,
3426 
3427         /**
3428          * Unicode script "Tai_Le".
3429          */
3430         TAI_LE,
3431 
3432         /**
3433          * Unicode script "Linear_B".
3434          */
3435         LINEAR_B,
3436 
3437         /**
3438          * Unicode script "Ugaritic".
3439          */
3440         UGARITIC,
3441 
3442         /**
3443          * Unicode script "Shavian".
3444          */
3445         SHAVIAN,
3446 
3447         /**
3448          * Unicode script "Osmanya".
3449          */
3450         OSMANYA,
3451 
3452         /**
3453          * Unicode script "Cypriot".
3454          */
3455         CYPRIOT,
3456 
3457         /**
3458          * Unicode script "Braille".
3459          */
3460         BRAILLE,
3461 
3462         /**
3463          * Unicode script "Buginese".
3464          */
3465         BUGINESE,
3466 
3467         /**
3468          * Unicode script "Coptic".
3469          */
3470         COPTIC,
3471 
3472         /**
3473          * Unicode script "New_Tai_Lue".
3474          */
3475         NEW_TAI_LUE,
3476 
3477         /**
3478          * Unicode script "Glagolitic".
3479          */
3480         GLAGOLITIC,
3481 
3482         /**
3483          * Unicode script "Tifinagh".
3484          */
3485         TIFINAGH,
3486 
3487         /**
3488          * Unicode script "Syloti_Nagri".
3489          */
3490         SYLOTI_NAGRI,
3491 
3492         /**
3493          * Unicode script "Old_Persian".
3494          */
3495         OLD_PERSIAN,
3496 
3497         /**
3498          * Unicode script "Kharoshthi".
3499          */
3500         KHAROSHTHI,
3501 
3502         /**
3503          * Unicode script "Balinese".
3504          */
3505         BALINESE,
3506 
3507         /**
3508          * Unicode script "Cuneiform".
3509          */
3510         CUNEIFORM,
3511 
3512         /**
3513          * Unicode script "Phoenician".
3514          */
3515         PHOENICIAN,
3516 
3517         /**
3518          * Unicode script "Phags_Pa".
3519          */
3520         PHAGS_PA,
3521 
3522         /**
3523          * Unicode script "Nko".
3524          */
3525         NKO,
3526 
3527         /**
3528          * Unicode script "Sundanese".
3529          */
3530         SUNDANESE,
3531 
3532         /**
3533          * Unicode script "Batak".
3534          */
3535         BATAK,
3536 
3537         /**
3538          * Unicode script "Lepcha".
3539          */
3540         LEPCHA,
3541 
3542         /**
3543          * Unicode script "Ol_Chiki".
3544          */
3545         OL_CHIKI,
3546 
3547         /**
3548          * Unicode script "Vai".
3549          */
3550         VAI,
3551 
3552         /**
3553          * Unicode script "Saurashtra".
3554          */
3555         SAURASHTRA,
3556 
3557         /**
3558          * Unicode script "Kayah_Li".
3559          */
3560         KAYAH_LI,
3561 
3562         /**
3563          * Unicode script "Rejang".
3564          */
3565         REJANG,
3566 
3567         /**
3568          * Unicode script "Lycian".
3569          */
3570         LYCIAN,
3571 
3572         /**
3573          * Unicode script "Carian".
3574          */
3575         CARIAN,
3576 
3577         /**
3578          * Unicode script "Lydian".
3579          */
3580         LYDIAN,
3581 
3582         /**
3583          * Unicode script "Cham".
3584          */
3585         CHAM,
3586 
3587         /**
3588          * Unicode script "Tai_Tham".
3589          */
3590         TAI_THAM,
3591 
3592         /**
3593          * Unicode script "Tai_Viet".
3594          */
3595         TAI_VIET,
3596 
3597         /**
3598          * Unicode script "Avestan".
3599          */
3600         AVESTAN,
3601 
3602         /**
3603          * Unicode script "Egyptian_Hieroglyphs".
3604          */
3605         EGYPTIAN_HIEROGLYPHS,
3606 
3607         /**
3608          * Unicode script "Samaritan".
3609          */
3610         SAMARITAN,
3611 
3612         /**
3613          * Unicode script "Mandaic".
3614          */
3615         MANDAIC,
3616 
3617         /**
3618          * Unicode script "Lisu".
3619          */
3620         LISU,
3621 
3622         /**
3623          * Unicode script "Bamum".
3624          */
3625         BAMUM,
3626 
3627         /**
3628          * Unicode script "Javanese".
3629          */
3630         JAVANESE,
3631 
3632         /**
3633          * Unicode script "Meetei_Mayek".
3634          */
3635         MEETEI_MAYEK,
3636 
3637         /**
3638          * Unicode script "Imperial_Aramaic".
3639          */
3640         IMPERIAL_ARAMAIC,
3641 
3642         /**
3643          * Unicode script "Old_South_Arabian".
3644          */
3645         OLD_SOUTH_ARABIAN,
3646 
3647         /**
3648          * Unicode script "Inscriptional_Parthian".
3649          */
3650         INSCRIPTIONAL_PARTHIAN,
3651 
3652         /**
3653          * Unicode script "Inscriptional_Pahlavi".
3654          */
3655         INSCRIPTIONAL_PAHLAVI,
3656 
3657         /**
3658          * Unicode script "Old_Turkic".
3659          */
3660         OLD_TURKIC,
3661 
3662         /**
3663          * Unicode script "Brahmi".
3664          */
3665         BRAHMI,
3666 
3667         /**
3668          * Unicode script "Kaithi".
3669          */
3670         KAITHI,
3671 
3672         /**
3673          * Unicode script "Meroitic Hieroglyphs".
3674          */
3675         MEROITIC_HIEROGLYPHS,
3676 
3677         /**
3678          * Unicode script "Meroitic Cursive".
3679          */
3680         MEROITIC_CURSIVE,
3681 
3682         /**
3683          * Unicode script "Sora Sompeng".
3684          */
3685         SORA_SOMPENG,
3686 
3687         /**
3688          * Unicode script "Chakma".
3689          */
3690         CHAKMA,
3691 
3692         /**
3693          * Unicode script "Sharada".
3694          */
3695         SHARADA,
3696 
3697         /**
3698          * Unicode script "Takri".
3699          */
3700         TAKRI,
3701 
3702         /**
3703          * Unicode script "Miao".
3704          */
3705         MIAO,
3706 
3707         /**
3708          * Unicode script "Unknown".
3709          */
3710         UNKNOWN;
3711 
3712         private static final int[] scriptStarts = {
3713             0x0000,   // 0000..0040; COMMON
3714             0x0041,   // 0041..005A; LATIN
3715             0x005B,   // 005B..0060; COMMON
3716             0x0061,   // 0061..007A; LATIN
3717             0x007B,   // 007B..00A9; COMMON
3718             0x00AA,   // 00AA..00AA; LATIN
3719             0x00AB,   // 00AB..00B9; COMMON
3720             0x00BA,   // 00BA..00BA; LATIN
3721             0x00BB,   // 00BB..00BF; COMMON
3722             0x00C0,   // 00C0..00D6; LATIN
3723             0x00D7,   // 00D7..00D7; COMMON
3724             0x00D8,   // 00D8..00F6; LATIN
3725             0x00F7,   // 00F7..00F7; COMMON
3726             0x00F8,   // 00F8..02B8; LATIN
3727             0x02B9,   // 02B9..02DF; COMMON
3728             0x02E0,   // 02E0..02E4; LATIN
3729             0x02E5,   // 02E5..02E9; COMMON
3730             0x02EA,   // 02EA..02EB; BOPOMOFO
3731             0x02EC,   // 02EC..02FF; COMMON
3732             0x0300,   // 0300..036F; INHERITED
3733             0x0370,   // 0370..0373; GREEK
3734             0x0374,   // 0374..0374; COMMON
3735             0x0375,   // 0375..037D; GREEK
3736             0x037E,   // 037E..0383; COMMON
3737             0x0384,   // 0384..0384; GREEK
3738             0x0385,   // 0385..0385; COMMON
3739             0x0386,   // 0386..0386; GREEK
3740             0x0387,   // 0387..0387; COMMON
3741             0x0388,   // 0388..03E1; GREEK
3742             0x03E2,   // 03E2..03EF; COPTIC
3743             0x03F0,   // 03F0..03FF; GREEK
3744             0x0400,   // 0400..0484; CYRILLIC
3745             0x0485,   // 0485..0486; INHERITED
3746             0x0487,   // 0487..0530; CYRILLIC
3747             0x0531,   // 0531..0588; ARMENIAN
3748             0x0589,   // 0589..0589; COMMON
3749             0x058A,   // 058A..0590; ARMENIAN
3750             0x0591,   // 0591..05FF; HEBREW
3751             0x0600,   // 0600..060B; ARABIC
3752             0x060C,   // 060C..060C; COMMON
3753             0x060D,   // 060D..061A; ARABIC
3754             0x061B,   // 061B..061D; COMMON
3755             0x061E,   // 061E..061E; ARABIC
3756             0x061F,   // 061F..061F; COMMON
3757             0x0620,   // 0620..063F; ARABIC
3758             0x0640,   // 0640..0640; COMMON
3759             0x0641,   // 0641..064A; ARABIC
3760             0x064B,   // 064B..0655; INHERITED
3761             0x0656,   // 0656..065F; ARABIC
3762             0x0660,   // 0660..0669; COMMON
3763             0x066A,   // 066A..066F; ARABIC
3764             0x0670,   // 0670..0670; INHERITED
3765             0x0671,   // 0671..06DC; ARABIC
3766             0x06DD,   // 06DD..06DD; COMMON
3767             0x06DE,   // 06DE..06FF; ARABIC
3768             0x0700,   // 0700..074F; SYRIAC
3769             0x0750,   // 0750..077F; ARABIC
3770             0x0780,   // 0780..07BF; THAANA
3771             0x07C0,   // 07C0..07FF; NKO
3772             0x0800,   // 0800..083F; SAMARITAN
3773             0x0840,   // 0840..089F; MANDAIC
3774             0x08A0,   // 08A0..08FF; ARABIC
3775             0x0900,   // 0900..0950; DEVANAGARI
3776             0x0951,   // 0951..0952; INHERITED
3777             0x0953,   // 0953..0963; DEVANAGARI
3778             0x0964,   // 0964..0965; COMMON
3779             0x0966,   // 0966..0980; DEVANAGARI
3780             0x0981,   // 0981..0A00; BENGALI
3781             0x0A01,   // 0A01..0A80; GURMUKHI
3782             0x0A81,   // 0A81..0B00; GUJARATI
3783             0x0B01,   // 0B01..0B81; ORIYA
3784             0x0B82,   // 0B82..0C00; TAMIL
3785             0x0C01,   // 0C01..0C81; TELUGU
3786             0x0C82,   // 0C82..0CF0; KANNADA
3787             0x0D02,   // 0D02..0D81; MALAYALAM
3788             0x0D82,   // 0D82..0E00; SINHALA
3789             0x0E01,   // 0E01..0E3E; THAI
3790             0x0E3F,   // 0E3F..0E3F; COMMON
3791             0x0E40,   // 0E40..0E80; THAI
3792             0x0E81,   // 0E81..0EFF; LAO
3793             0x0F00,   // 0F00..0FD4; TIBETAN
3794             0x0FD5,   // 0FD5..0FD8; COMMON
3795             0x0FD9,   // 0FD9..0FFF; TIBETAN
3796             0x1000,   // 1000..109F; MYANMAR
3797             0x10A0,   // 10A0..10FA; GEORGIAN
3798             0x10FB,   // 10FB..10FB; COMMON
3799             0x10FC,   // 10FC..10FF; GEORGIAN
3800             0x1100,   // 1100..11FF; HANGUL
3801             0x1200,   // 1200..139F; ETHIOPIC
3802             0x13A0,   // 13A0..13FF; CHEROKEE
3803             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3804             0x1680,   // 1680..169F; OGHAM
3805             0x16A0,   // 16A0..16EA; RUNIC
3806             0x16EB,   // 16EB..16ED; COMMON
3807             0x16EE,   // 16EE..16FF; RUNIC
3808             0x1700,   // 1700..171F; TAGALOG
3809             0x1720,   // 1720..1734; HANUNOO
3810             0x1735,   // 1735..173F; COMMON
3811             0x1740,   // 1740..175F; BUHID
3812             0x1760,   // 1760..177F; TAGBANWA
3813             0x1780,   // 1780..17FF; KHMER
3814             0x1800,   // 1800..1801; MONGOLIAN
3815             0x1802,   // 1802..1803; COMMON
3816             0x1804,   // 1804..1804; MONGOLIAN
3817             0x1805,   // 1805..1805; COMMON
3818             0x1806,   // 1806..18AF; MONGOLIAN
3819             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3820             0x1900,   // 1900..194F; LIMBU
3821             0x1950,   // 1950..197F; TAI_LE
3822             0x1980,   // 1980..19DF; NEW_TAI_LUE
3823             0x19E0,   // 19E0..19FF; KHMER
3824             0x1A00,   // 1A00..1A1F; BUGINESE
3825             0x1A20,   // 1A20..1AFF; TAI_THAM
3826             0x1B00,   // 1B00..1B7F; BALINESE
3827             0x1B80,   // 1B80..1BBF; SUNDANESE
3828             0x1BC0,   // 1BC0..1BFF; BATAK
3829             0x1C00,   // 1C00..1C4F; LEPCHA
3830             0x1C50,   // 1C50..1CBF; OL_CHIKI
3831             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3832             0x1CD0,   // 1CD0..1CD2; INHERITED
3833             0x1CD3,   // 1CD3..1CD3; COMMON
3834             0x1CD4,   // 1CD4..1CE0; INHERITED
3835             0x1CE1,   // 1CE1..1CE1; COMMON
3836             0x1CE2,   // 1CE2..1CE8; INHERITED
3837             0x1CE9,   // 1CE9..1CEC; COMMON
3838             0x1CED,   // 1CED..1CED; INHERITED
3839             0x1CEE,   // 1CEE..1CF3; COMMON
3840             0x1CF4,   // 1CF4..1CF4; INHERITED
3841             0x1CF5,   // 1CF5..1CFF; COMMON
3842             0x1D00,   // 1D00..1D25; LATIN
3843             0x1D26,   // 1D26..1D2A; GREEK
3844             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3845             0x1D2C,   // 1D2C..1D5C; LATIN
3846             0x1D5D,   // 1D5D..1D61; GREEK
3847             0x1D62,   // 1D62..1D65; LATIN
3848             0x1D66,   // 1D66..1D6A; GREEK
3849             0x1D6B,   // 1D6B..1D77; LATIN
3850             0x1D78,   // 1D78..1D78; CYRILLIC
3851             0x1D79,   // 1D79..1DBE; LATIN
3852             0x1DBF,   // 1DBF..1DBF; GREEK
3853             0x1DC0,   // 1DC0..1DFF; INHERITED
3854             0x1E00,   // 1E00..1EFF; LATIN
3855             0x1F00,   // 1F00..1FFF; GREEK
3856             0x2000,   // 2000..200B; COMMON
3857             0x200C,   // 200C..200D; INHERITED
3858             0x200E,   // 200E..2070; COMMON
3859             0x2071,   // 2071..2073; LATIN
3860             0x2074,   // 2074..207E; COMMON
3861             0x207F,   // 207F..207F; LATIN
3862             0x2080,   // 2080..208F; COMMON
3863             0x2090,   // 2090..209F; LATIN
3864             0x20A0,   // 20A0..20CF; COMMON
3865             0x20D0,   // 20D0..20FF; INHERITED
3866             0x2100,   // 2100..2125; COMMON
3867             0x2126,   // 2126..2126; GREEK
3868             0x2127,   // 2127..2129; COMMON
3869             0x212A,   // 212A..212B; LATIN
3870             0x212C,   // 212C..2131; COMMON
3871             0x2132,   // 2132..2132; LATIN
3872             0x2133,   // 2133..214D; COMMON
3873             0x214E,   // 214E..214E; LATIN
3874             0x214F,   // 214F..215F; COMMON
3875             0x2160,   // 2160..2188; LATIN
3876             0x2189,   // 2189..27FF; COMMON
3877             0x2800,   // 2800..28FF; BRAILLE
3878             0x2900,   // 2900..2BFF; COMMON
3879             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3880             0x2C60,   // 2C60..2C7F; LATIN
3881             0x2C80,   // 2C80..2CFF; COPTIC
3882             0x2D00,   // 2D00..2D2F; GEORGIAN
3883             0x2D30,   // 2D30..2D7F; TIFINAGH
3884             0x2D80,   // 2D80..2DDF; ETHIOPIC
3885             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3886             0x2E00,   // 2E00..2E7F; COMMON
3887             0x2E80,   // 2E80..2FEF; HAN
3888             0x2FF0,   // 2FF0..3004; COMMON
3889             0x3005,   // 3005..3005; HAN
3890             0x3006,   // 3006..3006; COMMON
3891             0x3007,   // 3007..3007; HAN
3892             0x3008,   // 3008..3020; COMMON
3893             0x3021,   // 3021..3029; HAN
3894             0x302A,   // 302A..302D; INHERITED
3895             0x302E,   // 302E..302F; HANGUL
3896             0x3030,   // 3030..3037; COMMON
3897             0x3038,   // 3038..303B; HAN
3898             0x303C,   // 303C..3040; COMMON
3899             0x3041,   // 3041..3098; HIRAGANA
3900             0x3099,   // 3099..309A; INHERITED
3901             0x309B,   // 309B..309C; COMMON
3902             0x309D,   // 309D..309F; HIRAGANA
3903             0x30A0,   // 30A0..30A0; COMMON
3904             0x30A1,   // 30A1..30FA; KATAKANA
3905             0x30FB,   // 30FB..30FC; COMMON
3906             0x30FD,   // 30FD..3104; KATAKANA
3907             0x3105,   // 3105..3130; BOPOMOFO
3908             0x3131,   // 3131..318F; HANGUL
3909             0x3190,   // 3190..319F; COMMON
3910             0x31A0,   // 31A0..31BF; BOPOMOFO
3911             0x31C0,   // 31C0..31EF; COMMON
3912             0x31F0,   // 31F0..31FF; KATAKANA
3913             0x3200,   // 3200..321F; HANGUL
3914             0x3220,   // 3220..325F; COMMON
3915             0x3260,   // 3260..327E; HANGUL
3916             0x327F,   // 327F..32CF; COMMON
3917             0x32D0,   // 32D0..3357; KATAKANA
3918             0x3358,   // 3358..33FF; COMMON
3919             0x3400,   // 3400..4DBF; HAN
3920             0x4DC0,   // 4DC0..4DFF; COMMON
3921             0x4E00,   // 4E00..9FFF; HAN
3922             0xA000,   // A000..A4CF; YI
3923             0xA4D0,   // A4D0..A4FF; LISU
3924             0xA500,   // A500..A63F; VAI
3925             0xA640,   // A640..A69F; CYRILLIC
3926             0xA6A0,   // A6A0..A6FF; BAMUM
3927             0xA700,   // A700..A721; COMMON
3928             0xA722,   // A722..A787; LATIN
3929             0xA788,   // A788..A78A; COMMON
3930             0xA78B,   // A78B..A7FF; LATIN
3931             0xA800,   // A800..A82F; SYLOTI_NAGRI
3932             0xA830,   // A830..A83F; COMMON
3933             0xA840,   // A840..A87F; PHAGS_PA
3934             0xA880,   // A880..A8DF; SAURASHTRA
3935             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3936             0xA900,   // A900..A92F; KAYAH_LI
3937             0xA930,   // A930..A95F; REJANG
3938             0xA960,   // A960..A97F; HANGUL
3939             0xA980,   // A980..A9FF; JAVANESE
3940             0xAA00,   // AA00..AA5F; CHAM
3941             0xAA60,   // AA60..AA7F; MYANMAR
3942             0xAA80,   // AA80..AADF; TAI_VIET
3943             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3944             0xAB01,   // AB01..ABBF; ETHIOPIC
3945             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3946             0xAC00,   // AC00..D7FB; HANGUL
3947             0xD7FC,   // D7FC..F8FF; UNKNOWN
3948             0xF900,   // F900..FAFF; HAN
3949             0xFB00,   // FB00..FB12; LATIN
3950             0xFB13,   // FB13..FB1C; ARMENIAN
3951             0xFB1D,   // FB1D..FB4F; HEBREW
3952             0xFB50,   // FB50..FD3D; ARABIC
3953             0xFD3E,   // FD3E..FD4F; COMMON
3954             0xFD50,   // FD50..FDFC; ARABIC
3955             0xFDFD,   // FDFD..FDFF; COMMON
3956             0xFE00,   // FE00..FE0F; INHERITED
3957             0xFE10,   // FE10..FE1F; COMMON
3958             0xFE20,   // FE20..FE2F; INHERITED
3959             0xFE30,   // FE30..FE6F; COMMON
3960             0xFE70,   // FE70..FEFE; ARABIC
3961             0xFEFF,   // FEFF..FF20; COMMON
3962             0xFF21,   // FF21..FF3A; LATIN
3963             0xFF3B,   // FF3B..FF40; COMMON
3964             0xFF41,   // FF41..FF5A; LATIN
3965             0xFF5B,   // FF5B..FF65; COMMON
3966             0xFF66,   // FF66..FF6F; KATAKANA
3967             0xFF70,   // FF70..FF70; COMMON
3968             0xFF71,   // FF71..FF9D; KATAKANA
3969             0xFF9E,   // FF9E..FF9F; COMMON
3970             0xFFA0,   // FFA0..FFDF; HANGUL
3971             0xFFE0,   // FFE0..FFFF; COMMON
3972             0x10000,  // 10000..100FF; LINEAR_B
3973             0x10100,  // 10100..1013F; COMMON
3974             0x10140,  // 10140..1018F; GREEK
3975             0x10190,  // 10190..101FC; COMMON
3976             0x101FD,  // 101FD..1027F; INHERITED
3977             0x10280,  // 10280..1029F; LYCIAN
3978             0x102A0,  // 102A0..102FF; CARIAN
3979             0x10300,  // 10300..1032F; OLD_ITALIC
3980             0x10330,  // 10330..1037F; GOTHIC
3981             0x10380,  // 10380..1039F; UGARITIC
3982             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3983             0x10400,  // 10400..1044F; DESERET
3984             0x10450,  // 10450..1047F; SHAVIAN
3985             0x10480,  // 10480..107FF; OSMANYA
3986             0x10800,  // 10800..1083F; CYPRIOT
3987             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3988             0x10900,  // 10900..1091F; PHOENICIAN
3989             0x10920,  // 10920..1097F; LYDIAN
3990             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3991             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3992             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3993             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3994             0x10B00,  // 10B00..10B3F; AVESTAN
3995             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3996             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3997             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3998             0x10E60,  // 10E60..10FFF; ARABIC
3999             0x11000,  // 11000..1107F; BRAHMI
4000             0x11080,  // 11080..110CF; KAITHI
4001             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4002             0x11100,  // 11100..1117F; CHAKMA
4003             0x11180,  // 11180..1167F; SHARADA
4004             0x11680,  // 11680..116CF; TAKRI
4005             0x12000,  // 12000..12FFF; CUNEIFORM
4006             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4007             0x16800,  // 16800..16A38; BAMUM
4008             0x16F00,  // 16F00..16F9F; MIAO
4009             0x1B000,  // 1B000..1B000; KATAKANA
4010             0x1B001,  // 1B001..1CFFF; HIRAGANA
4011             0x1D000,  // 1D000..1D166; COMMON
4012             0x1D167,  // 1D167..1D169; INHERITED
4013             0x1D16A,  // 1D16A..1D17A; COMMON
4014             0x1D17B,  // 1D17B..1D182; INHERITED
4015             0x1D183,  // 1D183..1D184; COMMON
4016             0x1D185,  // 1D185..1D18B; INHERITED
4017             0x1D18C,  // 1D18C..1D1A9; COMMON
4018             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4019             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4020             0x1D200,  // 1D200..1D2FF; GREEK
4021             0x1D300,  // 1D300..1EDFF; COMMON
4022             0x1EE00,  // 1EE00..1EFFF; ARABIC
4023             0x1F000,  // 1F000..1F1FF; COMMON
4024             0x1F200,  // 1F200..1F200; HIRAGANA
4025             0x1F201,  // 1F210..1FFFF; COMMON
4026             0x20000,  // 20000..E0000; HAN
4027             0xE0001,  // E0001..E00FF; COMMON
4028             0xE0100,  // E0100..E01EF; INHERITED
4029             0xE01F0   // E01F0..10FFFF; UNKNOWN
4030 
4031         };
4032 
4033         private static final UnicodeScript[] scripts = {
4034             COMMON,
4035             LATIN,
4036             COMMON,
4037             LATIN,
4038             COMMON,
4039             LATIN,
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             LATIN,
4048             COMMON,
4049             LATIN,
4050             COMMON,
4051             BOPOMOFO,
4052             COMMON,
4053             INHERITED,
4054             GREEK,
4055             COMMON,
4056             GREEK,
4057             COMMON,
4058             GREEK,
4059             COMMON,
4060             GREEK,
4061             COMMON,
4062             GREEK,
4063             COPTIC,
4064             GREEK,
4065             CYRILLIC,
4066             INHERITED,
4067             CYRILLIC,
4068             ARMENIAN,
4069             COMMON,
4070             ARMENIAN,
4071             HEBREW,
4072             ARABIC,
4073             COMMON,
4074             ARABIC,
4075             COMMON,
4076             ARABIC,
4077             COMMON,
4078             ARABIC,
4079             COMMON,
4080             ARABIC,
4081             INHERITED,
4082             ARABIC,
4083             COMMON,
4084             ARABIC,
4085             INHERITED,
4086             ARABIC,
4087             COMMON,
4088             ARABIC,
4089             SYRIAC,
4090             ARABIC,
4091             THAANA,
4092             NKO,
4093             SAMARITAN,
4094             MANDAIC,
4095             ARABIC,
4096             DEVANAGARI,
4097             INHERITED,
4098             DEVANAGARI,
4099             COMMON,
4100             DEVANAGARI,
4101             BENGALI,
4102             GURMUKHI,
4103             GUJARATI,
4104             ORIYA,
4105             TAMIL,
4106             TELUGU,
4107             KANNADA,
4108             MALAYALAM,
4109             SINHALA,
4110             THAI,
4111             COMMON,
4112             THAI,
4113             LAO,
4114             TIBETAN,
4115             COMMON,
4116             TIBETAN,
4117             MYANMAR,
4118             GEORGIAN,
4119             COMMON,
4120             GEORGIAN,
4121             HANGUL,
4122             ETHIOPIC,
4123             CHEROKEE,
4124             CANADIAN_ABORIGINAL,
4125             OGHAM,
4126             RUNIC,
4127             COMMON,
4128             RUNIC,
4129             TAGALOG,
4130             HANUNOO,
4131             COMMON,
4132             BUHID,
4133             TAGBANWA,
4134             KHMER,
4135             MONGOLIAN,
4136             COMMON,
4137             MONGOLIAN,
4138             COMMON,
4139             MONGOLIAN,
4140             CANADIAN_ABORIGINAL,
4141             LIMBU,
4142             TAI_LE,
4143             NEW_TAI_LUE,
4144             KHMER,
4145             BUGINESE,
4146             TAI_THAM,
4147             BALINESE,
4148             SUNDANESE,
4149             BATAK,
4150             LEPCHA,
4151             OL_CHIKI,
4152             SUNDANESE,
4153             INHERITED,
4154             COMMON,
4155             INHERITED,
4156             COMMON,
4157             INHERITED,
4158             COMMON,
4159             INHERITED,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             LATIN,
4164             GREEK,
4165             CYRILLIC,
4166             LATIN,
4167             GREEK,
4168             LATIN,
4169             GREEK,
4170             LATIN,
4171             CYRILLIC,
4172             LATIN,
4173             GREEK,
4174             INHERITED,
4175             LATIN,
4176             GREEK,
4177             COMMON,
4178             INHERITED,
4179             COMMON,
4180             LATIN,
4181             COMMON,
4182             LATIN,
4183             COMMON,
4184             LATIN,
4185             COMMON,
4186             INHERITED,
4187             COMMON,
4188             GREEK,
4189             COMMON,
4190             LATIN,
4191             COMMON,
4192             LATIN,
4193             COMMON,
4194             LATIN,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             BRAILLE,
4199             COMMON,
4200             GLAGOLITIC,
4201             LATIN,
4202             COPTIC,
4203             GEORGIAN,
4204             TIFINAGH,
4205             ETHIOPIC,
4206             CYRILLIC,
4207             COMMON,
4208             HAN,
4209             COMMON,
4210             HAN,
4211             COMMON,
4212             HAN,
4213             COMMON,
4214             HAN,
4215             INHERITED,
4216             HANGUL,
4217             COMMON,
4218             HAN,
4219             COMMON,
4220             HIRAGANA,
4221             INHERITED,
4222             COMMON,
4223             HIRAGANA,
4224             COMMON,
4225             KATAKANA,
4226             COMMON,
4227             KATAKANA,
4228             BOPOMOFO,
4229             HANGUL,
4230             COMMON,
4231             BOPOMOFO,
4232             COMMON,
4233             KATAKANA,
4234             HANGUL,
4235             COMMON,
4236             HANGUL,
4237             COMMON,
4238             KATAKANA,
4239             COMMON,
4240             HAN,
4241             COMMON,
4242             HAN,
4243             YI,
4244             LISU,
4245             VAI,
4246             CYRILLIC,
4247             BAMUM,
4248             COMMON,
4249             LATIN,
4250             COMMON,
4251             LATIN,
4252             SYLOTI_NAGRI,
4253             COMMON,
4254             PHAGS_PA,
4255             SAURASHTRA,
4256             DEVANAGARI,
4257             KAYAH_LI,
4258             REJANG,
4259             HANGUL,
4260             JAVANESE,
4261             CHAM,
4262             MYANMAR,
4263             TAI_VIET,
4264             MEETEI_MAYEK,
4265             ETHIOPIC,
4266             MEETEI_MAYEK,
4267             HANGUL,
4268             UNKNOWN     ,
4269             HAN,
4270             LATIN,
4271             ARMENIAN,
4272             HEBREW,
4273             ARABIC,
4274             COMMON,
4275             ARABIC,
4276             COMMON,
4277             INHERITED,
4278             COMMON,
4279             INHERITED,
4280             COMMON,
4281             ARABIC,
4282             COMMON,
4283             LATIN,
4284             COMMON,
4285             LATIN,
4286             COMMON,
4287             KATAKANA,
4288             COMMON,
4289             KATAKANA,
4290             COMMON,
4291             HANGUL,
4292             COMMON,
4293             LINEAR_B,
4294             COMMON,
4295             GREEK,
4296             COMMON,
4297             INHERITED,
4298             LYCIAN,
4299             CARIAN,
4300             OLD_ITALIC,
4301             GOTHIC,
4302             UGARITIC,
4303             OLD_PERSIAN,
4304             DESERET,
4305             SHAVIAN,
4306             OSMANYA,
4307             CYPRIOT,
4308             IMPERIAL_ARAMAIC,
4309             PHOENICIAN,
4310             LYDIAN,
4311             MEROITIC_HIEROGLYPHS,
4312             MEROITIC_CURSIVE,
4313             KHAROSHTHI,
4314             OLD_SOUTH_ARABIAN,
4315             AVESTAN,
4316             INSCRIPTIONAL_PARTHIAN,
4317             INSCRIPTIONAL_PAHLAVI,
4318             OLD_TURKIC,
4319             ARABIC,
4320             BRAHMI,
4321             KAITHI,
4322             SORA_SOMPENG,
4323             CHAKMA,
4324             SHARADA,
4325             TAKRI,
4326             CUNEIFORM,
4327             EGYPTIAN_HIEROGLYPHS,
4328             BAMUM,
4329             MIAO,
4330             KATAKANA,
4331             HIRAGANA,
4332             COMMON,
4333             INHERITED,
4334             COMMON,
4335             INHERITED,
4336             COMMON,
4337             INHERITED,
4338             COMMON,
4339             INHERITED,
4340             COMMON,
4341             GREEK,
4342             COMMON,
4343             ARABIC,
4344             COMMON,
4345             HIRAGANA,
4346             COMMON,
4347             HAN,
4348             COMMON,
4349             INHERITED,
4350             UNKNOWN
4351         };
4352 
4353         private static HashMap<String, Character.UnicodeScript> aliases;
4354         static {
4355             aliases = new HashMap<>(128);
4356             aliases.put("ARAB", ARABIC);
4357             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4358             aliases.put("ARMN", ARMENIAN);
4359             aliases.put("AVST", AVESTAN);
4360             aliases.put("BALI", BALINESE);
4361             aliases.put("BAMU", BAMUM);
4362             aliases.put("BATK", BATAK);
4363             aliases.put("BENG", BENGALI);
4364             aliases.put("BOPO", BOPOMOFO);
4365             aliases.put("BRAI", BRAILLE);
4366             aliases.put("BRAH", BRAHMI);
4367             aliases.put("BUGI", BUGINESE);
4368             aliases.put("BUHD", BUHID);
4369             aliases.put("CAKM", CHAKMA);
4370             aliases.put("CANS", CANADIAN_ABORIGINAL);
4371             aliases.put("CARI", CARIAN);
4372             aliases.put("CHAM", CHAM);
4373             aliases.put("CHER", CHEROKEE);
4374             aliases.put("COPT", COPTIC);
4375             aliases.put("CPRT", CYPRIOT);
4376             aliases.put("CYRL", CYRILLIC);
4377             aliases.put("DEVA", DEVANAGARI);
4378             aliases.put("DSRT", DESERET);
4379             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4380             aliases.put("ETHI", ETHIOPIC);
4381             aliases.put("GEOR", GEORGIAN);
4382             aliases.put("GLAG", GLAGOLITIC);
4383             aliases.put("GOTH", GOTHIC);
4384             aliases.put("GREK", GREEK);
4385             aliases.put("GUJR", GUJARATI);
4386             aliases.put("GURU", GURMUKHI);
4387             aliases.put("HANG", HANGUL);
4388             aliases.put("HANI", HAN);
4389             aliases.put("HANO", HANUNOO);
4390             aliases.put("HEBR", HEBREW);
4391             aliases.put("HIRA", HIRAGANA);
4392             // it appears we don't have the KATAKANA_OR_HIRAGANA
4393             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4394             aliases.put("ITAL", OLD_ITALIC);
4395             aliases.put("JAVA", JAVANESE);
4396             aliases.put("KALI", KAYAH_LI);
4397             aliases.put("KANA", KATAKANA);
4398             aliases.put("KHAR", KHAROSHTHI);
4399             aliases.put("KHMR", KHMER);
4400             aliases.put("KNDA", KANNADA);
4401             aliases.put("KTHI", KAITHI);
4402             aliases.put("LANA", TAI_THAM);
4403             aliases.put("LAOO", LAO);
4404             aliases.put("LATN", LATIN);
4405             aliases.put("LEPC", LEPCHA);
4406             aliases.put("LIMB", LIMBU);
4407             aliases.put("LINB", LINEAR_B);
4408             aliases.put("LISU", LISU);
4409             aliases.put("LYCI", LYCIAN);
4410             aliases.put("LYDI", LYDIAN);
4411             aliases.put("MAND", MANDAIC);
4412             aliases.put("MERC", MEROITIC_CURSIVE);
4413             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4414             aliases.put("MLYM", MALAYALAM);
4415             aliases.put("MONG", MONGOLIAN);
4416             aliases.put("MTEI", MEETEI_MAYEK);
4417             aliases.put("MYMR", MYANMAR);
4418             aliases.put("NKOO", NKO);
4419             aliases.put("OGAM", OGHAM);
4420             aliases.put("OLCK", OL_CHIKI);
4421             aliases.put("ORKH", OLD_TURKIC);
4422             aliases.put("ORYA", ORIYA);
4423             aliases.put("OSMA", OSMANYA);
4424             aliases.put("PHAG", PHAGS_PA);
4425             aliases.put("PLRD", MIAO);
4426             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4427             aliases.put("PHNX", PHOENICIAN);
4428             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4429             aliases.put("RJNG", REJANG);
4430             aliases.put("RUNR", RUNIC);
4431             aliases.put("SAMR", SAMARITAN);
4432             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4433             aliases.put("SAUR", SAURASHTRA);
4434             aliases.put("SHAW", SHAVIAN);
4435             aliases.put("SHRD", SHARADA);
4436             aliases.put("SINH", SINHALA);
4437             aliases.put("SORA", SORA_SOMPENG);
4438             aliases.put("SUND", SUNDANESE);
4439             aliases.put("SYLO", SYLOTI_NAGRI);
4440             aliases.put("SYRC", SYRIAC);
4441             aliases.put("TAGB", TAGBANWA);
4442             aliases.put("TALE", TAI_LE);
4443             aliases.put("TAKR", TAKRI);
4444             aliases.put("TALU", NEW_TAI_LUE);
4445             aliases.put("TAML", TAMIL);
4446             aliases.put("TAVT", TAI_VIET);
4447             aliases.put("TELU", TELUGU);
4448             aliases.put("TFNG", TIFINAGH);
4449             aliases.put("TGLG", TAGALOG);
4450             aliases.put("THAA", THAANA);
4451             aliases.put("THAI", THAI);
4452             aliases.put("TIBT", TIBETAN);
4453             aliases.put("UGAR", UGARITIC);
4454             aliases.put("VAII", VAI);
4455             aliases.put("XPEO", OLD_PERSIAN);
4456             aliases.put("XSUX", CUNEIFORM);
4457             aliases.put("YIII", YI);
4458             aliases.put("ZINH", INHERITED);
4459             aliases.put("ZYYY", COMMON);
4460             aliases.put("ZZZZ", UNKNOWN);
4461         }
4462 
4463         /**
4464          * Returns the enum constant representing the Unicode script of which
4465          * the given character (Unicode code point) is assigned to.
4466          *
4467          * @param   codePoint the character (Unicode code point) in question.
4468          * @return  The {@code UnicodeScript} constant representing the
4469          *          Unicode script of which this character is assigned to.
4470          *
4471          * @exception IllegalArgumentException if the specified
4472          * {@code codePoint} is an invalid Unicode code point.
4473          * @see Character#isValidCodePoint(int)
4474          *
4475          */
4476         public static UnicodeScript of(int codePoint) {
4477             if (!isValidCodePoint(codePoint))
4478                 throw new IllegalArgumentException();
4479             int type = getType(codePoint);
4480             // leave SURROGATE and PRIVATE_USE for table lookup
4481             if (type == UNASSIGNED)
4482                 return UNKNOWN;
4483             int index = Arrays.binarySearch(scriptStarts, codePoint);
4484             if (index < 0)
4485                 index = -index - 2;
4486             return scripts[index];
4487         }
4488 
4489         /**
4490          * Returns the UnicodeScript constant with the given Unicode script
4491          * name or the script name alias. Script names and their aliases are
4492          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4493          * and PropertyValueAliases&lt;version&gt;.txt define script names
4494          * and the script name aliases for a particular version of the
4495          * standard. The {@link Character} class specifies the version of
4496          * the standard that it supports.
4497          * <p>
4498          * Character case is ignored for all of the valid script names.
4499          * The en_US locale's case mapping rules are used to provide
4500          * case-insensitive string comparisons for script name validation.
4501          *
4502          * @param scriptName A {@code UnicodeScript} name.
4503          * @return The {@code UnicodeScript} constant identified
4504          *         by {@code scriptName}
4505          * @throws IllegalArgumentException if {@code scriptName} is an
4506          *         invalid name
4507          * @throws NullPointerException if {@code scriptName} is null
4508          */
4509         public static final UnicodeScript forName(String scriptName) {
4510             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4511                                  //.replace(' ', '_'));
4512             UnicodeScript sc = aliases.get(scriptName);
4513             if (sc != null)
4514                 return sc;
4515             return valueOf(scriptName);
4516         }
4517     }
4518 
4519     /**
4520      * The value of the {@code Character}.
4521      *
4522      * @serial
4523      */
4524     private final char value;
4525 
4526     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4527     private static final long serialVersionUID = 3786198910865385080L;
4528 
4529     /**
4530      * Constructs a newly allocated {@code Character} object that
4531      * represents the specified {@code char} value.
4532      *
4533      * @param  value   the value to be represented by the
4534      *                  {@code Character} object.
4535      */
4536     public Character(char value) {
4537         this.value = value;
4538     }
4539 
4540     private static class CharacterCache {
4541         private CharacterCache(){}
4542 
4543         static final Character cache[] = new Character[127 + 1];
4544 
4545         static {
4546             for (int i = 0; i < cache.length; i++)
4547                 cache[i] = new Character((char)i);
4548         }
4549     }
4550 
4551     /**
4552      * Returns a <tt>Character</tt> instance representing the specified
4553      * <tt>char</tt> value.
4554      * If a new <tt>Character</tt> instance is not required, this method
4555      * should generally be used in preference to the constructor
4556      * {@link #Character(char)}, as this method is likely to yield
4557      * significantly better space and time performance by caching
4558      * frequently requested values.
4559      *
4560      * This method will always cache values in the range {@code
4561      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4562      * cache other values outside of this range.
4563      *
4564      * @param  c a char value.
4565      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4566      * @since  1.5
4567      */
4568     public static Character valueOf(char c) {
4569         if (c <= 127) { // must cache
4570             return CharacterCache.cache[(int)c];
4571         }
4572         return new Character(c);
4573     }
4574 
4575     /**
4576      * Returns the value of this {@code Character} object.
4577      * @return  the primitive {@code char} value represented by
4578      *          this object.
4579      */
4580     public char charValue() {
4581         return value;
4582     }
4583 
4584     /**
4585      * Returns a hash code for this {@code Character}; equal to the result
4586      * of invoking {@code charValue()}.
4587      *
4588      * @return a hash code value for this {@code Character}
4589      */
4590     @Override
4591     public int hashCode() {
4592         return Character.hashCode(value);
4593     }
4594 
4595     /**
4596      * Returns a hash code for a {@code char} value; compatible with
4597      * {@code Character.hashCode()}.
4598      *
4599      * @since 1.8
4600      *
4601      * @param value The {@code char} for which to return a hash code.
4602      * @return a hash code value for a {@code char} value.
4603      */
4604     public static int hashCode(char value) {
4605         return (int)value;
4606     }
4607 
4608     /**
4609      * Compares this object against the specified object.
4610      * The result is {@code true} if and only if the argument is not
4611      * {@code null} and is a {@code Character} object that
4612      * represents the same {@code char} value as this object.
4613      *
4614      * @param   obj   the object to compare with.
4615      * @return  {@code true} if the objects are the same;
4616      *          {@code false} otherwise.
4617      */
4618     public boolean equals(Object obj) {
4619         if (obj instanceof Character) {
4620             return value == ((Character)obj).charValue();
4621         }
4622         return false;
4623     }
4624 
4625     /**
4626      * Returns a {@code String} object representing this
4627      * {@code Character}'s value.  The result is a string of
4628      * length 1 whose sole component is the primitive
4629      * {@code char} value represented by this
4630      * {@code Character} object.
4631      *
4632      * @return  a string representation of this object.
4633      */
4634     public String toString() {
4635         char buf[] = {value};
4636         return String.valueOf(buf);
4637     }
4638 
4639     /**
4640      * Returns a {@code String} object representing the
4641      * specified {@code char}.  The result is a string of length
4642      * 1 consisting solely of the specified {@code char}.
4643      *
4644      * @param c the {@code char} to be converted
4645      * @return the string representation of the specified {@code char}
4646      * @since 1.4
4647      */
4648     public static String toString(char c) {
4649         return String.valueOf(c);
4650     }
4651 
4652     /**
4653      * Determines whether the specified code point is a valid
4654      * <a href="http://www.unicode.org/glossary/#code_point">
4655      * Unicode code point value</a>.
4656      *
4657      * @param  codePoint the Unicode code point to be tested
4658      * @return {@code true} if the specified code point value is between
4659      *         {@link #MIN_CODE_POINT} and
4660      *         {@link #MAX_CODE_POINT} inclusive;
4661      *         {@code false} otherwise.
4662      * @since  1.5
4663      */
4664     public static boolean isValidCodePoint(int codePoint) {
4665         // Optimized form of:
4666         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4667         int plane = codePoint >>> 16;
4668         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4669     }
4670 
4671     /**
4672      * Determines whether the specified character (Unicode code point)
4673      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4674      * Such code points can be represented using a single {@code char}.
4675      *
4676      * @param  codePoint the character (Unicode code point) to be tested
4677      * @return {@code true} if the specified code point is between
4678      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4679      *         {@code false} otherwise.
4680      * @since  1.7
4681      */
4682     public static boolean isBmpCodePoint(int codePoint) {
4683         return codePoint >>> 16 == 0;
4684         // Optimized form of:
4685         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4686         // We consistently use logical shift (>>>) to facilitate
4687         // additional runtime optimizations.
4688     }
4689 
4690     /**
4691      * Determines whether the specified character (Unicode code point)
4692      * is in the <a href="#supplementary">supplementary character</a> range.
4693      *
4694      * @param  codePoint the character (Unicode code point) to be tested
4695      * @return {@code true} if the specified code point is between
4696      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4697      *         {@link #MAX_CODE_POINT} inclusive;
4698      *         {@code false} otherwise.
4699      * @since  1.5
4700      */
4701     public static boolean isSupplementaryCodePoint(int codePoint) {
4702         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4703             && codePoint <  MAX_CODE_POINT + 1;
4704     }
4705 
4706     /**
4707      * Determines if the given {@code char} value is a
4708      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4709      * Unicode high-surrogate code unit</a>
4710      * (also known as <i>leading-surrogate code unit</i>).
4711      *
4712      * <p>Such values do not represent characters by themselves,
4713      * but are used in the representation of
4714      * <a href="#supplementary">supplementary characters</a>
4715      * in the UTF-16 encoding.
4716      *
4717      * @param  ch the {@code char} value to be tested.
4718      * @return {@code true} if the {@code char} value is between
4719      *         {@link #MIN_HIGH_SURROGATE} and
4720      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4721      *         {@code false} otherwise.
4722      * @see    Character#isLowSurrogate(char)
4723      * @see    Character.UnicodeBlock#of(int)
4724      * @since  1.5
4725      */
4726     public static boolean isHighSurrogate(char ch) {
4727         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4728         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4729     }
4730 
4731     /**
4732      * Determines if the given {@code char} value is a
4733      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4734      * Unicode low-surrogate code unit</a>
4735      * (also known as <i>trailing-surrogate code unit</i>).
4736      *
4737      * <p>Such values do not represent characters by themselves,
4738      * but are used in the representation of
4739      * <a href="#supplementary">supplementary characters</a>
4740      * in the UTF-16 encoding.
4741      *
4742      * @param  ch the {@code char} value to be tested.
4743      * @return {@code true} if the {@code char} value is between
4744      *         {@link #MIN_LOW_SURROGATE} and
4745      *         {@link #MAX_LOW_SURROGATE} inclusive;
4746      *         {@code false} otherwise.
4747      * @see    Character#isHighSurrogate(char)
4748      * @since  1.5
4749      */
4750     public static boolean isLowSurrogate(char ch) {
4751         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4752     }
4753 
4754     /**
4755      * Determines if the given {@code char} value is a Unicode
4756      * <i>surrogate code unit</i>.
4757      *
4758      * <p>Such values do not represent characters by themselves,
4759      * but are used in the representation of
4760      * <a href="#supplementary">supplementary characters</a>
4761      * in the UTF-16 encoding.
4762      *
4763      * <p>A char value is a surrogate code unit if and only if it is either
4764      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4765      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4766      *
4767      * @param  ch the {@code char} value to be tested.
4768      * @return {@code true} if the {@code char} value is between
4769      *         {@link #MIN_SURROGATE} and
4770      *         {@link #MAX_SURROGATE} inclusive;
4771      *         {@code false} otherwise.
4772      * @since  1.7
4773      */
4774     public static boolean isSurrogate(char ch) {
4775         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4776     }
4777 
4778     /**
4779      * Determines whether the specified pair of {@code char}
4780      * values is a valid
4781      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4782      * Unicode surrogate pair</a>.
4783 
4784      * <p>This method is equivalent to the expression:
4785      * <blockquote><pre>{@code
4786      * isHighSurrogate(high) && isLowSurrogate(low)
4787      * }</pre></blockquote>
4788      *
4789      * @param  high the high-surrogate code value to be tested
4790      * @param  low the low-surrogate code value to be tested
4791      * @return {@code true} if the specified high and
4792      * low-surrogate code values represent a valid surrogate pair;
4793      * {@code false} otherwise.
4794      * @since  1.5
4795      */
4796     public static boolean isSurrogatePair(char high, char low) {
4797         return isHighSurrogate(high) && isLowSurrogate(low);
4798     }
4799 
4800     /**
4801      * Determines the number of {@code char} values needed to
4802      * represent the specified character (Unicode code point). If the
4803      * specified character is equal to or greater than 0x10000, then
4804      * the method returns 2. Otherwise, the method returns 1.
4805      *
4806      * <p>This method doesn't validate the specified character to be a
4807      * valid Unicode code point. The caller must validate the
4808      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4809      * if necessary.
4810      *
4811      * @param   codePoint the character (Unicode code point) to be tested.
4812      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4813      * @see     Character#isSupplementaryCodePoint(int)
4814      * @since   1.5
4815      */
4816     public static int charCount(int codePoint) {
4817         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4818     }
4819 
4820     /**
4821      * Converts the specified surrogate pair to its supplementary code
4822      * point value. This method does not validate the specified
4823      * surrogate pair. The caller must validate it using {@link
4824      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4825      *
4826      * @param  high the high-surrogate code unit
4827      * @param  low the low-surrogate code unit
4828      * @return the supplementary code point composed from the
4829      *         specified surrogate pair.
4830      * @since  1.5
4831      */
4832     public static int toCodePoint(char high, char low) {
4833         // Optimized form of:
4834         // return ((high - MIN_HIGH_SURROGATE) << 10)
4835         //         + (low - MIN_LOW_SURROGATE)
4836         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4837         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4838                                        - (MIN_HIGH_SURROGATE << 10)
4839                                        - MIN_LOW_SURROGATE);
4840     }
4841 
4842     /**
4843      * Returns the code point at the given index of the
4844      * {@code CharSequence}. If the {@code char} value at
4845      * the given index in the {@code CharSequence} is in the
4846      * high-surrogate range, the following index is less than the
4847      * length of the {@code CharSequence}, and the
4848      * {@code char} value at the following index is in the
4849      * low-surrogate range, then the supplementary code point
4850      * corresponding to this surrogate pair is returned. Otherwise,
4851      * the {@code char} value at the given index is returned.
4852      *
4853      * @param seq a sequence of {@code char} values (Unicode code
4854      * units)
4855      * @param index the index to the {@code char} values (Unicode
4856      * code units) in {@code seq} to be converted
4857      * @return the Unicode code point at the given index
4858      * @exception NullPointerException if {@code seq} is null.
4859      * @exception IndexOutOfBoundsException if the value
4860      * {@code index} is negative or not less than
4861      * {@link CharSequence#length() seq.length()}.
4862      * @since  1.5
4863      */
4864     public static int codePointAt(CharSequence seq, int index) {
4865         char c1 = seq.charAt(index);
4866         if (isHighSurrogate(c1) && ++index < seq.length()) {
4867             char c2 = seq.charAt(index);
4868             if (isLowSurrogate(c2)) {
4869                 return toCodePoint(c1, c2);
4870             }
4871         }
4872         return c1;
4873     }
4874 
4875     /**
4876      * Returns the code point at the given index of the
4877      * {@code char} array. If the {@code char} value at
4878      * the given index in the {@code char} array is in the
4879      * high-surrogate range, the following index is less than the
4880      * length of the {@code char} array, and the
4881      * {@code char} value at the following index is in the
4882      * low-surrogate range, then the supplementary code point
4883      * corresponding to this surrogate pair is returned. Otherwise,
4884      * the {@code char} value at the given index is returned.
4885      *
4886      * @param a the {@code char} array
4887      * @param index the index to the {@code char} values (Unicode
4888      * code units) in the {@code char} array to be converted
4889      * @return the Unicode code point at the given index
4890      * @exception NullPointerException if {@code a} is null.
4891      * @exception IndexOutOfBoundsException if the value
4892      * {@code index} is negative or not less than
4893      * the length of the {@code char} array.
4894      * @since  1.5
4895      */
4896     public static int codePointAt(char[] a, int index) {
4897         return codePointAtImpl(a, index, a.length);
4898     }
4899 
4900     /**
4901      * Returns the code point at the given index of the
4902      * {@code char} array, where only array elements with
4903      * {@code index} less than {@code limit} can be used. If
4904      * the {@code char} value at the given index in the
4905      * {@code char} array is in the high-surrogate range, the
4906      * following index is less than the {@code limit}, and the
4907      * {@code char} value at the following index is in the
4908      * low-surrogate range, then the supplementary code point
4909      * corresponding to this surrogate pair is returned. Otherwise,
4910      * the {@code char} value at the given index is returned.
4911      *
4912      * @param a the {@code char} array
4913      * @param index the index to the {@code char} values (Unicode
4914      * code units) in the {@code char} array to be converted
4915      * @param limit the index after the last array element that
4916      * can be used in the {@code char} array
4917      * @return the Unicode code point at the given index
4918      * @exception NullPointerException if {@code a} is null.
4919      * @exception IndexOutOfBoundsException if the {@code index}
4920      * argument is negative or not less than the {@code limit}
4921      * argument, or if the {@code limit} argument is negative or
4922      * greater than the length of the {@code char} array.
4923      * @since  1.5
4924      */
4925     public static int codePointAt(char[] a, int index, int limit) {
4926         if (index >= limit || limit < 0 || limit > a.length) {
4927             throw new IndexOutOfBoundsException();
4928         }
4929         return codePointAtImpl(a, index, limit);
4930     }
4931 
4932     // throws ArrayIndexOutOfBoundsException if index out of bounds
4933     static int codePointAtImpl(char[] a, int index, int limit) {
4934         char c1 = a[index];
4935         if (isHighSurrogate(c1) && ++index < limit) {
4936             char c2 = a[index];
4937             if (isLowSurrogate(c2)) {
4938                 return toCodePoint(c1, c2);
4939             }
4940         }
4941         return c1;
4942     }
4943 
4944     /**
4945      * Returns the code point preceding the given index of the
4946      * {@code CharSequence}. If the {@code char} value at
4947      * {@code (index - 1)} in the {@code CharSequence} is in
4948      * the low-surrogate range, {@code (index - 2)} is not
4949      * negative, and the {@code char} value at {@code (index - 2)}
4950      * in the {@code CharSequence} is in the
4951      * high-surrogate range, then the supplementary code point
4952      * corresponding to this surrogate pair is returned. Otherwise,
4953      * the {@code char} value at {@code (index - 1)} is
4954      * returned.
4955      *
4956      * @param seq the {@code CharSequence} instance
4957      * @param index the index following the code point that should be returned
4958      * @return the Unicode code point value before the given index.
4959      * @exception NullPointerException if {@code seq} is null.
4960      * @exception IndexOutOfBoundsException if the {@code index}
4961      * argument is less than 1 or greater than {@link
4962      * CharSequence#length() seq.length()}.
4963      * @since  1.5
4964      */
4965     public static int codePointBefore(CharSequence seq, int index) {
4966         char c2 = seq.charAt(--index);
4967         if (isLowSurrogate(c2) && index > 0) {
4968             char c1 = seq.charAt(--index);
4969             if (isHighSurrogate(c1)) {
4970                 return toCodePoint(c1, c2);
4971             }
4972         }
4973         return c2;
4974     }
4975 
4976     /**
4977      * Returns the code point preceding the given index of the
4978      * {@code char} array. If the {@code char} value at
4979      * {@code (index - 1)} in the {@code char} array is in
4980      * the low-surrogate range, {@code (index - 2)} is not
4981      * negative, and the {@code char} value at {@code (index - 2)}
4982      * in the {@code char} array is in the
4983      * high-surrogate range, then the supplementary code point
4984      * corresponding to this surrogate pair is returned. Otherwise,
4985      * the {@code char} value at {@code (index - 1)} is
4986      * returned.
4987      *
4988      * @param a the {@code char} array
4989      * @param index the index following the code point that should be returned
4990      * @return the Unicode code point value before the given index.
4991      * @exception NullPointerException if {@code a} is null.
4992      * @exception IndexOutOfBoundsException if the {@code index}
4993      * argument is less than 1 or greater than the length of the
4994      * {@code char} array
4995      * @since  1.5
4996      */
4997     public static int codePointBefore(char[] a, int index) {
4998         return codePointBeforeImpl(a, index, 0);
4999     }
5000 
5001     /**
5002      * Returns the code point preceding the given index of the
5003      * {@code char} array, where only array elements with
5004      * {@code index} greater than or equal to {@code start}
5005      * can be used. If the {@code char} value at {@code (index - 1)}
5006      * in the {@code char} array is in the
5007      * low-surrogate range, {@code (index - 2)} is not less than
5008      * {@code start}, and the {@code char} value at
5009      * {@code (index - 2)} in the {@code char} array is in
5010      * the high-surrogate range, then the supplementary code point
5011      * corresponding to this surrogate pair is returned. Otherwise,
5012      * the {@code char} value at {@code (index - 1)} is
5013      * returned.
5014      *
5015      * @param a the {@code char} array
5016      * @param index the index following the code point that should be returned
5017      * @param start the index of the first array element in the
5018      * {@code char} array
5019      * @return the Unicode code point value before the given index.
5020      * @exception NullPointerException if {@code a} is null.
5021      * @exception IndexOutOfBoundsException if the {@code index}
5022      * argument is not greater than the {@code start} argument or
5023      * is greater than the length of the {@code char} array, or
5024      * if the {@code start} argument is negative or not less than
5025      * the length of the {@code char} array.
5026      * @since  1.5
5027      */
5028     public static int codePointBefore(char[] a, int index, int start) {
5029         if (index <= start || start < 0 || start >= a.length) {
5030             throw new IndexOutOfBoundsException();
5031         }
5032         return codePointBeforeImpl(a, index, start);
5033     }
5034 
5035     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5036     static int codePointBeforeImpl(char[] a, int index, int start) {
5037         char c2 = a[--index];
5038         if (isLowSurrogate(c2) && index > start) {
5039             char c1 = a[--index];
5040             if (isHighSurrogate(c1)) {
5041                 return toCodePoint(c1, c2);
5042             }
5043         }
5044         return c2;
5045     }
5046 
5047     /**
5048      * Returns the leading surrogate (a
5049      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5050      * high surrogate code unit</a>) of the
5051      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5052      * surrogate pair</a>
5053      * representing the specified supplementary character (Unicode
5054      * code point) in the UTF-16 encoding.  If the specified character
5055      * is not a
5056      * <a href="Character.html#supplementary">supplementary character</a>,
5057      * an unspecified {@code char} is returned.
5058      *
5059      * <p>If
5060      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5061      * is {@code true}, then
5062      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5063      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5064      * are also always {@code true}.
5065      *
5066      * @param   codePoint a supplementary character (Unicode code point)
5067      * @return  the leading surrogate code unit used to represent the
5068      *          character in the UTF-16 encoding
5069      * @since   1.7
5070      */
5071     public static char highSurrogate(int codePoint) {
5072         return (char) ((codePoint >>> 10)
5073             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5074     }
5075 
5076     /**
5077      * Returns the trailing surrogate (a
5078      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5079      * low surrogate code unit</a>) of the
5080      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5081      * surrogate pair</a>
5082      * representing the specified supplementary character (Unicode
5083      * code point) in the UTF-16 encoding.  If the specified character
5084      * is not a
5085      * <a href="Character.html#supplementary">supplementary character</a>,
5086      * an unspecified {@code char} is returned.
5087      *
5088      * <p>If
5089      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5090      * is {@code true}, then
5091      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5092      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5093      * are also always {@code true}.
5094      *
5095      * @param   codePoint a supplementary character (Unicode code point)
5096      * @return  the trailing surrogate code unit used to represent the
5097      *          character in the UTF-16 encoding
5098      * @since   1.7
5099      */
5100     public static char lowSurrogate(int codePoint) {
5101         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5102     }
5103 
5104     /**
5105      * Converts the specified character (Unicode code point) to its
5106      * UTF-16 representation. If the specified code point is a BMP
5107      * (Basic Multilingual Plane or Plane 0) value, the same value is
5108      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5109      * specified code point is a supplementary character, its
5110      * surrogate values are stored in {@code dst[dstIndex]}
5111      * (high-surrogate) and {@code dst[dstIndex+1]}
5112      * (low-surrogate), and 2 is returned.
5113      *
5114      * @param  codePoint the character (Unicode code point) to be converted.
5115      * @param  dst an array of {@code char} in which the
5116      * {@code codePoint}'s UTF-16 value is stored.
5117      * @param dstIndex the start index into the {@code dst}
5118      * array where the converted value is stored.
5119      * @return 1 if the code point is a BMP code point, 2 if the
5120      * code point is a supplementary code point.
5121      * @exception IllegalArgumentException if the specified
5122      * {@code codePoint} is not a valid Unicode code point.
5123      * @exception NullPointerException if the specified {@code dst} is null.
5124      * @exception IndexOutOfBoundsException if {@code dstIndex}
5125      * is negative or not less than {@code dst.length}, or if
5126      * {@code dst} at {@code dstIndex} doesn't have enough
5127      * array element(s) to store the resulting {@code char}
5128      * value(s). (If {@code dstIndex} is equal to
5129      * {@code dst.length-1} and the specified
5130      * {@code codePoint} is a supplementary character, the
5131      * high-surrogate value is not stored in
5132      * {@code dst[dstIndex]}.)
5133      * @since  1.5
5134      */
5135     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5136         if (isBmpCodePoint(codePoint)) {
5137             dst[dstIndex] = (char) codePoint;
5138             return 1;
5139         } else if (isValidCodePoint(codePoint)) {
5140             toSurrogates(codePoint, dst, dstIndex);
5141             return 2;
5142         } else {
5143             throw new IllegalArgumentException();
5144         }
5145     }
5146 
5147     /**
5148      * Converts the specified character (Unicode code point) to its
5149      * UTF-16 representation stored in a {@code char} array. If
5150      * the specified code point is a BMP (Basic Multilingual Plane or
5151      * Plane 0) value, the resulting {@code char} array has
5152      * the same value as {@code codePoint}. If the specified code
5153      * point is a supplementary code point, the resulting
5154      * {@code char} array has the corresponding surrogate pair.
5155      *
5156      * @param  codePoint a Unicode code point
5157      * @return a {@code char} array having
5158      *         {@code codePoint}'s UTF-16 representation.
5159      * @exception IllegalArgumentException if the specified
5160      * {@code codePoint} is not a valid Unicode code point.
5161      * @since  1.5
5162      */
5163     public static char[] toChars(int codePoint) {
5164         if (isBmpCodePoint(codePoint)) {
5165             return new char[] { (char) codePoint };
5166         } else if (isValidCodePoint(codePoint)) {
5167             char[] result = new char[2];
5168             toSurrogates(codePoint, result, 0);
5169             return result;
5170         } else {
5171             throw new IllegalArgumentException();
5172         }
5173     }
5174 
5175     static void toSurrogates(int codePoint, char[] dst, int index) {
5176         // We write elements "backwards" to guarantee all-or-nothing
5177         dst[index+1] = lowSurrogate(codePoint);
5178         dst[index] = highSurrogate(codePoint);
5179     }
5180 
5181     /**
5182      * Returns the number of Unicode code points in the text range of
5183      * the specified char sequence. The text range begins at the
5184      * specified {@code beginIndex} and extends to the
5185      * {@code char} at index {@code endIndex - 1}. Thus the
5186      * length (in {@code char}s) of the text range is
5187      * {@code endIndex-beginIndex}. Unpaired surrogates within
5188      * the text range count as one code point each.
5189      *
5190      * @param seq the char sequence
5191      * @param beginIndex the index to the first {@code char} of
5192      * the text range.
5193      * @param endIndex the index after the last {@code char} of
5194      * the text range.
5195      * @return the number of Unicode code points in the specified text
5196      * range
5197      * @exception NullPointerException if {@code seq} is null.
5198      * @exception IndexOutOfBoundsException if the
5199      * {@code beginIndex} is negative, or {@code endIndex}
5200      * is larger than the length of the given sequence, or
5201      * {@code beginIndex} is larger than {@code endIndex}.
5202      * @since  1.5
5203      */
5204     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5205         int length = seq.length();
5206         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5207             throw new IndexOutOfBoundsException();
5208         }
5209         int n = endIndex - beginIndex;
5210         for (int i = beginIndex; i < endIndex; ) {
5211             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5212                 isLowSurrogate(seq.charAt(i))) {
5213                 n--;
5214                 i++;
5215             }
5216         }
5217         return n;
5218     }
5219 
5220     /**
5221      * Returns the number of Unicode code points in a subarray of the
5222      * {@code char} array argument. The {@code offset}
5223      * argument is the index of the first {@code char} of the
5224      * subarray and the {@code count} argument specifies the
5225      * length of the subarray in {@code char}s. Unpaired
5226      * surrogates within the subarray count as one code point each.
5227      *
5228      * @param a the {@code char} array
5229      * @param offset the index of the first {@code char} in the
5230      * given {@code char} array
5231      * @param count the length of the subarray in {@code char}s
5232      * @return the number of Unicode code points in the specified subarray
5233      * @exception NullPointerException if {@code a} is null.
5234      * @exception IndexOutOfBoundsException if {@code offset} or
5235      * {@code count} is negative, or if {@code offset +
5236      * count} is larger than the length of the given array.
5237      * @since  1.5
5238      */
5239     public static int codePointCount(char[] a, int offset, int count) {
5240         if (count > a.length - offset || offset < 0 || count < 0) {
5241             throw new IndexOutOfBoundsException();
5242         }
5243         return codePointCountImpl(a, offset, count);
5244     }
5245 
5246     static int codePointCountImpl(char[] a, int offset, int count) {
5247         int endIndex = offset + count;
5248         int n = count;
5249         for (int i = offset; i < endIndex; ) {
5250             if (isHighSurrogate(a[i++]) && i < endIndex &&
5251                 isLowSurrogate(a[i])) {
5252                 n--;
5253                 i++;
5254             }
5255         }
5256         return n;
5257     }
5258 
5259     /**
5260      * Returns the index within the given char sequence that is offset
5261      * from the given {@code index} by {@code codePointOffset}
5262      * code points. Unpaired surrogates within the text range given by
5263      * {@code index} and {@code codePointOffset} count as
5264      * one code point each.
5265      *
5266      * @param seq the char sequence
5267      * @param index the index to be offset
5268      * @param codePointOffset the offset in code points
5269      * @return the index within the char sequence
5270      * @exception NullPointerException if {@code seq} is null.
5271      * @exception IndexOutOfBoundsException if {@code index}
5272      *   is negative or larger then the length of the char sequence,
5273      *   or if {@code codePointOffset} is positive and the
5274      *   subsequence starting with {@code index} has fewer than
5275      *   {@code codePointOffset} code points, or if
5276      *   {@code codePointOffset} is negative and the subsequence
5277      *   before {@code index} has fewer than the absolute value
5278      *   of {@code codePointOffset} code points.
5279      * @since 1.5
5280      */
5281     public static int offsetByCodePoints(CharSequence seq, int index,
5282                                          int codePointOffset) {
5283         int length = seq.length();
5284         if (index < 0 || index > length) {
5285             throw new IndexOutOfBoundsException();
5286         }
5287 
5288         int x = index;
5289         if (codePointOffset >= 0) {
5290             int i;
5291             for (i = 0; x < length && i < codePointOffset; i++) {
5292                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5293                     isLowSurrogate(seq.charAt(x))) {
5294                     x++;
5295                 }
5296             }
5297             if (i < codePointOffset) {
5298                 throw new IndexOutOfBoundsException();
5299             }
5300         } else {
5301             int i;
5302             for (i = codePointOffset; x > 0 && i < 0; i++) {
5303                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5304                     isHighSurrogate(seq.charAt(x-1))) {
5305                     x--;
5306                 }
5307             }
5308             if (i < 0) {
5309                 throw new IndexOutOfBoundsException();
5310             }
5311         }
5312         return x;
5313     }
5314 
5315     /**
5316      * Returns the index within the given {@code char} subarray
5317      * that is offset from the given {@code index} by
5318      * {@code codePointOffset} code points. The
5319      * {@code start} and {@code count} arguments specify a
5320      * subarray of the {@code char} array. Unpaired surrogates
5321      * within the text range given by {@code index} and
5322      * {@code codePointOffset} count as one code point each.
5323      *
5324      * @param a the {@code char} array
5325      * @param start the index of the first {@code char} of the
5326      * subarray
5327      * @param count the length of the subarray in {@code char}s
5328      * @param index the index to be offset
5329      * @param codePointOffset the offset in code points
5330      * @return the index within the subarray
5331      * @exception NullPointerException if {@code a} is null.
5332      * @exception IndexOutOfBoundsException
5333      *   if {@code start} or {@code count} is negative,
5334      *   or if {@code start + count} is larger than the length of
5335      *   the given array,
5336      *   or if {@code index} is less than {@code start} or
5337      *   larger then {@code start + count},
5338      *   or if {@code codePointOffset} is positive and the text range
5339      *   starting with {@code index} and ending with {@code start + count - 1}
5340      *   has fewer than {@code codePointOffset} code
5341      *   points,
5342      *   or if {@code codePointOffset} is negative and the text range
5343      *   starting with {@code start} and ending with {@code index - 1}
5344      *   has fewer than the absolute value of
5345      *   {@code codePointOffset} code points.
5346      * @since 1.5
5347      */
5348     public static int offsetByCodePoints(char[] a, int start, int count,
5349                                          int index, int codePointOffset) {
5350         if (count > a.length-start || start < 0 || count < 0
5351             || index < start || index > start+count) {
5352             throw new IndexOutOfBoundsException();
5353         }
5354         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5355     }
5356 
5357     static int offsetByCodePointsImpl(char[]a, int start, int count,
5358                                       int index, int codePointOffset) {
5359         int x = index;
5360         if (codePointOffset >= 0) {
5361             int limit = start + count;
5362             int i;
5363             for (i = 0; x < limit && i < codePointOffset; i++) {
5364                 if (isHighSurrogate(a[x++]) && x < limit &&
5365                     isLowSurrogate(a[x])) {
5366                     x++;
5367                 }
5368             }
5369             if (i < codePointOffset) {
5370                 throw new IndexOutOfBoundsException();
5371             }
5372         } else {
5373             int i;
5374             for (i = codePointOffset; x > start && i < 0; i++) {
5375                 if (isLowSurrogate(a[--x]) && x > start &&
5376                     isHighSurrogate(a[x-1])) {
5377                     x--;
5378                 }
5379             }
5380             if (i < 0) {
5381                 throw new IndexOutOfBoundsException();
5382             }
5383         }
5384         return x;
5385     }
5386 
5387     /**
5388      * Determines if the specified character is a lowercase character.
5389      * <p>
5390      * A character is lowercase if its general category type, provided
5391      * by {@code Character.getType(ch)}, is
5392      * {@code LOWERCASE_LETTER}, or it has contributory property
5393      * Other_Lowercase as defined by the Unicode Standard.
5394      * <p>
5395      * The following are examples of lowercase characters:
5396      * <blockquote><pre>
5397      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5398      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5399      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5400      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5401      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5402      * </pre></blockquote>
5403      * <p> Many other Unicode characters are lowercase too.
5404      *
5405      * <p><b>Note:</b> This method cannot handle <a
5406      * href="#supplementary"> supplementary characters</a>. To support
5407      * all Unicode characters, including supplementary characters, use
5408      * the {@link #isLowerCase(int)} method.
5409      *
5410      * @param   ch   the character to be tested.
5411      * @return  {@code true} if the character is lowercase;
5412      *          {@code false} otherwise.
5413      * @see     Character#isLowerCase(char)
5414      * @see     Character#isTitleCase(char)
5415      * @see     Character#toLowerCase(char)
5416      * @see     Character#getType(char)
5417      */
5418     public static boolean isLowerCase(char ch) {
5419         return isLowerCase((int)ch);
5420     }
5421 
5422     /**
5423      * Determines if the specified character (Unicode code point) is a
5424      * lowercase character.
5425      * <p>
5426      * A character is lowercase if its general category type, provided
5427      * by {@link Character#getType getType(codePoint)}, is
5428      * {@code LOWERCASE_LETTER}, or it has contributory property
5429      * Other_Lowercase as defined by the Unicode Standard.
5430      * <p>
5431      * The following are examples of lowercase characters:
5432      * <blockquote><pre>
5433      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5434      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5435      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5436      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5437      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5438      * </pre></blockquote>
5439      * <p> Many other Unicode characters are lowercase too.
5440      *
5441      * @param   codePoint the character (Unicode code point) to be tested.
5442      * @return  {@code true} if the character is lowercase;
5443      *          {@code false} otherwise.
5444      * @see     Character#isLowerCase(int)
5445      * @see     Character#isTitleCase(int)
5446      * @see     Character#toLowerCase(int)
5447      * @see     Character#getType(int)
5448      * @since   1.5
5449      */
5450     public static boolean isLowerCase(int codePoint) {
5451         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5452                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5453     }
5454 
5455     /**
5456      * Determines if the specified character is an uppercase character.
5457      * <p>
5458      * A character is uppercase if its general category type, provided by
5459      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5460      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5461      * <p>
5462      * The following are examples of uppercase characters:
5463      * <blockquote><pre>
5464      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5465      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5466      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5467      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5468      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5469      * </pre></blockquote>
5470      * <p> Many other Unicode characters are uppercase too.
5471      *
5472      * <p><b>Note:</b> This method cannot handle <a
5473      * href="#supplementary"> supplementary characters</a>. To support
5474      * all Unicode characters, including supplementary characters, use
5475      * the {@link #isUpperCase(int)} method.
5476      *
5477      * @param   ch   the character to be tested.
5478      * @return  {@code true} if the character is uppercase;
5479      *          {@code false} otherwise.
5480      * @see     Character#isLowerCase(char)
5481      * @see     Character#isTitleCase(char)
5482      * @see     Character#toUpperCase(char)
5483      * @see     Character#getType(char)
5484      * @since   1.0
5485      */
5486     public static boolean isUpperCase(char ch) {
5487         return isUpperCase((int)ch);
5488     }
5489 
5490     /**
5491      * Determines if the specified character (Unicode code point) is an uppercase character.
5492      * <p>
5493      * A character is uppercase if its general category type, provided by
5494      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5495      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5496      * <p>
5497      * The following are examples of uppercase characters:
5498      * <blockquote><pre>
5499      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5500      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5501      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5502      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5503      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5504      * </pre></blockquote>
5505      * <p> Many other Unicode characters are uppercase too.
5506      *
5507      * @param   codePoint the character (Unicode code point) to be tested.
5508      * @return  {@code true} if the character is uppercase;
5509      *          {@code false} otherwise.
5510      * @see     Character#isLowerCase(int)
5511      * @see     Character#isTitleCase(int)
5512      * @see     Character#toUpperCase(int)
5513      * @see     Character#getType(int)
5514      * @since   1.5
5515      */
5516     public static boolean isUpperCase(int codePoint) {
5517         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5518                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5519     }
5520 
5521     /**
5522      * Determines if the specified character is a titlecase character.
5523      * <p>
5524      * A character is a titlecase character if its general
5525      * category type, provided by {@code Character.getType(ch)},
5526      * is {@code TITLECASE_LETTER}.
5527      * <p>
5528      * Some characters look like pairs of Latin letters. For example, there
5529      * is an uppercase letter that looks like "LJ" and has a corresponding
5530      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5531      * is the appropriate form to use when rendering a word in lowercase
5532      * with initial capitals, as for a book title.
5533      * <p>
5534      * These are some of the Unicode characters for which this method returns
5535      * {@code true}:
5536      * <ul>
5537      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5538      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5539      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5540      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5541      * </ul>
5542      * <p> Many other Unicode characters are titlecase too.
5543      *
5544      * <p><b>Note:</b> This method cannot handle <a
5545      * href="#supplementary"> supplementary characters</a>. To support
5546      * all Unicode characters, including supplementary characters, use
5547      * the {@link #isTitleCase(int)} method.
5548      *
5549      * @param   ch   the character to be tested.
5550      * @return  {@code true} if the character is titlecase;
5551      *          {@code false} otherwise.
5552      * @see     Character#isLowerCase(char)
5553      * @see     Character#isUpperCase(char)
5554      * @see     Character#toTitleCase(char)
5555      * @see     Character#getType(char)
5556      * @since   1.0.2
5557      */
5558     public static boolean isTitleCase(char ch) {
5559         return isTitleCase((int)ch);
5560     }
5561 
5562     /**
5563      * Determines if the specified character (Unicode code point) is a titlecase character.
5564      * <p>
5565      * A character is a titlecase character if its general
5566      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5567      * is {@code TITLECASE_LETTER}.
5568      * <p>
5569      * Some characters look like pairs of Latin letters. For example, there
5570      * is an uppercase letter that looks like "LJ" and has a corresponding
5571      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5572      * is the appropriate form to use when rendering a word in lowercase
5573      * with initial capitals, as for a book title.
5574      * <p>
5575      * These are some of the Unicode characters for which this method returns
5576      * {@code true}:
5577      * <ul>
5578      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5579      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5580      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5581      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5582      * </ul>
5583      * <p> Many other Unicode characters are titlecase too.
5584      *
5585      * @param   codePoint the character (Unicode code point) to be tested.
5586      * @return  {@code true} if the character is titlecase;
5587      *          {@code false} otherwise.
5588      * @see     Character#isLowerCase(int)
5589      * @see     Character#isUpperCase(int)
5590      * @see     Character#toTitleCase(int)
5591      * @see     Character#getType(int)
5592      * @since   1.5
5593      */
5594     public static boolean isTitleCase(int codePoint) {
5595         return getType(codePoint) == Character.TITLECASE_LETTER;
5596     }
5597 
5598     /**
5599      * Determines if the specified character is a digit.
5600      * <p>
5601      * A character is a digit if its general category type, provided
5602      * by {@code Character.getType(ch)}, is
5603      * {@code DECIMAL_DIGIT_NUMBER}.
5604      * <p>
5605      * Some Unicode character ranges that contain digits:
5606      * <ul>
5607      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5608      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5609      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5610      *     Arabic-Indic digits
5611      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5612      *     Extended Arabic-Indic digits
5613      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5614      *     Devanagari digits
5615      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5616      *     Fullwidth digits
5617      * </ul>
5618      *
5619      * Many other character ranges contain digits as well.
5620      *
5621      * <p><b>Note:</b> This method cannot handle <a
5622      * href="#supplementary"> supplementary characters</a>. To support
5623      * all Unicode characters, including supplementary characters, use
5624      * the {@link #isDigit(int)} method.
5625      *
5626      * @param   ch   the character to be tested.
5627      * @return  {@code true} if the character is a digit;
5628      *          {@code false} otherwise.
5629      * @see     Character#digit(char, int)
5630      * @see     Character#forDigit(int, int)
5631      * @see     Character#getType(char)
5632      */
5633     public static boolean isDigit(char ch) {
5634         return isDigit((int)ch);
5635     }
5636 
5637     /**
5638      * Determines if the specified character (Unicode code point) is a digit.
5639      * <p>
5640      * A character is a digit if its general category type, provided
5641      * by {@link Character#getType(int) getType(codePoint)}, is
5642      * {@code DECIMAL_DIGIT_NUMBER}.
5643      * <p>
5644      * Some Unicode character ranges that contain digits:
5645      * <ul>
5646      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5647      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5648      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5649      *     Arabic-Indic digits
5650      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5651      *     Extended Arabic-Indic digits
5652      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5653      *     Devanagari digits
5654      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5655      *     Fullwidth digits
5656      * </ul>
5657      *
5658      * Many other character ranges contain digits as well.
5659      *
5660      * @param   codePoint the character (Unicode code point) to be tested.
5661      * @return  {@code true} if the character is a digit;
5662      *          {@code false} otherwise.
5663      * @see     Character#forDigit(int, int)
5664      * @see     Character#getType(int)
5665      * @since   1.5
5666      */
5667     public static boolean isDigit(int codePoint) {
5668         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5669     }
5670 
5671     /**
5672      * Determines if a character is defined in Unicode.
5673      * <p>
5674      * A character is defined if at least one of the following is true:
5675      * <ul>
5676      * <li>It has an entry in the UnicodeData file.
5677      * <li>It has a value in a range defined by the UnicodeData file.
5678      * </ul>
5679      *
5680      * <p><b>Note:</b> This method cannot handle <a
5681      * href="#supplementary"> supplementary characters</a>. To support
5682      * all Unicode characters, including supplementary characters, use
5683      * the {@link #isDefined(int)} method.
5684      *
5685      * @param   ch   the character to be tested
5686      * @return  {@code true} if the character has a defined meaning
5687      *          in Unicode; {@code false} otherwise.
5688      * @see     Character#isDigit(char)
5689      * @see     Character#isLetter(char)
5690      * @see     Character#isLetterOrDigit(char)
5691      * @see     Character#isLowerCase(char)
5692      * @see     Character#isTitleCase(char)
5693      * @see     Character#isUpperCase(char)
5694      * @since   1.0.2
5695      */
5696     public static boolean isDefined(char ch) {
5697         return isDefined((int)ch);
5698     }
5699 
5700     /**
5701      * Determines if a character (Unicode code point) is defined in Unicode.
5702      * <p>
5703      * A character is defined if at least one of the following is true:
5704      * <ul>
5705      * <li>It has an entry in the UnicodeData file.
5706      * <li>It has a value in a range defined by the UnicodeData file.
5707      * </ul>
5708      *
5709      * @param   codePoint the character (Unicode code point) to be tested.
5710      * @return  {@code true} if the character has a defined meaning
5711      *          in Unicode; {@code false} otherwise.
5712      * @see     Character#isDigit(int)
5713      * @see     Character#isLetter(int)
5714      * @see     Character#isLetterOrDigit(int)
5715      * @see     Character#isLowerCase(int)
5716      * @see     Character#isTitleCase(int)
5717      * @see     Character#isUpperCase(int)
5718      * @since   1.5
5719      */
5720     public static boolean isDefined(int codePoint) {
5721         return getType(codePoint) != Character.UNASSIGNED;
5722     }
5723 
5724     /**
5725      * Determines if the specified character is a letter.
5726      * <p>
5727      * A character is considered to be a letter if its general
5728      * category type, provided by {@code Character.getType(ch)},
5729      * is any of the following:
5730      * <ul>
5731      * <li> {@code UPPERCASE_LETTER}
5732      * <li> {@code LOWERCASE_LETTER}
5733      * <li> {@code TITLECASE_LETTER}
5734      * <li> {@code MODIFIER_LETTER}
5735      * <li> {@code OTHER_LETTER}
5736      * </ul>
5737      *
5738      * Not all letters have case. Many characters are
5739      * letters but are neither uppercase nor lowercase nor titlecase.
5740      *
5741      * <p><b>Note:</b> This method cannot handle <a
5742      * href="#supplementary"> supplementary characters</a>. To support
5743      * all Unicode characters, including supplementary characters, use
5744      * the {@link #isLetter(int)} method.
5745      *
5746      * @param   ch   the character to be tested.
5747      * @return  {@code true} if the character is a letter;
5748      *          {@code false} otherwise.
5749      * @see     Character#isDigit(char)
5750      * @see     Character#isJavaIdentifierStart(char)
5751      * @see     Character#isJavaLetter(char)
5752      * @see     Character#isJavaLetterOrDigit(char)
5753      * @see     Character#isLetterOrDigit(char)
5754      * @see     Character#isLowerCase(char)
5755      * @see     Character#isTitleCase(char)
5756      * @see     Character#isUnicodeIdentifierStart(char)
5757      * @see     Character#isUpperCase(char)
5758      */
5759     public static boolean isLetter(char ch) {
5760         return isLetter((int)ch);
5761     }
5762 
5763     /**
5764      * Determines if the specified character (Unicode code point) is a letter.
5765      * <p>
5766      * A character is considered to be a letter if its general
5767      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5768      * is any of the following:
5769      * <ul>
5770      * <li> {@code UPPERCASE_LETTER}
5771      * <li> {@code LOWERCASE_LETTER}
5772      * <li> {@code TITLECASE_LETTER}
5773      * <li> {@code MODIFIER_LETTER}
5774      * <li> {@code OTHER_LETTER}
5775      * </ul>
5776      *
5777      * Not all letters have case. Many characters are
5778      * letters but are neither uppercase nor lowercase nor titlecase.
5779      *
5780      * @param   codePoint the character (Unicode code point) to be tested.
5781      * @return  {@code true} if the character is a letter;
5782      *          {@code false} otherwise.
5783      * @see     Character#isDigit(int)
5784      * @see     Character#isJavaIdentifierStart(int)
5785      * @see     Character#isLetterOrDigit(int)
5786      * @see     Character#isLowerCase(int)
5787      * @see     Character#isTitleCase(int)
5788      * @see     Character#isUnicodeIdentifierStart(int)
5789      * @see     Character#isUpperCase(int)
5790      * @since   1.5
5791      */
5792     public static boolean isLetter(int codePoint) {
5793         return ((((1 << Character.UPPERCASE_LETTER) |
5794             (1 << Character.LOWERCASE_LETTER) |
5795             (1 << Character.TITLECASE_LETTER) |
5796             (1 << Character.MODIFIER_LETTER) |
5797             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5798             != 0;
5799     }
5800 
5801     /**
5802      * Determines if the specified character is a letter or digit.
5803      * <p>
5804      * A character is considered to be a letter or digit if either
5805      * {@code Character.isLetter(char ch)} or
5806      * {@code Character.isDigit(char ch)} returns
5807      * {@code true} for the character.
5808      *
5809      * <p><b>Note:</b> This method cannot handle <a
5810      * href="#supplementary"> supplementary characters</a>. To support
5811      * all Unicode characters, including supplementary characters, use
5812      * the {@link #isLetterOrDigit(int)} method.
5813      *
5814      * @param   ch   the character to be tested.
5815      * @return  {@code true} if the character is a letter or digit;
5816      *          {@code false} otherwise.
5817      * @see     Character#isDigit(char)
5818      * @see     Character#isJavaIdentifierPart(char)
5819      * @see     Character#isJavaLetter(char)
5820      * @see     Character#isJavaLetterOrDigit(char)
5821      * @see     Character#isLetter(char)
5822      * @see     Character#isUnicodeIdentifierPart(char)
5823      * @since   1.0.2
5824      */
5825     public static boolean isLetterOrDigit(char ch) {
5826         return isLetterOrDigit((int)ch);
5827     }
5828 
5829     /**
5830      * Determines if the specified character (Unicode code point) is a letter or digit.
5831      * <p>
5832      * A character is considered to be a letter or digit if either
5833      * {@link #isLetter(int) isLetter(codePoint)} or
5834      * {@link #isDigit(int) isDigit(codePoint)} returns
5835      * {@code true} for the character.
5836      *
5837      * @param   codePoint the character (Unicode code point) to be tested.
5838      * @return  {@code true} if the character is a letter or digit;
5839      *          {@code false} otherwise.
5840      * @see     Character#isDigit(int)
5841      * @see     Character#isJavaIdentifierPart(int)
5842      * @see     Character#isLetter(int)
5843      * @see     Character#isUnicodeIdentifierPart(int)
5844      * @since   1.5
5845      */
5846     public static boolean isLetterOrDigit(int codePoint) {
5847         return ((((1 << Character.UPPERCASE_LETTER) |
5848             (1 << Character.LOWERCASE_LETTER) |
5849             (1 << Character.TITLECASE_LETTER) |
5850             (1 << Character.MODIFIER_LETTER) |
5851             (1 << Character.OTHER_LETTER) |
5852             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5853             != 0;
5854     }
5855 
5856     /**
5857      * Determines if the specified character is permissible as the first
5858      * character in a Java identifier.
5859      * <p>
5860      * A character may start a Java identifier if and only if
5861      * one of the following is true:
5862      * <ul>
5863      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5864      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5865      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5866      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5867      * </ul>
5868      *
5869      * @param   ch the character to be tested.
5870      * @return  {@code true} if the character may start a Java
5871      *          identifier; {@code false} otherwise.
5872      * @see     Character#isJavaLetterOrDigit(char)
5873      * @see     Character#isJavaIdentifierStart(char)
5874      * @see     Character#isJavaIdentifierPart(char)
5875      * @see     Character#isLetter(char)
5876      * @see     Character#isLetterOrDigit(char)
5877      * @see     Character#isUnicodeIdentifierStart(char)
5878      * @since   1.0.2
5879      * @deprecated Replaced by isJavaIdentifierStart(char).
5880      */
5881     @Deprecated
5882     public static boolean isJavaLetter(char ch) {
5883         return isJavaIdentifierStart(ch);
5884     }
5885 
5886     /**
5887      * Determines if the specified character may be part of a Java
5888      * identifier as other than the first character.
5889      * <p>
5890      * A character may be part of a Java identifier if and only if any
5891      * of the following are true:
5892      * <ul>
5893      * <li>  it is a letter
5894      * <li>  it is a currency symbol (such as {@code '$'})
5895      * <li>  it is a connecting punctuation character (such as {@code '_'})
5896      * <li>  it is a digit
5897      * <li>  it is a numeric letter (such as a Roman numeral character)
5898      * <li>  it is a combining mark
5899      * <li>  it is a non-spacing mark
5900      * <li> {@code isIdentifierIgnorable} returns
5901      * {@code true} for the character.
5902      * </ul>
5903      *
5904      * @param   ch the character to be tested.
5905      * @return  {@code true} if the character may be part of a
5906      *          Java identifier; {@code false} otherwise.
5907      * @see     Character#isJavaLetter(char)
5908      * @see     Character#isJavaIdentifierStart(char)
5909      * @see     Character#isJavaIdentifierPart(char)
5910      * @see     Character#isLetter(char)
5911      * @see     Character#isLetterOrDigit(char)
5912      * @see     Character#isUnicodeIdentifierPart(char)
5913      * @see     Character#isIdentifierIgnorable(char)
5914      * @since   1.0.2
5915      * @deprecated Replaced by isJavaIdentifierPart(char).
5916      */
5917     @Deprecated
5918     public static boolean isJavaLetterOrDigit(char ch) {
5919         return isJavaIdentifierPart(ch);
5920     }
5921 
5922     /**
5923      * Determines if the specified character (Unicode code point) is an alphabet.
5924      * <p>
5925      * A character is considered to be alphabetic if its general category type,
5926      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5927      * the following:
5928      * <ul>
5929      * <li> <code>UPPERCASE_LETTER</code>
5930      * <li> <code>LOWERCASE_LETTER</code>
5931      * <li> <code>TITLECASE_LETTER</code>
5932      * <li> <code>MODIFIER_LETTER</code>
5933      * <li> <code>OTHER_LETTER</code>
5934      * <li> <code>LETTER_NUMBER</code>
5935      * </ul>
5936      * or it has contributory property Other_Alphabetic as defined by the
5937      * Unicode Standard.
5938      *
5939      * @param   codePoint the character (Unicode code point) to be tested.
5940      * @return  <code>true</code> if the character is a Unicode alphabet
5941      *          character, <code>false</code> otherwise.
5942      * @since   1.7
5943      */
5944     public static boolean isAlphabetic(int codePoint) {
5945         return (((((1 << Character.UPPERCASE_LETTER) |
5946             (1 << Character.LOWERCASE_LETTER) |
5947             (1 << Character.TITLECASE_LETTER) |
5948             (1 << Character.MODIFIER_LETTER) |
5949             (1 << Character.OTHER_LETTER) |
5950             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5951             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5952     }
5953 
5954     /**
5955      * Determines if the specified character (Unicode code point) is a CJKV
5956      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5957      * the Unicode Standard.
5958      *
5959      * @param   codePoint the character (Unicode code point) to be tested.
5960      * @return  <code>true</code> if the character is a Unicode ideograph
5961      *          character, <code>false</code> otherwise.
5962      * @since   1.7
5963      */
5964     public static boolean isIdeographic(int codePoint) {
5965         return CharacterData.of(codePoint).isIdeographic(codePoint);
5966     }
5967 
5968     /**
5969      * Determines if the specified character is
5970      * permissible as the first character in a Java identifier.
5971      * <p>
5972      * A character may start a Java identifier if and only if
5973      * one of the following conditions is true:
5974      * <ul>
5975      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5976      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5977      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5978      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5979      * </ul>
5980      *
5981      * <p><b>Note:</b> This method cannot handle <a
5982      * href="#supplementary"> supplementary characters</a>. To support
5983      * all Unicode characters, including supplementary characters, use
5984      * the {@link #isJavaIdentifierStart(int)} method.
5985      *
5986      * @param   ch the character to be tested.
5987      * @return  {@code true} if the character may start a Java identifier;
5988      *          {@code false} otherwise.
5989      * @see     Character#isJavaIdentifierPart(char)
5990      * @see     Character#isLetter(char)
5991      * @see     Character#isUnicodeIdentifierStart(char)
5992      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5993      * @since   1.1
5994      */
5995     public static boolean isJavaIdentifierStart(char ch) {
5996         return isJavaIdentifierStart((int)ch);
5997     }
5998 
5999     /**
6000      * Determines if the character (Unicode code point) is
6001      * permissible as the first character in a Java identifier.
6002      * <p>
6003      * A character may start a Java identifier if and only if
6004      * one of the following conditions is true:
6005      * <ul>
6006      * <li> {@link #isLetter(int) isLetter(codePoint)}
6007      *      returns {@code true}
6008      * <li> {@link #getType(int) getType(codePoint)}
6009      *      returns {@code LETTER_NUMBER}
6010      * <li> the referenced character is a currency symbol (such as {@code '$'})
6011      * <li> the referenced character is a connecting punctuation character
6012      *      (such as {@code '_'}).
6013      * </ul>
6014      *
6015      * @param   codePoint the character (Unicode code point) to be tested.
6016      * @return  {@code true} if the character may start a Java identifier;
6017      *          {@code false} otherwise.
6018      * @see     Character#isJavaIdentifierPart(int)
6019      * @see     Character#isLetter(int)
6020      * @see     Character#isUnicodeIdentifierStart(int)
6021      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6022      * @since   1.5
6023      */
6024     public static boolean isJavaIdentifierStart(int codePoint) {
6025         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6026     }
6027 
6028     /**
6029      * Determines if the specified character may be part of a Java
6030      * identifier as other than the first character.
6031      * <p>
6032      * A character may be part of a Java identifier if any of the following
6033      * are true:
6034      * <ul>
6035      * <li>  it is a letter
6036      * <li>  it is a currency symbol (such as {@code '$'})
6037      * <li>  it is a connecting punctuation character (such as {@code '_'})
6038      * <li>  it is a digit
6039      * <li>  it is a numeric letter (such as a Roman numeral character)
6040      * <li>  it is a combining mark
6041      * <li>  it is a non-spacing mark
6042      * <li> {@code isIdentifierIgnorable} returns
6043      * {@code true} for the character
6044      * </ul>
6045      *
6046      * <p><b>Note:</b> This method cannot handle <a
6047      * href="#supplementary"> supplementary characters</a>. To support
6048      * all Unicode characters, including supplementary characters, use
6049      * the {@link #isJavaIdentifierPart(int)} method.
6050      *
6051      * @param   ch      the character to be tested.
6052      * @return {@code true} if the character may be part of a
6053      *          Java identifier; {@code false} otherwise.
6054      * @see     Character#isIdentifierIgnorable(char)
6055      * @see     Character#isJavaIdentifierStart(char)
6056      * @see     Character#isLetterOrDigit(char)
6057      * @see     Character#isUnicodeIdentifierPart(char)
6058      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6059      * @since   1.1
6060      */
6061     public static boolean isJavaIdentifierPart(char ch) {
6062         return isJavaIdentifierPart((int)ch);
6063     }
6064 
6065     /**
6066      * Determines if the character (Unicode code point) may be part of a Java
6067      * identifier as other than the first character.
6068      * <p>
6069      * A character may be part of a Java identifier if any of the following
6070      * are true:
6071      * <ul>
6072      * <li>  it is a letter
6073      * <li>  it is a currency symbol (such as {@code '$'})
6074      * <li>  it is a connecting punctuation character (such as {@code '_'})
6075      * <li>  it is a digit
6076      * <li>  it is a numeric letter (such as a Roman numeral character)
6077      * <li>  it is a combining mark
6078      * <li>  it is a non-spacing mark
6079      * <li> {@link #isIdentifierIgnorable(int)
6080      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6081      * the character
6082      * </ul>
6083      *
6084      * @param   codePoint the character (Unicode code point) to be tested.
6085      * @return {@code true} if the character may be part of a
6086      *          Java identifier; {@code false} otherwise.
6087      * @see     Character#isIdentifierIgnorable(int)
6088      * @see     Character#isJavaIdentifierStart(int)
6089      * @see     Character#isLetterOrDigit(int)
6090      * @see     Character#isUnicodeIdentifierPart(int)
6091      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6092      * @since   1.5
6093      */
6094     public static boolean isJavaIdentifierPart(int codePoint) {
6095         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6096     }
6097 
6098     /**
6099      * Determines if the specified character is permissible as the
6100      * first character in a Unicode identifier.
6101      * <p>
6102      * A character may start a Unicode identifier if and only if
6103      * one of the following conditions is true:
6104      * <ul>
6105      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6106      * <li> {@link #getType(char) getType(ch)} returns
6107      *      {@code LETTER_NUMBER}.
6108      * </ul>
6109      *
6110      * <p><b>Note:</b> This method cannot handle <a
6111      * href="#supplementary"> supplementary characters</a>. To support
6112      * all Unicode characters, including supplementary characters, use
6113      * the {@link #isUnicodeIdentifierStart(int)} method.
6114      *
6115      * @param   ch      the character to be tested.
6116      * @return  {@code true} if the character may start a Unicode
6117      *          identifier; {@code false} otherwise.
6118      * @see     Character#isJavaIdentifierStart(char)
6119      * @see     Character#isLetter(char)
6120      * @see     Character#isUnicodeIdentifierPart(char)
6121      * @since   1.1
6122      */
6123     public static boolean isUnicodeIdentifierStart(char ch) {
6124         return isUnicodeIdentifierStart((int)ch);
6125     }
6126 
6127     /**
6128      * Determines if the specified character (Unicode code point) is permissible as the
6129      * first character in a Unicode identifier.
6130      * <p>
6131      * A character may start a Unicode identifier if and only if
6132      * one of the following conditions is true:
6133      * <ul>
6134      * <li> {@link #isLetter(int) isLetter(codePoint)}
6135      *      returns {@code true}
6136      * <li> {@link #getType(int) getType(codePoint)}
6137      *      returns {@code LETTER_NUMBER}.
6138      * </ul>
6139      * @param   codePoint the character (Unicode code point) to be tested.
6140      * @return  {@code true} if the character may start a Unicode
6141      *          identifier; {@code false} otherwise.
6142      * @see     Character#isJavaIdentifierStart(int)
6143      * @see     Character#isLetter(int)
6144      * @see     Character#isUnicodeIdentifierPart(int)
6145      * @since   1.5
6146      */
6147     public static boolean isUnicodeIdentifierStart(int codePoint) {
6148         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6149     }
6150 
6151     /**
6152      * Determines if the specified character may be part of a Unicode
6153      * identifier as other than the first character.
6154      * <p>
6155      * A character may be part of a Unicode identifier if and only if
6156      * one of the following statements is true:
6157      * <ul>
6158      * <li>  it is a letter
6159      * <li>  it is a connecting punctuation character (such as {@code '_'})
6160      * <li>  it is a digit
6161      * <li>  it is a numeric letter (such as a Roman numeral character)
6162      * <li>  it is a combining mark
6163      * <li>  it is a non-spacing mark
6164      * <li> {@code isIdentifierIgnorable} returns
6165      * {@code true} for this character.
6166      * </ul>
6167      *
6168      * <p><b>Note:</b> This method cannot handle <a
6169      * href="#supplementary"> supplementary characters</a>. To support
6170      * all Unicode characters, including supplementary characters, use
6171      * the {@link #isUnicodeIdentifierPart(int)} method.
6172      *
6173      * @param   ch      the character to be tested.
6174      * @return  {@code true} if the character may be part of a
6175      *          Unicode identifier; {@code false} otherwise.
6176      * @see     Character#isIdentifierIgnorable(char)
6177      * @see     Character#isJavaIdentifierPart(char)
6178      * @see     Character#isLetterOrDigit(char)
6179      * @see     Character#isUnicodeIdentifierStart(char)
6180      * @since   1.1
6181      */
6182     public static boolean isUnicodeIdentifierPart(char ch) {
6183         return isUnicodeIdentifierPart((int)ch);
6184     }
6185 
6186     /**
6187      * Determines if the specified character (Unicode code point) may be part of a Unicode
6188      * identifier as other than the first character.
6189      * <p>
6190      * A character may be part of a Unicode identifier if and only if
6191      * one of the following statements is true:
6192      * <ul>
6193      * <li>  it is a letter
6194      * <li>  it is a connecting punctuation character (such as {@code '_'})
6195      * <li>  it is a digit
6196      * <li>  it is a numeric letter (such as a Roman numeral character)
6197      * <li>  it is a combining mark
6198      * <li>  it is a non-spacing mark
6199      * <li> {@code isIdentifierIgnorable} returns
6200      * {@code true} for this character.
6201      * </ul>
6202      * @param   codePoint the character (Unicode code point) to be tested.
6203      * @return  {@code true} if the character may be part of a
6204      *          Unicode identifier; {@code false} otherwise.
6205      * @see     Character#isIdentifierIgnorable(int)
6206      * @see     Character#isJavaIdentifierPart(int)
6207      * @see     Character#isLetterOrDigit(int)
6208      * @see     Character#isUnicodeIdentifierStart(int)
6209      * @since   1.5
6210      */
6211     public static boolean isUnicodeIdentifierPart(int codePoint) {
6212         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6213     }
6214 
6215     /**
6216      * Determines if the specified character should be regarded as
6217      * an ignorable character in a Java identifier or a Unicode identifier.
6218      * <p>
6219      * The following Unicode characters are ignorable in a Java identifier
6220      * or a Unicode identifier:
6221      * <ul>
6222      * <li>ISO control characters that are not whitespace
6223      * <ul>
6224      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6225      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6226      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6227      * </ul>
6228      *
6229      * <li>all characters that have the {@code FORMAT} general
6230      * category value
6231      * </ul>
6232      *
6233      * <p><b>Note:</b> This method cannot handle <a
6234      * href="#supplementary"> supplementary characters</a>. To support
6235      * all Unicode characters, including supplementary characters, use
6236      * the {@link #isIdentifierIgnorable(int)} method.
6237      *
6238      * @param   ch      the character to be tested.
6239      * @return  {@code true} if the character is an ignorable control
6240      *          character that may be part of a Java or Unicode identifier;
6241      *           {@code false} otherwise.
6242      * @see     Character#isJavaIdentifierPart(char)
6243      * @see     Character#isUnicodeIdentifierPart(char)
6244      * @since   1.1
6245      */
6246     public static boolean isIdentifierIgnorable(char ch) {
6247         return isIdentifierIgnorable((int)ch);
6248     }
6249 
6250     /**
6251      * Determines if the specified character (Unicode code point) should be regarded as
6252      * an ignorable character in a Java identifier or a Unicode identifier.
6253      * <p>
6254      * The following Unicode characters are ignorable in a Java identifier
6255      * or a Unicode identifier:
6256      * <ul>
6257      * <li>ISO control characters that are not whitespace
6258      * <ul>
6259      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6260      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6261      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6262      * </ul>
6263      *
6264      * <li>all characters that have the {@code FORMAT} general
6265      * category value
6266      * </ul>
6267      *
6268      * @param   codePoint the character (Unicode code point) to be tested.
6269      * @return  {@code true} if the character is an ignorable control
6270      *          character that may be part of a Java or Unicode identifier;
6271      *          {@code false} otherwise.
6272      * @see     Character#isJavaIdentifierPart(int)
6273      * @see     Character#isUnicodeIdentifierPart(int)
6274      * @since   1.5
6275      */
6276     public static boolean isIdentifierIgnorable(int codePoint) {
6277         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6278     }
6279 
6280     /**
6281      * Converts the character argument to lowercase using case
6282      * mapping information from the UnicodeData file.
6283      * <p>
6284      * Note that
6285      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6286      * does not always return {@code true} for some ranges of
6287      * characters, particularly those that are symbols or ideographs.
6288      *
6289      * <p>In general, {@link String#toLowerCase()} should be used to map
6290      * characters to lowercase. {@code String} case mapping methods
6291      * have several benefits over {@code Character} case mapping methods.
6292      * {@code String} case mapping methods can perform locale-sensitive
6293      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6294      * the {@code Character} case mapping methods cannot.
6295      *
6296      * <p><b>Note:</b> This method cannot handle <a
6297      * href="#supplementary"> supplementary characters</a>. To support
6298      * all Unicode characters, including supplementary characters, use
6299      * the {@link #toLowerCase(int)} method.
6300      *
6301      * @param   ch   the character to be converted.
6302      * @return  the lowercase equivalent of the character, if any;
6303      *          otherwise, the character itself.
6304      * @see     Character#isLowerCase(char)
6305      * @see     String#toLowerCase()
6306      */
6307     public static char toLowerCase(char ch) {
6308         return (char)toLowerCase((int)ch);
6309     }
6310 
6311     /**
6312      * Converts the character (Unicode code point) argument to
6313      * lowercase using case mapping information from the UnicodeData
6314      * file.
6315      *
6316      * <p> Note that
6317      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6318      * does not always return {@code true} for some ranges of
6319      * characters, particularly those that are symbols or ideographs.
6320      *
6321      * <p>In general, {@link String#toLowerCase()} should be used to map
6322      * characters to lowercase. {@code String} case mapping methods
6323      * have several benefits over {@code Character} case mapping methods.
6324      * {@code String} case mapping methods can perform locale-sensitive
6325      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6326      * the {@code Character} case mapping methods cannot.
6327      *
6328      * @param   codePoint   the character (Unicode code point) to be converted.
6329      * @return  the lowercase equivalent of the character (Unicode code
6330      *          point), if any; otherwise, the character itself.
6331      * @see     Character#isLowerCase(int)
6332      * @see     String#toLowerCase()
6333      *
6334      * @since   1.5
6335      */
6336     public static int toLowerCase(int codePoint) {
6337         return CharacterData.of(codePoint).toLowerCase(codePoint);
6338     }
6339 
6340     /**
6341      * Converts the character argument to uppercase using case mapping
6342      * information from the UnicodeData file.
6343      * <p>
6344      * Note that
6345      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6346      * does not always return {@code true} for some ranges of
6347      * characters, particularly those that are symbols or ideographs.
6348      *
6349      * <p>In general, {@link String#toUpperCase()} should be used to map
6350      * characters to uppercase. {@code String} case mapping methods
6351      * have several benefits over {@code Character} case mapping methods.
6352      * {@code String} case mapping methods can perform locale-sensitive
6353      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6354      * the {@code Character} case mapping methods cannot.
6355      *
6356      * <p><b>Note:</b> This method cannot handle <a
6357      * href="#supplementary"> supplementary characters</a>. To support
6358      * all Unicode characters, including supplementary characters, use
6359      * the {@link #toUpperCase(int)} method.
6360      *
6361      * @param   ch   the character to be converted.
6362      * @return  the uppercase equivalent of the character, if any;
6363      *          otherwise, the character itself.
6364      * @see     Character#isUpperCase(char)
6365      * @see     String#toUpperCase()
6366      */
6367     public static char toUpperCase(char ch) {
6368         return (char)toUpperCase((int)ch);
6369     }
6370 
6371     /**
6372      * Converts the character (Unicode code point) argument to
6373      * uppercase using case mapping information from the UnicodeData
6374      * file.
6375      *
6376      * <p>Note that
6377      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6378      * does not always return {@code true} for some ranges of
6379      * characters, particularly those that are symbols or ideographs.
6380      *
6381      * <p>In general, {@link String#toUpperCase()} should be used to map
6382      * characters to uppercase. {@code String} case mapping methods
6383      * have several benefits over {@code Character} case mapping methods.
6384      * {@code String} case mapping methods can perform locale-sensitive
6385      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6386      * the {@code Character} case mapping methods cannot.
6387      *
6388      * @param   codePoint   the character (Unicode code point) to be converted.
6389      * @return  the uppercase equivalent of the character, if any;
6390      *          otherwise, the character itself.
6391      * @see     Character#isUpperCase(int)
6392      * @see     String#toUpperCase()
6393      *
6394      * @since   1.5
6395      */
6396     public static int toUpperCase(int codePoint) {
6397         return CharacterData.of(codePoint).toUpperCase(codePoint);
6398     }
6399 
6400     /**
6401      * Converts the character argument to titlecase using case mapping
6402      * information from the UnicodeData file. If a character has no
6403      * explicit titlecase mapping and is not itself a titlecase char
6404      * according to UnicodeData, then the uppercase mapping is
6405      * returned as an equivalent titlecase mapping. If the
6406      * {@code char} argument is already a titlecase
6407      * {@code char}, the same {@code char} value will be
6408      * returned.
6409      * <p>
6410      * Note that
6411      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6412      * does not always return {@code true} for some ranges of
6413      * characters.
6414      *
6415      * <p><b>Note:</b> This method cannot handle <a
6416      * href="#supplementary"> supplementary characters</a>. To support
6417      * all Unicode characters, including supplementary characters, use
6418      * the {@link #toTitleCase(int)} method.
6419      *
6420      * @param   ch   the character to be converted.
6421      * @return  the titlecase equivalent of the character, if any;
6422      *          otherwise, the character itself.
6423      * @see     Character#isTitleCase(char)
6424      * @see     Character#toLowerCase(char)
6425      * @see     Character#toUpperCase(char)
6426      * @since   1.0.2
6427      */
6428     public static char toTitleCase(char ch) {
6429         return (char)toTitleCase((int)ch);
6430     }
6431 
6432     /**
6433      * Converts the character (Unicode code point) argument to titlecase using case mapping
6434      * information from the UnicodeData file. If a character has no
6435      * explicit titlecase mapping and is not itself a titlecase char
6436      * according to UnicodeData, then the uppercase mapping is
6437      * returned as an equivalent titlecase mapping. If the
6438      * character argument is already a titlecase
6439      * character, the same character value will be
6440      * returned.
6441      *
6442      * <p>Note that
6443      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6444      * does not always return {@code true} for some ranges of
6445      * characters.
6446      *
6447      * @param   codePoint   the character (Unicode code point) to be converted.
6448      * @return  the titlecase equivalent of the character, if any;
6449      *          otherwise, the character itself.
6450      * @see     Character#isTitleCase(int)
6451      * @see     Character#toLowerCase(int)
6452      * @see     Character#toUpperCase(int)
6453      * @since   1.5
6454      */
6455     public static int toTitleCase(int codePoint) {
6456         return CharacterData.of(codePoint).toTitleCase(codePoint);
6457     }
6458 
6459     /**
6460      * Returns the numeric value of the character {@code ch} in the
6461      * specified radix.
6462      * <p>
6463      * If the radix is not in the range {@code MIN_RADIX} &le;
6464      * {@code radix} &le; {@code MAX_RADIX} or if the
6465      * value of {@code ch} is not a valid digit in the specified
6466      * radix, {@code -1} is returned. A character is a valid digit
6467      * if at least one of the following is true:
6468      * <ul>
6469      * <li>The method {@code isDigit} is {@code true} of the character
6470      *     and the Unicode decimal digit value of the character (or its
6471      *     single-character decomposition) is less than the specified radix.
6472      *     In this case the decimal digit value is returned.
6473      * <li>The character is one of the uppercase Latin letters
6474      *     {@code 'A'} through {@code 'Z'} and its code is less than
6475      *     {@code radix + 'A' - 10}.
6476      *     In this case, {@code ch - 'A' + 10}
6477      *     is returned.
6478      * <li>The character is one of the lowercase Latin letters
6479      *     {@code 'a'} through {@code 'z'} and its code is less than
6480      *     {@code radix + 'a' - 10}.
6481      *     In this case, {@code ch - 'a' + 10}
6482      *     is returned.
6483      * <li>The character is one of the fullwidth uppercase Latin letters A
6484      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6485      *     and its code is less than
6486      *     {@code radix + '\u005CuFF21' - 10}.
6487      *     In this case, {@code ch - '\u005CuFF21' + 10}
6488      *     is returned.
6489      * <li>The character is one of the fullwidth lowercase Latin letters a
6490      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6491      *     and its code is less than
6492      *     {@code radix + '\u005CuFF41' - 10}.
6493      *     In this case, {@code ch - '\u005CuFF41' + 10}
6494      *     is returned.
6495      * </ul>
6496      *
6497      * <p><b>Note:</b> This method cannot handle <a
6498      * href="#supplementary"> supplementary characters</a>. To support
6499      * all Unicode characters, including supplementary characters, use
6500      * the {@link #digit(int, int)} method.
6501      *
6502      * @param   ch      the character to be converted.
6503      * @param   radix   the radix.
6504      * @return  the numeric value represented by the character in the
6505      *          specified radix.
6506      * @see     Character#forDigit(int, int)
6507      * @see     Character#isDigit(char)
6508      */
6509     public static int digit(char ch, int radix) {
6510         return digit((int)ch, radix);
6511     }
6512 
6513     /**
6514      * Returns the numeric value of the specified character (Unicode
6515      * code point) in the specified radix.
6516      *
6517      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6518      * {@code radix} &le; {@code MAX_RADIX} or if the
6519      * character is not a valid digit in the specified
6520      * radix, {@code -1} is returned. A character is a valid digit
6521      * if at least one of the following is true:
6522      * <ul>
6523      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6524      *     and the Unicode decimal digit value of the character (or its
6525      *     single-character decomposition) is less than the specified radix.
6526      *     In this case the decimal digit value is returned.
6527      * <li>The character is one of the uppercase Latin letters
6528      *     {@code 'A'} through {@code 'Z'} and its code is less than
6529      *     {@code radix + 'A' - 10}.
6530      *     In this case, {@code codePoint - 'A' + 10}
6531      *     is returned.
6532      * <li>The character is one of the lowercase Latin letters
6533      *     {@code 'a'} through {@code 'z'} and its code is less than
6534      *     {@code radix + 'a' - 10}.
6535      *     In this case, {@code codePoint - 'a' + 10}
6536      *     is returned.
6537      * <li>The character is one of the fullwidth uppercase Latin letters A
6538      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6539      *     and its code is less than
6540      *     {@code radix + '\u005CuFF21' - 10}.
6541      *     In this case,
6542      *     {@code codePoint - '\u005CuFF21' + 10}
6543      *     is returned.
6544      * <li>The character is one of the fullwidth lowercase Latin letters a
6545      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6546      *     and its code is less than
6547      *     {@code radix + '\u005CuFF41'- 10}.
6548      *     In this case,
6549      *     {@code codePoint - '\u005CuFF41' + 10}
6550      *     is returned.
6551      * </ul>
6552      *
6553      * @param   codePoint the character (Unicode code point) to be converted.
6554      * @param   radix   the radix.
6555      * @return  the numeric value represented by the character in the
6556      *          specified radix.
6557      * @see     Character#forDigit(int, int)
6558      * @see     Character#isDigit(int)
6559      * @since   1.5
6560      */
6561     public static int digit(int codePoint, int radix) {
6562         return CharacterData.of(codePoint).digit(codePoint, radix);
6563     }
6564 
6565     /**
6566      * Returns the {@code int} value that the specified Unicode
6567      * character represents. For example, the character
6568      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6569      * an int with a value of 50.
6570      * <p>
6571      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6572      * {@code '\u005Cu005A'}), lowercase
6573      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6574      * full width variant ({@code '\u005CuFF21'} through
6575      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6576      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6577      * through 35. This is independent of the Unicode specification,
6578      * which does not assign numeric values to these {@code char}
6579      * values.
6580      * <p>
6581      * If the character does not have a numeric value, then -1 is returned.
6582      * If the character has a numeric value that cannot be represented as a
6583      * nonnegative integer (for example, a fractional value), then -2
6584      * is returned.
6585      *
6586      * <p><b>Note:</b> This method cannot handle <a
6587      * href="#supplementary"> supplementary characters</a>. To support
6588      * all Unicode characters, including supplementary characters, use
6589      * the {@link #getNumericValue(int)} method.
6590      *
6591      * @param   ch      the character to be converted.
6592      * @return  the numeric value of the character, as a nonnegative {@code int}
6593      *           value; -2 if the character has a numeric value that is not a
6594      *          nonnegative integer; -1 if the character has no numeric value.
6595      * @see     Character#forDigit(int, int)
6596      * @see     Character#isDigit(char)
6597      * @since   1.1
6598      */
6599     public static int getNumericValue(char ch) {
6600         return getNumericValue((int)ch);
6601     }
6602 
6603     /**
6604      * Returns the {@code int} value that the specified
6605      * character (Unicode code point) represents. For example, the character
6606      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6607      * an {@code int} with a value of 50.
6608      * <p>
6609      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6610      * {@code '\u005Cu005A'}), lowercase
6611      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6612      * full width variant ({@code '\u005CuFF21'} through
6613      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6614      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6615      * through 35. This is independent of the Unicode specification,
6616      * which does not assign numeric values to these {@code char}
6617      * values.
6618      * <p>
6619      * If the character does not have a numeric value, then -1 is returned.
6620      * If the character has a numeric value that cannot be represented as a
6621      * nonnegative integer (for example, a fractional value), then -2
6622      * is returned.
6623      *
6624      * @param   codePoint the character (Unicode code point) to be converted.
6625      * @return  the numeric value of the character, as a nonnegative {@code int}
6626      *          value; -2 if the character has a numeric value that is not a
6627      *          nonnegative integer; -1 if the character has no numeric value.
6628      * @see     Character#forDigit(int, int)
6629      * @see     Character#isDigit(int)
6630      * @since   1.5
6631      */
6632     public static int getNumericValue(int codePoint) {
6633         return CharacterData.of(codePoint).getNumericValue(codePoint);
6634     }
6635 
6636     /**
6637      * Determines if the specified character is ISO-LATIN-1 white space.
6638      * This method returns {@code true} for the following five
6639      * characters only:
6640      * <table summary="truechars">
6641      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6642      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6643      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6644      *     <td>{@code NEW LINE}</td></tr>
6645      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6646      *     <td>{@code FORM FEED}</td></tr>
6647      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6648      *     <td>{@code CARRIAGE RETURN}</td></tr>
6649      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6650      *     <td>{@code SPACE}</td></tr>
6651      * </table>
6652      *
6653      * @param      ch   the character to be tested.
6654      * @return     {@code true} if the character is ISO-LATIN-1 white
6655      *             space; {@code false} otherwise.
6656      * @see        Character#isSpaceChar(char)
6657      * @see        Character#isWhitespace(char)
6658      * @deprecated Replaced by isWhitespace(char).
6659      */
6660     @Deprecated
6661     public static boolean isSpace(char ch) {
6662         return (ch <= 0x0020) &&
6663             (((((1L << 0x0009) |
6664             (1L << 0x000A) |
6665             (1L << 0x000C) |
6666             (1L << 0x000D) |
6667             (1L << 0x0020)) >> ch) & 1L) != 0);
6668     }
6669 
6670 
6671     /**
6672      * Determines if the specified character is a Unicode space character.
6673      * A character is considered to be a space character if and only if
6674      * it is specified to be a space character by the Unicode Standard. This
6675      * method returns true if the character's general category type is any of
6676      * the following:
6677      * <ul>
6678      * <li> {@code SPACE_SEPARATOR}
6679      * <li> {@code LINE_SEPARATOR}
6680      * <li> {@code PARAGRAPH_SEPARATOR}
6681      * </ul>
6682      *
6683      * <p><b>Note:</b> This method cannot handle <a
6684      * href="#supplementary"> supplementary characters</a>. To support
6685      * all Unicode characters, including supplementary characters, use
6686      * the {@link #isSpaceChar(int)} method.
6687      *
6688      * @param   ch      the character to be tested.
6689      * @return  {@code true} if the character is a space character;
6690      *          {@code false} otherwise.
6691      * @see     Character#isWhitespace(char)
6692      * @since   1.1
6693      */
6694     public static boolean isSpaceChar(char ch) {
6695         return isSpaceChar((int)ch);
6696     }
6697 
6698     /**
6699      * Determines if the specified character (Unicode code point) is a
6700      * Unicode space character.  A character is considered to be a
6701      * space character if and only if it is specified to be a space
6702      * character by the Unicode Standard. This method returns true if
6703      * the character's general category type is any of the following:
6704      *
6705      * <ul>
6706      * <li> {@link #SPACE_SEPARATOR}
6707      * <li> {@link #LINE_SEPARATOR}
6708      * <li> {@link #PARAGRAPH_SEPARATOR}
6709      * </ul>
6710      *
6711      * @param   codePoint the character (Unicode code point) to be tested.
6712      * @return  {@code true} if the character is a space character;
6713      *          {@code false} otherwise.
6714      * @see     Character#isWhitespace(int)
6715      * @since   1.5
6716      */
6717     public static boolean isSpaceChar(int codePoint) {
6718         return ((((1 << Character.SPACE_SEPARATOR) |
6719                   (1 << Character.LINE_SEPARATOR) |
6720                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6721             != 0;
6722     }
6723 
6724     /**
6725      * Determines if the specified character is white space according to Java.
6726      * A character is a Java whitespace character if and only if it satisfies
6727      * one of the following criteria:
6728      * <ul>
6729      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6730      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6731      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6732      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6733      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6734      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6735      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6736      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6737      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6738      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6739      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6740      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6741      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6742      * </ul>
6743      *
6744      * <p><b>Note:</b> This method cannot handle <a
6745      * href="#supplementary"> supplementary characters</a>. To support
6746      * all Unicode characters, including supplementary characters, use
6747      * the {@link #isWhitespace(int)} method.
6748      *
6749      * @param   ch the character to be tested.
6750      * @return  {@code true} if the character is a Java whitespace
6751      *          character; {@code false} otherwise.
6752      * @see     Character#isSpaceChar(char)
6753      * @since   1.1
6754      */
6755     public static boolean isWhitespace(char ch) {
6756         return isWhitespace((int)ch);
6757     }
6758 
6759     /**
6760      * Determines if the specified character (Unicode code point) is
6761      * white space according to Java.  A character is a Java
6762      * whitespace character if and only if it satisfies one of the
6763      * following criteria:
6764      * <ul>
6765      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6766      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6767      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6768      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6769      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6770      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6771      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6772      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6773      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6774      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6775      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6776      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6777      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6778      * </ul>
6779      *
6780      * @param   codePoint the character (Unicode code point) to be tested.
6781      * @return  {@code true} if the character is a Java whitespace
6782      *          character; {@code false} otherwise.
6783      * @see     Character#isSpaceChar(int)
6784      * @since   1.5
6785      */
6786     public static boolean isWhitespace(int codePoint) {
6787         return CharacterData.of(codePoint).isWhitespace(codePoint);
6788     }
6789 
6790     /**
6791      * Determines if the specified character is an ISO control
6792      * character.  A character is considered to be an ISO control
6793      * character if its code is in the range {@code '\u005Cu0000'}
6794      * through {@code '\u005Cu001F'} or in the range
6795      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6796      *
6797      * <p><b>Note:</b> This method cannot handle <a
6798      * href="#supplementary"> supplementary characters</a>. To support
6799      * all Unicode characters, including supplementary characters, use
6800      * the {@link #isISOControl(int)} method.
6801      *
6802      * @param   ch      the character to be tested.
6803      * @return  {@code true} if the character is an ISO control character;
6804      *          {@code false} otherwise.
6805      *
6806      * @see     Character#isSpaceChar(char)
6807      * @see     Character#isWhitespace(char)
6808      * @since   1.1
6809      */
6810     public static boolean isISOControl(char ch) {
6811         return isISOControl((int)ch);
6812     }
6813 
6814     /**
6815      * Determines if the referenced character (Unicode code point) is an ISO control
6816      * character.  A character is considered to be an ISO control
6817      * character if its code is in the range {@code '\u005Cu0000'}
6818      * through {@code '\u005Cu001F'} or in the range
6819      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6820      *
6821      * @param   codePoint the character (Unicode code point) to be tested.
6822      * @return  {@code true} if the character is an ISO control character;
6823      *          {@code false} otherwise.
6824      * @see     Character#isSpaceChar(int)
6825      * @see     Character#isWhitespace(int)
6826      * @since   1.5
6827      */
6828     public static boolean isISOControl(int codePoint) {
6829         // Optimized form of:
6830         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6831         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6832         return codePoint <= 0x9F &&
6833             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6834     }
6835 
6836     /**
6837      * Returns a value indicating a character's general category.
6838      *
6839      * <p><b>Note:</b> This method cannot handle <a
6840      * href="#supplementary"> supplementary characters</a>. To support
6841      * all Unicode characters, including supplementary characters, use
6842      * the {@link #getType(int)} method.
6843      *
6844      * @param   ch      the character to be tested.
6845      * @return  a value of type {@code int} representing the
6846      *          character's general category.
6847      * @see     Character#COMBINING_SPACING_MARK
6848      * @see     Character#CONNECTOR_PUNCTUATION
6849      * @see     Character#CONTROL
6850      * @see     Character#CURRENCY_SYMBOL
6851      * @see     Character#DASH_PUNCTUATION
6852      * @see     Character#DECIMAL_DIGIT_NUMBER
6853      * @see     Character#ENCLOSING_MARK
6854      * @see     Character#END_PUNCTUATION
6855      * @see     Character#FINAL_QUOTE_PUNCTUATION
6856      * @see     Character#FORMAT
6857      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6858      * @see     Character#LETTER_NUMBER
6859      * @see     Character#LINE_SEPARATOR
6860      * @see     Character#LOWERCASE_LETTER
6861      * @see     Character#MATH_SYMBOL
6862      * @see     Character#MODIFIER_LETTER
6863      * @see     Character#MODIFIER_SYMBOL
6864      * @see     Character#NON_SPACING_MARK
6865      * @see     Character#OTHER_LETTER
6866      * @see     Character#OTHER_NUMBER
6867      * @see     Character#OTHER_PUNCTUATION
6868      * @see     Character#OTHER_SYMBOL
6869      * @see     Character#PARAGRAPH_SEPARATOR
6870      * @see     Character#PRIVATE_USE
6871      * @see     Character#SPACE_SEPARATOR
6872      * @see     Character#START_PUNCTUATION
6873      * @see     Character#SURROGATE
6874      * @see     Character#TITLECASE_LETTER
6875      * @see     Character#UNASSIGNED
6876      * @see     Character#UPPERCASE_LETTER
6877      * @since   1.1
6878      */
6879     public static int getType(char ch) {
6880         return getType((int)ch);
6881     }
6882 
6883     /**
6884      * Returns a value indicating a character's general category.
6885      *
6886      * @param   codePoint the character (Unicode code point) to be tested.
6887      * @return  a value of type {@code int} representing the
6888      *          character's general category.
6889      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6890      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6891      * @see     Character#CONTROL CONTROL
6892      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6893      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6894      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6895      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6896      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6897      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6898      * @see     Character#FORMAT FORMAT
6899      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6900      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6901      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6902      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6903      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6904      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6905      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6906      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6907      * @see     Character#OTHER_LETTER OTHER_LETTER
6908      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6909      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6910      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6911      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6912      * @see     Character#PRIVATE_USE PRIVATE_USE
6913      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6914      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6915      * @see     Character#SURROGATE SURROGATE
6916      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6917      * @see     Character#UNASSIGNED UNASSIGNED
6918      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6919      * @since   1.5
6920      */
6921     public static int getType(int codePoint) {
6922         return CharacterData.of(codePoint).getType(codePoint);
6923     }
6924 
6925     /**
6926      * Determines the character representation for a specific digit in
6927      * the specified radix. If the value of {@code radix} is not a
6928      * valid radix, or the value of {@code digit} is not a valid
6929      * digit in the specified radix, the null character
6930      * ({@code '\u005Cu0000'}) is returned.
6931      * <p>
6932      * The {@code radix} argument is valid if it is greater than or
6933      * equal to {@code MIN_RADIX} and less than or equal to
6934      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6935      * {@code 0 <= digit < radix}.
6936      * <p>
6937      * If the digit is less than 10, then
6938      * {@code '0' + digit} is returned. Otherwise, the value
6939      * {@code 'a' + digit - 10} is returned.
6940      *
6941      * @param   digit   the number to convert to a character.
6942      * @param   radix   the radix.
6943      * @return  the {@code char} representation of the specified digit
6944      *          in the specified radix.
6945      * @see     Character#MIN_RADIX
6946      * @see     Character#MAX_RADIX
6947      * @see     Character#digit(char, int)
6948      */
6949     public static char forDigit(int digit, int radix) {
6950         if ((digit >= radix) || (digit < 0)) {
6951             return '\0';
6952         }
6953         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6954             return '\0';
6955         }
6956         if (digit < 10) {
6957             return (char)('0' + digit);
6958         }
6959         return (char)('a' - 10 + digit);
6960     }
6961 
6962     /**
6963      * Returns the Unicode directionality property for the given
6964      * character.  Character directionality is used to calculate the
6965      * visual ordering of text. The directionality value of undefined
6966      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6967      *
6968      * <p><b>Note:</b> This method cannot handle <a
6969      * href="#supplementary"> supplementary characters</a>. To support
6970      * all Unicode characters, including supplementary characters, use
6971      * the {@link #getDirectionality(int)} method.
6972      *
6973      * @param  ch {@code char} for which the directionality property
6974      *            is requested.
6975      * @return the directionality property of the {@code char} value.
6976      *
6977      * @see Character#DIRECTIONALITY_UNDEFINED
6978      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6979      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6980      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6981      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6982      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6983      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6984      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6985      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6986      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6987      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6988      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6989      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6990      * @see Character#DIRECTIONALITY_WHITESPACE
6991      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6992      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6993      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6994      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6995      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6996      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6997      * @since 1.4
6998      */
6999     public static byte getDirectionality(char ch) {
7000         return getDirectionality((int)ch);
7001     }
7002 
7003     /**
7004      * Returns the Unicode directionality property for the given
7005      * character (Unicode code point).  Character directionality is
7006      * used to calculate the visual ordering of text. The
7007      * directionality value of undefined character is {@link
7008      * #DIRECTIONALITY_UNDEFINED}.
7009      *
7010      * @param   codePoint the character (Unicode code point) for which
7011      *          the directionality property is requested.
7012      * @return the directionality property of the character.
7013      *
7014      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7015      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7016      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7017      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7018      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7019      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7020      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7021      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7022      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7023      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7024      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7025      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7026      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7027      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7028      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7029      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7030      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7031      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7032      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7033      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7034      * @since    1.5
7035      */
7036     public static byte getDirectionality(int codePoint) {
7037         return CharacterData.of(codePoint).getDirectionality(codePoint);
7038     }
7039 
7040     /**
7041      * Determines whether the character is mirrored according to the
7042      * Unicode specification.  Mirrored characters should have their
7043      * glyphs horizontally mirrored when displayed in text that is
7044      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7045      * PARENTHESIS is semantically defined to be an <i>opening
7046      * parenthesis</i>.  This will appear as a "(" in text that is
7047      * left-to-right but as a ")" in text that is right-to-left.
7048      *
7049      * <p><b>Note:</b> This method cannot handle <a
7050      * href="#supplementary"> supplementary characters</a>. To support
7051      * all Unicode characters, including supplementary characters, use
7052      * the {@link #isMirrored(int)} method.
7053      *
7054      * @param  ch {@code char} for which the mirrored property is requested
7055      * @return {@code true} if the char is mirrored, {@code false}
7056      *         if the {@code char} is not mirrored or is not defined.
7057      * @since 1.4
7058      */
7059     public static boolean isMirrored(char ch) {
7060         return isMirrored((int)ch);
7061     }
7062 
7063     /**
7064      * Determines whether the specified character (Unicode code point)
7065      * is mirrored according to the Unicode specification.  Mirrored
7066      * characters should have their glyphs horizontally mirrored when
7067      * displayed in text that is right-to-left.  For example,
7068      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7069      * defined to be an <i>opening parenthesis</i>.  This will appear
7070      * as a "(" in text that is left-to-right but as a ")" in text
7071      * that is right-to-left.
7072      *
7073      * @param   codePoint the character (Unicode code point) to be tested.
7074      * @return  {@code true} if the character is mirrored, {@code false}
7075      *          if the character is not mirrored or is not defined.
7076      * @since   1.5
7077      */
7078     public static boolean isMirrored(int codePoint) {
7079         return CharacterData.of(codePoint).isMirrored(codePoint);
7080     }
7081 
7082     /**
7083      * Compares two {@code Character} objects numerically.
7084      *
7085      * @param   anotherCharacter   the {@code Character} to be compared.
7086 
7087      * @return  the value {@code 0} if the argument {@code Character}
7088      *          is equal to this {@code Character}; a value less than
7089      *          {@code 0} if this {@code Character} is numerically less
7090      *          than the {@code Character} argument; and a value greater than
7091      *          {@code 0} if this {@code Character} is numerically greater
7092      *          than the {@code Character} argument (unsigned comparison).
7093      *          Note that this is strictly a numerical comparison; it is not
7094      *          locale-dependent.
7095      * @since   1.2
7096      */
7097     public int compareTo(Character anotherCharacter) {
7098         return compare(this.value, anotherCharacter.value);
7099     }
7100 
7101     /**
7102      * Compares two {@code char} values numerically.
7103      * The value returned is identical to what would be returned by:
7104      * <pre>
7105      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7106      * </pre>
7107      *
7108      * @param  x the first {@code char} to compare
7109      * @param  y the second {@code char} to compare
7110      * @return the value {@code 0} if {@code x == y};
7111      *         a value less than {@code 0} if {@code x < y}; and
7112      *         a value greater than {@code 0} if {@code x > y}
7113      * @since 1.7
7114      */
7115     public static int compare(char x, char y) {
7116         return x - y;
7117     }
7118 
7119     /**
7120      * Converts the character (Unicode code point) argument to uppercase using
7121      * information from the UnicodeData file.
7122      *
7123      * @param   codePoint   the character (Unicode code point) to be converted.
7124      * @return  either the uppercase equivalent of the character, if
7125      *          any, or an error flag ({@code Character.ERROR})
7126      *          that indicates that a 1:M {@code char} mapping exists.
7127      * @see     Character#isLowerCase(char)
7128      * @see     Character#isUpperCase(char)
7129      * @see     Character#toLowerCase(char)
7130      * @see     Character#toTitleCase(char)
7131      * @since 1.4
7132      */
7133     static int toUpperCaseEx(int codePoint) {
7134         assert isValidCodePoint(codePoint);
7135         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7136     }
7137 
7138     /**
7139      * Converts the character (Unicode code point) argument to uppercase using case
7140      * mapping information from the SpecialCasing file in the Unicode
7141      * specification. If a character has no explicit uppercase
7142      * mapping, then the {@code char} itself is returned in the
7143      * {@code char[]}.
7144      *
7145      * @param   codePoint   the character (Unicode code point) to be converted.
7146      * @return a {@code char[]} with the uppercased character.
7147      * @since 1.4
7148      */
7149     static char[] toUpperCaseCharArray(int codePoint) {
7150         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7151         assert isBmpCodePoint(codePoint);
7152         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7153     }
7154 
7155     /**
7156      * The number of bits used to represent a <tt>char</tt> value in unsigned
7157      * binary form, constant {@code 16}.
7158      *
7159      * @since 1.5
7160      */
7161     public static final int SIZE = 16;
7162 
7163     /**
7164      * The number of bytes used to represent a {@code char} value in unsigned
7165      * binary form.
7166      *
7167      * @since 1.8
7168      */
7169     public static final int BYTES = SIZE / Byte.SIZE;
7170 
7171     /**
7172      * Returns the value obtained by reversing the order of the bytes in the
7173      * specified <tt>char</tt> value.
7174      *
7175      * @param ch The {@code char} of which to reverse the byte order.
7176      * @return the value obtained by reversing (or, equivalently, swapping)
7177      *     the bytes in the specified <tt>char</tt> value.
7178      * @since 1.5
7179      */
7180     public static char reverseBytes(char ch) {
7181         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7182     }
7183 
7184     /**
7185      * Returns the Unicode name of the specified character
7186      * {@code codePoint}, or null if the code point is
7187      * {@link #UNASSIGNED unassigned}.
7188      * <p>
7189      * Note: if the specified character is not assigned a name by
7190      * the <i>UnicodeData</i> file (part of the Unicode Character
7191      * Database maintained by the Unicode Consortium), the returned
7192      * name is the same as the result of expression.
7193      *
7194      * <blockquote>{@code
7195      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7196      *     + " "
7197      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7198      *
7199      * }</blockquote>
7200      *
7201      * @param  codePoint the character (Unicode code point)
7202      *
7203      * @return the Unicode name of the specified character, or null if
7204      *         the code point is unassigned.
7205      *
7206      * @exception IllegalArgumentException if the specified
7207      *            {@code codePoint} is not a valid Unicode
7208      *            code point.
7209      *
7210      * @since 1.7
7211      */
7212     public static String getName(int codePoint) {
7213         if (!isValidCodePoint(codePoint)) {
7214             throw new IllegalArgumentException();
7215         }
7216         String name = CharacterName.get(codePoint);
7217         if (name != null)
7218             return name;
7219         if (getType(codePoint) == UNASSIGNED)
7220             return null;
7221         UnicodeBlock block = UnicodeBlock.of(codePoint);
7222         if (block != null)
7223             return block.toString().replace('_', ' ') + " "
7224                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7225         // should never come here
7226         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7227     }
7228 }