1 /*
   2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.2.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * Standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (\uD800-\uDBFF), the second from the
  80  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648 
 649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 650 
 651         /**
 652          * Creates a UnicodeBlock with the given identifier name.
 653          * This name must be the same as the block identifier.
 654          */
 655         private UnicodeBlock(String idName) {
 656             super(idName);
 657             map.put(idName, this);
 658         }
 659 
 660         /**
 661          * Creates a UnicodeBlock with the given identifier name and
 662          * alias name.
 663          */
 664         private UnicodeBlock(String idName, String alias) {
 665             this(idName);
 666             map.put(alias, this);
 667         }
 668 
 669         /**
 670          * Creates a UnicodeBlock with the given identifier name and
 671          * alias names.
 672          */
 673         private UnicodeBlock(String idName, String... aliases) {
 674             this(idName);
 675             for (String alias : aliases)
 676                 map.put(alias, this);
 677         }
 678 
 679         /**
 680          * Constant for the "Basic Latin" Unicode character block.
 681          * @since 1.2
 682          */
 683         public static final UnicodeBlock  BASIC_LATIN =
 684             new UnicodeBlock("BASIC_LATIN",
 685                              "BASIC LATIN",
 686                              "BASICLATIN");
 687 
 688         /**
 689          * Constant for the "Latin-1 Supplement" Unicode character block.
 690          * @since 1.2
 691          */
 692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 693             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 694                              "LATIN-1 SUPPLEMENT",
 695                              "LATIN-1SUPPLEMENT");
 696 
 697         /**
 698          * Constant for the "Latin Extended-A" Unicode character block.
 699          * @since 1.2
 700          */
 701         public static final UnicodeBlock LATIN_EXTENDED_A =
 702             new UnicodeBlock("LATIN_EXTENDED_A",
 703                              "LATIN EXTENDED-A",
 704                              "LATINEXTENDED-A");
 705 
 706         /**
 707          * Constant for the "Latin Extended-B" Unicode character block.
 708          * @since 1.2
 709          */
 710         public static final UnicodeBlock LATIN_EXTENDED_B =
 711             new UnicodeBlock("LATIN_EXTENDED_B",
 712                              "LATIN EXTENDED-B",
 713                              "LATINEXTENDED-B");
 714 
 715         /**
 716          * Constant for the "IPA Extensions" Unicode character block.
 717          * @since 1.2
 718          */
 719         public static final UnicodeBlock IPA_EXTENSIONS =
 720             new UnicodeBlock("IPA_EXTENSIONS",
 721                              "IPA EXTENSIONS",
 722                              "IPAEXTENSIONS");
 723 
 724         /**
 725          * Constant for the "Spacing Modifier Letters" Unicode character block.
 726          * @since 1.2
 727          */
 728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 730                              "SPACING MODIFIER LETTERS",
 731                              "SPACINGMODIFIERLETTERS");
 732 
 733         /**
 734          * Constant for the "Combining Diacritical Marks" Unicode character block.
 735          * @since 1.2
 736          */
 737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 739                              "COMBINING DIACRITICAL MARKS",
 740                              "COMBININGDIACRITICALMARKS");
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK =
 750             new UnicodeBlock("GREEK",
 751                              "GREEK AND COPTIC",
 752                              "GREEKANDCOPTIC");
 753 
 754         /**
 755          * Constant for the "Cyrillic" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock CYRILLIC =
 759             new UnicodeBlock("CYRILLIC");
 760 
 761         /**
 762          * Constant for the "Armenian" Unicode character block.
 763          * @since 1.2
 764          */
 765         public static final UnicodeBlock ARMENIAN =
 766             new UnicodeBlock("ARMENIAN");
 767 
 768         /**
 769          * Constant for the "Hebrew" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock HEBREW =
 773             new UnicodeBlock("HEBREW");
 774 
 775         /**
 776          * Constant for the "Arabic" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARABIC =
 780             new UnicodeBlock("ARABIC");
 781 
 782         /**
 783          * Constant for the "Devanagari" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock DEVANAGARI =
 787             new UnicodeBlock("DEVANAGARI");
 788 
 789         /**
 790          * Constant for the "Bengali" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock BENGALI =
 794             new UnicodeBlock("BENGALI");
 795 
 796         /**
 797          * Constant for the "Gurmukhi" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock GURMUKHI =
 801             new UnicodeBlock("GURMUKHI");
 802 
 803         /**
 804          * Constant for the "Gujarati" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock GUJARATI =
 808             new UnicodeBlock("GUJARATI");
 809 
 810         /**
 811          * Constant for the "Oriya" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock ORIYA =
 815             new UnicodeBlock("ORIYA");
 816 
 817         /**
 818          * Constant for the "Tamil" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock TAMIL =
 822             new UnicodeBlock("TAMIL");
 823 
 824         /**
 825          * Constant for the "Telugu" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock TELUGU =
 829             new UnicodeBlock("TELUGU");
 830 
 831         /**
 832          * Constant for the "Kannada" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock KANNADA =
 836             new UnicodeBlock("KANNADA");
 837 
 838         /**
 839          * Constant for the "Malayalam" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock MALAYALAM =
 843             new UnicodeBlock("MALAYALAM");
 844 
 845         /**
 846          * Constant for the "Thai" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock THAI =
 850             new UnicodeBlock("THAI");
 851 
 852         /**
 853          * Constant for the "Lao" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock LAO =
 857             new UnicodeBlock("LAO");
 858 
 859         /**
 860          * Constant for the "Tibetan" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock TIBETAN =
 864             new UnicodeBlock("TIBETAN");
 865 
 866         /**
 867          * Constant for the "Georgian" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock GEORGIAN =
 871             new UnicodeBlock("GEORGIAN");
 872 
 873         /**
 874          * Constant for the "Hangul Jamo" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock HANGUL_JAMO =
 878             new UnicodeBlock("HANGUL_JAMO",
 879                              "HANGUL JAMO",
 880                              "HANGULJAMO");
 881 
 882         /**
 883          * Constant for the "Latin Extended Additional" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 888                              "LATIN EXTENDED ADDITIONAL",
 889                              "LATINEXTENDEDADDITIONAL");
 890 
 891         /**
 892          * Constant for the "Greek Extended" Unicode character block.
 893          * @since 1.2
 894          */
 895         public static final UnicodeBlock GREEK_EXTENDED =
 896             new UnicodeBlock("GREEK_EXTENDED",
 897                              "GREEK EXTENDED",
 898                              "GREEKEXTENDED");
 899 
 900         /**
 901          * Constant for the "General Punctuation" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock GENERAL_PUNCTUATION =
 905             new UnicodeBlock("GENERAL_PUNCTUATION",
 906                              "GENERAL PUNCTUATION",
 907                              "GENERALPUNCTUATION");
 908 
 909         /**
 910          * Constant for the "Superscripts and Subscripts" Unicode character
 911          * block.
 912          * @since 1.2
 913          */
 914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 916                              "SUPERSCRIPTS AND SUBSCRIPTS",
 917                              "SUPERSCRIPTSANDSUBSCRIPTS");
 918 
 919         /**
 920          * Constant for the "Currency Symbols" Unicode character block.
 921          * @since 1.2
 922          */
 923         public static final UnicodeBlock CURRENCY_SYMBOLS =
 924             new UnicodeBlock("CURRENCY_SYMBOLS",
 925                              "CURRENCY SYMBOLS",
 926                              "CURRENCYSYMBOLS");
 927 
 928         /**
 929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 930          * character block.
 931          * <p>
 932          * This block was previously known as "Combining Marks for Symbols".
 933          * @since 1.2
 934          */
 935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 939                              "COMBINING MARKS FOR SYMBOLS",
 940                              "COMBININGMARKSFORSYMBOLS");
 941 
 942         /**
 943          * Constant for the "Letterlike Symbols" Unicode character block.
 944          * @since 1.2
 945          */
 946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 947             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 948                              "LETTERLIKE SYMBOLS",
 949                              "LETTERLIKESYMBOLS");
 950 
 951         /**
 952          * Constant for the "Number Forms" Unicode character block.
 953          * @since 1.2
 954          */
 955         public static final UnicodeBlock NUMBER_FORMS =
 956             new UnicodeBlock("NUMBER_FORMS",
 957                              "NUMBER FORMS",
 958                              "NUMBERFORMS");
 959 
 960         /**
 961          * Constant for the "Arrows" Unicode character block.
 962          * @since 1.2
 963          */
 964         public static final UnicodeBlock ARROWS =
 965             new UnicodeBlock("ARROWS");
 966 
 967         /**
 968          * Constant for the "Mathematical Operators" Unicode character block.
 969          * @since 1.2
 970          */
 971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 972             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 973                              "MATHEMATICAL OPERATORS",
 974                              "MATHEMATICALOPERATORS");
 975 
 976         /**
 977          * Constant for the "Miscellaneous Technical" Unicode character block.
 978          * @since 1.2
 979          */
 980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 982                              "MISCELLANEOUS TECHNICAL",
 983                              "MISCELLANEOUSTECHNICAL");
 984 
 985         /**
 986          * Constant for the "Control Pictures" Unicode character block.
 987          * @since 1.2
 988          */
 989         public static final UnicodeBlock CONTROL_PICTURES =
 990             new UnicodeBlock("CONTROL_PICTURES",
 991                              "CONTROL PICTURES",
 992                              "CONTROLPICTURES");
 993 
 994         /**
 995          * Constant for the "Optical Character Recognition" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1000                              "OPTICAL CHARACTER RECOGNITION",
1001                              "OPTICALCHARACTERRECOGNITION");
1002 
1003         /**
1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1009                              "ENCLOSED ALPHANUMERICS",
1010                              "ENCLOSEDALPHANUMERICS");
1011 
1012         /**
1013          * Constant for the "Box Drawing" Unicode character block.
1014          * @since 1.2
1015          */
1016         public static final UnicodeBlock BOX_DRAWING =
1017             new UnicodeBlock("BOX_DRAWING",
1018                              "BOX DRAWING",
1019                              "BOXDRAWING");
1020 
1021         /**
1022          * Constant for the "Block Elements" Unicode character block.
1023          * @since 1.2
1024          */
1025         public static final UnicodeBlock BLOCK_ELEMENTS =
1026             new UnicodeBlock("BLOCK_ELEMENTS",
1027                              "BLOCK ELEMENTS",
1028                              "BLOCKELEMENTS");
1029 
1030         /**
1031          * Constant for the "Geometric Shapes" Unicode character block.
1032          * @since 1.2
1033          */
1034         public static final UnicodeBlock GEOMETRIC_SHAPES =
1035             new UnicodeBlock("GEOMETRIC_SHAPES",
1036                              "GEOMETRIC SHAPES",
1037                              "GEOMETRICSHAPES");
1038 
1039         /**
1040          * Constant for the "Miscellaneous Symbols" Unicode character block.
1041          * @since 1.2
1042          */
1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1045                              "MISCELLANEOUS SYMBOLS",
1046                              "MISCELLANEOUSSYMBOLS");
1047 
1048         /**
1049          * Constant for the "Dingbats" Unicode character block.
1050          * @since 1.2
1051          */
1052         public static final UnicodeBlock DINGBATS =
1053             new UnicodeBlock("DINGBATS");
1054 
1055         /**
1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1057          * @since 1.2
1058          */
1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1061                              "CJK SYMBOLS AND PUNCTUATION",
1062                              "CJKSYMBOLSANDPUNCTUATION");
1063 
1064         /**
1065          * Constant for the "Hiragana" Unicode character block.
1066          * @since 1.2
1067          */
1068         public static final UnicodeBlock HIRAGANA =
1069             new UnicodeBlock("HIRAGANA");
1070 
1071         /**
1072          * Constant for the "Katakana" Unicode character block.
1073          * @since 1.2
1074          */
1075         public static final UnicodeBlock KATAKANA =
1076             new UnicodeBlock("KATAKANA");
1077 
1078         /**
1079          * Constant for the "Bopomofo" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock BOPOMOFO =
1083             new UnicodeBlock("BOPOMOFO");
1084 
1085         /**
1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1091                              "HANGUL COMPATIBILITY JAMO",
1092                              "HANGULCOMPATIBILITYJAMO");
1093 
1094         /**
1095          * Constant for the "Kanbun" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock KANBUN =
1099             new UnicodeBlock("KANBUN");
1100 
1101         /**
1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1107                              "ENCLOSED CJK LETTERS AND MONTHS",
1108                              "ENCLOSEDCJKLETTERSANDMONTHS");
1109 
1110         /**
1111          * Constant for the "CJK Compatibility" Unicode character block.
1112          * @since 1.2
1113          */
1114         public static final UnicodeBlock CJK_COMPATIBILITY =
1115             new UnicodeBlock("CJK_COMPATIBILITY",
1116                              "CJK COMPATIBILITY",
1117                              "CJKCOMPATIBILITY");
1118 
1119         /**
1120          * Constant for the "CJK Unified Ideographs" Unicode character block.
1121          * @since 1.2
1122          */
1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1125                              "CJK UNIFIED IDEOGRAPHS",
1126                              "CJKUNIFIEDIDEOGRAPHS");
1127 
1128         /**
1129          * Constant for the "Hangul Syllables" Unicode character block.
1130          * @since 1.2
1131          */
1132         public static final UnicodeBlock HANGUL_SYLLABLES =
1133             new UnicodeBlock("HANGUL_SYLLABLES",
1134                              "HANGUL SYLLABLES",
1135                              "HANGULSYLLABLES");
1136 
1137         /**
1138          * Constant for the "Private Use Area" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock PRIVATE_USE_AREA =
1142             new UnicodeBlock("PRIVATE_USE_AREA",
1143                              "PRIVATE USE AREA",
1144                              "PRIVATEUSEAREA");
1145 
1146         /**
1147          * Constant for the "CJK Compatibility Ideographs" Unicode character
1148          * block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1153                              "CJK COMPATIBILITY IDEOGRAPHS",
1154                              "CJKCOMPATIBILITYIDEOGRAPHS");
1155 
1156         /**
1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1162                              "ALPHABETIC PRESENTATION FORMS",
1163                              "ALPHABETICPRESENTATIONFORMS");
1164 
1165         /**
1166          * Constant for the "Arabic Presentation Forms-A" Unicode character
1167          * block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1172                              "ARABIC PRESENTATION FORMS-A",
1173                              "ARABICPRESENTATIONFORMS-A");
1174 
1175         /**
1176          * Constant for the "Combining Half Marks" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock COMBINING_HALF_MARKS =
1180             new UnicodeBlock("COMBINING_HALF_MARKS",
1181                              "COMBINING HALF MARKS",
1182                              "COMBININGHALFMARKS");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Forms" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1190                              "CJK COMPATIBILITY FORMS",
1191                              "CJKCOMPATIBILITYFORMS");
1192 
1193         /**
1194          * Constant for the "Small Form Variants" Unicode character block.
1195          * @since 1.2
1196          */
1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1198             new UnicodeBlock("SMALL_FORM_VARIANTS",
1199                              "SMALL FORM VARIANTS",
1200                              "SMALLFORMVARIANTS");
1201 
1202         /**
1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1204          * @since 1.2
1205          */
1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1208                              "ARABIC PRESENTATION FORMS-B",
1209                              "ARABICPRESENTATIONFORMS-B");
1210 
1211         /**
1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1213          * block.
1214          * @since 1.2
1215          */
1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1218                              "HALFWIDTH AND FULLWIDTH FORMS",
1219                              "HALFWIDTHANDFULLWIDTHFORMS");
1220 
1221         /**
1222          * Constant for the "Specials" Unicode character block.
1223          * @since 1.2
1224          */
1225         public static final UnicodeBlock SPECIALS =
1226             new UnicodeBlock("SPECIALS");
1227 
1228         /**
1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1231          *             {@link #LOW_SURROGATES}. These new constants match
1232          *             the block definitions of the Unicode Standard.
1233          *             The {@link #of(char)} and {@link #of(int)} methods
1234          *             return the new constants, not SURROGATES_AREA.
1235          */
1236         @Deprecated
1237         public static final UnicodeBlock SURROGATES_AREA =
1238             new UnicodeBlock("SURROGATES_AREA");
1239 
1240         /**
1241          * Constant for the "Syriac" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock SYRIAC =
1245             new UnicodeBlock("SYRIAC");
1246 
1247         /**
1248          * Constant for the "Thaana" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock THAANA =
1252             new UnicodeBlock("THAANA");
1253 
1254         /**
1255          * Constant for the "Sinhala" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock SINHALA =
1259             new UnicodeBlock("SINHALA");
1260 
1261         /**
1262          * Constant for the "Myanmar" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MYANMAR =
1266             new UnicodeBlock("MYANMAR");
1267 
1268         /**
1269          * Constant for the "Ethiopic" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock ETHIOPIC =
1273             new UnicodeBlock("ETHIOPIC");
1274 
1275         /**
1276          * Constant for the "Cherokee" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock CHEROKEE =
1280             new UnicodeBlock("CHEROKEE");
1281 
1282         /**
1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1290 
1291         /**
1292          * Constant for the "Ogham" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock OGHAM =
1296             new UnicodeBlock("OGHAM");
1297 
1298         /**
1299          * Constant for the "Runic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock RUNIC =
1303             new UnicodeBlock("RUNIC");
1304 
1305         /**
1306          * Constant for the "Khmer" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock KHMER =
1310             new UnicodeBlock("KHMER");
1311 
1312         /**
1313          * Constant for the "Mongolian" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock MONGOLIAN =
1317             new UnicodeBlock("MONGOLIAN");
1318 
1319         /**
1320          * Constant for the "Braille Patterns" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock BRAILLE_PATTERNS =
1324             new UnicodeBlock("BRAILLE_PATTERNS",
1325                              "BRAILLE PATTERNS",
1326                              "BRAILLEPATTERNS");
1327 
1328         /**
1329          * Constant for the "CJK Radicals Supplement" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1334                              "CJK RADICALS SUPPLEMENT",
1335                              "CJKRADICALSSUPPLEMENT");
1336 
1337         /**
1338          * Constant for the "Kangxi Radicals" Unicode character block.
1339          * @since 1.4
1340          */
1341         public static final UnicodeBlock KANGXI_RADICALS =
1342             new UnicodeBlock("KANGXI_RADICALS",
1343                              "KANGXI RADICALS",
1344                              "KANGXIRADICALS");
1345 
1346         /**
1347          * Constant for the "Ideographic Description Characters" Unicode character block.
1348          * @since 1.4
1349          */
1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1354 
1355         /**
1356          * Constant for the "Bopomofo Extended" Unicode character block.
1357          * @since 1.4
1358          */
1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1360             new UnicodeBlock("BOPOMOFO_EXTENDED",
1361                              "BOPOMOFO EXTENDED",
1362                              "BOPOMOFOEXTENDED");
1363 
1364         /**
1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1366          * @since 1.4
1367          */
1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1372 
1373         /**
1374          * Constant for the "Yi Syllables" Unicode character block.
1375          * @since 1.4
1376          */
1377         public static final UnicodeBlock YI_SYLLABLES =
1378             new UnicodeBlock("YI_SYLLABLES",
1379                              "YI SYLLABLES",
1380                              "YISYLLABLES");
1381 
1382         /**
1383          * Constant for the "Yi Radicals" Unicode character block.
1384          * @since 1.4
1385          */
1386         public static final UnicodeBlock YI_RADICALS =
1387             new UnicodeBlock("YI_RADICALS",
1388                              "YI RADICALS",
1389                              "YIRADICALS");
1390 
1391         /**
1392          * Constant for the "Cyrillic Supplementary" Unicode character block.
1393          * @since 1.5
1394          */
1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1397                              "CYRILLIC SUPPLEMENTARY",
1398                              "CYRILLICSUPPLEMENTARY",
1399                              "CYRILLIC SUPPLEMENT",
1400                              "CYRILLICSUPPLEMENT");
1401 
1402         /**
1403          * Constant for the "Tagalog" Unicode character block.
1404          * @since 1.5
1405          */
1406         public static final UnicodeBlock TAGALOG =
1407             new UnicodeBlock("TAGALOG");
1408 
1409         /**
1410          * Constant for the "Hanunoo" Unicode character block.
1411          * @since 1.5
1412          */
1413         public static final UnicodeBlock HANUNOO =
1414             new UnicodeBlock("HANUNOO");
1415 
1416         /**
1417          * Constant for the "Buhid" Unicode character block.
1418          * @since 1.5
1419          */
1420         public static final UnicodeBlock BUHID =
1421             new UnicodeBlock("BUHID");
1422 
1423         /**
1424          * Constant for the "Tagbanwa" Unicode character block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock TAGBANWA =
1428             new UnicodeBlock("TAGBANWA");
1429 
1430         /**
1431          * Constant for the "Limbu" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock LIMBU =
1435             new UnicodeBlock("LIMBU");
1436 
1437         /**
1438          * Constant for the "Tai Le" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAI_LE =
1442             new UnicodeBlock("TAI_LE",
1443                              "TAI LE",
1444                              "TAILE");
1445 
1446         /**
1447          * Constant for the "Khmer Symbols" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock KHMER_SYMBOLS =
1451             new UnicodeBlock("KHMER_SYMBOLS",
1452                              "KHMER SYMBOLS",
1453                              "KHMERSYMBOLS");
1454 
1455         /**
1456          * Constant for the "Phonetic Extensions" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1460             new UnicodeBlock("PHONETIC_EXTENSIONS",
1461                              "PHONETIC EXTENSIONS",
1462                              "PHONETICEXTENSIONS");
1463 
1464         /**
1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1472 
1473         /**
1474          * Constant for the "Supplemental Arrows-A" Unicode character block.
1475          * @since 1.5
1476          */
1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1479                              "SUPPLEMENTAL ARROWS-A",
1480                              "SUPPLEMENTALARROWS-A");
1481 
1482         /**
1483          * Constant for the "Supplemental Arrows-B" Unicode character block.
1484          * @since 1.5
1485          */
1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1488                              "SUPPLEMENTAL ARROWS-B",
1489                              "SUPPLEMENTALARROWS-B");
1490 
1491         /**
1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1493          * character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1500 
1501         /**
1502          * Constant for the "Supplemental Mathematical Operators" Unicode
1503          * character block.
1504          * @since 1.5
1505          */
1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1510 
1511         /**
1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1513          * block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1519                              "MISCELLANEOUSSYMBOLSANDARROWS");
1520 
1521         /**
1522          * Constant for the "Katakana Phonetic Extensions" Unicode character
1523          * block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1528                              "KATAKANA PHONETIC EXTENSIONS",
1529                              "KATAKANAPHONETICEXTENSIONS");
1530 
1531         /**
1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1537                              "YIJING HEXAGRAM SYMBOLS",
1538                              "YIJINGHEXAGRAMSYMBOLS");
1539 
1540         /**
1541          * Constant for the "Variation Selectors" Unicode character block.
1542          * @since 1.5
1543          */
1544         public static final UnicodeBlock VARIATION_SELECTORS =
1545             new UnicodeBlock("VARIATION_SELECTORS",
1546                              "VARIATION SELECTORS",
1547                              "VARIATIONSELECTORS");
1548 
1549         /**
1550          * Constant for the "Linear B Syllabary" Unicode character block.
1551          * @since 1.5
1552          */
1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1554             new UnicodeBlock("LINEAR_B_SYLLABARY",
1555                              "LINEAR B SYLLABARY",
1556                              "LINEARBSYLLABARY");
1557 
1558         /**
1559          * Constant for the "Linear B Ideograms" Unicode character block.
1560          * @since 1.5
1561          */
1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1564                              "LINEAR B IDEOGRAMS",
1565                              "LINEARBIDEOGRAMS");
1566 
1567         /**
1568          * Constant for the "Aegean Numbers" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock AEGEAN_NUMBERS =
1572             new UnicodeBlock("AEGEAN_NUMBERS",
1573                              "AEGEAN NUMBERS",
1574                              "AEGEANNUMBERS");
1575 
1576         /**
1577          * Constant for the "Old Italic" Unicode character block.
1578          * @since 1.5
1579          */
1580         public static final UnicodeBlock OLD_ITALIC =
1581             new UnicodeBlock("OLD_ITALIC",
1582                              "OLD ITALIC",
1583                              "OLDITALIC");
1584 
1585         /**
1586          * Constant for the "Gothic" Unicode character block.
1587          * @since 1.5
1588          */
1589         public static final UnicodeBlock GOTHIC =
1590             new UnicodeBlock("GOTHIC");
1591 
1592         /**
1593          * Constant for the "Ugaritic" Unicode character block.
1594          * @since 1.5
1595          */
1596         public static final UnicodeBlock UGARITIC =
1597             new UnicodeBlock("UGARITIC");
1598 
1599         /**
1600          * Constant for the "Deseret" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock DESERET =
1604             new UnicodeBlock("DESERET");
1605 
1606         /**
1607          * Constant for the "Shavian" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock SHAVIAN =
1611             new UnicodeBlock("SHAVIAN");
1612 
1613         /**
1614          * Constant for the "Osmanya" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock OSMANYA =
1618             new UnicodeBlock("OSMANYA");
1619 
1620         /**
1621          * Constant for the "Cypriot Syllabary" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1625             new UnicodeBlock("CYPRIOT_SYLLABARY",
1626                              "CYPRIOT SYLLABARY",
1627                              "CYPRIOTSYLLABARY");
1628 
1629         /**
1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1635                              "BYZANTINE MUSICAL SYMBOLS",
1636                              "BYZANTINEMUSICALSYMBOLS");
1637 
1638         /**
1639          * Constant for the "Musical Symbols" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock MUSICAL_SYMBOLS =
1643             new UnicodeBlock("MUSICAL_SYMBOLS",
1644                              "MUSICAL SYMBOLS",
1645                              "MUSICALSYMBOLS");
1646 
1647         /**
1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1649          * @since 1.5
1650          */
1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1653                              "TAI XUAN JING SYMBOLS",
1654                              "TAIXUANJINGSYMBOLS");
1655 
1656         /**
1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1658          * character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1664                              "MATHEMATICALALPHANUMERICSYMBOLS");
1665 
1666         /**
1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1668          * character block.
1669          * @since 1.5
1670          */
1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1675 
1676         /**
1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1678          * @since 1.5
1679          */
1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1684 
1685         /**
1686          * Constant for the "Tags" Unicode character block.
1687          * @since 1.5
1688          */
1689         public static final UnicodeBlock TAGS =
1690             new UnicodeBlock("TAGS");
1691 
1692         /**
1693          * Constant for the "Variation Selectors Supplement" Unicode character
1694          * block.
1695          * @since 1.5
1696          */
1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1699                              "VARIATION SELECTORS SUPPLEMENT",
1700                              "VARIATIONSELECTORSSUPPLEMENT");
1701 
1702         /**
1703          * Constant for the "Supplementary Private Use Area-A" Unicode character
1704          * block.
1705          * @since 1.5
1706          */
1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1711 
1712         /**
1713          * Constant for the "Supplementary Private Use Area-B" Unicode character
1714          * block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1721 
1722         /**
1723          * Constant for the "High Surrogates" Unicode character block.
1724          * This block represents codepoint values in the high surrogate
1725          * range: U+D800 through U+DB7F
1726          *
1727          * @since 1.5
1728          */
1729         public static final UnicodeBlock HIGH_SURROGATES =
1730             new UnicodeBlock("HIGH_SURROGATES",
1731                              "HIGH SURROGATES",
1732                              "HIGHSURROGATES");
1733 
1734         /**
1735          * Constant for the "High Private Use Surrogates" Unicode character
1736          * block.
1737          * This block represents codepoint values in the private use high
1738          * surrogate range: U+DB80 through U+DBFF
1739          *
1740          * @since 1.5
1741          */
1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1744                              "HIGH PRIVATE USE SURROGATES",
1745                              "HIGHPRIVATEUSESURROGATES");
1746 
1747         /**
1748          * Constant for the "Low Surrogates" Unicode character block.
1749          * This block represents codepoint values in the low surrogate
1750          * range: U+DC00 through U+DFFF
1751          *
1752          * @since 1.5
1753          */
1754         public static final UnicodeBlock LOW_SURROGATES =
1755             new UnicodeBlock("LOW_SURROGATES",
1756                              "LOW SURROGATES",
1757                              "LOWSURROGATES");
1758 
1759         /**
1760          * Constant for the "Arabic Supplement" Unicode character block.
1761          * @since 1.7
1762          */
1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1764             new UnicodeBlock("ARABIC_SUPPLEMENT",
1765                              "ARABIC SUPPLEMENT",
1766                              "ARABICSUPPLEMENT");
1767 
1768         /**
1769          * Constant for the "NKo" Unicode character block.
1770          * @since 1.7
1771          */
1772         public static final UnicodeBlock NKO =
1773             new UnicodeBlock("NKO");
1774 
1775         /**
1776          * Constant for the "Samaritan" Unicode character block.
1777          * @since 1.7
1778          */
1779         public static final UnicodeBlock SAMARITAN =
1780             new UnicodeBlock("SAMARITAN");
1781 
1782         /**
1783          * Constant for the "Mandaic" Unicode character block.
1784          * @since 1.7
1785          */
1786         public static final UnicodeBlock MANDAIC =
1787             new UnicodeBlock("MANDAIC");
1788 
1789         /**
1790          * Constant for the "Ethiopic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1795                              "ETHIOPIC SUPPLEMENT",
1796                              "ETHIOPICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1800          * Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1807 
1808         /**
1809          * Constant for the "New Tai Lue" Unicode character block.
1810          * @since 1.7
1811          */
1812         public static final UnicodeBlock NEW_TAI_LUE =
1813             new UnicodeBlock("NEW_TAI_LUE",
1814                              "NEW TAI LUE",
1815                              "NEWTAILUE");
1816 
1817         /**
1818          * Constant for the "Buginese" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock BUGINESE =
1822             new UnicodeBlock("BUGINESE");
1823 
1824         /**
1825          * Constant for the "Tai Tham" Unicode character block.
1826          * @since 1.7
1827          */
1828         public static final UnicodeBlock TAI_THAM =
1829             new UnicodeBlock("TAI_THAM",
1830                              "TAI THAM",
1831                              "TAITHAM");
1832 
1833         /**
1834          * Constant for the "Balinese" Unicode character block.
1835          * @since 1.7
1836          */
1837         public static final UnicodeBlock BALINESE =
1838             new UnicodeBlock("BALINESE");
1839 
1840         /**
1841          * Constant for the "Sundanese" Unicode character block.
1842          * @since 1.7
1843          */
1844         public static final UnicodeBlock SUNDANESE =
1845             new UnicodeBlock("SUNDANESE");
1846 
1847         /**
1848          * Constant for the "Batak" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BATAK =
1852             new UnicodeBlock("BATAK");
1853 
1854         /**
1855          * Constant for the "Lepcha" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock LEPCHA =
1859             new UnicodeBlock("LEPCHA");
1860 
1861         /**
1862          * Constant for the "Ol Chiki" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock OL_CHIKI =
1866             new UnicodeBlock("OL_CHIKI",
1867                              "OL CHIKI",
1868                              "OLCHIKI");
1869 
1870         /**
1871          * Constant for the "Vedic Extensions" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
1875             new UnicodeBlock("VEDIC_EXTENSIONS",
1876                              "VEDIC EXTENSIONS",
1877                              "VEDICEXTENSIONS");
1878 
1879         /**
1880          * Constant for the "Phonetic Extensions Supplement" Unicode character
1881          * block.
1882          * @since 1.7
1883          */
1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1886                              "PHONETIC EXTENSIONS SUPPLEMENT",
1887                              "PHONETICEXTENSIONSSUPPLEMENT");
1888 
1889         /**
1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1891          * character block.
1892          * @since 1.7
1893          */
1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1898 
1899         /**
1900          * Constant for the "Glagolitic" Unicode character block.
1901          * @since 1.7
1902          */
1903         public static final UnicodeBlock GLAGOLITIC =
1904             new UnicodeBlock("GLAGOLITIC");
1905 
1906         /**
1907          * Constant for the "Latin Extended-C" Unicode character block.
1908          * @since 1.7
1909          */
1910         public static final UnicodeBlock LATIN_EXTENDED_C =
1911             new UnicodeBlock("LATIN_EXTENDED_C",
1912                              "LATIN EXTENDED-C",
1913                              "LATINEXTENDED-C");
1914 
1915         /**
1916          * Constant for the "Coptic" Unicode character block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock COPTIC =
1920             new UnicodeBlock("COPTIC");
1921 
1922         /**
1923          * Constant for the "Georgian Supplement" Unicode character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1928                              "GEORGIAN SUPPLEMENT",
1929                              "GEORGIANSUPPLEMENT");
1930 
1931         /**
1932          * Constant for the "Tifinagh" Unicode character block.
1933          * @since 1.7
1934          */
1935         public static final UnicodeBlock TIFINAGH =
1936             new UnicodeBlock("TIFINAGH");
1937 
1938         /**
1939          * Constant for the "Ethiopic Extended" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1943             new UnicodeBlock("ETHIOPIC_EXTENDED",
1944                              "ETHIOPIC EXTENDED",
1945                              "ETHIOPICEXTENDED");
1946 
1947         /**
1948          * Constant for the "Cyrillic Extended-A" Unicode character block.
1949          * @since 1.7
1950          */
1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1953                              "CYRILLIC EXTENDED-A",
1954                              "CYRILLICEXTENDED-A");
1955 
1956         /**
1957          * Constant for the "Supplemental Punctuation" Unicode character block.
1958          * @since 1.7
1959          */
1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1962                              "SUPPLEMENTAL PUNCTUATION",
1963                              "SUPPLEMENTALPUNCTUATION");
1964 
1965         /**
1966          * Constant for the "CJK Strokes" Unicode character block.
1967          * @since 1.7
1968          */
1969         public static final UnicodeBlock CJK_STROKES =
1970             new UnicodeBlock("CJK_STROKES",
1971                              "CJK STROKES",
1972                              "CJKSTROKES");
1973 
1974         /**
1975          * Constant for the "Lisu" Unicode character block.
1976          * @since 1.7
1977          */
1978         public static final UnicodeBlock LISU =
1979             new UnicodeBlock("LISU");
1980 
1981         /**
1982          * Constant for the "Vai" Unicode character block.
1983          * @since 1.7
1984          */
1985         public static final UnicodeBlock VAI =
1986             new UnicodeBlock("VAI");
1987 
1988         /**
1989          * Constant for the "Cyrillic Extended-B" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1994                              "CYRILLIC EXTENDED-B",
1995                              "CYRILLICEXTENDED-B");
1996 
1997         /**
1998          * Constant for the "Bamum" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock BAMUM =
2002             new UnicodeBlock("BAMUM");
2003 
2004         /**
2005          * Constant for the "Modifier Tone Letters" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2010                              "MODIFIER TONE LETTERS",
2011                              "MODIFIERTONELETTERS");
2012 
2013         /**
2014          * Constant for the "Latin Extended-D" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LATIN_EXTENDED_D =
2018             new UnicodeBlock("LATIN_EXTENDED_D",
2019                              "LATIN EXTENDED-D",
2020                              "LATINEXTENDED-D");
2021 
2022         /**
2023          * Constant for the "Syloti Nagri" Unicode character block.
2024          * @since 1.7
2025          */
2026         public static final UnicodeBlock SYLOTI_NAGRI =
2027             new UnicodeBlock("SYLOTI_NAGRI",
2028                              "SYLOTI NAGRI",
2029                              "SYLOTINAGRI");
2030 
2031         /**
2032          * Constant for the "Common Indic Number Forms" Unicode character block.
2033          * @since 1.7
2034          */
2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2037                              "COMMON INDIC NUMBER FORMS",
2038                              "COMMONINDICNUMBERFORMS");
2039 
2040         /**
2041          * Constant for the "Phags-pa" Unicode character block.
2042          * @since 1.7
2043          */
2044         public static final UnicodeBlock PHAGS_PA =
2045             new UnicodeBlock("PHAGS_PA",
2046                              "PHAGS-PA");
2047 
2048         /**
2049          * Constant for the "Saurashtra" Unicode character block.
2050          * @since 1.7
2051          */
2052         public static final UnicodeBlock SAURASHTRA =
2053             new UnicodeBlock("SAURASHTRA");
2054 
2055         /**
2056          * Constant for the "Devanagari Extended" Unicode character block.
2057          * @since 1.7
2058          */
2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2060             new UnicodeBlock("DEVANAGARI_EXTENDED",
2061                              "DEVANAGARI EXTENDED",
2062                              "DEVANAGARIEXTENDED");
2063 
2064         /**
2065          * Constant for the "Kayah Li" Unicode character block.
2066          * @since 1.7
2067          */
2068         public static final UnicodeBlock KAYAH_LI =
2069             new UnicodeBlock("KAYAH_LI",
2070                              "KAYAH LI",
2071                              "KAYAHLI");
2072 
2073         /**
2074          * Constant for the "Rejang" Unicode character block.
2075          * @since 1.7
2076          */
2077         public static final UnicodeBlock REJANG =
2078             new UnicodeBlock("REJANG");
2079 
2080         /**
2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2082          * @since 1.7
2083          */
2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2086                              "HANGUL JAMO EXTENDED-A",
2087                              "HANGULJAMOEXTENDED-A");
2088 
2089         /**
2090          * Constant for the "Javanese" Unicode character block.
2091          * @since 1.7
2092          */
2093         public static final UnicodeBlock JAVANESE =
2094             new UnicodeBlock("JAVANESE");
2095 
2096         /**
2097          * Constant for the "Cham" Unicode character block.
2098          * @since 1.7
2099          */
2100         public static final UnicodeBlock CHAM =
2101             new UnicodeBlock("CHAM");
2102 
2103         /**
2104          * Constant for the "Myanmar Extended-A" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2108             new UnicodeBlock("MYANMAR_EXTENDED_A",
2109                              "MYANMAR EXTENDED-A",
2110                              "MYANMAREXTENDED-A");
2111 
2112         /**
2113          * Constant for the "Tai Viet" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock TAI_VIET =
2117             new UnicodeBlock("TAI_VIET",
2118                              "TAI VIET",
2119                              "TAIVIET");
2120 
2121         /**
2122          * Constant for the "Ethiopic Extended-A" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2127                              "ETHIOPIC EXTENDED-A",
2128                              "ETHIOPICEXTENDED-A");
2129 
2130         /**
2131          * Constant for the "Meetei Mayek" Unicode character block.
2132          * @since 1.7
2133          */
2134         public static final UnicodeBlock MEETEI_MAYEK =
2135             new UnicodeBlock("MEETEI_MAYEK",
2136                              "MEETEI MAYEK",
2137                              "MEETEIMAYEK");
2138 
2139         /**
2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2141          * @since 1.7
2142          */
2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2145                              "HANGUL JAMO EXTENDED-B",
2146                              "HANGULJAMOEXTENDED-B");
2147 
2148         /**
2149          * Constant for the "Vertical Forms" Unicode character block.
2150          * @since 1.7
2151          */
2152         public static final UnicodeBlock VERTICAL_FORMS =
2153             new UnicodeBlock("VERTICAL_FORMS",
2154                              "VERTICAL FORMS",
2155                              "VERTICALFORMS");
2156 
2157         /**
2158          * Constant for the "Ancient Greek Numbers" Unicode character block.
2159          * @since 1.7
2160          */
2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2163                              "ANCIENT GREEK NUMBERS",
2164                              "ANCIENTGREEKNUMBERS");
2165 
2166         /**
2167          * Constant for the "Ancient Symbols" Unicode character block.
2168          * @since 1.7
2169          */
2170         public static final UnicodeBlock ANCIENT_SYMBOLS =
2171             new UnicodeBlock("ANCIENT_SYMBOLS",
2172                              "ANCIENT SYMBOLS",
2173                              "ANCIENTSYMBOLS");
2174 
2175         /**
2176          * Constant for the "Phaistos Disc" Unicode character block.
2177          * @since 1.7
2178          */
2179         public static final UnicodeBlock PHAISTOS_DISC =
2180             new UnicodeBlock("PHAISTOS_DISC",
2181                              "PHAISTOS DISC",
2182                              "PHAISTOSDISC");
2183 
2184         /**
2185          * Constant for the "Lycian" Unicode character block.
2186          * @since 1.7
2187          */
2188         public static final UnicodeBlock LYCIAN =
2189             new UnicodeBlock("LYCIAN");
2190 
2191         /**
2192          * Constant for the "Carian" Unicode character block.
2193          * @since 1.7
2194          */
2195         public static final UnicodeBlock CARIAN =
2196             new UnicodeBlock("CARIAN");
2197 
2198         /**
2199          * Constant for the "Old Persian" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock OLD_PERSIAN =
2203             new UnicodeBlock("OLD_PERSIAN",
2204                              "OLD PERSIAN",
2205                              "OLDPERSIAN");
2206 
2207         /**
2208          * Constant for the "Imperial Aramaic" Unicode character block.
2209          * @since 1.7
2210          */
2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2212             new UnicodeBlock("IMPERIAL_ARAMAIC",
2213                              "IMPERIAL ARAMAIC",
2214                              "IMPERIALARAMAIC");
2215 
2216         /**
2217          * Constant for the "Phoenician" Unicode character block.
2218          * @since 1.7
2219          */
2220         public static final UnicodeBlock PHOENICIAN =
2221             new UnicodeBlock("PHOENICIAN");
2222 
2223         /**
2224          * Constant for the "Lydian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYDIAN =
2228             new UnicodeBlock("LYDIAN");
2229 
2230         /**
2231          * Constant for the "Kharoshthi" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock KHAROSHTHI =
2235             new UnicodeBlock("KHAROSHTHI");
2236 
2237         /**
2238          * Constant for the "Old South Arabian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2243                              "OLD SOUTH ARABIAN",
2244                              "OLDSOUTHARABIAN");
2245 
2246         /**
2247          * Constant for the "Avestan" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock AVESTAN =
2251             new UnicodeBlock("AVESTAN");
2252 
2253         /**
2254          * Constant for the "Inscriptional Parthian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2259                              "INSCRIPTIONAL PARTHIAN",
2260                              "INSCRIPTIONALPARTHIAN");
2261 
2262         /**
2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2268                              "INSCRIPTIONAL PAHLAVI",
2269                              "INSCRIPTIONALPAHLAVI");
2270 
2271         /**
2272          * Constant for the "Old Turkic" Unicode character block.
2273          * @since 1.7
2274          */
2275         public static final UnicodeBlock OLD_TURKIC =
2276             new UnicodeBlock("OLD_TURKIC",
2277                              "OLD TURKIC",
2278                              "OLDTURKIC");
2279 
2280         /**
2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2282          * @since 1.7
2283          */
2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2286                              "RUMI NUMERAL SYMBOLS",
2287                              "RUMINUMERALSYMBOLS");
2288 
2289         /**
2290          * Constant for the "Brahmi" Unicode character block.
2291          * @since 1.7
2292          */
2293         public static final UnicodeBlock BRAHMI =
2294             new UnicodeBlock("BRAHMI");
2295 
2296         /**
2297          * Constant for the "Kaithi" Unicode character block.
2298          * @since 1.7
2299          */
2300         public static final UnicodeBlock KAITHI =
2301             new UnicodeBlock("KAITHI");
2302 
2303         /**
2304          * Constant for the "Cuneiform" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock CUNEIFORM =
2308             new UnicodeBlock("CUNEIFORM");
2309 
2310         /**
2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2312          * character block.
2313          * @since 1.7
2314          */
2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2319 
2320         /**
2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2322          * @since 1.7
2323          */
2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2326                              "EGYPTIAN HIEROGLYPHS",
2327                              "EGYPTIANHIEROGLYPHS");
2328 
2329         /**
2330          * Constant for the "Bamum Supplement" Unicode character block.
2331          * @since 1.7
2332          */
2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2334             new UnicodeBlock("BAMUM_SUPPLEMENT",
2335                              "BAMUM SUPPLEMENT",
2336                              "BAMUMSUPPLEMENT");
2337 
2338         /**
2339          * Constant for the "Kana Supplement" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock KANA_SUPPLEMENT =
2343             new UnicodeBlock("KANA_SUPPLEMENT",
2344                              "KANA SUPPLEMENT",
2345                              "KANASUPPLEMENT");
2346 
2347         /**
2348          * Constant for the "Ancient Greek Musical Notation" Unicode character
2349          * block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2354                              "ANCIENT GREEK MUSICAL NOTATION",
2355                              "ANCIENTGREEKMUSICALNOTATION");
2356 
2357         /**
2358          * Constant for the "Counting Rod Numerals" Unicode character block.
2359          * @since 1.7
2360          */
2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2363                              "COUNTING ROD NUMERALS",
2364                              "COUNTINGRODNUMERALS");
2365 
2366         /**
2367          * Constant for the "Mahjong Tiles" Unicode character block.
2368          * @since 1.7
2369          */
2370         public static final UnicodeBlock MAHJONG_TILES =
2371             new UnicodeBlock("MAHJONG_TILES",
2372                              "MAHJONG TILES",
2373                              "MAHJONGTILES");
2374 
2375         /**
2376          * Constant for the "Domino Tiles" Unicode character block.
2377          * @since 1.7
2378          */
2379         public static final UnicodeBlock DOMINO_TILES =
2380             new UnicodeBlock("DOMINO_TILES",
2381                              "DOMINO TILES",
2382                              "DOMINOTILES");
2383 
2384         /**
2385          * Constant for the "Playing Cards" Unicode character block.
2386          * @since 1.7
2387          */
2388         public static final UnicodeBlock PLAYING_CARDS =
2389             new UnicodeBlock("PLAYING_CARDS",
2390                              "PLAYING CARDS",
2391                              "PLAYINGCARDS");
2392 
2393         /**
2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2395          * block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2402 
2403         /**
2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2405          * block.
2406          * @since 1.7
2407          */
2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2412 
2413         /**
2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2415          * character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2422 
2423         /**
2424          * Constant for the "Emoticons" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock EMOTICONS =
2428             new UnicodeBlock("EMOTICONS");
2429 
2430         /**
2431          * Constant for the "Transport And Map Symbols" Unicode character block.
2432          * @since 1.7
2433          */
2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2436                              "TRANSPORT AND MAP SYMBOLS",
2437                              "TRANSPORTANDMAPSYMBOLS");
2438 
2439         /**
2440          * Constant for the "Alchemical Symbols" Unicode character block.
2441          * @since 1.7
2442          */
2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2445                              "ALCHEMICAL SYMBOLS",
2446                              "ALCHEMICALSYMBOLS");
2447 
2448         /**
2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2450          * character block.
2451          * @since 1.7
2452          */
2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2457 
2458         /**
2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2460          * character block.
2461          * @since 1.7
2462          */
2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2467 
2468         /**
2469          * Constant for the "Arabic Extended-A" Unicode character block.
2470          * @since 1.8
2471          */
2472         public static final UnicodeBlock ARABIC_EXTENDED_A =
2473             new UnicodeBlock("ARABIC_EXTENDED_A",
2474                              "ARABIC EXTENDED-A",
2475                              "ARABICEXTENDED-A");
2476 
2477         /**
2478          * Constant for the "Sundanese Supplement" Unicode character block.
2479          * @since 1.8
2480          */
2481         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2482             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2483                              "SUNDANESE SUPPLEMENT",
2484                              "SUNDANESESUPPLEMENT");
2485 
2486         /**
2487          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2488          * @since 1.8
2489          */
2490         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2491             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2492                              "MEETEI MAYEK EXTENSIONS",
2493                              "MEETEIMAYEKEXTENSIONS");
2494 
2495         /**
2496          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2497          * @since 1.8
2498          */
2499         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2500             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2501                              "MEROITIC HIEROGLYPHS",
2502                              "MEROITICHIEROGLYPHS");
2503 
2504         /**
2505          * Constant for the "Meroitic Cursive" Unicode character block.
2506          * @since 1.8
2507          */
2508         public static final UnicodeBlock MEROITIC_CURSIVE =
2509             new UnicodeBlock("MEROITIC_CURSIVE",
2510                              "MEROITIC CURSIVE",
2511                              "MEROITICCURSIVE");
2512 
2513         /**
2514          * Constant for the "Sora Sompeng" Unicode character block.
2515          * @since 1.8
2516          */
2517         public static final UnicodeBlock SORA_SOMPENG =
2518             new UnicodeBlock("SORA_SOMPENG",
2519                              "SORA SOMPENG",
2520                              "SORASOMPENG");
2521 
2522         /**
2523          * Constant for the "Chakma" Unicode character block.
2524          * @since 1.8
2525          */
2526         public static final UnicodeBlock CHAKMA =
2527             new UnicodeBlock("CHAKMA");
2528 
2529         /**
2530          * Constant for the "Sharada" Unicode character block.
2531          * @since 1.8
2532          */
2533         public static final UnicodeBlock SHARADA =
2534             new UnicodeBlock("SHARADA");
2535 
2536         /**
2537          * Constant for the "Takri" Unicode character block.
2538          * @since 1.8
2539          */
2540         public static final UnicodeBlock TAKRI =
2541             new UnicodeBlock("TAKRI");
2542 
2543         /**
2544          * Constant for the "Miao" Unicode character block.
2545          * @since 1.8
2546          */
2547         public static final UnicodeBlock MIAO =
2548             new UnicodeBlock("MIAO");
2549 
2550         /**
2551          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2552          * character block.
2553          * @since 1.8
2554          */
2555         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2556             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2557                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2558                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2559 
2560         private static final int blockStarts[] = {
2561             0x0000,   // 0000..007F; Basic Latin
2562             0x0080,   // 0080..00FF; Latin-1 Supplement
2563             0x0100,   // 0100..017F; Latin Extended-A
2564             0x0180,   // 0180..024F; Latin Extended-B
2565             0x0250,   // 0250..02AF; IPA Extensions
2566             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2567             0x0300,   // 0300..036F; Combining Diacritical Marks
2568             0x0370,   // 0370..03FF; Greek and Coptic
2569             0x0400,   // 0400..04FF; Cyrillic
2570             0x0500,   // 0500..052F; Cyrillic Supplement
2571             0x0530,   // 0530..058F; Armenian
2572             0x0590,   // 0590..05FF; Hebrew
2573             0x0600,   // 0600..06FF; Arabic
2574             0x0700,   // 0700..074F; Syriac
2575             0x0750,   // 0750..077F; Arabic Supplement
2576             0x0780,   // 0780..07BF; Thaana
2577             0x07C0,   // 07C0..07FF; NKo
2578             0x0800,   // 0800..083F; Samaritan
2579             0x0840,   // 0840..085F; Mandaic
2580             0x0860,   //             unassigned
2581             0x08A0,   // 08A0..08FF; Arabic Extended-A
2582             0x0900,   // 0900..097F; Devanagari
2583             0x0980,   // 0980..09FF; Bengali
2584             0x0A00,   // 0A00..0A7F; Gurmukhi
2585             0x0A80,   // 0A80..0AFF; Gujarati
2586             0x0B00,   // 0B00..0B7F; Oriya
2587             0x0B80,   // 0B80..0BFF; Tamil
2588             0x0C00,   // 0C00..0C7F; Telugu
2589             0x0C80,   // 0C80..0CFF; Kannada
2590             0x0D00,   // 0D00..0D7F; Malayalam
2591             0x0D80,   // 0D80..0DFF; Sinhala
2592             0x0E00,   // 0E00..0E7F; Thai
2593             0x0E80,   // 0E80..0EFF; Lao
2594             0x0F00,   // 0F00..0FFF; Tibetan
2595             0x1000,   // 1000..109F; Myanmar
2596             0x10A0,   // 10A0..10FF; Georgian
2597             0x1100,   // 1100..11FF; Hangul Jamo
2598             0x1200,   // 1200..137F; Ethiopic
2599             0x1380,   // 1380..139F; Ethiopic Supplement
2600             0x13A0,   // 13A0..13FF; Cherokee
2601             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2602             0x1680,   // 1680..169F; Ogham
2603             0x16A0,   // 16A0..16FF; Runic
2604             0x1700,   // 1700..171F; Tagalog
2605             0x1720,   // 1720..173F; Hanunoo
2606             0x1740,   // 1740..175F; Buhid
2607             0x1760,   // 1760..177F; Tagbanwa
2608             0x1780,   // 1780..17FF; Khmer
2609             0x1800,   // 1800..18AF; Mongolian
2610             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2611             0x1900,   // 1900..194F; Limbu
2612             0x1950,   // 1950..197F; Tai Le
2613             0x1980,   // 1980..19DF; New Tai Lue
2614             0x19E0,   // 19E0..19FF; Khmer Symbols
2615             0x1A00,   // 1A00..1A1F; Buginese
2616             0x1A20,   // 1A20..1AAF; Tai Tham
2617             0x1AB0,   //             unassigned
2618             0x1B00,   // 1B00..1B7F; Balinese
2619             0x1B80,   // 1B80..1BBF; Sundanese
2620             0x1BC0,   // 1BC0..1BFF; Batak
2621             0x1C00,   // 1C00..1C4F; Lepcha
2622             0x1C50,   // 1C50..1C7F; Ol Chiki
2623             0x1C80,   //             unassigned
2624             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2625             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2626             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2627             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2628             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2629             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2630             0x1F00,   // 1F00..1FFF; Greek Extended
2631             0x2000,   // 2000..206F; General Punctuation
2632             0x2070,   // 2070..209F; Superscripts and Subscripts
2633             0x20A0,   // 20A0..20CF; Currency Symbols
2634             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2635             0x2100,   // 2100..214F; Letterlike Symbols
2636             0x2150,   // 2150..218F; Number Forms
2637             0x2190,   // 2190..21FF; Arrows
2638             0x2200,   // 2200..22FF; Mathematical Operators
2639             0x2300,   // 2300..23FF; Miscellaneous Technical
2640             0x2400,   // 2400..243F; Control Pictures
2641             0x2440,   // 2440..245F; Optical Character Recognition
2642             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2643             0x2500,   // 2500..257F; Box Drawing
2644             0x2580,   // 2580..259F; Block Elements
2645             0x25A0,   // 25A0..25FF; Geometric Shapes
2646             0x2600,   // 2600..26FF; Miscellaneous Symbols
2647             0x2700,   // 2700..27BF; Dingbats
2648             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2649             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2650             0x2800,   // 2800..28FF; Braille Patterns
2651             0x2900,   // 2900..297F; Supplemental Arrows-B
2652             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2653             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2654             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2655             0x2C00,   // 2C00..2C5F; Glagolitic
2656             0x2C60,   // 2C60..2C7F; Latin Extended-C
2657             0x2C80,   // 2C80..2CFF; Coptic
2658             0x2D00,   // 2D00..2D2F; Georgian Supplement
2659             0x2D30,   // 2D30..2D7F; Tifinagh
2660             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2661             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2662             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2663             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2664             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2665             0x2FE0,   //             unassigned
2666             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2667             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2668             0x3040,   // 3040..309F; Hiragana
2669             0x30A0,   // 30A0..30FF; Katakana
2670             0x3100,   // 3100..312F; Bopomofo
2671             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2672             0x3190,   // 3190..319F; Kanbun
2673             0x31A0,   // 31A0..31BF; Bopomofo Extended
2674             0x31C0,   // 31C0..31EF; CJK Strokes
2675             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2676             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2677             0x3300,   // 3300..33FF; CJK Compatibility
2678             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2679             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2680             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2681             0xA000,   // A000..A48F; Yi Syllables
2682             0xA490,   // A490..A4CF; Yi Radicals
2683             0xA4D0,   // A4D0..A4FF; Lisu
2684             0xA500,   // A500..A63F; Vai
2685             0xA640,   // A640..A69F; Cyrillic Extended-B
2686             0xA6A0,   // A6A0..A6FF; Bamum
2687             0xA700,   // A700..A71F; Modifier Tone Letters
2688             0xA720,   // A720..A7FF; Latin Extended-D
2689             0xA800,   // A800..A82F; Syloti Nagri
2690             0xA830,   // A830..A83F; Common Indic Number Forms
2691             0xA840,   // A840..A87F; Phags-pa
2692             0xA880,   // A880..A8DF; Saurashtra
2693             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2694             0xA900,   // A900..A92F; Kayah Li
2695             0xA930,   // A930..A95F; Rejang
2696             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2697             0xA980,   // A980..A9DF; Javanese
2698             0xA9E0,   //             unassigned
2699             0xAA00,   // AA00..AA5F; Cham
2700             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2701             0xAA80,   // AA80..AADF; Tai Viet
2702             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2703             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2704             0xAB30,   //             unassigned
2705             0xABC0,   // ABC0..ABFF; Meetei Mayek
2706             0xAC00,   // AC00..D7AF; Hangul Syllables
2707             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2708             0xD800,   // D800..DB7F; High Surrogates
2709             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2710             0xDC00,   // DC00..DFFF; Low Surrogates
2711             0xE000,   // E000..F8FF; Private Use Area
2712             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2713             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2714             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2715             0xFE00,   // FE00..FE0F; Variation Selectors
2716             0xFE10,   // FE10..FE1F; Vertical Forms
2717             0xFE20,   // FE20..FE2F; Combining Half Marks
2718             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2719             0xFE50,   // FE50..FE6F; Small Form Variants
2720             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2721             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2722             0xFFF0,   // FFF0..FFFF; Specials
2723             0x10000,  // 10000..1007F; Linear B Syllabary
2724             0x10080,  // 10080..100FF; Linear B Ideograms
2725             0x10100,  // 10100..1013F; Aegean Numbers
2726             0x10140,  // 10140..1018F; Ancient Greek Numbers
2727             0x10190,  // 10190..101CF; Ancient Symbols
2728             0x101D0,  // 101D0..101FF; Phaistos Disc
2729             0x10200,  //               unassigned
2730             0x10280,  // 10280..1029F; Lycian
2731             0x102A0,  // 102A0..102DF; Carian
2732             0x102E0,  //               unassigned
2733             0x10300,  // 10300..1032F; Old Italic
2734             0x10330,  // 10330..1034F; Gothic
2735             0x10350,  //               unassigned
2736             0x10380,  // 10380..1039F; Ugaritic
2737             0x103A0,  // 103A0..103DF; Old Persian
2738             0x103E0,  //               unassigned
2739             0x10400,  // 10400..1044F; Deseret
2740             0x10450,  // 10450..1047F; Shavian
2741             0x10480,  // 10480..104AF; Osmanya
2742             0x104B0,  //               unassigned
2743             0x10800,  // 10800..1083F; Cypriot Syllabary
2744             0x10840,  // 10840..1085F; Imperial Aramaic
2745             0x10860,  //               unassigned
2746             0x10900,  // 10900..1091F; Phoenician
2747             0x10920,  // 10920..1093F; Lydian
2748             0x10940,  //               unassigned
2749             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2750             0x109A0,  // 109A0..109FF; Meroitic Cursive
2751             0x10A00,  // 10A00..10A5F; Kharoshthi
2752             0x10A60,  // 10A60..10A7F; Old South Arabian
2753             0x10A80,  //               unassigned
2754             0x10B00,  // 10B00..10B3F; Avestan
2755             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2756             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2757             0x10B80,  //               unassigned
2758             0x10C00,  // 10C00..10C4F; Old Turkic
2759             0x10C50,  //               unassigned
2760             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2761             0x10E80,  //               unassigned
2762             0x11000,  // 11000..1107F; Brahmi
2763             0x11080,  // 11080..110CF; Kaithi
2764             0x110D0,  // 110D0..110FF; Sora Sompeng
2765             0x11100,  // 11100..1114F; Chakma
2766             0x11150,  //               unassigned
2767             0x11180,  // 11180..111DF; Sharada
2768             0x111E0,  //               unassigned
2769             0x11680,  // 11680..116CF; Takri
2770             0x116D0,  //               unassigned
2771             0x12000,  // 12000..123FF; Cuneiform
2772             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2773             0x12480,  //               unassigned
2774             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2775             0x13430,  //               unassigned
2776             0x16800,  // 16800..16A3F; Bamum Supplement
2777             0x16A40,  //               unassigned
2778             0x16F00,  // 16F00..16F9F; Miao
2779             0x16FA0,  //               unassigned
2780             0x1B000,  // 1B000..1B0FF; Kana Supplement
2781             0x1B100,  //               unassigned
2782             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2783             0x1D100,  // 1D100..1D1FF; Musical Symbols
2784             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2785             0x1D250,  //               unassigned
2786             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2787             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2788             0x1D380,  //               unassigned
2789             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2790             0x1D800,  //               unassigned
2791             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2792             0x1EF00,  //               unassigned
2793             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2794             0x1F030,  // 1F030..1F09F; Domino Tiles
2795             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2796             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2797             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2798             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2799             0x1F600,  // 1F600..1F64F; Emoticons
2800             0x1F650,  //               unassigned
2801             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2802             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2803             0x1F780,  //               unassigned
2804             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2805             0x2A6E0,  //               unassigned
2806             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2807             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2808             0x2B820,  //               unassigned
2809             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2810             0x2FA20,  //               unassigned
2811             0xE0000,  // E0000..E007F; Tags
2812             0xE0080,  //               unassigned
2813             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2814             0xE01F0,  //               unassigned
2815             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2816             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2817         };
2818 
2819         private static final UnicodeBlock[] blocks = {
2820             BASIC_LATIN,
2821             LATIN_1_SUPPLEMENT,
2822             LATIN_EXTENDED_A,
2823             LATIN_EXTENDED_B,
2824             IPA_EXTENSIONS,
2825             SPACING_MODIFIER_LETTERS,
2826             COMBINING_DIACRITICAL_MARKS,
2827             GREEK,
2828             CYRILLIC,
2829             CYRILLIC_SUPPLEMENTARY,
2830             ARMENIAN,
2831             HEBREW,
2832             ARABIC,
2833             SYRIAC,
2834             ARABIC_SUPPLEMENT,
2835             THAANA,
2836             NKO,
2837             SAMARITAN,
2838             MANDAIC,
2839             null,
2840             ARABIC_EXTENDED_A,
2841             DEVANAGARI,
2842             BENGALI,
2843             GURMUKHI,
2844             GUJARATI,
2845             ORIYA,
2846             TAMIL,
2847             TELUGU,
2848             KANNADA,
2849             MALAYALAM,
2850             SINHALA,
2851             THAI,
2852             LAO,
2853             TIBETAN,
2854             MYANMAR,
2855             GEORGIAN,
2856             HANGUL_JAMO,
2857             ETHIOPIC,
2858             ETHIOPIC_SUPPLEMENT,
2859             CHEROKEE,
2860             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2861             OGHAM,
2862             RUNIC,
2863             TAGALOG,
2864             HANUNOO,
2865             BUHID,
2866             TAGBANWA,
2867             KHMER,
2868             MONGOLIAN,
2869             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2870             LIMBU,
2871             TAI_LE,
2872             NEW_TAI_LUE,
2873             KHMER_SYMBOLS,
2874             BUGINESE,
2875             TAI_THAM,
2876             null,
2877             BALINESE,
2878             SUNDANESE,
2879             BATAK,
2880             LEPCHA,
2881             OL_CHIKI,
2882             null,
2883             SUNDANESE_SUPPLEMENT,
2884             VEDIC_EXTENSIONS,
2885             PHONETIC_EXTENSIONS,
2886             PHONETIC_EXTENSIONS_SUPPLEMENT,
2887             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2888             LATIN_EXTENDED_ADDITIONAL,
2889             GREEK_EXTENDED,
2890             GENERAL_PUNCTUATION,
2891             SUPERSCRIPTS_AND_SUBSCRIPTS,
2892             CURRENCY_SYMBOLS,
2893             COMBINING_MARKS_FOR_SYMBOLS,
2894             LETTERLIKE_SYMBOLS,
2895             NUMBER_FORMS,
2896             ARROWS,
2897             MATHEMATICAL_OPERATORS,
2898             MISCELLANEOUS_TECHNICAL,
2899             CONTROL_PICTURES,
2900             OPTICAL_CHARACTER_RECOGNITION,
2901             ENCLOSED_ALPHANUMERICS,
2902             BOX_DRAWING,
2903             BLOCK_ELEMENTS,
2904             GEOMETRIC_SHAPES,
2905             MISCELLANEOUS_SYMBOLS,
2906             DINGBATS,
2907             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2908             SUPPLEMENTAL_ARROWS_A,
2909             BRAILLE_PATTERNS,
2910             SUPPLEMENTAL_ARROWS_B,
2911             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2912             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2913             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2914             GLAGOLITIC,
2915             LATIN_EXTENDED_C,
2916             COPTIC,
2917             GEORGIAN_SUPPLEMENT,
2918             TIFINAGH,
2919             ETHIOPIC_EXTENDED,
2920             CYRILLIC_EXTENDED_A,
2921             SUPPLEMENTAL_PUNCTUATION,
2922             CJK_RADICALS_SUPPLEMENT,
2923             KANGXI_RADICALS,
2924             null,
2925             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2926             CJK_SYMBOLS_AND_PUNCTUATION,
2927             HIRAGANA,
2928             KATAKANA,
2929             BOPOMOFO,
2930             HANGUL_COMPATIBILITY_JAMO,
2931             KANBUN,
2932             BOPOMOFO_EXTENDED,
2933             CJK_STROKES,
2934             KATAKANA_PHONETIC_EXTENSIONS,
2935             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2936             CJK_COMPATIBILITY,
2937             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2938             YIJING_HEXAGRAM_SYMBOLS,
2939             CJK_UNIFIED_IDEOGRAPHS,
2940             YI_SYLLABLES,
2941             YI_RADICALS,
2942             LISU,
2943             VAI,
2944             CYRILLIC_EXTENDED_B,
2945             BAMUM,
2946             MODIFIER_TONE_LETTERS,
2947             LATIN_EXTENDED_D,
2948             SYLOTI_NAGRI,
2949             COMMON_INDIC_NUMBER_FORMS,
2950             PHAGS_PA,
2951             SAURASHTRA,
2952             DEVANAGARI_EXTENDED,
2953             KAYAH_LI,
2954             REJANG,
2955             HANGUL_JAMO_EXTENDED_A,
2956             JAVANESE,
2957             null,
2958             CHAM,
2959             MYANMAR_EXTENDED_A,
2960             TAI_VIET,
2961             MEETEI_MAYEK_EXTENSIONS,
2962             ETHIOPIC_EXTENDED_A,
2963             null,
2964             MEETEI_MAYEK,
2965             HANGUL_SYLLABLES,
2966             HANGUL_JAMO_EXTENDED_B,
2967             HIGH_SURROGATES,
2968             HIGH_PRIVATE_USE_SURROGATES,
2969             LOW_SURROGATES,
2970             PRIVATE_USE_AREA,
2971             CJK_COMPATIBILITY_IDEOGRAPHS,
2972             ALPHABETIC_PRESENTATION_FORMS,
2973             ARABIC_PRESENTATION_FORMS_A,
2974             VARIATION_SELECTORS,
2975             VERTICAL_FORMS,
2976             COMBINING_HALF_MARKS,
2977             CJK_COMPATIBILITY_FORMS,
2978             SMALL_FORM_VARIANTS,
2979             ARABIC_PRESENTATION_FORMS_B,
2980             HALFWIDTH_AND_FULLWIDTH_FORMS,
2981             SPECIALS,
2982             LINEAR_B_SYLLABARY,
2983             LINEAR_B_IDEOGRAMS,
2984             AEGEAN_NUMBERS,
2985             ANCIENT_GREEK_NUMBERS,
2986             ANCIENT_SYMBOLS,
2987             PHAISTOS_DISC,
2988             null,
2989             LYCIAN,
2990             CARIAN,
2991             null,
2992             OLD_ITALIC,
2993             GOTHIC,
2994             null,
2995             UGARITIC,
2996             OLD_PERSIAN,
2997             null,
2998             DESERET,
2999             SHAVIAN,
3000             OSMANYA,
3001             null,
3002             CYPRIOT_SYLLABARY,
3003             IMPERIAL_ARAMAIC,
3004             null,
3005             PHOENICIAN,
3006             LYDIAN,
3007             null,
3008             MEROITIC_HIEROGLYPHS,
3009             MEROITIC_CURSIVE,
3010             KHAROSHTHI,
3011             OLD_SOUTH_ARABIAN,
3012             null,
3013             AVESTAN,
3014             INSCRIPTIONAL_PARTHIAN,
3015             INSCRIPTIONAL_PAHLAVI,
3016             null,
3017             OLD_TURKIC,
3018             null,
3019             RUMI_NUMERAL_SYMBOLS,
3020             null,
3021             BRAHMI,
3022             KAITHI,
3023             SORA_SOMPENG,
3024             CHAKMA,
3025             null,
3026             SHARADA,
3027             null,
3028             TAKRI,
3029             null,
3030             CUNEIFORM,
3031             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3032             null,
3033             EGYPTIAN_HIEROGLYPHS,
3034             null,
3035             BAMUM_SUPPLEMENT,
3036             null,
3037             MIAO,
3038             null,
3039             KANA_SUPPLEMENT,
3040             null,
3041             BYZANTINE_MUSICAL_SYMBOLS,
3042             MUSICAL_SYMBOLS,
3043             ANCIENT_GREEK_MUSICAL_NOTATION,
3044             null,
3045             TAI_XUAN_JING_SYMBOLS,
3046             COUNTING_ROD_NUMERALS,
3047             null,
3048             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3049             null,
3050             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3051             null,
3052             MAHJONG_TILES,
3053             DOMINO_TILES,
3054             PLAYING_CARDS,
3055             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3056             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3057             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3058             EMOTICONS,
3059             null,
3060             TRANSPORT_AND_MAP_SYMBOLS,
3061             ALCHEMICAL_SYMBOLS,
3062             null,
3063             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3064             null,
3065             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3066             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3067             null,
3068             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3069             null,
3070             TAGS,
3071             null,
3072             VARIATION_SELECTORS_SUPPLEMENT,
3073             null,
3074             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3075             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3076         };
3077 
3078 
3079         /**
3080          * Returns the object representing the Unicode block containing the
3081          * given character, or {@code null} if the character is not a
3082          * member of a defined block.
3083          *
3084          * <p><b>Note:</b> This method cannot handle
3085          * <a href="Character.html#supplementary"> supplementary
3086          * characters</a>.  To support all Unicode characters, including
3087          * supplementary characters, use the {@link #of(int)} method.
3088          *
3089          * @param   c  The character in question
3090          * @return  The {@code UnicodeBlock} instance representing the
3091          *          Unicode block of which this character is a member, or
3092          *          {@code null} if the character is not a member of any
3093          *          Unicode block
3094          */
3095         public static UnicodeBlock of(char c) {
3096             return of((int)c);
3097         }
3098 
3099         /**
3100          * Returns the object representing the Unicode block
3101          * containing the given character (Unicode code point), or
3102          * {@code null} if the character is not a member of a
3103          * defined block.
3104          *
3105          * @param   codePoint the character (Unicode code point) in question.
3106          * @return  The {@code UnicodeBlock} instance representing the
3107          *          Unicode block of which this character is a member, or
3108          *          {@code null} if the character is not a member of any
3109          *          Unicode block
3110          * @exception IllegalArgumentException if the specified
3111          * {@code codePoint} is an invalid Unicode code point.
3112          * @see Character#isValidCodePoint(int)
3113          * @since   1.5
3114          */
3115         public static UnicodeBlock of(int codePoint) {
3116             if (!isValidCodePoint(codePoint)) {
3117                 throw new IllegalArgumentException();
3118             }
3119 
3120             int top, bottom, current;
3121             bottom = 0;
3122             top = blockStarts.length;
3123             current = top/2;
3124 
3125             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3126             while (top - bottom > 1) {
3127                 if (codePoint >= blockStarts[current]) {
3128                     bottom = current;
3129                 } else {
3130                     top = current;
3131                 }
3132                 current = (top + bottom) / 2;
3133             }
3134             return blocks[current];
3135         }
3136 
3137         /**
3138          * Returns the UnicodeBlock with the given name. Block
3139          * names are determined by The Unicode Standard. The file
3140          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3141          * version of the standard. The {@link Character} class specifies
3142          * the version of the standard that it supports.
3143          * <p>
3144          * This method accepts block names in the following forms:
3145          * <ol>
3146          * <li> Canonical block names as defined by the Unicode Standard.
3147          * For example, the standard defines a "Basic Latin" block. Therefore, this
3148          * method accepts "Basic Latin" as a valid block name. The documentation of
3149          * each UnicodeBlock provides the canonical name.
3150          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3151          * is a valid block name for the "Basic Latin" block.
3152          * <li>The text representation of each constant UnicodeBlock identifier.
3153          * For example, this method will return the {@link #BASIC_LATIN} block if
3154          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3155          * hyphens in the canonical name with underscores.
3156          * </ol>
3157          * Finally, character case is ignored for all of the valid block name forms.
3158          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3159          * The en_US locale's case mapping rules are used to provide case-insensitive
3160          * string comparisons for block name validation.
3161          * <p>
3162          * If the Unicode Standard changes block names, both the previous and
3163          * current names will be accepted.
3164          *
3165          * @param blockName A {@code UnicodeBlock} name.
3166          * @return The {@code UnicodeBlock} instance identified
3167          *         by {@code blockName}
3168          * @throws IllegalArgumentException if {@code blockName} is an
3169          *         invalid name
3170          * @throws NullPointerException if {@code blockName} is null
3171          * @since 1.5
3172          */
3173         public static final UnicodeBlock forName(String blockName) {
3174             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3175             if (block == null) {
3176                 throw new IllegalArgumentException();
3177             }
3178             return block;
3179         }
3180     }
3181 
3182 
3183     /**
3184      * A family of character subsets representing the character scripts
3185      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3186      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3187      * character is assigned to a single Unicode script, either a specific
3188      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3189      * one of the following three special values,
3190      * {@link Character.UnicodeScript#INHERITED Inherited},
3191      * {@link Character.UnicodeScript#COMMON Common} or
3192      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3193      *
3194      * @since 1.7
3195      */
3196     public static enum UnicodeScript {
3197         /**
3198          * Unicode script "Common".
3199          */
3200         COMMON,
3201 
3202         /**
3203          * Unicode script "Latin".
3204          */
3205         LATIN,
3206 
3207         /**
3208          * Unicode script "Greek".
3209          */
3210         GREEK,
3211 
3212         /**
3213          * Unicode script "Cyrillic".
3214          */
3215         CYRILLIC,
3216 
3217         /**
3218          * Unicode script "Armenian".
3219          */
3220         ARMENIAN,
3221 
3222         /**
3223          * Unicode script "Hebrew".
3224          */
3225         HEBREW,
3226 
3227         /**
3228          * Unicode script "Arabic".
3229          */
3230         ARABIC,
3231 
3232         /**
3233          * Unicode script "Syriac".
3234          */
3235         SYRIAC,
3236 
3237         /**
3238          * Unicode script "Thaana".
3239          */
3240         THAANA,
3241 
3242         /**
3243          * Unicode script "Devanagari".
3244          */
3245         DEVANAGARI,
3246 
3247         /**
3248          * Unicode script "Bengali".
3249          */
3250         BENGALI,
3251 
3252         /**
3253          * Unicode script "Gurmukhi".
3254          */
3255         GURMUKHI,
3256 
3257         /**
3258          * Unicode script "Gujarati".
3259          */
3260         GUJARATI,
3261 
3262         /**
3263          * Unicode script "Oriya".
3264          */
3265         ORIYA,
3266 
3267         /**
3268          * Unicode script "Tamil".
3269          */
3270         TAMIL,
3271 
3272         /**
3273          * Unicode script "Telugu".
3274          */
3275         TELUGU,
3276 
3277         /**
3278          * Unicode script "Kannada".
3279          */
3280         KANNADA,
3281 
3282         /**
3283          * Unicode script "Malayalam".
3284          */
3285         MALAYALAM,
3286 
3287         /**
3288          * Unicode script "Sinhala".
3289          */
3290         SINHALA,
3291 
3292         /**
3293          * Unicode script "Thai".
3294          */
3295         THAI,
3296 
3297         /**
3298          * Unicode script "Lao".
3299          */
3300         LAO,
3301 
3302         /**
3303          * Unicode script "Tibetan".
3304          */
3305         TIBETAN,
3306 
3307         /**
3308          * Unicode script "Myanmar".
3309          */
3310         MYANMAR,
3311 
3312         /**
3313          * Unicode script "Georgian".
3314          */
3315         GEORGIAN,
3316 
3317         /**
3318          * Unicode script "Hangul".
3319          */
3320         HANGUL,
3321 
3322         /**
3323          * Unicode script "Ethiopic".
3324          */
3325         ETHIOPIC,
3326 
3327         /**
3328          * Unicode script "Cherokee".
3329          */
3330         CHEROKEE,
3331 
3332         /**
3333          * Unicode script "Canadian_Aboriginal".
3334          */
3335         CANADIAN_ABORIGINAL,
3336 
3337         /**
3338          * Unicode script "Ogham".
3339          */
3340         OGHAM,
3341 
3342         /**
3343          * Unicode script "Runic".
3344          */
3345         RUNIC,
3346 
3347         /**
3348          * Unicode script "Khmer".
3349          */
3350         KHMER,
3351 
3352         /**
3353          * Unicode script "Mongolian".
3354          */
3355         MONGOLIAN,
3356 
3357         /**
3358          * Unicode script "Hiragana".
3359          */
3360         HIRAGANA,
3361 
3362         /**
3363          * Unicode script "Katakana".
3364          */
3365         KATAKANA,
3366 
3367         /**
3368          * Unicode script "Bopomofo".
3369          */
3370         BOPOMOFO,
3371 
3372         /**
3373          * Unicode script "Han".
3374          */
3375         HAN,
3376 
3377         /**
3378          * Unicode script "Yi".
3379          */
3380         YI,
3381 
3382         /**
3383          * Unicode script "Old_Italic".
3384          */
3385         OLD_ITALIC,
3386 
3387         /**
3388          * Unicode script "Gothic".
3389          */
3390         GOTHIC,
3391 
3392         /**
3393          * Unicode script "Deseret".
3394          */
3395         DESERET,
3396 
3397         /**
3398          * Unicode script "Inherited".
3399          */
3400         INHERITED,
3401 
3402         /**
3403          * Unicode script "Tagalog".
3404          */
3405         TAGALOG,
3406 
3407         /**
3408          * Unicode script "Hanunoo".
3409          */
3410         HANUNOO,
3411 
3412         /**
3413          * Unicode script "Buhid".
3414          */
3415         BUHID,
3416 
3417         /**
3418          * Unicode script "Tagbanwa".
3419          */
3420         TAGBANWA,
3421 
3422         /**
3423          * Unicode script "Limbu".
3424          */
3425         LIMBU,
3426 
3427         /**
3428          * Unicode script "Tai_Le".
3429          */
3430         TAI_LE,
3431 
3432         /**
3433          * Unicode script "Linear_B".
3434          */
3435         LINEAR_B,
3436 
3437         /**
3438          * Unicode script "Ugaritic".
3439          */
3440         UGARITIC,
3441 
3442         /**
3443          * Unicode script "Shavian".
3444          */
3445         SHAVIAN,
3446 
3447         /**
3448          * Unicode script "Osmanya".
3449          */
3450         OSMANYA,
3451 
3452         /**
3453          * Unicode script "Cypriot".
3454          */
3455         CYPRIOT,
3456 
3457         /**
3458          * Unicode script "Braille".
3459          */
3460         BRAILLE,
3461 
3462         /**
3463          * Unicode script "Buginese".
3464          */
3465         BUGINESE,
3466 
3467         /**
3468          * Unicode script "Coptic".
3469          */
3470         COPTIC,
3471 
3472         /**
3473          * Unicode script "New_Tai_Lue".
3474          */
3475         NEW_TAI_LUE,
3476 
3477         /**
3478          * Unicode script "Glagolitic".
3479          */
3480         GLAGOLITIC,
3481 
3482         /**
3483          * Unicode script "Tifinagh".
3484          */
3485         TIFINAGH,
3486 
3487         /**
3488          * Unicode script "Syloti_Nagri".
3489          */
3490         SYLOTI_NAGRI,
3491 
3492         /**
3493          * Unicode script "Old_Persian".
3494          */
3495         OLD_PERSIAN,
3496 
3497         /**
3498          * Unicode script "Kharoshthi".
3499          */
3500         KHAROSHTHI,
3501 
3502         /**
3503          * Unicode script "Balinese".
3504          */
3505         BALINESE,
3506 
3507         /**
3508          * Unicode script "Cuneiform".
3509          */
3510         CUNEIFORM,
3511 
3512         /**
3513          * Unicode script "Phoenician".
3514          */
3515         PHOENICIAN,
3516 
3517         /**
3518          * Unicode script "Phags_Pa".
3519          */
3520         PHAGS_PA,
3521 
3522         /**
3523          * Unicode script "Nko".
3524          */
3525         NKO,
3526 
3527         /**
3528          * Unicode script "Sundanese".
3529          */
3530         SUNDANESE,
3531 
3532         /**
3533          * Unicode script "Batak".
3534          */
3535         BATAK,
3536 
3537         /**
3538          * Unicode script "Lepcha".
3539          */
3540         LEPCHA,
3541 
3542         /**
3543          * Unicode script "Ol_Chiki".
3544          */
3545         OL_CHIKI,
3546 
3547         /**
3548          * Unicode script "Vai".
3549          */
3550         VAI,
3551 
3552         /**
3553          * Unicode script "Saurashtra".
3554          */
3555         SAURASHTRA,
3556 
3557         /**
3558          * Unicode script "Kayah_Li".
3559          */
3560         KAYAH_LI,
3561 
3562         /**
3563          * Unicode script "Rejang".
3564          */
3565         REJANG,
3566 
3567         /**
3568          * Unicode script "Lycian".
3569          */
3570         LYCIAN,
3571 
3572         /**
3573          * Unicode script "Carian".
3574          */
3575         CARIAN,
3576 
3577         /**
3578          * Unicode script "Lydian".
3579          */
3580         LYDIAN,
3581 
3582         /**
3583          * Unicode script "Cham".
3584          */
3585         CHAM,
3586 
3587         /**
3588          * Unicode script "Tai_Tham".
3589          */
3590         TAI_THAM,
3591 
3592         /**
3593          * Unicode script "Tai_Viet".
3594          */
3595         TAI_VIET,
3596 
3597         /**
3598          * Unicode script "Avestan".
3599          */
3600         AVESTAN,
3601 
3602         /**
3603          * Unicode script "Egyptian_Hieroglyphs".
3604          */
3605         EGYPTIAN_HIEROGLYPHS,
3606 
3607         /**
3608          * Unicode script "Samaritan".
3609          */
3610         SAMARITAN,
3611 
3612         /**
3613          * Unicode script "Mandaic".
3614          */
3615         MANDAIC,
3616 
3617         /**
3618          * Unicode script "Lisu".
3619          */
3620         LISU,
3621 
3622         /**
3623          * Unicode script "Bamum".
3624          */
3625         BAMUM,
3626 
3627         /**
3628          * Unicode script "Javanese".
3629          */
3630         JAVANESE,
3631 
3632         /**
3633          * Unicode script "Meetei_Mayek".
3634          */
3635         MEETEI_MAYEK,
3636 
3637         /**
3638          * Unicode script "Imperial_Aramaic".
3639          */
3640         IMPERIAL_ARAMAIC,
3641 
3642         /**
3643          * Unicode script "Old_South_Arabian".
3644          */
3645         OLD_SOUTH_ARABIAN,
3646 
3647         /**
3648          * Unicode script "Inscriptional_Parthian".
3649          */
3650         INSCRIPTIONAL_PARTHIAN,
3651 
3652         /**
3653          * Unicode script "Inscriptional_Pahlavi".
3654          */
3655         INSCRIPTIONAL_PAHLAVI,
3656 
3657         /**
3658          * Unicode script "Old_Turkic".
3659          */
3660         OLD_TURKIC,
3661 
3662         /**
3663          * Unicode script "Brahmi".
3664          */
3665         BRAHMI,
3666 
3667         /**
3668          * Unicode script "Kaithi".
3669          */
3670         KAITHI,
3671 
3672         /**
3673          * Unicode script "Meroitic Hieroglyphs".
3674          */
3675         MEROITIC_HIEROGLYPHS,
3676 
3677         /**
3678          * Unicode script "Meroitic Cursive".
3679          */
3680         MEROITIC_CURSIVE,
3681 
3682         /**
3683          * Unicode script "Sora Sompeng".
3684          */
3685         SORA_SOMPENG,
3686 
3687         /**
3688          * Unicode script "Chakma".
3689          */
3690         CHAKMA,
3691 
3692         /**
3693          * Unicode script "Sharada".
3694          */
3695         SHARADA,
3696 
3697         /**
3698          * Unicode script "Takri".
3699          */
3700         TAKRI,
3701 
3702         /**
3703          * Unicode script "Miao".
3704          */
3705         MIAO,
3706 
3707         /**
3708          * Unicode script "Unknown".
3709          */
3710         UNKNOWN;
3711 
3712         private static final int[] scriptStarts = {
3713             0x0000,   // 0000..0040; COMMON
3714             0x0041,   // 0041..005A; LATIN
3715             0x005B,   // 005B..0060; COMMON
3716             0x0061,   // 0061..007A; LATIN
3717             0x007B,   // 007B..00A9; COMMON
3718             0x00AA,   // 00AA..00AA; LATIN
3719             0x00AB,   // 00AB..00B9; COMMON
3720             0x00BA,   // 00BA..00BA; LATIN
3721             0x00BB,   // 00BB..00BF; COMMON
3722             0x00C0,   // 00C0..00D6; LATIN
3723             0x00D7,   // 00D7..00D7; COMMON
3724             0x00D8,   // 00D8..00F6; LATIN
3725             0x00F7,   // 00F7..00F7; COMMON
3726             0x00F8,   // 00F8..02B8; LATIN
3727             0x02B9,   // 02B9..02DF; COMMON
3728             0x02E0,   // 02E0..02E4; LATIN
3729             0x02E5,   // 02E5..02E9; COMMON
3730             0x02EA,   // 02EA..02EB; BOPOMOFO
3731             0x02EC,   // 02EC..02FF; COMMON
3732             0x0300,   // 0300..036F; INHERITED
3733             0x0370,   // 0370..0373; GREEK
3734             0x0374,   // 0374..0374; COMMON
3735             0x0375,   // 0375..037D; GREEK
3736             0x037E,   // 037E..0383; COMMON
3737             0x0384,   // 0384..0384; GREEK
3738             0x0385,   // 0385..0385; COMMON
3739             0x0386,   // 0386..0386; GREEK
3740             0x0387,   // 0387..0387; COMMON
3741             0x0388,   // 0388..03E1; GREEK
3742             0x03E2,   // 03E2..03EF; COPTIC
3743             0x03F0,   // 03F0..03FF; GREEK
3744             0x0400,   // 0400..0484; CYRILLIC
3745             0x0485,   // 0485..0486; INHERITED
3746             0x0487,   // 0487..0530; CYRILLIC
3747             0x0531,   // 0531..0588; ARMENIAN
3748             0x0589,   // 0589..0589; COMMON
3749             0x058A,   // 058A..0590; ARMENIAN
3750             0x0591,   // 0591..05FF; HEBREW
3751             0x0600,   // 0600..060B; ARABIC
3752             0x060C,   // 060C..060C; COMMON
3753             0x060D,   // 060D..061A; ARABIC
3754             0x061B,   // 061B..061D; COMMON
3755             0x061E,   // 061E..061E; ARABIC
3756             0x061F,   // 061F..061F; COMMON
3757             0x0620,   // 0620..063F; ARABIC
3758             0x0640,   // 0640..0640; COMMON
3759             0x0641,   // 0641..064A; ARABIC
3760             0x064B,   // 064B..0655; INHERITED
3761             0x0656,   // 0656..065F; ARABIC
3762             0x0660,   // 0660..0669; COMMON
3763             0x066A,   // 066A..066F; ARABIC
3764             0x0670,   // 0670..0670; INHERITED
3765             0x0671,   // 0671..06DC; ARABIC
3766             0x06DD,   // 06DD..06DD; COMMON
3767             0x06DE,   // 06DE..06FF; ARABIC
3768             0x0700,   // 0700..074F; SYRIAC
3769             0x0750,   // 0750..077F; ARABIC
3770             0x0780,   // 0780..07BF; THAANA
3771             0x07C0,   // 07C0..07FF; NKO
3772             0x0800,   // 0800..083F; SAMARITAN
3773             0x0840,   // 0840..089F; MANDAIC
3774             0x08A0,   // 08A0..08FF; ARABIC
3775             0x0900,   // 0900..0950; DEVANAGARI
3776             0x0951,   // 0951..0952; INHERITED
3777             0x0953,   // 0953..0963; DEVANAGARI
3778             0x0964,   // 0964..0965; COMMON
3779             0x0966,   // 0966..0980; DEVANAGARI
3780             0x0981,   // 0981..0A00; BENGALI
3781             0x0A01,   // 0A01..0A80; GURMUKHI
3782             0x0A81,   // 0A81..0B00; GUJARATI
3783             0x0B01,   // 0B01..0B81; ORIYA
3784             0x0B82,   // 0B82..0C00; TAMIL
3785             0x0C01,   // 0C01..0C81; TELUGU
3786             0x0C82,   // 0C82..0CF0; KANNADA
3787             0x0D02,   // 0D02..0D81; MALAYALAM
3788             0x0D82,   // 0D82..0E00; SINHALA
3789             0x0E01,   // 0E01..0E3E; THAI
3790             0x0E3F,   // 0E3F..0E3F; COMMON
3791             0x0E40,   // 0E40..0E80; THAI
3792             0x0E81,   // 0E81..0EFF; LAO
3793             0x0F00,   // 0F00..0FD4; TIBETAN
3794             0x0FD5,   // 0FD5..0FD8; COMMON
3795             0x0FD9,   // 0FD9..0FFF; TIBETAN
3796             0x1000,   // 1000..109F; MYANMAR
3797             0x10A0,   // 10A0..10FA; GEORGIAN
3798             0x10FB,   // 10FB..10FB; COMMON
3799             0x10FC,   // 10FC..10FF; GEORGIAN
3800             0x1100,   // 1100..11FF; HANGUL
3801             0x1200,   // 1200..139F; ETHIOPIC
3802             0x13A0,   // 13A0..13FF; CHEROKEE
3803             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3804             0x1680,   // 1680..169F; OGHAM
3805             0x16A0,   // 16A0..16EA; RUNIC
3806             0x16EB,   // 16EB..16ED; COMMON
3807             0x16EE,   // 16EE..16FF; RUNIC
3808             0x1700,   // 1700..171F; TAGALOG
3809             0x1720,   // 1720..1734; HANUNOO
3810             0x1735,   // 1735..173F; COMMON
3811             0x1740,   // 1740..175F; BUHID
3812             0x1760,   // 1760..177F; TAGBANWA
3813             0x1780,   // 1780..17FF; KHMER
3814             0x1800,   // 1800..1801; MONGOLIAN
3815             0x1802,   // 1802..1803; COMMON
3816             0x1804,   // 1804..1804; MONGOLIAN
3817             0x1805,   // 1805..1805; COMMON
3818             0x1806,   // 1806..18AF; MONGOLIAN
3819             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3820             0x1900,   // 1900..194F; LIMBU
3821             0x1950,   // 1950..197F; TAI_LE
3822             0x1980,   // 1980..19DF; NEW_TAI_LUE
3823             0x19E0,   // 19E0..19FF; KHMER
3824             0x1A00,   // 1A00..1A1F; BUGINESE
3825             0x1A20,   // 1A20..1AFF; TAI_THAM
3826             0x1B00,   // 1B00..1B7F; BALINESE
3827             0x1B80,   // 1B80..1BBF; SUNDANESE
3828             0x1BC0,   // 1BC0..1BFF; BATAK
3829             0x1C00,   // 1C00..1C4F; LEPCHA
3830             0x1C50,   // 1C50..1CBF; OL_CHIKI
3831             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3832             0x1CD0,   // 1CD0..1CD2; INHERITED
3833             0x1CD3,   // 1CD3..1CD3; COMMON
3834             0x1CD4,   // 1CD4..1CE0; INHERITED
3835             0x1CE1,   // 1CE1..1CE1; COMMON
3836             0x1CE2,   // 1CE2..1CE8; INHERITED
3837             0x1CE9,   // 1CE9..1CEC; COMMON
3838             0x1CED,   // 1CED..1CED; INHERITED
3839             0x1CEE,   // 1CEE..1CF3; COMMON
3840             0x1CF4,   // 1CF4..1CF4; INHERITED
3841             0x1CF5,   // 1CF5..1CFF; COMMON
3842             0x1D00,   // 1D00..1D25; LATIN
3843             0x1D26,   // 1D26..1D2A; GREEK
3844             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3845             0x1D2C,   // 1D2C..1D5C; LATIN
3846             0x1D5D,   // 1D5D..1D61; GREEK
3847             0x1D62,   // 1D62..1D65; LATIN
3848             0x1D66,   // 1D66..1D6A; GREEK
3849             0x1D6B,   // 1D6B..1D77; LATIN
3850             0x1D78,   // 1D78..1D78; CYRILLIC
3851             0x1D79,   // 1D79..1DBE; LATIN
3852             0x1DBF,   // 1DBF..1DBF; GREEK
3853             0x1DC0,   // 1DC0..1DFF; INHERITED
3854             0x1E00,   // 1E00..1EFF; LATIN
3855             0x1F00,   // 1F00..1FFF; GREEK
3856             0x2000,   // 2000..200B; COMMON
3857             0x200C,   // 200C..200D; INHERITED
3858             0x200E,   // 200E..2070; COMMON
3859             0x2071,   // 2071..2073; LATIN
3860             0x2074,   // 2074..207E; COMMON
3861             0x207F,   // 207F..207F; LATIN
3862             0x2080,   // 2080..208F; COMMON
3863             0x2090,   // 2090..209F; LATIN
3864             0x20A0,   // 20A0..20CF; COMMON
3865             0x20D0,   // 20D0..20FF; INHERITED
3866             0x2100,   // 2100..2125; COMMON
3867             0x2126,   // 2126..2126; GREEK
3868             0x2127,   // 2127..2129; COMMON
3869             0x212A,   // 212A..212B; LATIN
3870             0x212C,   // 212C..2131; COMMON
3871             0x2132,   // 2132..2132; LATIN
3872             0x2133,   // 2133..214D; COMMON
3873             0x214E,   // 214E..214E; LATIN
3874             0x214F,   // 214F..215F; COMMON
3875             0x2160,   // 2160..2188; LATIN
3876             0x2189,   // 2189..27FF; COMMON
3877             0x2800,   // 2800..28FF; BRAILLE
3878             0x2900,   // 2900..2BFF; COMMON
3879             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3880             0x2C60,   // 2C60..2C7F; LATIN
3881             0x2C80,   // 2C80..2CFF; COPTIC
3882             0x2D00,   // 2D00..2D2F; GEORGIAN
3883             0x2D30,   // 2D30..2D7F; TIFINAGH
3884             0x2D80,   // 2D80..2DDF; ETHIOPIC
3885             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3886             0x2E00,   // 2E00..2E7F; COMMON
3887             0x2E80,   // 2E80..2FEF; HAN
3888             0x2FF0,   // 2FF0..3004; COMMON
3889             0x3005,   // 3005..3005; HAN
3890             0x3006,   // 3006..3006; COMMON
3891             0x3007,   // 3007..3007; HAN
3892             0x3008,   // 3008..3020; COMMON
3893             0x3021,   // 3021..3029; HAN
3894             0x302A,   // 302A..302D; INHERITED
3895             0x302E,   // 302E..302F; HANGUL
3896             0x3030,   // 3030..3037; COMMON
3897             0x3038,   // 3038..303B; HAN
3898             0x303C,   // 303C..3040; COMMON
3899             0x3041,   // 3041..3098; HIRAGANA
3900             0x3099,   // 3099..309A; INHERITED
3901             0x309B,   // 309B..309C; COMMON
3902             0x309D,   // 309D..309F; HIRAGANA
3903             0x30A0,   // 30A0..30A0; COMMON
3904             0x30A1,   // 30A1..30FA; KATAKANA
3905             0x30FB,   // 30FB..30FC; COMMON
3906             0x30FD,   // 30FD..3104; KATAKANA
3907             0x3105,   // 3105..3130; BOPOMOFO
3908             0x3131,   // 3131..318F; HANGUL
3909             0x3190,   // 3190..319F; COMMON
3910             0x31A0,   // 31A0..31BF; BOPOMOFO
3911             0x31C0,   // 31C0..31EF; COMMON
3912             0x31F0,   // 31F0..31FF; KATAKANA
3913             0x3200,   // 3200..321F; HANGUL
3914             0x3220,   // 3220..325F; COMMON
3915             0x3260,   // 3260..327E; HANGUL
3916             0x327F,   // 327F..32CF; COMMON
3917             0x32D0,   // 32D0..3357; KATAKANA
3918             0x3358,   // 3358..33FF; COMMON
3919             0x3400,   // 3400..4DBF; HAN
3920             0x4DC0,   // 4DC0..4DFF; COMMON
3921             0x4E00,   // 4E00..9FFF; HAN
3922             0xA000,   // A000..A4CF; YI
3923             0xA4D0,   // A4D0..A4FF; LISU
3924             0xA500,   // A500..A63F; VAI
3925             0xA640,   // A640..A69F; CYRILLIC
3926             0xA6A0,   // A6A0..A6FF; BAMUM
3927             0xA700,   // A700..A721; COMMON
3928             0xA722,   // A722..A787; LATIN
3929             0xA788,   // A788..A78A; COMMON
3930             0xA78B,   // A78B..A7FF; LATIN
3931             0xA800,   // A800..A82F; SYLOTI_NAGRI
3932             0xA830,   // A830..A83F; COMMON
3933             0xA840,   // A840..A87F; PHAGS_PA
3934             0xA880,   // A880..A8DF; SAURASHTRA
3935             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3936             0xA900,   // A900..A92F; KAYAH_LI
3937             0xA930,   // A930..A95F; REJANG
3938             0xA960,   // A960..A97F; HANGUL
3939             0xA980,   // A980..A9FF; JAVANESE
3940             0xAA00,   // AA00..AA5F; CHAM
3941             0xAA60,   // AA60..AA7F; MYANMAR
3942             0xAA80,   // AA80..AADF; TAI_VIET
3943             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3944             0xAB01,   // AB01..ABBF; ETHIOPIC
3945             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3946             0xAC00,   // AC00..D7FB; HANGUL
3947             0xD7FC,   // D7FC..F8FF; UNKNOWN
3948             0xF900,   // F900..FAFF; HAN
3949             0xFB00,   // FB00..FB12; LATIN
3950             0xFB13,   // FB13..FB1C; ARMENIAN
3951             0xFB1D,   // FB1D..FB4F; HEBREW
3952             0xFB50,   // FB50..FD3D; ARABIC
3953             0xFD3E,   // FD3E..FD4F; COMMON
3954             0xFD50,   // FD50..FDFC; ARABIC
3955             0xFDFD,   // FDFD..FDFF; COMMON
3956             0xFE00,   // FE00..FE0F; INHERITED
3957             0xFE10,   // FE10..FE1F; COMMON
3958             0xFE20,   // FE20..FE2F; INHERITED
3959             0xFE30,   // FE30..FE6F; COMMON
3960             0xFE70,   // FE70..FEFE; ARABIC
3961             0xFEFF,   // FEFF..FF20; COMMON
3962             0xFF21,   // FF21..FF3A; LATIN
3963             0xFF3B,   // FF3B..FF40; COMMON
3964             0xFF41,   // FF41..FF5A; LATIN
3965             0xFF5B,   // FF5B..FF65; COMMON
3966             0xFF66,   // FF66..FF6F; KATAKANA
3967             0xFF70,   // FF70..FF70; COMMON
3968             0xFF71,   // FF71..FF9D; KATAKANA
3969             0xFF9E,   // FF9E..FF9F; COMMON
3970             0xFFA0,   // FFA0..FFDF; HANGUL
3971             0xFFE0,   // FFE0..FFFF; COMMON
3972             0x10000,  // 10000..100FF; LINEAR_B
3973             0x10100,  // 10100..1013F; COMMON
3974             0x10140,  // 10140..1018F; GREEK
3975             0x10190,  // 10190..101FC; COMMON
3976             0x101FD,  // 101FD..1027F; INHERITED
3977             0x10280,  // 10280..1029F; LYCIAN
3978             0x102A0,  // 102A0..102FF; CARIAN
3979             0x10300,  // 10300..1032F; OLD_ITALIC
3980             0x10330,  // 10330..1037F; GOTHIC
3981             0x10380,  // 10380..1039F; UGARITIC
3982             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3983             0x10400,  // 10400..1044F; DESERET
3984             0x10450,  // 10450..1047F; SHAVIAN
3985             0x10480,  // 10480..107FF; OSMANYA
3986             0x10800,  // 10800..1083F; CYPRIOT
3987             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3988             0x10900,  // 10900..1091F; PHOENICIAN
3989             0x10920,  // 10920..1097F; LYDIAN
3990             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3991             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3992             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3993             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3994             0x10B00,  // 10B00..10B3F; AVESTAN
3995             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3996             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3997             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3998             0x10E60,  // 10E60..10FFF; ARABIC
3999             0x11000,  // 11000..1107F; BRAHMI
4000             0x11080,  // 11080..110CF; KAITHI
4001             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4002             0x11100,  // 11100..1117F; CHAKMA
4003             0x11180,  // 11180..1167F; SHARADA
4004             0x11680,  // 11680..116CF; TAKRI
4005             0x12000,  // 12000..12FFF; CUNEIFORM
4006             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4007             0x16800,  // 16800..16A38; BAMUM
4008             0x16F00,  // 16F00..16F9F; MIAO
4009             0x1B000,  // 1B000..1B000; KATAKANA
4010             0x1B001,  // 1B001..1CFFF; HIRAGANA
4011             0x1D000,  // 1D000..1D166; COMMON
4012             0x1D167,  // 1D167..1D169; INHERITED
4013             0x1D16A,  // 1D16A..1D17A; COMMON
4014             0x1D17B,  // 1D17B..1D182; INHERITED
4015             0x1D183,  // 1D183..1D184; COMMON
4016             0x1D185,  // 1D185..1D18B; INHERITED
4017             0x1D18C,  // 1D18C..1D1A9; COMMON
4018             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4019             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4020             0x1D200,  // 1D200..1D2FF; GREEK
4021             0x1D300,  // 1D300..1EDFF; COMMON
4022             0x1EE00,  // 1EE00..1EFFF; ARABIC
4023             0x1F000,  // 1F000..1F1FF; COMMON
4024             0x1F200,  // 1F200..1F200; HIRAGANA
4025             0x1F201,  // 1F210..1FFFF; COMMON
4026             0x20000,  // 20000..E0000; HAN
4027             0xE0001,  // E0001..E00FF; COMMON
4028             0xE0100,  // E0100..E01EF; INHERITED
4029             0xE01F0   // E01F0..10FFFF; UNKNOWN
4030 
4031         };
4032 
4033         private static final UnicodeScript[] scripts = {
4034             COMMON,
4035             LATIN,
4036             COMMON,
4037             LATIN,
4038             COMMON,
4039             LATIN,
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             LATIN,
4048             COMMON,
4049             LATIN,
4050             COMMON,
4051             BOPOMOFO,
4052             COMMON,
4053             INHERITED,
4054             GREEK,
4055             COMMON,
4056             GREEK,
4057             COMMON,
4058             GREEK,
4059             COMMON,
4060             GREEK,
4061             COMMON,
4062             GREEK,
4063             COPTIC,
4064             GREEK,
4065             CYRILLIC,
4066             INHERITED,
4067             CYRILLIC,
4068             ARMENIAN,
4069             COMMON,
4070             ARMENIAN,
4071             HEBREW,
4072             ARABIC,
4073             COMMON,
4074             ARABIC,
4075             COMMON,
4076             ARABIC,
4077             COMMON,
4078             ARABIC,
4079             COMMON,
4080             ARABIC,
4081             INHERITED,
4082             ARABIC,
4083             COMMON,
4084             ARABIC,
4085             INHERITED,
4086             ARABIC,
4087             COMMON,
4088             ARABIC,
4089             SYRIAC,
4090             ARABIC,
4091             THAANA,
4092             NKO,
4093             SAMARITAN,
4094             MANDAIC,
4095             ARABIC,
4096             DEVANAGARI,
4097             INHERITED,
4098             DEVANAGARI,
4099             COMMON,
4100             DEVANAGARI,
4101             BENGALI,
4102             GURMUKHI,
4103             GUJARATI,
4104             ORIYA,
4105             TAMIL,
4106             TELUGU,
4107             KANNADA,
4108             MALAYALAM,
4109             SINHALA,
4110             THAI,
4111             COMMON,
4112             THAI,
4113             LAO,
4114             TIBETAN,
4115             COMMON,
4116             TIBETAN,
4117             MYANMAR,
4118             GEORGIAN,
4119             COMMON,
4120             GEORGIAN,
4121             HANGUL,
4122             ETHIOPIC,
4123             CHEROKEE,
4124             CANADIAN_ABORIGINAL,
4125             OGHAM,
4126             RUNIC,
4127             COMMON,
4128             RUNIC,
4129             TAGALOG,
4130             HANUNOO,
4131             COMMON,
4132             BUHID,
4133             TAGBANWA,
4134             KHMER,
4135             MONGOLIAN,
4136             COMMON,
4137             MONGOLIAN,
4138             COMMON,
4139             MONGOLIAN,
4140             CANADIAN_ABORIGINAL,
4141             LIMBU,
4142             TAI_LE,
4143             NEW_TAI_LUE,
4144             KHMER,
4145             BUGINESE,
4146             TAI_THAM,
4147             BALINESE,
4148             SUNDANESE,
4149             BATAK,
4150             LEPCHA,
4151             OL_CHIKI,
4152             SUNDANESE,
4153             INHERITED,
4154             COMMON,
4155             INHERITED,
4156             COMMON,
4157             INHERITED,
4158             COMMON,
4159             INHERITED,
4160             COMMON,
4161             INHERITED,
4162             COMMON,
4163             LATIN,
4164             GREEK,
4165             CYRILLIC,
4166             LATIN,
4167             GREEK,
4168             LATIN,
4169             GREEK,
4170             LATIN,
4171             CYRILLIC,
4172             LATIN,
4173             GREEK,
4174             INHERITED,
4175             LATIN,
4176             GREEK,
4177             COMMON,
4178             INHERITED,
4179             COMMON,
4180             LATIN,
4181             COMMON,
4182             LATIN,
4183             COMMON,
4184             LATIN,
4185             COMMON,
4186             INHERITED,
4187             COMMON,
4188             GREEK,
4189             COMMON,
4190             LATIN,
4191             COMMON,
4192             LATIN,
4193             COMMON,
4194             LATIN,
4195             COMMON,
4196             LATIN,
4197             COMMON,
4198             BRAILLE,
4199             COMMON,
4200             GLAGOLITIC,
4201             LATIN,
4202             COPTIC,
4203             GEORGIAN,
4204             TIFINAGH,
4205             ETHIOPIC,
4206             CYRILLIC,
4207             COMMON,
4208             HAN,
4209             COMMON,
4210             HAN,
4211             COMMON,
4212             HAN,
4213             COMMON,
4214             HAN,
4215             INHERITED,
4216             HANGUL,
4217             COMMON,
4218             HAN,
4219             COMMON,
4220             HIRAGANA,
4221             INHERITED,
4222             COMMON,
4223             HIRAGANA,
4224             COMMON,
4225             KATAKANA,
4226             COMMON,
4227             KATAKANA,
4228             BOPOMOFO,
4229             HANGUL,
4230             COMMON,
4231             BOPOMOFO,
4232             COMMON,
4233             KATAKANA,
4234             HANGUL,
4235             COMMON,
4236             HANGUL,
4237             COMMON,
4238             KATAKANA,
4239             COMMON,
4240             HAN,
4241             COMMON,
4242             HAN,
4243             YI,
4244             LISU,
4245             VAI,
4246             CYRILLIC,
4247             BAMUM,
4248             COMMON,
4249             LATIN,
4250             COMMON,
4251             LATIN,
4252             SYLOTI_NAGRI,
4253             COMMON,
4254             PHAGS_PA,
4255             SAURASHTRA,
4256             DEVANAGARI,
4257             KAYAH_LI,
4258             REJANG,
4259             HANGUL,
4260             JAVANESE,
4261             CHAM,
4262             MYANMAR,
4263             TAI_VIET,
4264             MEETEI_MAYEK,
4265             ETHIOPIC,
4266             MEETEI_MAYEK,
4267             HANGUL,
4268             UNKNOWN     ,
4269             HAN,
4270             LATIN,
4271             ARMENIAN,
4272             HEBREW,
4273             ARABIC,
4274             COMMON,
4275             ARABIC,
4276             COMMON,
4277             INHERITED,
4278             COMMON,
4279             INHERITED,
4280             COMMON,
4281             ARABIC,
4282             COMMON,
4283             LATIN,
4284             COMMON,
4285             LATIN,
4286             COMMON,
4287             KATAKANA,
4288             COMMON,
4289             KATAKANA,
4290             COMMON,
4291             HANGUL,
4292             COMMON,
4293             LINEAR_B,
4294             COMMON,
4295             GREEK,
4296             COMMON,
4297             INHERITED,
4298             LYCIAN,
4299             CARIAN,
4300             OLD_ITALIC,
4301             GOTHIC,
4302             UGARITIC,
4303             OLD_PERSIAN,
4304             DESERET,
4305             SHAVIAN,
4306             OSMANYA,
4307             CYPRIOT,
4308             IMPERIAL_ARAMAIC,
4309             PHOENICIAN,
4310             LYDIAN,
4311             MEROITIC_HIEROGLYPHS,
4312             MEROITIC_CURSIVE,
4313             KHAROSHTHI,
4314             OLD_SOUTH_ARABIAN,
4315             AVESTAN,
4316             INSCRIPTIONAL_PARTHIAN,
4317             INSCRIPTIONAL_PAHLAVI,
4318             OLD_TURKIC,
4319             ARABIC,
4320             BRAHMI,
4321             KAITHI,
4322             SORA_SOMPENG,
4323             CHAKMA,
4324             SHARADA,
4325             TAKRI,
4326             CUNEIFORM,
4327             EGYPTIAN_HIEROGLYPHS,
4328             BAMUM,
4329             MIAO,
4330             KATAKANA,
4331             HIRAGANA,
4332             COMMON,
4333             INHERITED,
4334             COMMON,
4335             INHERITED,
4336             COMMON,
4337             INHERITED,
4338             COMMON,
4339             INHERITED,
4340             COMMON,
4341             GREEK,
4342             COMMON,
4343             ARABIC,
4344             COMMON,
4345             HIRAGANA,
4346             COMMON,
4347             HAN,
4348             COMMON,
4349             INHERITED,
4350             UNKNOWN
4351         };
4352 
4353         private static HashMap<String, Character.UnicodeScript> aliases;
4354         static {
4355             aliases = new HashMap<>(128);
4356             aliases.put("ARAB", ARABIC);
4357             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4358             aliases.put("ARMN", ARMENIAN);
4359             aliases.put("AVST", AVESTAN);
4360             aliases.put("BALI", BALINESE);
4361             aliases.put("BAMU", BAMUM);
4362             aliases.put("BATK", BATAK);
4363             aliases.put("BENG", BENGALI);
4364             aliases.put("BOPO", BOPOMOFO);
4365             aliases.put("BRAI", BRAILLE);
4366             aliases.put("BRAH", BRAHMI);
4367             aliases.put("BUGI", BUGINESE);
4368             aliases.put("BUHD", BUHID);
4369             aliases.put("CAKM", CHAKMA);
4370             aliases.put("CANS", CANADIAN_ABORIGINAL);
4371             aliases.put("CARI", CARIAN);
4372             aliases.put("CHAM", CHAM);
4373             aliases.put("CHER", CHEROKEE);
4374             aliases.put("COPT", COPTIC);
4375             aliases.put("CPRT", CYPRIOT);
4376             aliases.put("CYRL", CYRILLIC);
4377             aliases.put("DEVA", DEVANAGARI);
4378             aliases.put("DSRT", DESERET);
4379             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4380             aliases.put("ETHI", ETHIOPIC);
4381             aliases.put("GEOR", GEORGIAN);
4382             aliases.put("GLAG", GLAGOLITIC);
4383             aliases.put("GOTH", GOTHIC);
4384             aliases.put("GREK", GREEK);
4385             aliases.put("GUJR", GUJARATI);
4386             aliases.put("GURU", GURMUKHI);
4387             aliases.put("HANG", HANGUL);
4388             aliases.put("HANI", HAN);
4389             aliases.put("HANO", HANUNOO);
4390             aliases.put("HEBR", HEBREW);
4391             aliases.put("HIRA", HIRAGANA);
4392             // it appears we don't have the KATAKANA_OR_HIRAGANA
4393             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4394             aliases.put("ITAL", OLD_ITALIC);
4395             aliases.put("JAVA", JAVANESE);
4396             aliases.put("KALI", KAYAH_LI);
4397             aliases.put("KANA", KATAKANA);
4398             aliases.put("KHAR", KHAROSHTHI);
4399             aliases.put("KHMR", KHMER);
4400             aliases.put("KNDA", KANNADA);
4401             aliases.put("KTHI", KAITHI);
4402             aliases.put("LANA", TAI_THAM);
4403             aliases.put("LAOO", LAO);
4404             aliases.put("LATN", LATIN);
4405             aliases.put("LEPC", LEPCHA);
4406             aliases.put("LIMB", LIMBU);
4407             aliases.put("LINB", LINEAR_B);
4408             aliases.put("LISU", LISU);
4409             aliases.put("LYCI", LYCIAN);
4410             aliases.put("LYDI", LYDIAN);
4411             aliases.put("MAND", MANDAIC);
4412             aliases.put("MERC", MEROITIC_CURSIVE);
4413             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4414             aliases.put("MLYM", MALAYALAM);
4415             aliases.put("MONG", MONGOLIAN);
4416             aliases.put("MTEI", MEETEI_MAYEK);
4417             aliases.put("MYMR", MYANMAR);
4418             aliases.put("NKOO", NKO);
4419             aliases.put("OGAM", OGHAM);
4420             aliases.put("OLCK", OL_CHIKI);
4421             aliases.put("ORKH", OLD_TURKIC);
4422             aliases.put("ORYA", ORIYA);
4423             aliases.put("OSMA", OSMANYA);
4424             aliases.put("PHAG", PHAGS_PA);
4425             aliases.put("PLRD", MIAO);
4426             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4427             aliases.put("PHNX", PHOENICIAN);
4428             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4429             aliases.put("RJNG", REJANG);
4430             aliases.put("RUNR", RUNIC);
4431             aliases.put("SAMR", SAMARITAN);
4432             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4433             aliases.put("SAUR", SAURASHTRA);
4434             aliases.put("SHAW", SHAVIAN);
4435             aliases.put("SHRD", SHARADA);
4436             aliases.put("SINH", SINHALA);
4437             aliases.put("SORA", SORA_SOMPENG);
4438             aliases.put("SUND", SUNDANESE);
4439             aliases.put("SYLO", SYLOTI_NAGRI);
4440             aliases.put("SYRC", SYRIAC);
4441             aliases.put("TAGB", TAGBANWA);
4442             aliases.put("TALE", TAI_LE);
4443             aliases.put("TAKR", TAKRI);
4444             aliases.put("TALU", NEW_TAI_LUE);
4445             aliases.put("TAML", TAMIL);
4446             aliases.put("TAVT", TAI_VIET);
4447             aliases.put("TELU", TELUGU);
4448             aliases.put("TFNG", TIFINAGH);
4449             aliases.put("TGLG", TAGALOG);
4450             aliases.put("THAA", THAANA);
4451             aliases.put("THAI", THAI);
4452             aliases.put("TIBT", TIBETAN);
4453             aliases.put("UGAR", UGARITIC);
4454             aliases.put("VAII", VAI);
4455             aliases.put("XPEO", OLD_PERSIAN);
4456             aliases.put("XSUX", CUNEIFORM);
4457             aliases.put("YIII", YI);
4458             aliases.put("ZINH", INHERITED);
4459             aliases.put("ZYYY", COMMON);
4460             aliases.put("ZZZZ", UNKNOWN);
4461         }
4462 
4463         /**
4464          * Returns the enum constant representing the Unicode script of which
4465          * the given character (Unicode code point) is assigned to.
4466          *
4467          * @param   codePoint the character (Unicode code point) in question.
4468          * @return  The {@code UnicodeScript} constant representing the
4469          *          Unicode script of which this character is assigned to.
4470          *
4471          * @exception IllegalArgumentException if the specified
4472          * {@code codePoint} is an invalid Unicode code point.
4473          * @see Character#isValidCodePoint(int)
4474          *
4475          */
4476         public static UnicodeScript of(int codePoint) {
4477             if (!isValidCodePoint(codePoint))
4478                 throw new IllegalArgumentException();
4479             int type = getType(codePoint);
4480             // leave SURROGATE and PRIVATE_USE for table lookup
4481             if (type == UNASSIGNED)
4482                 return UNKNOWN;
4483             int index = Arrays.binarySearch(scriptStarts, codePoint);
4484             if (index < 0)
4485                 index = -index - 2;
4486             return scripts[index];
4487         }
4488 
4489         /**
4490          * Returns the UnicodeScript constant with the given Unicode script
4491          * name or the script name alias. Script names and their aliases are
4492          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4493          * and PropertyValueAliases&lt;version&gt;.txt define script names
4494          * and the script name aliases for a particular version of the
4495          * standard. The {@link Character} class specifies the version of
4496          * the standard that it supports.
4497          * <p>
4498          * Character case is ignored for all of the valid script names.
4499          * The en_US locale's case mapping rules are used to provide
4500          * case-insensitive string comparisons for script name validation.
4501          * <p>
4502          *
4503          * @param scriptName A {@code UnicodeScript} name.
4504          * @return The {@code UnicodeScript} constant identified
4505          *         by {@code scriptName}
4506          * @throws IllegalArgumentException if {@code scriptName} is an
4507          *         invalid name
4508          * @throws NullPointerException if {@code scriptName} is null
4509          */
4510         public static final UnicodeScript forName(String scriptName) {
4511             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4512                                  //.replace(' ', '_'));
4513             UnicodeScript sc = aliases.get(scriptName);
4514             if (sc != null)
4515                 return sc;
4516             return valueOf(scriptName);
4517         }
4518     }
4519 
4520     /**
4521      * The value of the {@code Character}.
4522      *
4523      * @serial
4524      */
4525     private final char value;
4526 
4527     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4528     private static final long serialVersionUID = 3786198910865385080L;
4529 
4530     /**
4531      * Constructs a newly allocated {@code Character} object that
4532      * represents the specified {@code char} value.
4533      *
4534      * @param  value   the value to be represented by the
4535      *                  {@code Character} object.
4536      */
4537     public Character(char value) {
4538         this.value = value;
4539     }
4540 
4541     private static class CharacterCache {
4542         private CharacterCache(){}
4543 
4544         static final Character cache[] = new Character[127 + 1];
4545 
4546         static {
4547             for (int i = 0; i < cache.length; i++)
4548                 cache[i] = new Character((char)i);
4549         }
4550     }
4551 
4552     /**
4553      * Returns a <tt>Character</tt> instance representing the specified
4554      * <tt>char</tt> value.
4555      * If a new <tt>Character</tt> instance is not required, this method
4556      * should generally be used in preference to the constructor
4557      * {@link #Character(char)}, as this method is likely to yield
4558      * significantly better space and time performance by caching
4559      * frequently requested values.
4560      *
4561      * This method will always cache values in the range {@code
4562      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4563      * cache other values outside of this range.
4564      *
4565      * @param  c a char value.
4566      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4567      * @since  1.5
4568      */
4569     public static Character valueOf(char c) {
4570         if (c <= 127) { // must cache
4571             return CharacterCache.cache[(int)c];
4572         }
4573         return new Character(c);
4574     }
4575 
4576     /**
4577      * Returns the value of this {@code Character} object.
4578      * @return  the primitive {@code char} value represented by
4579      *          this object.
4580      */
4581     public char charValue() {
4582         return value;
4583     }
4584 
4585     /**
4586      * Returns a hash code for this {@code Character}; equal to the result
4587      * of invoking {@code charValue()}.
4588      *
4589      * @return a hash code value for this {@code Character}
4590      */
4591     @Override
4592     public int hashCode() {
4593         return Character.hashCode(value);
4594     }
4595 
4596     /**
4597      * Returns a hash code for a {@code char} value; compatible with
4598      * {@code Character.hashCode()}.
4599      *
4600      * @since 1.8
4601      *
4602      * @param value The {@code char} for which to return a hash code.
4603      * @return a hash code value for a {@code char} value.
4604      */
4605     public static int hashCode(char value) {
4606         return (int)value;
4607     }
4608 
4609     /**
4610      * Compares this object against the specified object.
4611      * The result is {@code true} if and only if the argument is not
4612      * {@code null} and is a {@code Character} object that
4613      * represents the same {@code char} value as this object.
4614      *
4615      * @param   obj   the object to compare with.
4616      * @return  {@code true} if the objects are the same;
4617      *          {@code false} otherwise.
4618      */
4619     public boolean equals(Object obj) {
4620         if (obj instanceof Character) {
4621             return value == ((Character)obj).charValue();
4622         }
4623         return false;
4624     }
4625 
4626     /**
4627      * Returns a {@code String} object representing this
4628      * {@code Character}'s value.  The result is a string of
4629      * length 1 whose sole component is the primitive
4630      * {@code char} value represented by this
4631      * {@code Character} object.
4632      *
4633      * @return  a string representation of this object.
4634      */
4635     public String toString() {
4636         char buf[] = {value};
4637         return String.valueOf(buf);
4638     }
4639 
4640     /**
4641      * Returns a {@code String} object representing the
4642      * specified {@code char}.  The result is a string of length
4643      * 1 consisting solely of the specified {@code char}.
4644      *
4645      * @param c the {@code char} to be converted
4646      * @return the string representation of the specified {@code char}
4647      * @since 1.4
4648      */
4649     public static String toString(char c) {
4650         return String.valueOf(c);
4651     }
4652 
4653     /**
4654      * Determines whether the specified code point is a valid
4655      * <a href="http://www.unicode.org/glossary/#code_point">
4656      * Unicode code point value</a>.
4657      *
4658      * @param  codePoint the Unicode code point to be tested
4659      * @return {@code true} if the specified code point value is between
4660      *         {@link #MIN_CODE_POINT} and
4661      *         {@link #MAX_CODE_POINT} inclusive;
4662      *         {@code false} otherwise.
4663      * @since  1.5
4664      */
4665     public static boolean isValidCodePoint(int codePoint) {
4666         // Optimized form of:
4667         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4668         int plane = codePoint >>> 16;
4669         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4670     }
4671 
4672     /**
4673      * Determines whether the specified character (Unicode code point)
4674      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4675      * Such code points can be represented using a single {@code char}.
4676      *
4677      * @param  codePoint the character (Unicode code point) to be tested
4678      * @return {@code true} if the specified code point is between
4679      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4680      *         {@code false} otherwise.
4681      * @since  1.7
4682      */
4683     public static boolean isBmpCodePoint(int codePoint) {
4684         return codePoint >>> 16 == 0;
4685         // Optimized form of:
4686         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4687         // We consistently use logical shift (>>>) to facilitate
4688         // additional runtime optimizations.
4689     }
4690 
4691     /**
4692      * Determines whether the specified character (Unicode code point)
4693      * is in the <a href="#supplementary">supplementary character</a> range.
4694      *
4695      * @param  codePoint the character (Unicode code point) to be tested
4696      * @return {@code true} if the specified code point is between
4697      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4698      *         {@link #MAX_CODE_POINT} inclusive;
4699      *         {@code false} otherwise.
4700      * @since  1.5
4701      */
4702     public static boolean isSupplementaryCodePoint(int codePoint) {
4703         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4704             && codePoint <  MAX_CODE_POINT + 1;
4705     }
4706 
4707     /**
4708      * Determines if the given {@code char} value is a
4709      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4710      * Unicode high-surrogate code unit</a>
4711      * (also known as <i>leading-surrogate code unit</i>).
4712      *
4713      * <p>Such values do not represent characters by themselves,
4714      * but are used in the representation of
4715      * <a href="#supplementary">supplementary characters</a>
4716      * in the UTF-16 encoding.
4717      *
4718      * @param  ch the {@code char} value to be tested.
4719      * @return {@code true} if the {@code char} value is between
4720      *         {@link #MIN_HIGH_SURROGATE} and
4721      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4722      *         {@code false} otherwise.
4723      * @see    Character#isLowSurrogate(char)
4724      * @see    Character.UnicodeBlock#of(int)
4725      * @since  1.5
4726      */
4727     public static boolean isHighSurrogate(char ch) {
4728         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4729         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4730     }
4731 
4732     /**
4733      * Determines if the given {@code char} value is a
4734      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4735      * Unicode low-surrogate code unit</a>
4736      * (also known as <i>trailing-surrogate code unit</i>).
4737      *
4738      * <p>Such values do not represent characters by themselves,
4739      * but are used in the representation of
4740      * <a href="#supplementary">supplementary characters</a>
4741      * in the UTF-16 encoding.
4742      *
4743      * @param  ch the {@code char} value to be tested.
4744      * @return {@code true} if the {@code char} value is between
4745      *         {@link #MIN_LOW_SURROGATE} and
4746      *         {@link #MAX_LOW_SURROGATE} inclusive;
4747      *         {@code false} otherwise.
4748      * @see    Character#isHighSurrogate(char)
4749      * @since  1.5
4750      */
4751     public static boolean isLowSurrogate(char ch) {
4752         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4753     }
4754 
4755     /**
4756      * Determines if the given {@code char} value is a Unicode
4757      * <i>surrogate code unit</i>.
4758      *
4759      * <p>Such values do not represent characters by themselves,
4760      * but are used in the representation of
4761      * <a href="#supplementary">supplementary characters</a>
4762      * in the UTF-16 encoding.
4763      *
4764      * <p>A char value is a surrogate code unit if and only if it is either
4765      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4766      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4767      *
4768      * @param  ch the {@code char} value to be tested.
4769      * @return {@code true} if the {@code char} value is between
4770      *         {@link #MIN_SURROGATE} and
4771      *         {@link #MAX_SURROGATE} inclusive;
4772      *         {@code false} otherwise.
4773      * @since  1.7
4774      */
4775     public static boolean isSurrogate(char ch) {
4776         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4777     }
4778 
4779     /**
4780      * Determines whether the specified pair of {@code char}
4781      * values is a valid
4782      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4783      * Unicode surrogate pair</a>.
4784 
4785      * <p>This method is equivalent to the expression:
4786      * <blockquote><pre>{@code
4787      * isHighSurrogate(high) && isLowSurrogate(low)
4788      * }</pre></blockquote>
4789      *
4790      * @param  high the high-surrogate code value to be tested
4791      * @param  low the low-surrogate code value to be tested
4792      * @return {@code true} if the specified high and
4793      * low-surrogate code values represent a valid surrogate pair;
4794      * {@code false} otherwise.
4795      * @since  1.5
4796      */
4797     public static boolean isSurrogatePair(char high, char low) {
4798         return isHighSurrogate(high) && isLowSurrogate(low);
4799     }
4800 
4801     /**
4802      * Determines the number of {@code char} values needed to
4803      * represent the specified character (Unicode code point). If the
4804      * specified character is equal to or greater than 0x10000, then
4805      * the method returns 2. Otherwise, the method returns 1.
4806      *
4807      * <p>This method doesn't validate the specified character to be a
4808      * valid Unicode code point. The caller must validate the
4809      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4810      * if necessary.
4811      *
4812      * @param   codePoint the character (Unicode code point) to be tested.
4813      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4814      * @see     Character#isSupplementaryCodePoint(int)
4815      * @since   1.5
4816      */
4817     public static int charCount(int codePoint) {
4818         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4819     }
4820 
4821     /**
4822      * Converts the specified surrogate pair to its supplementary code
4823      * point value. This method does not validate the specified
4824      * surrogate pair. The caller must validate it using {@link
4825      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4826      *
4827      * @param  high the high-surrogate code unit
4828      * @param  low the low-surrogate code unit
4829      * @return the supplementary code point composed from the
4830      *         specified surrogate pair.
4831      * @since  1.5
4832      */
4833     public static int toCodePoint(char high, char low) {
4834         // Optimized form of:
4835         // return ((high - MIN_HIGH_SURROGATE) << 10)
4836         //         + (low - MIN_LOW_SURROGATE)
4837         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4838         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4839                                        - (MIN_HIGH_SURROGATE << 10)
4840                                        - MIN_LOW_SURROGATE);
4841     }
4842 
4843     /**
4844      * Returns the code point at the given index of the
4845      * {@code CharSequence}. If the {@code char} value at
4846      * the given index in the {@code CharSequence} is in the
4847      * high-surrogate range, the following index is less than the
4848      * length of the {@code CharSequence}, and the
4849      * {@code char} value at the following index is in the
4850      * low-surrogate range, then the supplementary code point
4851      * corresponding to this surrogate pair is returned. Otherwise,
4852      * the {@code char} value at the given index is returned.
4853      *
4854      * @param seq a sequence of {@code char} values (Unicode code
4855      * units)
4856      * @param index the index to the {@code char} values (Unicode
4857      * code units) in {@code seq} to be converted
4858      * @return the Unicode code point at the given index
4859      * @exception NullPointerException if {@code seq} is null.
4860      * @exception IndexOutOfBoundsException if the value
4861      * {@code index} is negative or not less than
4862      * {@link CharSequence#length() seq.length()}.
4863      * @since  1.5
4864      */
4865     public static int codePointAt(CharSequence seq, int index) {
4866         char c1 = seq.charAt(index);
4867         if (isHighSurrogate(c1) && ++index < seq.length()) {
4868             char c2 = seq.charAt(index);
4869             if (isLowSurrogate(c2)) {
4870                 return toCodePoint(c1, c2);
4871             }
4872         }
4873         return c1;
4874     }
4875 
4876     /**
4877      * Returns the code point at the given index of the
4878      * {@code char} array. If the {@code char} value at
4879      * the given index in the {@code char} array is in the
4880      * high-surrogate range, the following index is less than the
4881      * length of the {@code char} array, and the
4882      * {@code char} value at the following index is in the
4883      * low-surrogate range, then the supplementary code point
4884      * corresponding to this surrogate pair is returned. Otherwise,
4885      * the {@code char} value at the given index is returned.
4886      *
4887      * @param a the {@code char} array
4888      * @param index the index to the {@code char} values (Unicode
4889      * code units) in the {@code char} array to be converted
4890      * @return the Unicode code point at the given index
4891      * @exception NullPointerException if {@code a} is null.
4892      * @exception IndexOutOfBoundsException if the value
4893      * {@code index} is negative or not less than
4894      * the length of the {@code char} array.
4895      * @since  1.5
4896      */
4897     public static int codePointAt(char[] a, int index) {
4898         return codePointAtImpl(a, index, a.length);
4899     }
4900 
4901     /**
4902      * Returns the code point at the given index of the
4903      * {@code char} array, where only array elements with
4904      * {@code index} less than {@code limit} can be used. If
4905      * the {@code char} value at the given index in the
4906      * {@code char} array is in the high-surrogate range, the
4907      * following index is less than the {@code limit}, and the
4908      * {@code char} value at the following index is in the
4909      * low-surrogate range, then the supplementary code point
4910      * corresponding to this surrogate pair is returned. Otherwise,
4911      * the {@code char} value at the given index is returned.
4912      *
4913      * @param a the {@code char} array
4914      * @param index the index to the {@code char} values (Unicode
4915      * code units) in the {@code char} array to be converted
4916      * @param limit the index after the last array element that
4917      * can be used in the {@code char} array
4918      * @return the Unicode code point at the given index
4919      * @exception NullPointerException if {@code a} is null.
4920      * @exception IndexOutOfBoundsException if the {@code index}
4921      * argument is negative or not less than the {@code limit}
4922      * argument, or if the {@code limit} argument is negative or
4923      * greater than the length of the {@code char} array.
4924      * @since  1.5
4925      */
4926     public static int codePointAt(char[] a, int index, int limit) {
4927         if (index >= limit || limit < 0 || limit > a.length) {
4928             throw new IndexOutOfBoundsException();
4929         }
4930         return codePointAtImpl(a, index, limit);
4931     }
4932 
4933     // throws ArrayIndexOutOfBoundsException if index out of bounds
4934     static int codePointAtImpl(char[] a, int index, int limit) {
4935         char c1 = a[index];
4936         if (isHighSurrogate(c1) && ++index < limit) {
4937             char c2 = a[index];
4938             if (isLowSurrogate(c2)) {
4939                 return toCodePoint(c1, c2);
4940             }
4941         }
4942         return c1;
4943     }
4944 
4945     /**
4946      * Returns the code point preceding the given index of the
4947      * {@code CharSequence}. If the {@code char} value at
4948      * {@code (index - 1)} in the {@code CharSequence} is in
4949      * the low-surrogate range, {@code (index - 2)} is not
4950      * negative, and the {@code char} value at {@code (index - 2)}
4951      * in the {@code CharSequence} is in the
4952      * high-surrogate range, then the supplementary code point
4953      * corresponding to this surrogate pair is returned. Otherwise,
4954      * the {@code char} value at {@code (index - 1)} is
4955      * returned.
4956      *
4957      * @param seq the {@code CharSequence} instance
4958      * @param index the index following the code point that should be returned
4959      * @return the Unicode code point value before the given index.
4960      * @exception NullPointerException if {@code seq} is null.
4961      * @exception IndexOutOfBoundsException if the {@code index}
4962      * argument is less than 1 or greater than {@link
4963      * CharSequence#length() seq.length()}.
4964      * @since  1.5
4965      */
4966     public static int codePointBefore(CharSequence seq, int index) {
4967         char c2 = seq.charAt(--index);
4968         if (isLowSurrogate(c2) && index > 0) {
4969             char c1 = seq.charAt(--index);
4970             if (isHighSurrogate(c1)) {
4971                 return toCodePoint(c1, c2);
4972             }
4973         }
4974         return c2;
4975     }
4976 
4977     /**
4978      * Returns the code point preceding the given index of the
4979      * {@code char} array. If the {@code char} value at
4980      * {@code (index - 1)} in the {@code char} array is in
4981      * the low-surrogate range, {@code (index - 2)} is not
4982      * negative, and the {@code char} value at {@code (index - 2)}
4983      * in the {@code char} array is in the
4984      * high-surrogate range, then the supplementary code point
4985      * corresponding to this surrogate pair is returned. Otherwise,
4986      * the {@code char} value at {@code (index - 1)} is
4987      * returned.
4988      *
4989      * @param a the {@code char} array
4990      * @param index the index following the code point that should be returned
4991      * @return the Unicode code point value before the given index.
4992      * @exception NullPointerException if {@code a} is null.
4993      * @exception IndexOutOfBoundsException if the {@code index}
4994      * argument is less than 1 or greater than the length of the
4995      * {@code char} array
4996      * @since  1.5
4997      */
4998     public static int codePointBefore(char[] a, int index) {
4999         return codePointBeforeImpl(a, index, 0);
5000     }
5001 
5002     /**
5003      * Returns the code point preceding the given index of the
5004      * {@code char} array, where only array elements with
5005      * {@code index} greater than or equal to {@code start}
5006      * can be used. If the {@code char} value at {@code (index - 1)}
5007      * in the {@code char} array is in the
5008      * low-surrogate range, {@code (index - 2)} is not less than
5009      * {@code start}, and the {@code char} value at
5010      * {@code (index - 2)} in the {@code char} array is in
5011      * the high-surrogate range, then the supplementary code point
5012      * corresponding to this surrogate pair is returned. Otherwise,
5013      * the {@code char} value at {@code (index - 1)} is
5014      * returned.
5015      *
5016      * @param a the {@code char} array
5017      * @param index the index following the code point that should be returned
5018      * @param start the index of the first array element in the
5019      * {@code char} array
5020      * @return the Unicode code point value before the given index.
5021      * @exception NullPointerException if {@code a} is null.
5022      * @exception IndexOutOfBoundsException if the {@code index}
5023      * argument is not greater than the {@code start} argument or
5024      * is greater than the length of the {@code char} array, or
5025      * if the {@code start} argument is negative or not less than
5026      * the length of the {@code char} array.
5027      * @since  1.5
5028      */
5029     public static int codePointBefore(char[] a, int index, int start) {
5030         if (index <= start || start < 0 || start >= a.length) {
5031             throw new IndexOutOfBoundsException();
5032         }
5033         return codePointBeforeImpl(a, index, start);
5034     }
5035 
5036     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5037     static int codePointBeforeImpl(char[] a, int index, int start) {
5038         char c2 = a[--index];
5039         if (isLowSurrogate(c2) && index > start) {
5040             char c1 = a[--index];
5041             if (isHighSurrogate(c1)) {
5042                 return toCodePoint(c1, c2);
5043             }
5044         }
5045         return c2;
5046     }
5047 
5048     /**
5049      * Returns the leading surrogate (a
5050      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5051      * high surrogate code unit</a>) of the
5052      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5053      * surrogate pair</a>
5054      * representing the specified supplementary character (Unicode
5055      * code point) in the UTF-16 encoding.  If the specified character
5056      * is not a
5057      * <a href="Character.html#supplementary">supplementary character</a>,
5058      * an unspecified {@code char} is returned.
5059      *
5060      * <p>If
5061      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5062      * is {@code true}, then
5063      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5064      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5065      * are also always {@code true}.
5066      *
5067      * @param   codePoint a supplementary character (Unicode code point)
5068      * @return  the leading surrogate code unit used to represent the
5069      *          character in the UTF-16 encoding
5070      * @since   1.7
5071      */
5072     public static char highSurrogate(int codePoint) {
5073         return (char) ((codePoint >>> 10)
5074             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5075     }
5076 
5077     /**
5078      * Returns the trailing surrogate (a
5079      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5080      * low surrogate code unit</a>) of the
5081      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5082      * surrogate pair</a>
5083      * representing the specified supplementary character (Unicode
5084      * code point) in the UTF-16 encoding.  If the specified character
5085      * is not a
5086      * <a href="Character.html#supplementary">supplementary character</a>,
5087      * an unspecified {@code char} is returned.
5088      *
5089      * <p>If
5090      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5091      * is {@code true}, then
5092      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5093      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5094      * are also always {@code true}.
5095      *
5096      * @param   codePoint a supplementary character (Unicode code point)
5097      * @return  the trailing surrogate code unit used to represent the
5098      *          character in the UTF-16 encoding
5099      * @since   1.7
5100      */
5101     public static char lowSurrogate(int codePoint) {
5102         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5103     }
5104 
5105     /**
5106      * Converts the specified character (Unicode code point) to its
5107      * UTF-16 representation. If the specified code point is a BMP
5108      * (Basic Multilingual Plane or Plane 0) value, the same value is
5109      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5110      * specified code point is a supplementary character, its
5111      * surrogate values are stored in {@code dst[dstIndex]}
5112      * (high-surrogate) and {@code dst[dstIndex+1]}
5113      * (low-surrogate), and 2 is returned.
5114      *
5115      * @param  codePoint the character (Unicode code point) to be converted.
5116      * @param  dst an array of {@code char} in which the
5117      * {@code codePoint}'s UTF-16 value is stored.
5118      * @param dstIndex the start index into the {@code dst}
5119      * array where the converted value is stored.
5120      * @return 1 if the code point is a BMP code point, 2 if the
5121      * code point is a supplementary code point.
5122      * @exception IllegalArgumentException if the specified
5123      * {@code codePoint} is not a valid Unicode code point.
5124      * @exception NullPointerException if the specified {@code dst} is null.
5125      * @exception IndexOutOfBoundsException if {@code dstIndex}
5126      * is negative or not less than {@code dst.length}, or if
5127      * {@code dst} at {@code dstIndex} doesn't have enough
5128      * array element(s) to store the resulting {@code char}
5129      * value(s). (If {@code dstIndex} is equal to
5130      * {@code dst.length-1} and the specified
5131      * {@code codePoint} is a supplementary character, the
5132      * high-surrogate value is not stored in
5133      * {@code dst[dstIndex]}.)
5134      * @since  1.5
5135      */
5136     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5137         if (isBmpCodePoint(codePoint)) {
5138             dst[dstIndex] = (char) codePoint;
5139             return 1;
5140         } else if (isValidCodePoint(codePoint)) {
5141             toSurrogates(codePoint, dst, dstIndex);
5142             return 2;
5143         } else {
5144             throw new IllegalArgumentException();
5145         }
5146     }
5147 
5148     /**
5149      * Converts the specified character (Unicode code point) to its
5150      * UTF-16 representation stored in a {@code char} array. If
5151      * the specified code point is a BMP (Basic Multilingual Plane or
5152      * Plane 0) value, the resulting {@code char} array has
5153      * the same value as {@code codePoint}. If the specified code
5154      * point is a supplementary code point, the resulting
5155      * {@code char} array has the corresponding surrogate pair.
5156      *
5157      * @param  codePoint a Unicode code point
5158      * @return a {@code char} array having
5159      *         {@code codePoint}'s UTF-16 representation.
5160      * @exception IllegalArgumentException if the specified
5161      * {@code codePoint} is not a valid Unicode code point.
5162      * @since  1.5
5163      */
5164     public static char[] toChars(int codePoint) {
5165         if (isBmpCodePoint(codePoint)) {
5166             return new char[] { (char) codePoint };
5167         } else if (isValidCodePoint(codePoint)) {
5168             char[] result = new char[2];
5169             toSurrogates(codePoint, result, 0);
5170             return result;
5171         } else {
5172             throw new IllegalArgumentException();
5173         }
5174     }
5175 
5176     static void toSurrogates(int codePoint, char[] dst, int index) {
5177         // We write elements "backwards" to guarantee all-or-nothing
5178         dst[index+1] = lowSurrogate(codePoint);
5179         dst[index] = highSurrogate(codePoint);
5180     }
5181 
5182     /**
5183      * Returns the number of Unicode code points in the text range of
5184      * the specified char sequence. The text range begins at the
5185      * specified {@code beginIndex} and extends to the
5186      * {@code char} at index {@code endIndex - 1}. Thus the
5187      * length (in {@code char}s) of the text range is
5188      * {@code endIndex-beginIndex}. Unpaired surrogates within
5189      * the text range count as one code point each.
5190      *
5191      * @param seq the char sequence
5192      * @param beginIndex the index to the first {@code char} of
5193      * the text range.
5194      * @param endIndex the index after the last {@code char} of
5195      * the text range.
5196      * @return the number of Unicode code points in the specified text
5197      * range
5198      * @exception NullPointerException if {@code seq} is null.
5199      * @exception IndexOutOfBoundsException if the
5200      * {@code beginIndex} is negative, or {@code endIndex}
5201      * is larger than the length of the given sequence, or
5202      * {@code beginIndex} is larger than {@code endIndex}.
5203      * @since  1.5
5204      */
5205     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5206         int length = seq.length();
5207         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5208             throw new IndexOutOfBoundsException();
5209         }
5210         int n = endIndex - beginIndex;
5211         for (int i = beginIndex; i < endIndex; ) {
5212             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5213                 isLowSurrogate(seq.charAt(i))) {
5214                 n--;
5215                 i++;
5216             }
5217         }
5218         return n;
5219     }
5220 
5221     /**
5222      * Returns the number of Unicode code points in a subarray of the
5223      * {@code char} array argument. The {@code offset}
5224      * argument is the index of the first {@code char} of the
5225      * subarray and the {@code count} argument specifies the
5226      * length of the subarray in {@code char}s. Unpaired
5227      * surrogates within the subarray count as one code point each.
5228      *
5229      * @param a the {@code char} array
5230      * @param offset the index of the first {@code char} in the
5231      * given {@code char} array
5232      * @param count the length of the subarray in {@code char}s
5233      * @return the number of Unicode code points in the specified subarray
5234      * @exception NullPointerException if {@code a} is null.
5235      * @exception IndexOutOfBoundsException if {@code offset} or
5236      * {@code count} is negative, or if {@code offset +
5237      * count} is larger than the length of the given array.
5238      * @since  1.5
5239      */
5240     public static int codePointCount(char[] a, int offset, int count) {
5241         if (count > a.length - offset || offset < 0 || count < 0) {
5242             throw new IndexOutOfBoundsException();
5243         }
5244         return codePointCountImpl(a, offset, count);
5245     }
5246 
5247     static int codePointCountImpl(char[] a, int offset, int count) {
5248         int endIndex = offset + count;
5249         int n = count;
5250         for (int i = offset; i < endIndex; ) {
5251             if (isHighSurrogate(a[i++]) && i < endIndex &&
5252                 isLowSurrogate(a[i])) {
5253                 n--;
5254                 i++;
5255             }
5256         }
5257         return n;
5258     }
5259 
5260     /**
5261      * Returns the index within the given char sequence that is offset
5262      * from the given {@code index} by {@code codePointOffset}
5263      * code points. Unpaired surrogates within the text range given by
5264      * {@code index} and {@code codePointOffset} count as
5265      * one code point each.
5266      *
5267      * @param seq the char sequence
5268      * @param index the index to be offset
5269      * @param codePointOffset the offset in code points
5270      * @return the index within the char sequence
5271      * @exception NullPointerException if {@code seq} is null.
5272      * @exception IndexOutOfBoundsException if {@code index}
5273      *   is negative or larger then the length of the char sequence,
5274      *   or if {@code codePointOffset} is positive and the
5275      *   subsequence starting with {@code index} has fewer than
5276      *   {@code codePointOffset} code points, or if
5277      *   {@code codePointOffset} is negative and the subsequence
5278      *   before {@code index} has fewer than the absolute value
5279      *   of {@code codePointOffset} code points.
5280      * @since 1.5
5281      */
5282     public static int offsetByCodePoints(CharSequence seq, int index,
5283                                          int codePointOffset) {
5284         int length = seq.length();
5285         if (index < 0 || index > length) {
5286             throw new IndexOutOfBoundsException();
5287         }
5288 
5289         int x = index;
5290         if (codePointOffset >= 0) {
5291             int i;
5292             for (i = 0; x < length && i < codePointOffset; i++) {
5293                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5294                     isLowSurrogate(seq.charAt(x))) {
5295                     x++;
5296                 }
5297             }
5298             if (i < codePointOffset) {
5299                 throw new IndexOutOfBoundsException();
5300             }
5301         } else {
5302             int i;
5303             for (i = codePointOffset; x > 0 && i < 0; i++) {
5304                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5305                     isHighSurrogate(seq.charAt(x-1))) {
5306                     x--;
5307                 }
5308             }
5309             if (i < 0) {
5310                 throw new IndexOutOfBoundsException();
5311             }
5312         }
5313         return x;
5314     }
5315 
5316     /**
5317      * Returns the index within the given {@code char} subarray
5318      * that is offset from the given {@code index} by
5319      * {@code codePointOffset} code points. The
5320      * {@code start} and {@code count} arguments specify a
5321      * subarray of the {@code char} array. Unpaired surrogates
5322      * within the text range given by {@code index} and
5323      * {@code codePointOffset} count as one code point each.
5324      *
5325      * @param a the {@code char} array
5326      * @param start the index of the first {@code char} of the
5327      * subarray
5328      * @param count the length of the subarray in {@code char}s
5329      * @param index the index to be offset
5330      * @param codePointOffset the offset in code points
5331      * @return the index within the subarray
5332      * @exception NullPointerException if {@code a} is null.
5333      * @exception IndexOutOfBoundsException
5334      *   if {@code start} or {@code count} is negative,
5335      *   or if {@code start + count} is larger than the length of
5336      *   the given array,
5337      *   or if {@code index} is less than {@code start} or
5338      *   larger then {@code start + count},
5339      *   or if {@code codePointOffset} is positive and the text range
5340      *   starting with {@code index} and ending with {@code start + count - 1}
5341      *   has fewer than {@code codePointOffset} code
5342      *   points,
5343      *   or if {@code codePointOffset} is negative and the text range
5344      *   starting with {@code start} and ending with {@code index - 1}
5345      *   has fewer than the absolute value of
5346      *   {@code codePointOffset} code points.
5347      * @since 1.5
5348      */
5349     public static int offsetByCodePoints(char[] a, int start, int count,
5350                                          int index, int codePointOffset) {
5351         if (count > a.length-start || start < 0 || count < 0
5352             || index < start || index > start+count) {
5353             throw new IndexOutOfBoundsException();
5354         }
5355         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5356     }
5357 
5358     static int offsetByCodePointsImpl(char[]a, int start, int count,
5359                                       int index, int codePointOffset) {
5360         int x = index;
5361         if (codePointOffset >= 0) {
5362             int limit = start + count;
5363             int i;
5364             for (i = 0; x < limit && i < codePointOffset; i++) {
5365                 if (isHighSurrogate(a[x++]) && x < limit &&
5366                     isLowSurrogate(a[x])) {
5367                     x++;
5368                 }
5369             }
5370             if (i < codePointOffset) {
5371                 throw new IndexOutOfBoundsException();
5372             }
5373         } else {
5374             int i;
5375             for (i = codePointOffset; x > start && i < 0; i++) {
5376                 if (isLowSurrogate(a[--x]) && x > start &&
5377                     isHighSurrogate(a[x-1])) {
5378                     x--;
5379                 }
5380             }
5381             if (i < 0) {
5382                 throw new IndexOutOfBoundsException();
5383             }
5384         }
5385         return x;
5386     }
5387 
5388     /**
5389      * Determines if the specified character is a lowercase character.
5390      * <p>
5391      * A character is lowercase if its general category type, provided
5392      * by {@code Character.getType(ch)}, is
5393      * {@code LOWERCASE_LETTER}, or it has contributory property
5394      * Other_Lowercase as defined by the Unicode Standard.
5395      * <p>
5396      * The following are examples of lowercase characters:
5397      * <blockquote><pre>
5398      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5399      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5400      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5401      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5402      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5403      * </pre></blockquote>
5404      * <p> Many other Unicode characters are lowercase too.
5405      *
5406      * <p><b>Note:</b> This method cannot handle <a
5407      * href="#supplementary"> supplementary characters</a>. To support
5408      * all Unicode characters, including supplementary characters, use
5409      * the {@link #isLowerCase(int)} method.
5410      *
5411      * @param   ch   the character to be tested.
5412      * @return  {@code true} if the character is lowercase;
5413      *          {@code false} otherwise.
5414      * @see     Character#isLowerCase(char)
5415      * @see     Character#isTitleCase(char)
5416      * @see     Character#toLowerCase(char)
5417      * @see     Character#getType(char)
5418      */
5419     public static boolean isLowerCase(char ch) {
5420         return isLowerCase((int)ch);
5421     }
5422 
5423     /**
5424      * Determines if the specified character (Unicode code point) is a
5425      * lowercase character.
5426      * <p>
5427      * A character is lowercase if its general category type, provided
5428      * by {@link Character#getType getType(codePoint)}, is
5429      * {@code LOWERCASE_LETTER}, or it has contributory property
5430      * Other_Lowercase as defined by the Unicode Standard.
5431      * <p>
5432      * The following are examples of lowercase characters:
5433      * <blockquote><pre>
5434      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5435      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5436      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5437      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5438      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5439      * </pre></blockquote>
5440      * <p> Many other Unicode characters are lowercase too.
5441      *
5442      * @param   codePoint the character (Unicode code point) to be tested.
5443      * @return  {@code true} if the character is lowercase;
5444      *          {@code false} otherwise.
5445      * @see     Character#isLowerCase(int)
5446      * @see     Character#isTitleCase(int)
5447      * @see     Character#toLowerCase(int)
5448      * @see     Character#getType(int)
5449      * @since   1.5
5450      */
5451     public static boolean isLowerCase(int codePoint) {
5452         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5453                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5454     }
5455 
5456     /**
5457      * Determines if the specified character is an uppercase character.
5458      * <p>
5459      * A character is uppercase if its general category type, provided by
5460      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5461      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5462      * <p>
5463      * The following are examples of uppercase characters:
5464      * <blockquote><pre>
5465      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5466      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5467      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5468      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5469      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5470      * </pre></blockquote>
5471      * <p> Many other Unicode characters are uppercase too.
5472      *
5473      * <p><b>Note:</b> This method cannot handle <a
5474      * href="#supplementary"> supplementary characters</a>. To support
5475      * all Unicode characters, including supplementary characters, use
5476      * the {@link #isUpperCase(int)} method.
5477      *
5478      * @param   ch   the character to be tested.
5479      * @return  {@code true} if the character is uppercase;
5480      *          {@code false} otherwise.
5481      * @see     Character#isLowerCase(char)
5482      * @see     Character#isTitleCase(char)
5483      * @see     Character#toUpperCase(char)
5484      * @see     Character#getType(char)
5485      * @since   1.0
5486      */
5487     public static boolean isUpperCase(char ch) {
5488         return isUpperCase((int)ch);
5489     }
5490 
5491     /**
5492      * Determines if the specified character (Unicode code point) is an uppercase character.
5493      * <p>
5494      * A character is uppercase if its general category type, provided by
5495      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5496      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5497      * <p>
5498      * The following are examples of uppercase characters:
5499      * <blockquote><pre>
5500      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5501      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5502      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5503      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5504      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5505      * </pre></blockquote>
5506      * <p> Many other Unicode characters are uppercase too.<p>
5507      *
5508      * @param   codePoint the character (Unicode code point) to be tested.
5509      * @return  {@code true} if the character is uppercase;
5510      *          {@code false} otherwise.
5511      * @see     Character#isLowerCase(int)
5512      * @see     Character#isTitleCase(int)
5513      * @see     Character#toUpperCase(int)
5514      * @see     Character#getType(int)
5515      * @since   1.5
5516      */
5517     public static boolean isUpperCase(int codePoint) {
5518         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5519                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5520     }
5521 
5522     /**
5523      * Determines if the specified character is a titlecase character.
5524      * <p>
5525      * A character is a titlecase character if its general
5526      * category type, provided by {@code Character.getType(ch)},
5527      * is {@code TITLECASE_LETTER}.
5528      * <p>
5529      * Some characters look like pairs of Latin letters. For example, there
5530      * is an uppercase letter that looks like "LJ" and has a corresponding
5531      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5532      * is the appropriate form to use when rendering a word in lowercase
5533      * with initial capitals, as for a book title.
5534      * <p>
5535      * These are some of the Unicode characters for which this method returns
5536      * {@code true}:
5537      * <ul>
5538      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5539      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5540      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5541      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5542      * </ul>
5543      * <p> Many other Unicode characters are titlecase too.
5544      *
5545      * <p><b>Note:</b> This method cannot handle <a
5546      * href="#supplementary"> supplementary characters</a>. To support
5547      * all Unicode characters, including supplementary characters, use
5548      * the {@link #isTitleCase(int)} method.
5549      *
5550      * @param   ch   the character to be tested.
5551      * @return  {@code true} if the character is titlecase;
5552      *          {@code false} otherwise.
5553      * @see     Character#isLowerCase(char)
5554      * @see     Character#isUpperCase(char)
5555      * @see     Character#toTitleCase(char)
5556      * @see     Character#getType(char)
5557      * @since   1.0.2
5558      */
5559     public static boolean isTitleCase(char ch) {
5560         return isTitleCase((int)ch);
5561     }
5562 
5563     /**
5564      * Determines if the specified character (Unicode code point) is a titlecase character.
5565      * <p>
5566      * A character is a titlecase character if its general
5567      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5568      * is {@code TITLECASE_LETTER}.
5569      * <p>
5570      * Some characters look like pairs of Latin letters. For example, there
5571      * is an uppercase letter that looks like "LJ" and has a corresponding
5572      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5573      * is the appropriate form to use when rendering a word in lowercase
5574      * with initial capitals, as for a book title.
5575      * <p>
5576      * These are some of the Unicode characters for which this method returns
5577      * {@code true}:
5578      * <ul>
5579      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5580      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5581      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5583      * </ul>
5584      * <p> Many other Unicode characters are titlecase too.<p>
5585      *
5586      * @param   codePoint the character (Unicode code point) to be tested.
5587      * @return  {@code true} if the character is titlecase;
5588      *          {@code false} otherwise.
5589      * @see     Character#isLowerCase(int)
5590      * @see     Character#isUpperCase(int)
5591      * @see     Character#toTitleCase(int)
5592      * @see     Character#getType(int)
5593      * @since   1.5
5594      */
5595     public static boolean isTitleCase(int codePoint) {
5596         return getType(codePoint) == Character.TITLECASE_LETTER;
5597     }
5598 
5599     /**
5600      * Determines if the specified character is a digit.
5601      * <p>
5602      * A character is a digit if its general category type, provided
5603      * by {@code Character.getType(ch)}, is
5604      * {@code DECIMAL_DIGIT_NUMBER}.
5605      * <p>
5606      * Some Unicode character ranges that contain digits:
5607      * <ul>
5608      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5609      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5610      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5611      *     Arabic-Indic digits
5612      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5613      *     Extended Arabic-Indic digits
5614      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5615      *     Devanagari digits
5616      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5617      *     Fullwidth digits
5618      * </ul>
5619      *
5620      * Many other character ranges contain digits as well.
5621      *
5622      * <p><b>Note:</b> This method cannot handle <a
5623      * href="#supplementary"> supplementary characters</a>. To support
5624      * all Unicode characters, including supplementary characters, use
5625      * the {@link #isDigit(int)} method.
5626      *
5627      * @param   ch   the character to be tested.
5628      * @return  {@code true} if the character is a digit;
5629      *          {@code false} otherwise.
5630      * @see     Character#digit(char, int)
5631      * @see     Character#forDigit(int, int)
5632      * @see     Character#getType(char)
5633      */
5634     public static boolean isDigit(char ch) {
5635         return isDigit((int)ch);
5636     }
5637 
5638     /**
5639      * Determines if the specified character (Unicode code point) is a digit.
5640      * <p>
5641      * A character is a digit if its general category type, provided
5642      * by {@link Character#getType(int) getType(codePoint)}, is
5643      * {@code DECIMAL_DIGIT_NUMBER}.
5644      * <p>
5645      * Some Unicode character ranges that contain digits:
5646      * <ul>
5647      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5648      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5649      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5650      *     Arabic-Indic digits
5651      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5652      *     Extended Arabic-Indic digits
5653      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5654      *     Devanagari digits
5655      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5656      *     Fullwidth digits
5657      * </ul>
5658      *
5659      * Many other character ranges contain digits as well.
5660      *
5661      * @param   codePoint the character (Unicode code point) to be tested.
5662      * @return  {@code true} if the character is a digit;
5663      *          {@code false} otherwise.
5664      * @see     Character#forDigit(int, int)
5665      * @see     Character#getType(int)
5666      * @since   1.5
5667      */
5668     public static boolean isDigit(int codePoint) {
5669         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5670     }
5671 
5672     /**
5673      * Determines if a character is defined in Unicode.
5674      * <p>
5675      * A character is defined if at least one of the following is true:
5676      * <ul>
5677      * <li>It has an entry in the UnicodeData file.
5678      * <li>It has a value in a range defined by the UnicodeData file.
5679      * </ul>
5680      *
5681      * <p><b>Note:</b> This method cannot handle <a
5682      * href="#supplementary"> supplementary characters</a>. To support
5683      * all Unicode characters, including supplementary characters, use
5684      * the {@link #isDefined(int)} method.
5685      *
5686      * @param   ch   the character to be tested
5687      * @return  {@code true} if the character has a defined meaning
5688      *          in Unicode; {@code false} otherwise.
5689      * @see     Character#isDigit(char)
5690      * @see     Character#isLetter(char)
5691      * @see     Character#isLetterOrDigit(char)
5692      * @see     Character#isLowerCase(char)
5693      * @see     Character#isTitleCase(char)
5694      * @see     Character#isUpperCase(char)
5695      * @since   1.0.2
5696      */
5697     public static boolean isDefined(char ch) {
5698         return isDefined((int)ch);
5699     }
5700 
5701     /**
5702      * Determines if a character (Unicode code point) is defined in Unicode.
5703      * <p>
5704      * A character is defined if at least one of the following is true:
5705      * <ul>
5706      * <li>It has an entry in the UnicodeData file.
5707      * <li>It has a value in a range defined by the UnicodeData file.
5708      * </ul>
5709      *
5710      * @param   codePoint the character (Unicode code point) to be tested.
5711      * @return  {@code true} if the character has a defined meaning
5712      *          in Unicode; {@code false} otherwise.
5713      * @see     Character#isDigit(int)
5714      * @see     Character#isLetter(int)
5715      * @see     Character#isLetterOrDigit(int)
5716      * @see     Character#isLowerCase(int)
5717      * @see     Character#isTitleCase(int)
5718      * @see     Character#isUpperCase(int)
5719      * @since   1.5
5720      */
5721     public static boolean isDefined(int codePoint) {
5722         return getType(codePoint) != Character.UNASSIGNED;
5723     }
5724 
5725     /**
5726      * Determines if the specified character is a letter.
5727      * <p>
5728      * A character is considered to be a letter if its general
5729      * category type, provided by {@code Character.getType(ch)},
5730      * is any of the following:
5731      * <ul>
5732      * <li> {@code UPPERCASE_LETTER}
5733      * <li> {@code LOWERCASE_LETTER}
5734      * <li> {@code TITLECASE_LETTER}
5735      * <li> {@code MODIFIER_LETTER}
5736      * <li> {@code OTHER_LETTER}
5737      * </ul>
5738      *
5739      * Not all letters have case. Many characters are
5740      * letters but are neither uppercase nor lowercase nor titlecase.
5741      *
5742      * <p><b>Note:</b> This method cannot handle <a
5743      * href="#supplementary"> supplementary characters</a>. To support
5744      * all Unicode characters, including supplementary characters, use
5745      * the {@link #isLetter(int)} method.
5746      *
5747      * @param   ch   the character to be tested.
5748      * @return  {@code true} if the character is a letter;
5749      *          {@code false} otherwise.
5750      * @see     Character#isDigit(char)
5751      * @see     Character#isJavaIdentifierStart(char)
5752      * @see     Character#isJavaLetter(char)
5753      * @see     Character#isJavaLetterOrDigit(char)
5754      * @see     Character#isLetterOrDigit(char)
5755      * @see     Character#isLowerCase(char)
5756      * @see     Character#isTitleCase(char)
5757      * @see     Character#isUnicodeIdentifierStart(char)
5758      * @see     Character#isUpperCase(char)
5759      */
5760     public static boolean isLetter(char ch) {
5761         return isLetter((int)ch);
5762     }
5763 
5764     /**
5765      * Determines if the specified character (Unicode code point) is a letter.
5766      * <p>
5767      * A character is considered to be a letter if its general
5768      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5769      * is any of the following:
5770      * <ul>
5771      * <li> {@code UPPERCASE_LETTER}
5772      * <li> {@code LOWERCASE_LETTER}
5773      * <li> {@code TITLECASE_LETTER}
5774      * <li> {@code MODIFIER_LETTER}
5775      * <li> {@code OTHER_LETTER}
5776      * </ul>
5777      *
5778      * Not all letters have case. Many characters are
5779      * letters but are neither uppercase nor lowercase nor titlecase.
5780      *
5781      * @param   codePoint the character (Unicode code point) to be tested.
5782      * @return  {@code true} if the character is a letter;
5783      *          {@code false} otherwise.
5784      * @see     Character#isDigit(int)
5785      * @see     Character#isJavaIdentifierStart(int)
5786      * @see     Character#isLetterOrDigit(int)
5787      * @see     Character#isLowerCase(int)
5788      * @see     Character#isTitleCase(int)
5789      * @see     Character#isUnicodeIdentifierStart(int)
5790      * @see     Character#isUpperCase(int)
5791      * @since   1.5
5792      */
5793     public static boolean isLetter(int codePoint) {
5794         return ((((1 << Character.UPPERCASE_LETTER) |
5795             (1 << Character.LOWERCASE_LETTER) |
5796             (1 << Character.TITLECASE_LETTER) |
5797             (1 << Character.MODIFIER_LETTER) |
5798             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5799             != 0;
5800     }
5801 
5802     /**
5803      * Determines if the specified character is a letter or digit.
5804      * <p>
5805      * A character is considered to be a letter or digit if either
5806      * {@code Character.isLetter(char ch)} or
5807      * {@code Character.isDigit(char ch)} returns
5808      * {@code true} for the character.
5809      *
5810      * <p><b>Note:</b> This method cannot handle <a
5811      * href="#supplementary"> supplementary characters</a>. To support
5812      * all Unicode characters, including supplementary characters, use
5813      * the {@link #isLetterOrDigit(int)} method.
5814      *
5815      * @param   ch   the character to be tested.
5816      * @return  {@code true} if the character is a letter or digit;
5817      *          {@code false} otherwise.
5818      * @see     Character#isDigit(char)
5819      * @see     Character#isJavaIdentifierPart(char)
5820      * @see     Character#isJavaLetter(char)
5821      * @see     Character#isJavaLetterOrDigit(char)
5822      * @see     Character#isLetter(char)
5823      * @see     Character#isUnicodeIdentifierPart(char)
5824      * @since   1.0.2
5825      */
5826     public static boolean isLetterOrDigit(char ch) {
5827         return isLetterOrDigit((int)ch);
5828     }
5829 
5830     /**
5831      * Determines if the specified character (Unicode code point) is a letter or digit.
5832      * <p>
5833      * A character is considered to be a letter or digit if either
5834      * {@link #isLetter(int) isLetter(codePoint)} or
5835      * {@link #isDigit(int) isDigit(codePoint)} returns
5836      * {@code true} for the character.
5837      *
5838      * @param   codePoint the character (Unicode code point) to be tested.
5839      * @return  {@code true} if the character is a letter or digit;
5840      *          {@code false} otherwise.
5841      * @see     Character#isDigit(int)
5842      * @see     Character#isJavaIdentifierPart(int)
5843      * @see     Character#isLetter(int)
5844      * @see     Character#isUnicodeIdentifierPart(int)
5845      * @since   1.5
5846      */
5847     public static boolean isLetterOrDigit(int codePoint) {
5848         return ((((1 << Character.UPPERCASE_LETTER) |
5849             (1 << Character.LOWERCASE_LETTER) |
5850             (1 << Character.TITLECASE_LETTER) |
5851             (1 << Character.MODIFIER_LETTER) |
5852             (1 << Character.OTHER_LETTER) |
5853             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5854             != 0;
5855     }
5856 
5857     /**
5858      * Determines if the specified character is permissible as the first
5859      * character in a Java identifier.
5860      * <p>
5861      * A character may start a Java identifier if and only if
5862      * one of the following is true:
5863      * <ul>
5864      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5865      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5866      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5867      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5868      * </ul>
5869      *
5870      * @param   ch the character to be tested.
5871      * @return  {@code true} if the character may start a Java
5872      *          identifier; {@code false} otherwise.
5873      * @see     Character#isJavaLetterOrDigit(char)
5874      * @see     Character#isJavaIdentifierStart(char)
5875      * @see     Character#isJavaIdentifierPart(char)
5876      * @see     Character#isLetter(char)
5877      * @see     Character#isLetterOrDigit(char)
5878      * @see     Character#isUnicodeIdentifierStart(char)
5879      * @since   1.02
5880      * @deprecated Replaced by isJavaIdentifierStart(char).
5881      */
5882     @Deprecated
5883     public static boolean isJavaLetter(char ch) {
5884         return isJavaIdentifierStart(ch);
5885     }
5886 
5887     /**
5888      * Determines if the specified character may be part of a Java
5889      * identifier as other than the first character.
5890      * <p>
5891      * A character may be part of a Java identifier if and only if any
5892      * of the following are true:
5893      * <ul>
5894      * <li>  it is a letter
5895      * <li>  it is a currency symbol (such as {@code '$'})
5896      * <li>  it is a connecting punctuation character (such as {@code '_'})
5897      * <li>  it is a digit
5898      * <li>  it is a numeric letter (such as a Roman numeral character)
5899      * <li>  it is a combining mark
5900      * <li>  it is a non-spacing mark
5901      * <li> {@code isIdentifierIgnorable} returns
5902      * {@code true} for the character.
5903      * </ul>
5904      *
5905      * @param   ch the character to be tested.
5906      * @return  {@code true} if the character may be part of a
5907      *          Java identifier; {@code false} otherwise.
5908      * @see     Character#isJavaLetter(char)
5909      * @see     Character#isJavaIdentifierStart(char)
5910      * @see     Character#isJavaIdentifierPart(char)
5911      * @see     Character#isLetter(char)
5912      * @see     Character#isLetterOrDigit(char)
5913      * @see     Character#isUnicodeIdentifierPart(char)
5914      * @see     Character#isIdentifierIgnorable(char)
5915      * @since   1.02
5916      * @deprecated Replaced by isJavaIdentifierPart(char).
5917      */
5918     @Deprecated
5919     public static boolean isJavaLetterOrDigit(char ch) {
5920         return isJavaIdentifierPart(ch);
5921     }
5922 
5923     /**
5924      * Determines if the specified character (Unicode code point) is an alphabet.
5925      * <p>
5926      * A character is considered to be alphabetic if its general category type,
5927      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5928      * the following:
5929      * <ul>
5930      * <li> <code>UPPERCASE_LETTER</code>
5931      * <li> <code>LOWERCASE_LETTER</code>
5932      * <li> <code>TITLECASE_LETTER</code>
5933      * <li> <code>MODIFIER_LETTER</code>
5934      * <li> <code>OTHER_LETTER</code>
5935      * <li> <code>LETTER_NUMBER</code>
5936      * </ul>
5937      * or it has contributory property Other_Alphabetic as defined by the
5938      * Unicode Standard.
5939      *
5940      * @param   codePoint the character (Unicode code point) to be tested.
5941      * @return  <code>true</code> if the character is a Unicode alphabet
5942      *          character, <code>false</code> otherwise.
5943      * @since   1.7
5944      */
5945     public static boolean isAlphabetic(int codePoint) {
5946         return (((((1 << Character.UPPERCASE_LETTER) |
5947             (1 << Character.LOWERCASE_LETTER) |
5948             (1 << Character.TITLECASE_LETTER) |
5949             (1 << Character.MODIFIER_LETTER) |
5950             (1 << Character.OTHER_LETTER) |
5951             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5952             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5953     }
5954 
5955     /**
5956      * Determines if the specified character (Unicode code point) is a CJKV
5957      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5958      * the Unicode Standard.
5959      *
5960      * @param   codePoint the character (Unicode code point) to be tested.
5961      * @return  <code>true</code> if the character is a Unicode ideograph
5962      *          character, <code>false</code> otherwise.
5963      * @since   1.7
5964      */
5965     public static boolean isIdeographic(int codePoint) {
5966         return CharacterData.of(codePoint).isIdeographic(codePoint);
5967     }
5968 
5969     /**
5970      * Determines if the specified character is
5971      * permissible as the first character in a Java identifier.
5972      * <p>
5973      * A character may start a Java identifier if and only if
5974      * one of the following conditions is true:
5975      * <ul>
5976      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5977      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5978      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5979      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5980      * </ul>
5981      *
5982      * <p><b>Note:</b> This method cannot handle <a
5983      * href="#supplementary"> supplementary characters</a>. To support
5984      * all Unicode characters, including supplementary characters, use
5985      * the {@link #isJavaIdentifierStart(int)} method.
5986      *
5987      * @param   ch the character to be tested.
5988      * @return  {@code true} if the character may start a Java identifier;
5989      *          {@code false} otherwise.
5990      * @see     Character#isJavaIdentifierPart(char)
5991      * @see     Character#isLetter(char)
5992      * @see     Character#isUnicodeIdentifierStart(char)
5993      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5994      * @since   1.1
5995      */
5996     public static boolean isJavaIdentifierStart(char ch) {
5997         return isJavaIdentifierStart((int)ch);
5998     }
5999 
6000     /**
6001      * Determines if the character (Unicode code point) is
6002      * permissible as the first character in a Java identifier.
6003      * <p>
6004      * A character may start a Java identifier if and only if
6005      * one of the following conditions is true:
6006      * <ul>
6007      * <li> {@link #isLetter(int) isLetter(codePoint)}
6008      *      returns {@code true}
6009      * <li> {@link #getType(int) getType(codePoint)}
6010      *      returns {@code LETTER_NUMBER}
6011      * <li> the referenced character is a currency symbol (such as {@code '$'})
6012      * <li> the referenced character is a connecting punctuation character
6013      *      (such as {@code '_'}).
6014      * </ul>
6015      *
6016      * @param   codePoint the character (Unicode code point) to be tested.
6017      * @return  {@code true} if the character may start a Java identifier;
6018      *          {@code false} otherwise.
6019      * @see     Character#isJavaIdentifierPart(int)
6020      * @see     Character#isLetter(int)
6021      * @see     Character#isUnicodeIdentifierStart(int)
6022      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6023      * @since   1.5
6024      */
6025     public static boolean isJavaIdentifierStart(int codePoint) {
6026         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6027     }
6028 
6029     /**
6030      * Determines if the specified character may be part of a Java
6031      * identifier as other than the first character.
6032      * <p>
6033      * A character may be part of a Java identifier if any of the following
6034      * are true:
6035      * <ul>
6036      * <li>  it is a letter
6037      * <li>  it is a currency symbol (such as {@code '$'})
6038      * <li>  it is a connecting punctuation character (such as {@code '_'})
6039      * <li>  it is a digit
6040      * <li>  it is a numeric letter (such as a Roman numeral character)
6041      * <li>  it is a combining mark
6042      * <li>  it is a non-spacing mark
6043      * <li> {@code isIdentifierIgnorable} returns
6044      * {@code true} for the character
6045      * </ul>
6046      *
6047      * <p><b>Note:</b> This method cannot handle <a
6048      * href="#supplementary"> supplementary characters</a>. To support
6049      * all Unicode characters, including supplementary characters, use
6050      * the {@link #isJavaIdentifierPart(int)} method.
6051      *
6052      * @param   ch      the character to be tested.
6053      * @return {@code true} if the character may be part of a
6054      *          Java identifier; {@code false} otherwise.
6055      * @see     Character#isIdentifierIgnorable(char)
6056      * @see     Character#isJavaIdentifierStart(char)
6057      * @see     Character#isLetterOrDigit(char)
6058      * @see     Character#isUnicodeIdentifierPart(char)
6059      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6060      * @since   1.1
6061      */
6062     public static boolean isJavaIdentifierPart(char ch) {
6063         return isJavaIdentifierPart((int)ch);
6064     }
6065 
6066     /**
6067      * Determines if the character (Unicode code point) may be part of a Java
6068      * identifier as other than the first character.
6069      * <p>
6070      * A character may be part of a Java identifier if any of the following
6071      * are true:
6072      * <ul>
6073      * <li>  it is a letter
6074      * <li>  it is a currency symbol (such as {@code '$'})
6075      * <li>  it is a connecting punctuation character (such as {@code '_'})
6076      * <li>  it is a digit
6077      * <li>  it is a numeric letter (such as a Roman numeral character)
6078      * <li>  it is a combining mark
6079      * <li>  it is a non-spacing mark
6080      * <li> {@link #isIdentifierIgnorable(int)
6081      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6082      * the character
6083      * </ul>
6084      *
6085      * @param   codePoint the character (Unicode code point) to be tested.
6086      * @return {@code true} if the character may be part of a
6087      *          Java identifier; {@code false} otherwise.
6088      * @see     Character#isIdentifierIgnorable(int)
6089      * @see     Character#isJavaIdentifierStart(int)
6090      * @see     Character#isLetterOrDigit(int)
6091      * @see     Character#isUnicodeIdentifierPart(int)
6092      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6093      * @since   1.5
6094      */
6095     public static boolean isJavaIdentifierPart(int codePoint) {
6096         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6097     }
6098 
6099     /**
6100      * Determines if the specified character is permissible as the
6101      * first character in a Unicode identifier.
6102      * <p>
6103      * A character may start a Unicode identifier if and only if
6104      * one of the following conditions is true:
6105      * <ul>
6106      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6107      * <li> {@link #getType(char) getType(ch)} returns
6108      *      {@code LETTER_NUMBER}.
6109      * </ul>
6110      *
6111      * <p><b>Note:</b> This method cannot handle <a
6112      * href="#supplementary"> supplementary characters</a>. To support
6113      * all Unicode characters, including supplementary characters, use
6114      * the {@link #isUnicodeIdentifierStart(int)} method.
6115      *
6116      * @param   ch      the character to be tested.
6117      * @return  {@code true} if the character may start a Unicode
6118      *          identifier; {@code false} otherwise.
6119      * @see     Character#isJavaIdentifierStart(char)
6120      * @see     Character#isLetter(char)
6121      * @see     Character#isUnicodeIdentifierPart(char)
6122      * @since   1.1
6123      */
6124     public static boolean isUnicodeIdentifierStart(char ch) {
6125         return isUnicodeIdentifierStart((int)ch);
6126     }
6127 
6128     /**
6129      * Determines if the specified character (Unicode code point) is permissible as the
6130      * first character in a Unicode identifier.
6131      * <p>
6132      * A character may start a Unicode identifier if and only if
6133      * one of the following conditions is true:
6134      * <ul>
6135      * <li> {@link #isLetter(int) isLetter(codePoint)}
6136      *      returns {@code true}
6137      * <li> {@link #getType(int) getType(codePoint)}
6138      *      returns {@code LETTER_NUMBER}.
6139      * </ul>
6140      * @param   codePoint the character (Unicode code point) to be tested.
6141      * @return  {@code true} if the character may start a Unicode
6142      *          identifier; {@code false} otherwise.
6143      * @see     Character#isJavaIdentifierStart(int)
6144      * @see     Character#isLetter(int)
6145      * @see     Character#isUnicodeIdentifierPart(int)
6146      * @since   1.5
6147      */
6148     public static boolean isUnicodeIdentifierStart(int codePoint) {
6149         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6150     }
6151 
6152     /**
6153      * Determines if the specified character may be part of a Unicode
6154      * identifier as other than the first character.
6155      * <p>
6156      * A character may be part of a Unicode identifier if and only if
6157      * one of the following statements is true:
6158      * <ul>
6159      * <li>  it is a letter
6160      * <li>  it is a connecting punctuation character (such as {@code '_'})
6161      * <li>  it is a digit
6162      * <li>  it is a numeric letter (such as a Roman numeral character)
6163      * <li>  it is a combining mark
6164      * <li>  it is a non-spacing mark
6165      * <li> {@code isIdentifierIgnorable} returns
6166      * {@code true} for this character.
6167      * </ul>
6168      *
6169      * <p><b>Note:</b> This method cannot handle <a
6170      * href="#supplementary"> supplementary characters</a>. To support
6171      * all Unicode characters, including supplementary characters, use
6172      * the {@link #isUnicodeIdentifierPart(int)} method.
6173      *
6174      * @param   ch      the character to be tested.
6175      * @return  {@code true} if the character may be part of a
6176      *          Unicode identifier; {@code false} otherwise.
6177      * @see     Character#isIdentifierIgnorable(char)
6178      * @see     Character#isJavaIdentifierPart(char)
6179      * @see     Character#isLetterOrDigit(char)
6180      * @see     Character#isUnicodeIdentifierStart(char)
6181      * @since   1.1
6182      */
6183     public static boolean isUnicodeIdentifierPart(char ch) {
6184         return isUnicodeIdentifierPart((int)ch);
6185     }
6186 
6187     /**
6188      * Determines if the specified character (Unicode code point) may be part of a Unicode
6189      * identifier as other than the first character.
6190      * <p>
6191      * A character may be part of a Unicode identifier if and only if
6192      * one of the following statements is true:
6193      * <ul>
6194      * <li>  it is a letter
6195      * <li>  it is a connecting punctuation character (such as {@code '_'})
6196      * <li>  it is a digit
6197      * <li>  it is a numeric letter (such as a Roman numeral character)
6198      * <li>  it is a combining mark
6199      * <li>  it is a non-spacing mark
6200      * <li> {@code isIdentifierIgnorable} returns
6201      * {@code true} for this character.
6202      * </ul>
6203      * @param   codePoint the character (Unicode code point) to be tested.
6204      * @return  {@code true} if the character may be part of a
6205      *          Unicode identifier; {@code false} otherwise.
6206      * @see     Character#isIdentifierIgnorable(int)
6207      * @see     Character#isJavaIdentifierPart(int)
6208      * @see     Character#isLetterOrDigit(int)
6209      * @see     Character#isUnicodeIdentifierStart(int)
6210      * @since   1.5
6211      */
6212     public static boolean isUnicodeIdentifierPart(int codePoint) {
6213         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6214     }
6215 
6216     /**
6217      * Determines if the specified character should be regarded as
6218      * an ignorable character in a Java identifier or a Unicode identifier.
6219      * <p>
6220      * The following Unicode characters are ignorable in a Java identifier
6221      * or a Unicode identifier:
6222      * <ul>
6223      * <li>ISO control characters that are not whitespace
6224      * <ul>
6225      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6226      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6227      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6228      * </ul>
6229      *
6230      * <li>all characters that have the {@code FORMAT} general
6231      * category value
6232      * </ul>
6233      *
6234      * <p><b>Note:</b> This method cannot handle <a
6235      * href="#supplementary"> supplementary characters</a>. To support
6236      * all Unicode characters, including supplementary characters, use
6237      * the {@link #isIdentifierIgnorable(int)} method.
6238      *
6239      * @param   ch      the character to be tested.
6240      * @return  {@code true} if the character is an ignorable control
6241      *          character that may be part of a Java or Unicode identifier;
6242      *           {@code false} otherwise.
6243      * @see     Character#isJavaIdentifierPart(char)
6244      * @see     Character#isUnicodeIdentifierPart(char)
6245      * @since   1.1
6246      */
6247     public static boolean isIdentifierIgnorable(char ch) {
6248         return isIdentifierIgnorable((int)ch);
6249     }
6250 
6251     /**
6252      * Determines if the specified character (Unicode code point) should be regarded as
6253      * an ignorable character in a Java identifier or a Unicode identifier.
6254      * <p>
6255      * The following Unicode characters are ignorable in a Java identifier
6256      * or a Unicode identifier:
6257      * <ul>
6258      * <li>ISO control characters that are not whitespace
6259      * <ul>
6260      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6261      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6262      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6263      * </ul>
6264      *
6265      * <li>all characters that have the {@code FORMAT} general
6266      * category value
6267      * </ul>
6268      *
6269      * @param   codePoint the character (Unicode code point) to be tested.
6270      * @return  {@code true} if the character is an ignorable control
6271      *          character that may be part of a Java or Unicode identifier;
6272      *          {@code false} otherwise.
6273      * @see     Character#isJavaIdentifierPart(int)
6274      * @see     Character#isUnicodeIdentifierPart(int)
6275      * @since   1.5
6276      */
6277     public static boolean isIdentifierIgnorable(int codePoint) {
6278         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6279     }
6280 
6281     /**
6282      * Converts the character argument to lowercase using case
6283      * mapping information from the UnicodeData file.
6284      * <p>
6285      * Note that
6286      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6287      * does not always return {@code true} for some ranges of
6288      * characters, particularly those that are symbols or ideographs.
6289      *
6290      * <p>In general, {@link String#toLowerCase()} should be used to map
6291      * characters to lowercase. {@code String} case mapping methods
6292      * have several benefits over {@code Character} case mapping methods.
6293      * {@code String} case mapping methods can perform locale-sensitive
6294      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6295      * the {@code Character} case mapping methods cannot.
6296      *
6297      * <p><b>Note:</b> This method cannot handle <a
6298      * href="#supplementary"> supplementary characters</a>. To support
6299      * all Unicode characters, including supplementary characters, use
6300      * the {@link #toLowerCase(int)} method.
6301      *
6302      * @param   ch   the character to be converted.
6303      * @return  the lowercase equivalent of the character, if any;
6304      *          otherwise, the character itself.
6305      * @see     Character#isLowerCase(char)
6306      * @see     String#toLowerCase()
6307      */
6308     public static char toLowerCase(char ch) {
6309         return (char)toLowerCase((int)ch);
6310     }
6311 
6312     /**
6313      * Converts the character (Unicode code point) argument to
6314      * lowercase using case mapping information from the UnicodeData
6315      * file.
6316      *
6317      * <p> Note that
6318      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6319      * does not always return {@code true} for some ranges of
6320      * characters, particularly those that are symbols or ideographs.
6321      *
6322      * <p>In general, {@link String#toLowerCase()} should be used to map
6323      * characters to lowercase. {@code String} case mapping methods
6324      * have several benefits over {@code Character} case mapping methods.
6325      * {@code String} case mapping methods can perform locale-sensitive
6326      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6327      * the {@code Character} case mapping methods cannot.
6328      *
6329      * @param   codePoint   the character (Unicode code point) to be converted.
6330      * @return  the lowercase equivalent of the character (Unicode code
6331      *          point), if any; otherwise, the character itself.
6332      * @see     Character#isLowerCase(int)
6333      * @see     String#toLowerCase()
6334      *
6335      * @since   1.5
6336      */
6337     public static int toLowerCase(int codePoint) {
6338         return CharacterData.of(codePoint).toLowerCase(codePoint);
6339     }
6340 
6341     /**
6342      * Converts the character argument to uppercase using case mapping
6343      * information from the UnicodeData file.
6344      * <p>
6345      * Note that
6346      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6347      * does not always return {@code true} for some ranges of
6348      * characters, particularly those that are symbols or ideographs.
6349      *
6350      * <p>In general, {@link String#toUpperCase()} should be used to map
6351      * characters to uppercase. {@code String} case mapping methods
6352      * have several benefits over {@code Character} case mapping methods.
6353      * {@code String} case mapping methods can perform locale-sensitive
6354      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6355      * the {@code Character} case mapping methods cannot.
6356      *
6357      * <p><b>Note:</b> This method cannot handle <a
6358      * href="#supplementary"> supplementary characters</a>. To support
6359      * all Unicode characters, including supplementary characters, use
6360      * the {@link #toUpperCase(int)} method.
6361      *
6362      * @param   ch   the character to be converted.
6363      * @return  the uppercase equivalent of the character, if any;
6364      *          otherwise, the character itself.
6365      * @see     Character#isUpperCase(char)
6366      * @see     String#toUpperCase()
6367      */
6368     public static char toUpperCase(char ch) {
6369         return (char)toUpperCase((int)ch);
6370     }
6371 
6372     /**
6373      * Converts the character (Unicode code point) argument to
6374      * uppercase using case mapping information from the UnicodeData
6375      * file.
6376      *
6377      * <p>Note that
6378      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6379      * does not always return {@code true} for some ranges of
6380      * characters, particularly those that are symbols or ideographs.
6381      *
6382      * <p>In general, {@link String#toUpperCase()} should be used to map
6383      * characters to uppercase. {@code String} case mapping methods
6384      * have several benefits over {@code Character} case mapping methods.
6385      * {@code String} case mapping methods can perform locale-sensitive
6386      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6387      * the {@code Character} case mapping methods cannot.
6388      *
6389      * @param   codePoint   the character (Unicode code point) to be converted.
6390      * @return  the uppercase equivalent of the character, if any;
6391      *          otherwise, the character itself.
6392      * @see     Character#isUpperCase(int)
6393      * @see     String#toUpperCase()
6394      *
6395      * @since   1.5
6396      */
6397     public static int toUpperCase(int codePoint) {
6398         return CharacterData.of(codePoint).toUpperCase(codePoint);
6399     }
6400 
6401     /**
6402      * Converts the character argument to titlecase using case mapping
6403      * information from the UnicodeData file. If a character has no
6404      * explicit titlecase mapping and is not itself a titlecase char
6405      * according to UnicodeData, then the uppercase mapping is
6406      * returned as an equivalent titlecase mapping. If the
6407      * {@code char} argument is already a titlecase
6408      * {@code char}, the same {@code char} value will be
6409      * returned.
6410      * <p>
6411      * Note that
6412      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6413      * does not always return {@code true} for some ranges of
6414      * characters.
6415      *
6416      * <p><b>Note:</b> This method cannot handle <a
6417      * href="#supplementary"> supplementary characters</a>. To support
6418      * all Unicode characters, including supplementary characters, use
6419      * the {@link #toTitleCase(int)} method.
6420      *
6421      * @param   ch   the character to be converted.
6422      * @return  the titlecase equivalent of the character, if any;
6423      *          otherwise, the character itself.
6424      * @see     Character#isTitleCase(char)
6425      * @see     Character#toLowerCase(char)
6426      * @see     Character#toUpperCase(char)
6427      * @since   1.0.2
6428      */
6429     public static char toTitleCase(char ch) {
6430         return (char)toTitleCase((int)ch);
6431     }
6432 
6433     /**
6434      * Converts the character (Unicode code point) argument to titlecase using case mapping
6435      * information from the UnicodeData file. If a character has no
6436      * explicit titlecase mapping and is not itself a titlecase char
6437      * according to UnicodeData, then the uppercase mapping is
6438      * returned as an equivalent titlecase mapping. If the
6439      * character argument is already a titlecase
6440      * character, the same character value will be
6441      * returned.
6442      *
6443      * <p>Note that
6444      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6445      * does not always return {@code true} for some ranges of
6446      * characters.
6447      *
6448      * @param   codePoint   the character (Unicode code point) to be converted.
6449      * @return  the titlecase equivalent of the character, if any;
6450      *          otherwise, the character itself.
6451      * @see     Character#isTitleCase(int)
6452      * @see     Character#toLowerCase(int)
6453      * @see     Character#toUpperCase(int)
6454      * @since   1.5
6455      */
6456     public static int toTitleCase(int codePoint) {
6457         return CharacterData.of(codePoint).toTitleCase(codePoint);
6458     }
6459 
6460     /**
6461      * Returns the numeric value of the character {@code ch} in the
6462      * specified radix.
6463      * <p>
6464      * If the radix is not in the range {@code MIN_RADIX} &le;
6465      * {@code radix} &le; {@code MAX_RADIX} or if the
6466      * value of {@code ch} is not a valid digit in the specified
6467      * radix, {@code -1} is returned. A character is a valid digit
6468      * if at least one of the following is true:
6469      * <ul>
6470      * <li>The method {@code isDigit} is {@code true} of the character
6471      *     and the Unicode decimal digit value of the character (or its
6472      *     single-character decomposition) is less than the specified radix.
6473      *     In this case the decimal digit value is returned.
6474      * <li>The character is one of the uppercase Latin letters
6475      *     {@code 'A'} through {@code 'Z'} and its code is less than
6476      *     {@code radix + 'A' - 10}.
6477      *     In this case, {@code ch - 'A' + 10}
6478      *     is returned.
6479      * <li>The character is one of the lowercase Latin letters
6480      *     {@code 'a'} through {@code 'z'} and its code is less than
6481      *     {@code radix + 'a' - 10}.
6482      *     In this case, {@code ch - 'a' + 10}
6483      *     is returned.
6484      * <li>The character is one of the fullwidth uppercase Latin letters A
6485      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6486      *     and its code is less than
6487      *     {@code radix + '\u005CuFF21' - 10}.
6488      *     In this case, {@code ch - '\u005CuFF21' + 10}
6489      *     is returned.
6490      * <li>The character is one of the fullwidth lowercase Latin letters a
6491      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6492      *     and its code is less than
6493      *     {@code radix + '\u005CuFF41' - 10}.
6494      *     In this case, {@code ch - '\u005CuFF41' + 10}
6495      *     is returned.
6496      * </ul>
6497      *
6498      * <p><b>Note:</b> This method cannot handle <a
6499      * href="#supplementary"> supplementary characters</a>. To support
6500      * all Unicode characters, including supplementary characters, use
6501      * the {@link #digit(int, int)} method.
6502      *
6503      * @param   ch      the character to be converted.
6504      * @param   radix   the radix.
6505      * @return  the numeric value represented by the character in the
6506      *          specified radix.
6507      * @see     Character#forDigit(int, int)
6508      * @see     Character#isDigit(char)
6509      */
6510     public static int digit(char ch, int radix) {
6511         return digit((int)ch, radix);
6512     }
6513 
6514     /**
6515      * Returns the numeric value of the specified character (Unicode
6516      * code point) in the specified radix.
6517      *
6518      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6519      * {@code radix} &le; {@code MAX_RADIX} or if the
6520      * character is not a valid digit in the specified
6521      * radix, {@code -1} is returned. A character is a valid digit
6522      * if at least one of the following is true:
6523      * <ul>
6524      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6525      *     and the Unicode decimal digit value of the character (or its
6526      *     single-character decomposition) is less than the specified radix.
6527      *     In this case the decimal digit value is returned.
6528      * <li>The character is one of the uppercase Latin letters
6529      *     {@code 'A'} through {@code 'Z'} and its code is less than
6530      *     {@code radix + 'A' - 10}.
6531      *     In this case, {@code codePoint - 'A' + 10}
6532      *     is returned.
6533      * <li>The character is one of the lowercase Latin letters
6534      *     {@code 'a'} through {@code 'z'} and its code is less than
6535      *     {@code radix + 'a' - 10}.
6536      *     In this case, {@code codePoint - 'a' + 10}
6537      *     is returned.
6538      * <li>The character is one of the fullwidth uppercase Latin letters A
6539      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6540      *     and its code is less than
6541      *     {@code radix + '\u005CuFF21' - 10}.
6542      *     In this case,
6543      *     {@code codePoint - '\u005CuFF21' + 10}
6544      *     is returned.
6545      * <li>The character is one of the fullwidth lowercase Latin letters a
6546      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6547      *     and its code is less than
6548      *     {@code radix + '\u005CuFF41'- 10}.
6549      *     In this case,
6550      *     {@code codePoint - '\u005CuFF41' + 10}
6551      *     is returned.
6552      * </ul>
6553      *
6554      * @param   codePoint the character (Unicode code point) to be converted.
6555      * @param   radix   the radix.
6556      * @return  the numeric value represented by the character in the
6557      *          specified radix.
6558      * @see     Character#forDigit(int, int)
6559      * @see     Character#isDigit(int)
6560      * @since   1.5
6561      */
6562     public static int digit(int codePoint, int radix) {
6563         return CharacterData.of(codePoint).digit(codePoint, radix);
6564     }
6565 
6566     /**
6567      * Returns the {@code int} value that the specified Unicode
6568      * character represents. For example, the character
6569      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6570      * an int with a value of 50.
6571      * <p>
6572      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6573      * {@code '\u005Cu005A'}), lowercase
6574      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6575      * full width variant ({@code '\u005CuFF21'} through
6576      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6577      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6578      * through 35. This is independent of the Unicode specification,
6579      * which does not assign numeric values to these {@code char}
6580      * values.
6581      * <p>
6582      * If the character does not have a numeric value, then -1 is returned.
6583      * If the character has a numeric value that cannot be represented as a
6584      * nonnegative integer (for example, a fractional value), then -2
6585      * is returned.
6586      *
6587      * <p><b>Note:</b> This method cannot handle <a
6588      * href="#supplementary"> supplementary characters</a>. To support
6589      * all Unicode characters, including supplementary characters, use
6590      * the {@link #getNumericValue(int)} method.
6591      *
6592      * @param   ch      the character to be converted.
6593      * @return  the numeric value of the character, as a nonnegative {@code int}
6594      *           value; -2 if the character has a numeric value that is not a
6595      *          nonnegative integer; -1 if the character has no numeric value.
6596      * @see     Character#forDigit(int, int)
6597      * @see     Character#isDigit(char)
6598      * @since   1.1
6599      */
6600     public static int getNumericValue(char ch) {
6601         return getNumericValue((int)ch);
6602     }
6603 
6604     /**
6605      * Returns the {@code int} value that the specified
6606      * character (Unicode code point) represents. For example, the character
6607      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6608      * an {@code int} with a value of 50.
6609      * <p>
6610      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6611      * {@code '\u005Cu005A'}), lowercase
6612      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6613      * full width variant ({@code '\u005CuFF21'} through
6614      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6615      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6616      * through 35. This is independent of the Unicode specification,
6617      * which does not assign numeric values to these {@code char}
6618      * values.
6619      * <p>
6620      * If the character does not have a numeric value, then -1 is returned.
6621      * If the character has a numeric value that cannot be represented as a
6622      * nonnegative integer (for example, a fractional value), then -2
6623      * is returned.
6624      *
6625      * @param   codePoint the character (Unicode code point) to be converted.
6626      * @return  the numeric value of the character, as a nonnegative {@code int}
6627      *          value; -2 if the character has a numeric value that is not a
6628      *          nonnegative integer; -1 if the character has no numeric value.
6629      * @see     Character#forDigit(int, int)
6630      * @see     Character#isDigit(int)
6631      * @since   1.5
6632      */
6633     public static int getNumericValue(int codePoint) {
6634         return CharacterData.of(codePoint).getNumericValue(codePoint);
6635     }
6636 
6637     /**
6638      * Determines if the specified character is ISO-LATIN-1 white space.
6639      * This method returns {@code true} for the following five
6640      * characters only:
6641      * <table summary="truechars">
6642      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6643      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6644      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6645      *     <td>{@code NEW LINE}</td></tr>
6646      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6647      *     <td>{@code FORM FEED}</td></tr>
6648      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6649      *     <td>{@code CARRIAGE RETURN}</td></tr>
6650      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6651      *     <td>{@code SPACE}</td></tr>
6652      * </table>
6653      *
6654      * @param      ch   the character to be tested.
6655      * @return     {@code true} if the character is ISO-LATIN-1 white
6656      *             space; {@code false} otherwise.
6657      * @see        Character#isSpaceChar(char)
6658      * @see        Character#isWhitespace(char)
6659      * @deprecated Replaced by isWhitespace(char).
6660      */
6661     @Deprecated
6662     public static boolean isSpace(char ch) {
6663         return (ch <= 0x0020) &&
6664             (((((1L << 0x0009) |
6665             (1L << 0x000A) |
6666             (1L << 0x000C) |
6667             (1L << 0x000D) |
6668             (1L << 0x0020)) >> ch) & 1L) != 0);
6669     }
6670 
6671 
6672     /**
6673      * Determines if the specified character is a Unicode space character.
6674      * A character is considered to be a space character if and only if
6675      * it is specified to be a space character by the Unicode Standard. This
6676      * method returns true if the character's general category type is any of
6677      * the following:
6678      * <ul>
6679      * <li> {@code SPACE_SEPARATOR}
6680      * <li> {@code LINE_SEPARATOR}
6681      * <li> {@code PARAGRAPH_SEPARATOR}
6682      * </ul>
6683      *
6684      * <p><b>Note:</b> This method cannot handle <a
6685      * href="#supplementary"> supplementary characters</a>. To support
6686      * all Unicode characters, including supplementary characters, use
6687      * the {@link #isSpaceChar(int)} method.
6688      *
6689      * @param   ch      the character to be tested.
6690      * @return  {@code true} if the character is a space character;
6691      *          {@code false} otherwise.
6692      * @see     Character#isWhitespace(char)
6693      * @since   1.1
6694      */
6695     public static boolean isSpaceChar(char ch) {
6696         return isSpaceChar((int)ch);
6697     }
6698 
6699     /**
6700      * Determines if the specified character (Unicode code point) is a
6701      * Unicode space character.  A character is considered to be a
6702      * space character if and only if it is specified to be a space
6703      * character by the Unicode Standard. This method returns true if
6704      * the character's general category type is any of the following:
6705      *
6706      * <ul>
6707      * <li> {@link #SPACE_SEPARATOR}
6708      * <li> {@link #LINE_SEPARATOR}
6709      * <li> {@link #PARAGRAPH_SEPARATOR}
6710      * </ul>
6711      *
6712      * @param   codePoint the character (Unicode code point) to be tested.
6713      * @return  {@code true} if the character is a space character;
6714      *          {@code false} otherwise.
6715      * @see     Character#isWhitespace(int)
6716      * @since   1.5
6717      */
6718     public static boolean isSpaceChar(int codePoint) {
6719         return ((((1 << Character.SPACE_SEPARATOR) |
6720                   (1 << Character.LINE_SEPARATOR) |
6721                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6722             != 0;
6723     }
6724 
6725     /**
6726      * Determines if the specified character is white space according to Java.
6727      * A character is a Java whitespace character if and only if it satisfies
6728      * one of the following criteria:
6729      * <ul>
6730      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6731      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6732      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6733      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6734      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6735      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6736      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6737      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6738      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6739      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6740      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6741      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6742      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6743      * </ul>
6744      *
6745      * <p><b>Note:</b> This method cannot handle <a
6746      * href="#supplementary"> supplementary characters</a>. To support
6747      * all Unicode characters, including supplementary characters, use
6748      * the {@link #isWhitespace(int)} method.
6749      *
6750      * @param   ch the character to be tested.
6751      * @return  {@code true} if the character is a Java whitespace
6752      *          character; {@code false} otherwise.
6753      * @see     Character#isSpaceChar(char)
6754      * @since   1.1
6755      */
6756     public static boolean isWhitespace(char ch) {
6757         return isWhitespace((int)ch);
6758     }
6759 
6760     /**
6761      * Determines if the specified character (Unicode code point) is
6762      * white space according to Java.  A character is a Java
6763      * whitespace character if and only if it satisfies one of the
6764      * following criteria:
6765      * <ul>
6766      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6767      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6768      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779      * </ul>
6780      * <p>
6781      *
6782      * @param   codePoint the character (Unicode code point) to be tested.
6783      * @return  {@code true} if the character is a Java whitespace
6784      *          character; {@code false} otherwise.
6785      * @see     Character#isSpaceChar(int)
6786      * @since   1.5
6787      */
6788     public static boolean isWhitespace(int codePoint) {
6789         return CharacterData.of(codePoint).isWhitespace(codePoint);
6790     }
6791 
6792     /**
6793      * Determines if the specified character is an ISO control
6794      * character.  A character is considered to be an ISO control
6795      * character if its code is in the range {@code '\u005Cu0000'}
6796      * through {@code '\u005Cu001F'} or in the range
6797      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6798      *
6799      * <p><b>Note:</b> This method cannot handle <a
6800      * href="#supplementary"> supplementary characters</a>. To support
6801      * all Unicode characters, including supplementary characters, use
6802      * the {@link #isISOControl(int)} method.
6803      *
6804      * @param   ch      the character to be tested.
6805      * @return  {@code true} if the character is an ISO control character;
6806      *          {@code false} otherwise.
6807      *
6808      * @see     Character#isSpaceChar(char)
6809      * @see     Character#isWhitespace(char)
6810      * @since   1.1
6811      */
6812     public static boolean isISOControl(char ch) {
6813         return isISOControl((int)ch);
6814     }
6815 
6816     /**
6817      * Determines if the referenced character (Unicode code point) is an ISO control
6818      * character.  A character is considered to be an ISO control
6819      * character if its code is in the range {@code '\u005Cu0000'}
6820      * through {@code '\u005Cu001F'} or in the range
6821      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6822      *
6823      * @param   codePoint the character (Unicode code point) to be tested.
6824      * @return  {@code true} if the character is an ISO control character;
6825      *          {@code false} otherwise.
6826      * @see     Character#isSpaceChar(int)
6827      * @see     Character#isWhitespace(int)
6828      * @since   1.5
6829      */
6830     public static boolean isISOControl(int codePoint) {
6831         // Optimized form of:
6832         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6833         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6834         return codePoint <= 0x9F &&
6835             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6836     }
6837 
6838     /**
6839      * Returns a value indicating a character's general category.
6840      *
6841      * <p><b>Note:</b> This method cannot handle <a
6842      * href="#supplementary"> supplementary characters</a>. To support
6843      * all Unicode characters, including supplementary characters, use
6844      * the {@link #getType(int)} method.
6845      *
6846      * @param   ch      the character to be tested.
6847      * @return  a value of type {@code int} representing the
6848      *          character's general category.
6849      * @see     Character#COMBINING_SPACING_MARK
6850      * @see     Character#CONNECTOR_PUNCTUATION
6851      * @see     Character#CONTROL
6852      * @see     Character#CURRENCY_SYMBOL
6853      * @see     Character#DASH_PUNCTUATION
6854      * @see     Character#DECIMAL_DIGIT_NUMBER
6855      * @see     Character#ENCLOSING_MARK
6856      * @see     Character#END_PUNCTUATION
6857      * @see     Character#FINAL_QUOTE_PUNCTUATION
6858      * @see     Character#FORMAT
6859      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6860      * @see     Character#LETTER_NUMBER
6861      * @see     Character#LINE_SEPARATOR
6862      * @see     Character#LOWERCASE_LETTER
6863      * @see     Character#MATH_SYMBOL
6864      * @see     Character#MODIFIER_LETTER
6865      * @see     Character#MODIFIER_SYMBOL
6866      * @see     Character#NON_SPACING_MARK
6867      * @see     Character#OTHER_LETTER
6868      * @see     Character#OTHER_NUMBER
6869      * @see     Character#OTHER_PUNCTUATION
6870      * @see     Character#OTHER_SYMBOL
6871      * @see     Character#PARAGRAPH_SEPARATOR
6872      * @see     Character#PRIVATE_USE
6873      * @see     Character#SPACE_SEPARATOR
6874      * @see     Character#START_PUNCTUATION
6875      * @see     Character#SURROGATE
6876      * @see     Character#TITLECASE_LETTER
6877      * @see     Character#UNASSIGNED
6878      * @see     Character#UPPERCASE_LETTER
6879      * @since   1.1
6880      */
6881     public static int getType(char ch) {
6882         return getType((int)ch);
6883     }
6884 
6885     /**
6886      * Returns a value indicating a character's general category.
6887      *
6888      * @param   codePoint the character (Unicode code point) to be tested.
6889      * @return  a value of type {@code int} representing the
6890      *          character's general category.
6891      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6892      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6893      * @see     Character#CONTROL CONTROL
6894      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6895      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6896      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6897      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6898      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6899      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6900      * @see     Character#FORMAT FORMAT
6901      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6902      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6903      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6904      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6905      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6906      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6907      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6908      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6909      * @see     Character#OTHER_LETTER OTHER_LETTER
6910      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6911      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6912      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6913      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6914      * @see     Character#PRIVATE_USE PRIVATE_USE
6915      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6916      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6917      * @see     Character#SURROGATE SURROGATE
6918      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6919      * @see     Character#UNASSIGNED UNASSIGNED
6920      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6921      * @since   1.5
6922      */
6923     public static int getType(int codePoint) {
6924         return CharacterData.of(codePoint).getType(codePoint);
6925     }
6926 
6927     /**
6928      * Determines the character representation for a specific digit in
6929      * the specified radix. If the value of {@code radix} is not a
6930      * valid radix, or the value of {@code digit} is not a valid
6931      * digit in the specified radix, the null character
6932      * ({@code '\u005Cu0000'}) is returned.
6933      * <p>
6934      * The {@code radix} argument is valid if it is greater than or
6935      * equal to {@code MIN_RADIX} and less than or equal to
6936      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6937      * {@code 0 <= digit < radix}.
6938      * <p>
6939      * If the digit is less than 10, then
6940      * {@code '0' + digit} is returned. Otherwise, the value
6941      * {@code 'a' + digit - 10} is returned.
6942      *
6943      * @param   digit   the number to convert to a character.
6944      * @param   radix   the radix.
6945      * @return  the {@code char} representation of the specified digit
6946      *          in the specified radix.
6947      * @see     Character#MIN_RADIX
6948      * @see     Character#MAX_RADIX
6949      * @see     Character#digit(char, int)
6950      */
6951     public static char forDigit(int digit, int radix) {
6952         if ((digit >= radix) || (digit < 0)) {
6953             return '\0';
6954         }
6955         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6956             return '\0';
6957         }
6958         if (digit < 10) {
6959             return (char)('0' + digit);
6960         }
6961         return (char)('a' - 10 + digit);
6962     }
6963 
6964     /**
6965      * Returns the Unicode directionality property for the given
6966      * character.  Character directionality is used to calculate the
6967      * visual ordering of text. The directionality value of undefined
6968      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6969      *
6970      * <p><b>Note:</b> This method cannot handle <a
6971      * href="#supplementary"> supplementary characters</a>. To support
6972      * all Unicode characters, including supplementary characters, use
6973      * the {@link #getDirectionality(int)} method.
6974      *
6975      * @param  ch {@code char} for which the directionality property
6976      *            is requested.
6977      * @return the directionality property of the {@code char} value.
6978      *
6979      * @see Character#DIRECTIONALITY_UNDEFINED
6980      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6981      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6982      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6983      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6984      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6985      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6986      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6987      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6988      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6989      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6990      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6991      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6992      * @see Character#DIRECTIONALITY_WHITESPACE
6993      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6994      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6995      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6996      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6997      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6998      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6999      * @since 1.4
7000      */
7001     public static byte getDirectionality(char ch) {
7002         return getDirectionality((int)ch);
7003     }
7004 
7005     /**
7006      * Returns the Unicode directionality property for the given
7007      * character (Unicode code point).  Character directionality is
7008      * used to calculate the visual ordering of text. The
7009      * directionality value of undefined character is {@link
7010      * #DIRECTIONALITY_UNDEFINED}.
7011      *
7012      * @param   codePoint the character (Unicode code point) for which
7013      *          the directionality property is requested.
7014      * @return the directionality property of the character.
7015      *
7016      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7017      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7018      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7019      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7020      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7021      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7022      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7023      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7024      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7025      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7026      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7027      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7028      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7029      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7030      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7031      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7032      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7033      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7034      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7035      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7036      * @since    1.5
7037      */
7038     public static byte getDirectionality(int codePoint) {
7039         return CharacterData.of(codePoint).getDirectionality(codePoint);
7040     }
7041 
7042     /**
7043      * Determines whether the character is mirrored according to the
7044      * Unicode specification.  Mirrored characters should have their
7045      * glyphs horizontally mirrored when displayed in text that is
7046      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7047      * PARENTHESIS is semantically defined to be an <i>opening
7048      * parenthesis</i>.  This will appear as a "(" in text that is
7049      * left-to-right but as a ")" in text that is right-to-left.
7050      *
7051      * <p><b>Note:</b> This method cannot handle <a
7052      * href="#supplementary"> supplementary characters</a>. To support
7053      * all Unicode characters, including supplementary characters, use
7054      * the {@link #isMirrored(int)} method.
7055      *
7056      * @param  ch {@code char} for which the mirrored property is requested
7057      * @return {@code true} if the char is mirrored, {@code false}
7058      *         if the {@code char} is not mirrored or is not defined.
7059      * @since 1.4
7060      */
7061     public static boolean isMirrored(char ch) {
7062         return isMirrored((int)ch);
7063     }
7064 
7065     /**
7066      * Determines whether the specified character (Unicode code point)
7067      * is mirrored according to the Unicode specification.  Mirrored
7068      * characters should have their glyphs horizontally mirrored when
7069      * displayed in text that is right-to-left.  For example,
7070      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7071      * defined to be an <i>opening parenthesis</i>.  This will appear
7072      * as a "(" in text that is left-to-right but as a ")" in text
7073      * that is right-to-left.
7074      *
7075      * @param   codePoint the character (Unicode code point) to be tested.
7076      * @return  {@code true} if the character is mirrored, {@code false}
7077      *          if the character is not mirrored or is not defined.
7078      * @since   1.5
7079      */
7080     public static boolean isMirrored(int codePoint) {
7081         return CharacterData.of(codePoint).isMirrored(codePoint);
7082     }
7083 
7084     /**
7085      * Compares two {@code Character} objects numerically.
7086      *
7087      * @param   anotherCharacter   the {@code Character} to be compared.
7088 
7089      * @return  the value {@code 0} if the argument {@code Character}
7090      *          is equal to this {@code Character}; a value less than
7091      *          {@code 0} if this {@code Character} is numerically less
7092      *          than the {@code Character} argument; and a value greater than
7093      *          {@code 0} if this {@code Character} is numerically greater
7094      *          than the {@code Character} argument (unsigned comparison).
7095      *          Note that this is strictly a numerical comparison; it is not
7096      *          locale-dependent.
7097      * @since   1.2
7098      */
7099     public int compareTo(Character anotherCharacter) {
7100         return compare(this.value, anotherCharacter.value);
7101     }
7102 
7103     /**
7104      * Compares two {@code char} values numerically.
7105      * The value returned is identical to what would be returned by:
7106      * <pre>
7107      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7108      * </pre>
7109      *
7110      * @param  x the first {@code char} to compare
7111      * @param  y the second {@code char} to compare
7112      * @return the value {@code 0} if {@code x == y};
7113      *         a value less than {@code 0} if {@code x < y}; and
7114      *         a value greater than {@code 0} if {@code x > y}
7115      * @since 1.7
7116      */
7117     public static int compare(char x, char y) {
7118         return x - y;
7119     }
7120 
7121     /**
7122      * Converts the character (Unicode code point) argument to uppercase using
7123      * information from the UnicodeData file.
7124      * <p>
7125      *
7126      * @param   codePoint   the character (Unicode code point) to be converted.
7127      * @return  either the uppercase equivalent of the character, if
7128      *          any, or an error flag ({@code Character.ERROR})
7129      *          that indicates that a 1:M {@code char} mapping exists.
7130      * @see     Character#isLowerCase(char)
7131      * @see     Character#isUpperCase(char)
7132      * @see     Character#toLowerCase(char)
7133      * @see     Character#toTitleCase(char)
7134      * @since 1.4
7135      */
7136     static int toUpperCaseEx(int codePoint) {
7137         assert isValidCodePoint(codePoint);
7138         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7139     }
7140 
7141     /**
7142      * Converts the character (Unicode code point) argument to uppercase using case
7143      * mapping information from the SpecialCasing file in the Unicode
7144      * specification. If a character has no explicit uppercase
7145      * mapping, then the {@code char} itself is returned in the
7146      * {@code char[]}.
7147      *
7148      * @param   codePoint   the character (Unicode code point) to be converted.
7149      * @return a {@code char[]} with the uppercased character.
7150      * @since 1.4
7151      */
7152     static char[] toUpperCaseCharArray(int codePoint) {
7153         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7154         assert isBmpCodePoint(codePoint);
7155         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7156     }
7157 
7158     /**
7159      * The number of bits used to represent a <tt>char</tt> value in unsigned
7160      * binary form, constant {@code 16}.
7161      *
7162      * @since 1.5
7163      */
7164     public static final int SIZE = 16;
7165 
7166     /**
7167      * The number of bytes used to represent a {@code char} value in unsigned
7168      * binary form.
7169      *
7170      * @since 1.8
7171      */
7172     public static final int BYTES = SIZE / Byte.SIZE;
7173 
7174     /**
7175      * Returns the value obtained by reversing the order of the bytes in the
7176      * specified <tt>char</tt> value.
7177      *
7178      * @param ch The {@code char} of which to reverse the byte order.
7179      * @return the value obtained by reversing (or, equivalently, swapping)
7180      *     the bytes in the specified <tt>char</tt> value.
7181      * @since 1.5
7182      */
7183     public static char reverseBytes(char ch) {
7184         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7185     }
7186 
7187     /**
7188      * Returns the Unicode name of the specified character
7189      * {@code codePoint}, or null if the code point is
7190      * {@link #UNASSIGNED unassigned}.
7191      * <p>
7192      * Note: if the specified character is not assigned a name by
7193      * the <i>UnicodeData</i> file (part of the Unicode Character
7194      * Database maintained by the Unicode Consortium), the returned
7195      * name is the same as the result of expression.
7196      *
7197      * <blockquote>{@code
7198      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7199      *     + " "
7200      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7201      *
7202      * }</blockquote>
7203      *
7204      * @param  codePoint the character (Unicode code point)
7205      *
7206      * @return the Unicode name of the specified character, or null if
7207      *         the code point is unassigned.
7208      *
7209      * @exception IllegalArgumentException if the specified
7210      *            {@code codePoint} is not a valid Unicode
7211      *            code point.
7212      *
7213      * @since 1.7
7214      */
7215     public static String getName(int codePoint) {
7216         if (!isValidCodePoint(codePoint)) {
7217             throw new IllegalArgumentException();
7218         }
7219         String name = CharacterName.get(codePoint);
7220         if (name != null)
7221             return name;
7222         if (getType(codePoint) == UNASSIGNED)
7223             return null;
7224         UnicodeBlock block = UnicodeBlock.of(codePoint);
7225         if (block != null)
7226             return block.toString().replace('_', ' ') + " "
7227                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7228         // should never come here
7229         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7230     }
7231 }