1 /*
   2  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.2.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h3><a name="unicode">Unicode Character Representations</a></h3>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * Standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (\uD800-\uDBFF), the second from the
  80  * <em>low-surrogates</em> range (\uDC00-\uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648         private static final int INITIAL_CAPACITY = (int)(510 / 0.75f);
 649         private static Map<String, UnicodeBlock> map =
 650                 new HashMap<>(INITIAL_CAPACITY);
 651 
 652         /**
 653          * Creates a UnicodeBlock with the given identifier name.
 654          * This name must be the same as the block identifier.
 655          */
 656         private UnicodeBlock(String idName) {
 657             super(idName);
 658             map.put(idName, this);
 659         }
 660 
 661         /**
 662          * Creates a UnicodeBlock with the given identifier name and
 663          * alias name.
 664          */
 665         private UnicodeBlock(String idName, String alias) {
 666             this(idName);
 667             map.put(alias, this);
 668         }
 669 
 670         /**
 671          * Creates a UnicodeBlock with the given identifier name and
 672          * alias names.
 673          */
 674         private UnicodeBlock(String idName, String... aliases) {
 675             this(idName);
 676             for (String alias : aliases)
 677                 map.put(alias, this);
 678         }
 679 
 680         /**
 681          * Constant for the "Basic Latin" Unicode character block.
 682          * @since 1.2
 683          */
 684         public static final UnicodeBlock  BASIC_LATIN =
 685             new UnicodeBlock("BASIC_LATIN",
 686                              "BASIC LATIN",
 687                              "BASICLATIN");
 688 
 689         /**
 690          * Constant for the "Latin-1 Supplement" Unicode character block.
 691          * @since 1.2
 692          */
 693         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 694             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 695                              "LATIN-1 SUPPLEMENT",
 696                              "LATIN-1SUPPLEMENT");
 697 
 698         /**
 699          * Constant for the "Latin Extended-A" Unicode character block.
 700          * @since 1.2
 701          */
 702         public static final UnicodeBlock LATIN_EXTENDED_A =
 703             new UnicodeBlock("LATIN_EXTENDED_A",
 704                              "LATIN EXTENDED-A",
 705                              "LATINEXTENDED-A");
 706 
 707         /**
 708          * Constant for the "Latin Extended-B" Unicode character block.
 709          * @since 1.2
 710          */
 711         public static final UnicodeBlock LATIN_EXTENDED_B =
 712             new UnicodeBlock("LATIN_EXTENDED_B",
 713                              "LATIN EXTENDED-B",
 714                              "LATINEXTENDED-B");
 715 
 716         /**
 717          * Constant for the "IPA Extensions" Unicode character block.
 718          * @since 1.2
 719          */
 720         public static final UnicodeBlock IPA_EXTENSIONS =
 721             new UnicodeBlock("IPA_EXTENSIONS",
 722                              "IPA EXTENSIONS",
 723                              "IPAEXTENSIONS");
 724 
 725         /**
 726          * Constant for the "Spacing Modifier Letters" Unicode character block.
 727          * @since 1.2
 728          */
 729         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 730             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 731                              "SPACING MODIFIER LETTERS",
 732                              "SPACINGMODIFIERLETTERS");
 733 
 734         /**
 735          * Constant for the "Combining Diacritical Marks" Unicode character block.
 736          * @since 1.2
 737          */
 738         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 739             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 740                              "COMBINING DIACRITICAL MARKS",
 741                              "COMBININGDIACRITICALMARKS");
 742 
 743         /**
 744          * Constant for the "Greek and Coptic" Unicode character block.
 745          * <p>
 746          * This block was previously known as the "Greek" block.
 747          *
 748          * @since 1.2
 749          */
 750         public static final UnicodeBlock GREEK =
 751             new UnicodeBlock("GREEK",
 752                              "GREEK AND COPTIC",
 753                              "GREEKANDCOPTIC");
 754 
 755         /**
 756          * Constant for the "Cyrillic" Unicode character block.
 757          * @since 1.2
 758          */
 759         public static final UnicodeBlock CYRILLIC =
 760             new UnicodeBlock("CYRILLIC");
 761 
 762         /**
 763          * Constant for the "Armenian" Unicode character block.
 764          * @since 1.2
 765          */
 766         public static final UnicodeBlock ARMENIAN =
 767             new UnicodeBlock("ARMENIAN");
 768 
 769         /**
 770          * Constant for the "Hebrew" Unicode character block.
 771          * @since 1.2
 772          */
 773         public static final UnicodeBlock HEBREW =
 774             new UnicodeBlock("HEBREW");
 775 
 776         /**
 777          * Constant for the "Arabic" Unicode character block.
 778          * @since 1.2
 779          */
 780         public static final UnicodeBlock ARABIC =
 781             new UnicodeBlock("ARABIC");
 782 
 783         /**
 784          * Constant for the "Devanagari" Unicode character block.
 785          * @since 1.2
 786          */
 787         public static final UnicodeBlock DEVANAGARI =
 788             new UnicodeBlock("DEVANAGARI");
 789 
 790         /**
 791          * Constant for the "Bengali" Unicode character block.
 792          * @since 1.2
 793          */
 794         public static final UnicodeBlock BENGALI =
 795             new UnicodeBlock("BENGALI");
 796 
 797         /**
 798          * Constant for the "Gurmukhi" Unicode character block.
 799          * @since 1.2
 800          */
 801         public static final UnicodeBlock GURMUKHI =
 802             new UnicodeBlock("GURMUKHI");
 803 
 804         /**
 805          * Constant for the "Gujarati" Unicode character block.
 806          * @since 1.2
 807          */
 808         public static final UnicodeBlock GUJARATI =
 809             new UnicodeBlock("GUJARATI");
 810 
 811         /**
 812          * Constant for the "Oriya" Unicode character block.
 813          * @since 1.2
 814          */
 815         public static final UnicodeBlock ORIYA =
 816             new UnicodeBlock("ORIYA");
 817 
 818         /**
 819          * Constant for the "Tamil" Unicode character block.
 820          * @since 1.2
 821          */
 822         public static final UnicodeBlock TAMIL =
 823             new UnicodeBlock("TAMIL");
 824 
 825         /**
 826          * Constant for the "Telugu" Unicode character block.
 827          * @since 1.2
 828          */
 829         public static final UnicodeBlock TELUGU =
 830             new UnicodeBlock("TELUGU");
 831 
 832         /**
 833          * Constant for the "Kannada" Unicode character block.
 834          * @since 1.2
 835          */
 836         public static final UnicodeBlock KANNADA =
 837             new UnicodeBlock("KANNADA");
 838 
 839         /**
 840          * Constant for the "Malayalam" Unicode character block.
 841          * @since 1.2
 842          */
 843         public static final UnicodeBlock MALAYALAM =
 844             new UnicodeBlock("MALAYALAM");
 845 
 846         /**
 847          * Constant for the "Thai" Unicode character block.
 848          * @since 1.2
 849          */
 850         public static final UnicodeBlock THAI =
 851             new UnicodeBlock("THAI");
 852 
 853         /**
 854          * Constant for the "Lao" Unicode character block.
 855          * @since 1.2
 856          */
 857         public static final UnicodeBlock LAO =
 858             new UnicodeBlock("LAO");
 859 
 860         /**
 861          * Constant for the "Tibetan" Unicode character block.
 862          * @since 1.2
 863          */
 864         public static final UnicodeBlock TIBETAN =
 865             new UnicodeBlock("TIBETAN");
 866 
 867         /**
 868          * Constant for the "Georgian" Unicode character block.
 869          * @since 1.2
 870          */
 871         public static final UnicodeBlock GEORGIAN =
 872             new UnicodeBlock("GEORGIAN");
 873 
 874         /**
 875          * Constant for the "Hangul Jamo" Unicode character block.
 876          * @since 1.2
 877          */
 878         public static final UnicodeBlock HANGUL_JAMO =
 879             new UnicodeBlock("HANGUL_JAMO",
 880                              "HANGUL JAMO",
 881                              "HANGULJAMO");
 882 
 883         /**
 884          * Constant for the "Latin Extended Additional" Unicode character block.
 885          * @since 1.2
 886          */
 887         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 888             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 889                              "LATIN EXTENDED ADDITIONAL",
 890                              "LATINEXTENDEDADDITIONAL");
 891 
 892         /**
 893          * Constant for the "Greek Extended" Unicode character block.
 894          * @since 1.2
 895          */
 896         public static final UnicodeBlock GREEK_EXTENDED =
 897             new UnicodeBlock("GREEK_EXTENDED",
 898                              "GREEK EXTENDED",
 899                              "GREEKEXTENDED");
 900 
 901         /**
 902          * Constant for the "General Punctuation" Unicode character block.
 903          * @since 1.2
 904          */
 905         public static final UnicodeBlock GENERAL_PUNCTUATION =
 906             new UnicodeBlock("GENERAL_PUNCTUATION",
 907                              "GENERAL PUNCTUATION",
 908                              "GENERALPUNCTUATION");
 909 
 910         /**
 911          * Constant for the "Superscripts and Subscripts" Unicode character
 912          * block.
 913          * @since 1.2
 914          */
 915         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 916             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 917                              "SUPERSCRIPTS AND SUBSCRIPTS",
 918                              "SUPERSCRIPTSANDSUBSCRIPTS");
 919 
 920         /**
 921          * Constant for the "Currency Symbols" Unicode character block.
 922          * @since 1.2
 923          */
 924         public static final UnicodeBlock CURRENCY_SYMBOLS =
 925             new UnicodeBlock("CURRENCY_SYMBOLS",
 926                              "CURRENCY SYMBOLS",
 927                              "CURRENCYSYMBOLS");
 928 
 929         /**
 930          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 931          * character block.
 932          * <p>
 933          * This block was previously known as "Combining Marks for Symbols".
 934          * @since 1.2
 935          */
 936         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 937             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 938                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 939                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 940                              "COMBINING MARKS FOR SYMBOLS",
 941                              "COMBININGMARKSFORSYMBOLS");
 942 
 943         /**
 944          * Constant for the "Letterlike Symbols" Unicode character block.
 945          * @since 1.2
 946          */
 947         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 948             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 949                              "LETTERLIKE SYMBOLS",
 950                              "LETTERLIKESYMBOLS");
 951 
 952         /**
 953          * Constant for the "Number Forms" Unicode character block.
 954          * @since 1.2
 955          */
 956         public static final UnicodeBlock NUMBER_FORMS =
 957             new UnicodeBlock("NUMBER_FORMS",
 958                              "NUMBER FORMS",
 959                              "NUMBERFORMS");
 960 
 961         /**
 962          * Constant for the "Arrows" Unicode character block.
 963          * @since 1.2
 964          */
 965         public static final UnicodeBlock ARROWS =
 966             new UnicodeBlock("ARROWS");
 967 
 968         /**
 969          * Constant for the "Mathematical Operators" Unicode character block.
 970          * @since 1.2
 971          */
 972         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 973             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 974                              "MATHEMATICAL OPERATORS",
 975                              "MATHEMATICALOPERATORS");
 976 
 977         /**
 978          * Constant for the "Miscellaneous Technical" Unicode character block.
 979          * @since 1.2
 980          */
 981         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 982             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 983                              "MISCELLANEOUS TECHNICAL",
 984                              "MISCELLANEOUSTECHNICAL");
 985 
 986         /**
 987          * Constant for the "Control Pictures" Unicode character block.
 988          * @since 1.2
 989          */
 990         public static final UnicodeBlock CONTROL_PICTURES =
 991             new UnicodeBlock("CONTROL_PICTURES",
 992                              "CONTROL PICTURES",
 993                              "CONTROLPICTURES");
 994 
 995         /**
 996          * Constant for the "Optical Character Recognition" Unicode character block.
 997          * @since 1.2
 998          */
 999         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1000             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1001                              "OPTICAL CHARACTER RECOGNITION",
1002                              "OPTICALCHARACTERRECOGNITION");
1003 
1004         /**
1005          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1006          * @since 1.2
1007          */
1008         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1009             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1010                              "ENCLOSED ALPHANUMERICS",
1011                              "ENCLOSEDALPHANUMERICS");
1012 
1013         /**
1014          * Constant for the "Box Drawing" Unicode character block.
1015          * @since 1.2
1016          */
1017         public static final UnicodeBlock BOX_DRAWING =
1018             new UnicodeBlock("BOX_DRAWING",
1019                              "BOX DRAWING",
1020                              "BOXDRAWING");
1021 
1022         /**
1023          * Constant for the "Block Elements" Unicode character block.
1024          * @since 1.2
1025          */
1026         public static final UnicodeBlock BLOCK_ELEMENTS =
1027             new UnicodeBlock("BLOCK_ELEMENTS",
1028                              "BLOCK ELEMENTS",
1029                              "BLOCKELEMENTS");
1030 
1031         /**
1032          * Constant for the "Geometric Shapes" Unicode character block.
1033          * @since 1.2
1034          */
1035         public static final UnicodeBlock GEOMETRIC_SHAPES =
1036             new UnicodeBlock("GEOMETRIC_SHAPES",
1037                              "GEOMETRIC SHAPES",
1038                              "GEOMETRICSHAPES");
1039 
1040         /**
1041          * Constant for the "Miscellaneous Symbols" Unicode character block.
1042          * @since 1.2
1043          */
1044         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1045             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1046                              "MISCELLANEOUS SYMBOLS",
1047                              "MISCELLANEOUSSYMBOLS");
1048 
1049         /**
1050          * Constant for the "Dingbats" Unicode character block.
1051          * @since 1.2
1052          */
1053         public static final UnicodeBlock DINGBATS =
1054             new UnicodeBlock("DINGBATS");
1055 
1056         /**
1057          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1058          * @since 1.2
1059          */
1060         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1061             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1062                              "CJK SYMBOLS AND PUNCTUATION",
1063                              "CJKSYMBOLSANDPUNCTUATION");
1064 
1065         /**
1066          * Constant for the "Hiragana" Unicode character block.
1067          * @since 1.2
1068          */
1069         public static final UnicodeBlock HIRAGANA =
1070             new UnicodeBlock("HIRAGANA");
1071 
1072         /**
1073          * Constant for the "Katakana" Unicode character block.
1074          * @since 1.2
1075          */
1076         public static final UnicodeBlock KATAKANA =
1077             new UnicodeBlock("KATAKANA");
1078 
1079         /**
1080          * Constant for the "Bopomofo" Unicode character block.
1081          * @since 1.2
1082          */
1083         public static final UnicodeBlock BOPOMOFO =
1084             new UnicodeBlock("BOPOMOFO");
1085 
1086         /**
1087          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1088          * @since 1.2
1089          */
1090         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1091             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1092                              "HANGUL COMPATIBILITY JAMO",
1093                              "HANGULCOMPATIBILITYJAMO");
1094 
1095         /**
1096          * Constant for the "Kanbun" Unicode character block.
1097          * @since 1.2
1098          */
1099         public static final UnicodeBlock KANBUN =
1100             new UnicodeBlock("KANBUN");
1101 
1102         /**
1103          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1104          * @since 1.2
1105          */
1106         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1107             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1108                              "ENCLOSED CJK LETTERS AND MONTHS",
1109                              "ENCLOSEDCJKLETTERSANDMONTHS");
1110 
1111         /**
1112          * Constant for the "CJK Compatibility" Unicode character block.
1113          * @since 1.2
1114          */
1115         public static final UnicodeBlock CJK_COMPATIBILITY =
1116             new UnicodeBlock("CJK_COMPATIBILITY",
1117                              "CJK COMPATIBILITY",
1118                              "CJKCOMPATIBILITY");
1119 
1120         /**
1121          * Constant for the "CJK Unified Ideographs" Unicode character block.
1122          * @since 1.2
1123          */
1124         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1125             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1126                              "CJK UNIFIED IDEOGRAPHS",
1127                              "CJKUNIFIEDIDEOGRAPHS");
1128 
1129         /**
1130          * Constant for the "Hangul Syllables" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock HANGUL_SYLLABLES =
1134             new UnicodeBlock("HANGUL_SYLLABLES",
1135                              "HANGUL SYLLABLES",
1136                              "HANGULSYLLABLES");
1137 
1138         /**
1139          * Constant for the "Private Use Area" Unicode character block.
1140          * @since 1.2
1141          */
1142         public static final UnicodeBlock PRIVATE_USE_AREA =
1143             new UnicodeBlock("PRIVATE_USE_AREA",
1144                              "PRIVATE USE AREA",
1145                              "PRIVATEUSEAREA");
1146 
1147         /**
1148          * Constant for the "CJK Compatibility Ideographs" Unicode character
1149          * block.
1150          * @since 1.2
1151          */
1152         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1153             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1154                              "CJK COMPATIBILITY IDEOGRAPHS",
1155                              "CJKCOMPATIBILITYIDEOGRAPHS");
1156 
1157         /**
1158          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1159          * @since 1.2
1160          */
1161         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1162             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1163                              "ALPHABETIC PRESENTATION FORMS",
1164                              "ALPHABETICPRESENTATIONFORMS");
1165 
1166         /**
1167          * Constant for the "Arabic Presentation Forms-A" Unicode character
1168          * block.
1169          * @since 1.2
1170          */
1171         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1172             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1173                              "ARABIC PRESENTATION FORMS-A",
1174                              "ARABICPRESENTATIONFORMS-A");
1175 
1176         /**
1177          * Constant for the "Combining Half Marks" Unicode character block.
1178          * @since 1.2
1179          */
1180         public static final UnicodeBlock COMBINING_HALF_MARKS =
1181             new UnicodeBlock("COMBINING_HALF_MARKS",
1182                              "COMBINING HALF MARKS",
1183                              "COMBININGHALFMARKS");
1184 
1185         /**
1186          * Constant for the "CJK Compatibility Forms" Unicode character block.
1187          * @since 1.2
1188          */
1189         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1190             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1191                              "CJK COMPATIBILITY FORMS",
1192                              "CJKCOMPATIBILITYFORMS");
1193 
1194         /**
1195          * Constant for the "Small Form Variants" Unicode character block.
1196          * @since 1.2
1197          */
1198         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1199             new UnicodeBlock("SMALL_FORM_VARIANTS",
1200                              "SMALL FORM VARIANTS",
1201                              "SMALLFORMVARIANTS");
1202 
1203         /**
1204          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1205          * @since 1.2
1206          */
1207         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1208             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1209                              "ARABIC PRESENTATION FORMS-B",
1210                              "ARABICPRESENTATIONFORMS-B");
1211 
1212         /**
1213          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1214          * block.
1215          * @since 1.2
1216          */
1217         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1218             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1219                              "HALFWIDTH AND FULLWIDTH FORMS",
1220                              "HALFWIDTHANDFULLWIDTHFORMS");
1221 
1222         /**
1223          * Constant for the "Specials" Unicode character block.
1224          * @since 1.2
1225          */
1226         public static final UnicodeBlock SPECIALS =
1227             new UnicodeBlock("SPECIALS");
1228 
1229         /**
1230          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1231          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1232          *             {@link #LOW_SURROGATES}. These new constants match
1233          *             the block definitions of the Unicode Standard.
1234          *             The {@link #of(char)} and {@link #of(int)} methods
1235          *             return the new constants, not SURROGATES_AREA.
1236          */
1237         @Deprecated
1238         public static final UnicodeBlock SURROGATES_AREA =
1239             new UnicodeBlock("SURROGATES_AREA");
1240 
1241         /**
1242          * Constant for the "Syriac" Unicode character block.
1243          * @since 1.4
1244          */
1245         public static final UnicodeBlock SYRIAC =
1246             new UnicodeBlock("SYRIAC");
1247 
1248         /**
1249          * Constant for the "Thaana" Unicode character block.
1250          * @since 1.4
1251          */
1252         public static final UnicodeBlock THAANA =
1253             new UnicodeBlock("THAANA");
1254 
1255         /**
1256          * Constant for the "Sinhala" Unicode character block.
1257          * @since 1.4
1258          */
1259         public static final UnicodeBlock SINHALA =
1260             new UnicodeBlock("SINHALA");
1261 
1262         /**
1263          * Constant for the "Myanmar" Unicode character block.
1264          * @since 1.4
1265          */
1266         public static final UnicodeBlock MYANMAR =
1267             new UnicodeBlock("MYANMAR");
1268 
1269         /**
1270          * Constant for the "Ethiopic" Unicode character block.
1271          * @since 1.4
1272          */
1273         public static final UnicodeBlock ETHIOPIC =
1274             new UnicodeBlock("ETHIOPIC");
1275 
1276         /**
1277          * Constant for the "Cherokee" Unicode character block.
1278          * @since 1.4
1279          */
1280         public static final UnicodeBlock CHEROKEE =
1281             new UnicodeBlock("CHEROKEE");
1282 
1283         /**
1284          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1285          * @since 1.4
1286          */
1287         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1288             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1289                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1290                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1291 
1292         /**
1293          * Constant for the "Ogham" Unicode character block.
1294          * @since 1.4
1295          */
1296         public static final UnicodeBlock OGHAM =
1297             new UnicodeBlock("OGHAM");
1298 
1299         /**
1300          * Constant for the "Runic" Unicode character block.
1301          * @since 1.4
1302          */
1303         public static final UnicodeBlock RUNIC =
1304             new UnicodeBlock("RUNIC");
1305 
1306         /**
1307          * Constant for the "Khmer" Unicode character block.
1308          * @since 1.4
1309          */
1310         public static final UnicodeBlock KHMER =
1311             new UnicodeBlock("KHMER");
1312 
1313         /**
1314          * Constant for the "Mongolian" Unicode character block.
1315          * @since 1.4
1316          */
1317         public static final UnicodeBlock MONGOLIAN =
1318             new UnicodeBlock("MONGOLIAN");
1319 
1320         /**
1321          * Constant for the "Braille Patterns" Unicode character block.
1322          * @since 1.4
1323          */
1324         public static final UnicodeBlock BRAILLE_PATTERNS =
1325             new UnicodeBlock("BRAILLE_PATTERNS",
1326                              "BRAILLE PATTERNS",
1327                              "BRAILLEPATTERNS");
1328 
1329         /**
1330          * Constant for the "CJK Radicals Supplement" Unicode character block.
1331          * @since 1.4
1332          */
1333         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1334             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1335                              "CJK RADICALS SUPPLEMENT",
1336                              "CJKRADICALSSUPPLEMENT");
1337 
1338         /**
1339          * Constant for the "Kangxi Radicals" Unicode character block.
1340          * @since 1.4
1341          */
1342         public static final UnicodeBlock KANGXI_RADICALS =
1343             new UnicodeBlock("KANGXI_RADICALS",
1344                              "KANGXI RADICALS",
1345                              "KANGXIRADICALS");
1346 
1347         /**
1348          * Constant for the "Ideographic Description Characters" Unicode character block.
1349          * @since 1.4
1350          */
1351         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1352             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1353                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1354                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1355 
1356         /**
1357          * Constant for the "Bopomofo Extended" Unicode character block.
1358          * @since 1.4
1359          */
1360         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1361             new UnicodeBlock("BOPOMOFO_EXTENDED",
1362                              "BOPOMOFO EXTENDED",
1363                              "BOPOMOFOEXTENDED");
1364 
1365         /**
1366          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1367          * @since 1.4
1368          */
1369         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1370             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1371                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1372                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1373 
1374         /**
1375          * Constant for the "Yi Syllables" Unicode character block.
1376          * @since 1.4
1377          */
1378         public static final UnicodeBlock YI_SYLLABLES =
1379             new UnicodeBlock("YI_SYLLABLES",
1380                              "YI SYLLABLES",
1381                              "YISYLLABLES");
1382 
1383         /**
1384          * Constant for the "Yi Radicals" Unicode character block.
1385          * @since 1.4
1386          */
1387         public static final UnicodeBlock YI_RADICALS =
1388             new UnicodeBlock("YI_RADICALS",
1389                              "YI RADICALS",
1390                              "YIRADICALS");
1391 
1392         /**
1393          * Constant for the "Cyrillic Supplementary" Unicode character block.
1394          * @since 1.5
1395          */
1396         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1397             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1398                              "CYRILLIC SUPPLEMENTARY",
1399                              "CYRILLICSUPPLEMENTARY",
1400                              "CYRILLIC SUPPLEMENT",
1401                              "CYRILLICSUPPLEMENT");
1402 
1403         /**
1404          * Constant for the "Tagalog" Unicode character block.
1405          * @since 1.5
1406          */
1407         public static final UnicodeBlock TAGALOG =
1408             new UnicodeBlock("TAGALOG");
1409 
1410         /**
1411          * Constant for the "Hanunoo" Unicode character block.
1412          * @since 1.5
1413          */
1414         public static final UnicodeBlock HANUNOO =
1415             new UnicodeBlock("HANUNOO");
1416 
1417         /**
1418          * Constant for the "Buhid" Unicode character block.
1419          * @since 1.5
1420          */
1421         public static final UnicodeBlock BUHID =
1422             new UnicodeBlock("BUHID");
1423 
1424         /**
1425          * Constant for the "Tagbanwa" Unicode character block.
1426          * @since 1.5
1427          */
1428         public static final UnicodeBlock TAGBANWA =
1429             new UnicodeBlock("TAGBANWA");
1430 
1431         /**
1432          * Constant for the "Limbu" Unicode character block.
1433          * @since 1.5
1434          */
1435         public static final UnicodeBlock LIMBU =
1436             new UnicodeBlock("LIMBU");
1437 
1438         /**
1439          * Constant for the "Tai Le" Unicode character block.
1440          * @since 1.5
1441          */
1442         public static final UnicodeBlock TAI_LE =
1443             new UnicodeBlock("TAI_LE",
1444                              "TAI LE",
1445                              "TAILE");
1446 
1447         /**
1448          * Constant for the "Khmer Symbols" Unicode character block.
1449          * @since 1.5
1450          */
1451         public static final UnicodeBlock KHMER_SYMBOLS =
1452             new UnicodeBlock("KHMER_SYMBOLS",
1453                              "KHMER SYMBOLS",
1454                              "KHMERSYMBOLS");
1455 
1456         /**
1457          * Constant for the "Phonetic Extensions" Unicode character block.
1458          * @since 1.5
1459          */
1460         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1461             new UnicodeBlock("PHONETIC_EXTENSIONS",
1462                              "PHONETIC EXTENSIONS",
1463                              "PHONETICEXTENSIONS");
1464 
1465         /**
1466          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1467          * @since 1.5
1468          */
1469         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1470             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1471                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1472                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1473 
1474         /**
1475          * Constant for the "Supplemental Arrows-A" Unicode character block.
1476          * @since 1.5
1477          */
1478         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1479             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1480                              "SUPPLEMENTAL ARROWS-A",
1481                              "SUPPLEMENTALARROWS-A");
1482 
1483         /**
1484          * Constant for the "Supplemental Arrows-B" Unicode character block.
1485          * @since 1.5
1486          */
1487         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1488             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1489                              "SUPPLEMENTAL ARROWS-B",
1490                              "SUPPLEMENTALARROWS-B");
1491 
1492         /**
1493          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1494          * character block.
1495          * @since 1.5
1496          */
1497         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1498             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1499                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1500                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1501 
1502         /**
1503          * Constant for the "Supplemental Mathematical Operators" Unicode
1504          * character block.
1505          * @since 1.5
1506          */
1507         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1508             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1509                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1510                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1511 
1512         /**
1513          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1514          * block.
1515          * @since 1.5
1516          */
1517         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1518             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1519                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1520                              "MISCELLANEOUSSYMBOLSANDARROWS");
1521 
1522         /**
1523          * Constant for the "Katakana Phonetic Extensions" Unicode character
1524          * block.
1525          * @since 1.5
1526          */
1527         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1528             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1529                              "KATAKANA PHONETIC EXTENSIONS",
1530                              "KATAKANAPHONETICEXTENSIONS");
1531 
1532         /**
1533          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1534          * @since 1.5
1535          */
1536         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1537             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1538                              "YIJING HEXAGRAM SYMBOLS",
1539                              "YIJINGHEXAGRAMSYMBOLS");
1540 
1541         /**
1542          * Constant for the "Variation Selectors" Unicode character block.
1543          * @since 1.5
1544          */
1545         public static final UnicodeBlock VARIATION_SELECTORS =
1546             new UnicodeBlock("VARIATION_SELECTORS",
1547                              "VARIATION SELECTORS",
1548                              "VARIATIONSELECTORS");
1549 
1550         /**
1551          * Constant for the "Linear B Syllabary" Unicode character block.
1552          * @since 1.5
1553          */
1554         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1555             new UnicodeBlock("LINEAR_B_SYLLABARY",
1556                              "LINEAR B SYLLABARY",
1557                              "LINEARBSYLLABARY");
1558 
1559         /**
1560          * Constant for the "Linear B Ideograms" Unicode character block.
1561          * @since 1.5
1562          */
1563         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1564             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1565                              "LINEAR B IDEOGRAMS",
1566                              "LINEARBIDEOGRAMS");
1567 
1568         /**
1569          * Constant for the "Aegean Numbers" Unicode character block.
1570          * @since 1.5
1571          */
1572         public static final UnicodeBlock AEGEAN_NUMBERS =
1573             new UnicodeBlock("AEGEAN_NUMBERS",
1574                              "AEGEAN NUMBERS",
1575                              "AEGEANNUMBERS");
1576 
1577         /**
1578          * Constant for the "Old Italic" Unicode character block.
1579          * @since 1.5
1580          */
1581         public static final UnicodeBlock OLD_ITALIC =
1582             new UnicodeBlock("OLD_ITALIC",
1583                              "OLD ITALIC",
1584                              "OLDITALIC");
1585 
1586         /**
1587          * Constant for the "Gothic" Unicode character block.
1588          * @since 1.5
1589          */
1590         public static final UnicodeBlock GOTHIC =
1591             new UnicodeBlock("GOTHIC");
1592 
1593         /**
1594          * Constant for the "Ugaritic" Unicode character block.
1595          * @since 1.5
1596          */
1597         public static final UnicodeBlock UGARITIC =
1598             new UnicodeBlock("UGARITIC");
1599 
1600         /**
1601          * Constant for the "Deseret" Unicode character block.
1602          * @since 1.5
1603          */
1604         public static final UnicodeBlock DESERET =
1605             new UnicodeBlock("DESERET");
1606 
1607         /**
1608          * Constant for the "Shavian" Unicode character block.
1609          * @since 1.5
1610          */
1611         public static final UnicodeBlock SHAVIAN =
1612             new UnicodeBlock("SHAVIAN");
1613 
1614         /**
1615          * Constant for the "Osmanya" Unicode character block.
1616          * @since 1.5
1617          */
1618         public static final UnicodeBlock OSMANYA =
1619             new UnicodeBlock("OSMANYA");
1620 
1621         /**
1622          * Constant for the "Cypriot Syllabary" Unicode character block.
1623          * @since 1.5
1624          */
1625         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1626             new UnicodeBlock("CYPRIOT_SYLLABARY",
1627                              "CYPRIOT SYLLABARY",
1628                              "CYPRIOTSYLLABARY");
1629 
1630         /**
1631          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1632          * @since 1.5
1633          */
1634         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1635             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1636                              "BYZANTINE MUSICAL SYMBOLS",
1637                              "BYZANTINEMUSICALSYMBOLS");
1638 
1639         /**
1640          * Constant for the "Musical Symbols" Unicode character block.
1641          * @since 1.5
1642          */
1643         public static final UnicodeBlock MUSICAL_SYMBOLS =
1644             new UnicodeBlock("MUSICAL_SYMBOLS",
1645                              "MUSICAL SYMBOLS",
1646                              "MUSICALSYMBOLS");
1647 
1648         /**
1649          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1650          * @since 1.5
1651          */
1652         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1653             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1654                              "TAI XUAN JING SYMBOLS",
1655                              "TAIXUANJINGSYMBOLS");
1656 
1657         /**
1658          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1659          * character block.
1660          * @since 1.5
1661          */
1662         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1663             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1664                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1665                              "MATHEMATICALALPHANUMERICSYMBOLS");
1666 
1667         /**
1668          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1669          * character block.
1670          * @since 1.5
1671          */
1672         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1673             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1674                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1675                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1676 
1677         /**
1678          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1682             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1683                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1684                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1685 
1686         /**
1687          * Constant for the "Tags" Unicode character block.
1688          * @since 1.5
1689          */
1690         public static final UnicodeBlock TAGS =
1691             new UnicodeBlock("TAGS");
1692 
1693         /**
1694          * Constant for the "Variation Selectors Supplement" Unicode character
1695          * block.
1696          * @since 1.5
1697          */
1698         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1699             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1700                              "VARIATION SELECTORS SUPPLEMENT",
1701                              "VARIATIONSELECTORSSUPPLEMENT");
1702 
1703         /**
1704          * Constant for the "Supplementary Private Use Area-A" Unicode character
1705          * block.
1706          * @since 1.5
1707          */
1708         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1709             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1710                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1711                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1712 
1713         /**
1714          * Constant for the "Supplementary Private Use Area-B" Unicode character
1715          * block.
1716          * @since 1.5
1717          */
1718         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1719             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1720                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1721                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1722 
1723         /**
1724          * Constant for the "High Surrogates" Unicode character block.
1725          * This block represents codepoint values in the high surrogate
1726          * range: U+D800 through U+DB7F
1727          *
1728          * @since 1.5
1729          */
1730         public static final UnicodeBlock HIGH_SURROGATES =
1731             new UnicodeBlock("HIGH_SURROGATES",
1732                              "HIGH SURROGATES",
1733                              "HIGHSURROGATES");
1734 
1735         /**
1736          * Constant for the "High Private Use Surrogates" Unicode character
1737          * block.
1738          * This block represents codepoint values in the private use high
1739          * surrogate range: U+DB80 through U+DBFF
1740          *
1741          * @since 1.5
1742          */
1743         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1744             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1745                              "HIGH PRIVATE USE SURROGATES",
1746                              "HIGHPRIVATEUSESURROGATES");
1747 
1748         /**
1749          * Constant for the "Low Surrogates" Unicode character block.
1750          * This block represents codepoint values in the low surrogate
1751          * range: U+DC00 through U+DFFF
1752          *
1753          * @since 1.5
1754          */
1755         public static final UnicodeBlock LOW_SURROGATES =
1756             new UnicodeBlock("LOW_SURROGATES",
1757                              "LOW SURROGATES",
1758                              "LOWSURROGATES");
1759 
1760         /**
1761          * Constant for the "Arabic Supplement" Unicode character block.
1762          * @since 1.7
1763          */
1764         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1765             new UnicodeBlock("ARABIC_SUPPLEMENT",
1766                              "ARABIC SUPPLEMENT",
1767                              "ARABICSUPPLEMENT");
1768 
1769         /**
1770          * Constant for the "NKo" Unicode character block.
1771          * @since 1.7
1772          */
1773         public static final UnicodeBlock NKO =
1774             new UnicodeBlock("NKO");
1775 
1776         /**
1777          * Constant for the "Samaritan" Unicode character block.
1778          * @since 1.7
1779          */
1780         public static final UnicodeBlock SAMARITAN =
1781             new UnicodeBlock("SAMARITAN");
1782 
1783         /**
1784          * Constant for the "Mandaic" Unicode character block.
1785          * @since 1.7
1786          */
1787         public static final UnicodeBlock MANDAIC =
1788             new UnicodeBlock("MANDAIC");
1789 
1790         /**
1791          * Constant for the "Ethiopic Supplement" Unicode character block.
1792          * @since 1.7
1793          */
1794         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1795             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1796                              "ETHIOPIC SUPPLEMENT",
1797                              "ETHIOPICSUPPLEMENT");
1798 
1799         /**
1800          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1801          * Unicode character block.
1802          * @since 1.7
1803          */
1804         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1805             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1806                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1807                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1808 
1809         /**
1810          * Constant for the "New Tai Lue" Unicode character block.
1811          * @since 1.7
1812          */
1813         public static final UnicodeBlock NEW_TAI_LUE =
1814             new UnicodeBlock("NEW_TAI_LUE",
1815                              "NEW TAI LUE",
1816                              "NEWTAILUE");
1817 
1818         /**
1819          * Constant for the "Buginese" Unicode character block.
1820          * @since 1.7
1821          */
1822         public static final UnicodeBlock BUGINESE =
1823             new UnicodeBlock("BUGINESE");
1824 
1825         /**
1826          * Constant for the "Tai Tham" Unicode character block.
1827          * @since 1.7
1828          */
1829         public static final UnicodeBlock TAI_THAM =
1830             new UnicodeBlock("TAI_THAM",
1831                              "TAI THAM",
1832                              "TAITHAM");
1833 
1834         /**
1835          * Constant for the "Balinese" Unicode character block.
1836          * @since 1.7
1837          */
1838         public static final UnicodeBlock BALINESE =
1839             new UnicodeBlock("BALINESE");
1840 
1841         /**
1842          * Constant for the "Sundanese" Unicode character block.
1843          * @since 1.7
1844          */
1845         public static final UnicodeBlock SUNDANESE =
1846             new UnicodeBlock("SUNDANESE");
1847 
1848         /**
1849          * Constant for the "Batak" Unicode character block.
1850          * @since 1.7
1851          */
1852         public static final UnicodeBlock BATAK =
1853             new UnicodeBlock("BATAK");
1854 
1855         /**
1856          * Constant for the "Lepcha" Unicode character block.
1857          * @since 1.7
1858          */
1859         public static final UnicodeBlock LEPCHA =
1860             new UnicodeBlock("LEPCHA");
1861 
1862         /**
1863          * Constant for the "Ol Chiki" Unicode character block.
1864          * @since 1.7
1865          */
1866         public static final UnicodeBlock OL_CHIKI =
1867             new UnicodeBlock("OL_CHIKI",
1868                              "OL CHIKI",
1869                              "OLCHIKI");
1870 
1871         /**
1872          * Constant for the "Vedic Extensions" Unicode character block.
1873          * @since 1.7
1874          */
1875         public static final UnicodeBlock VEDIC_EXTENSIONS =
1876             new UnicodeBlock("VEDIC_EXTENSIONS",
1877                              "VEDIC EXTENSIONS",
1878                              "VEDICEXTENSIONS");
1879 
1880         /**
1881          * Constant for the "Phonetic Extensions Supplement" Unicode character
1882          * block.
1883          * @since 1.7
1884          */
1885         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1886             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1887                              "PHONETIC EXTENSIONS SUPPLEMENT",
1888                              "PHONETICEXTENSIONSSUPPLEMENT");
1889 
1890         /**
1891          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1892          * character block.
1893          * @since 1.7
1894          */
1895         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1896             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1897                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1898                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1899 
1900         /**
1901          * Constant for the "Glagolitic" Unicode character block.
1902          * @since 1.7
1903          */
1904         public static final UnicodeBlock GLAGOLITIC =
1905             new UnicodeBlock("GLAGOLITIC");
1906 
1907         /**
1908          * Constant for the "Latin Extended-C" Unicode character block.
1909          * @since 1.7
1910          */
1911         public static final UnicodeBlock LATIN_EXTENDED_C =
1912             new UnicodeBlock("LATIN_EXTENDED_C",
1913                              "LATIN EXTENDED-C",
1914                              "LATINEXTENDED-C");
1915 
1916         /**
1917          * Constant for the "Coptic" Unicode character block.
1918          * @since 1.7
1919          */
1920         public static final UnicodeBlock COPTIC =
1921             new UnicodeBlock("COPTIC");
1922 
1923         /**
1924          * Constant for the "Georgian Supplement" Unicode character block.
1925          * @since 1.7
1926          */
1927         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1928             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1929                              "GEORGIAN SUPPLEMENT",
1930                              "GEORGIANSUPPLEMENT");
1931 
1932         /**
1933          * Constant for the "Tifinagh" Unicode character block.
1934          * @since 1.7
1935          */
1936         public static final UnicodeBlock TIFINAGH =
1937             new UnicodeBlock("TIFINAGH");
1938 
1939         /**
1940          * Constant for the "Ethiopic Extended" Unicode character block.
1941          * @since 1.7
1942          */
1943         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1944             new UnicodeBlock("ETHIOPIC_EXTENDED",
1945                              "ETHIOPIC EXTENDED",
1946                              "ETHIOPICEXTENDED");
1947 
1948         /**
1949          * Constant for the "Cyrillic Extended-A" Unicode character block.
1950          * @since 1.7
1951          */
1952         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1953             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1954                              "CYRILLIC EXTENDED-A",
1955                              "CYRILLICEXTENDED-A");
1956 
1957         /**
1958          * Constant for the "Supplemental Punctuation" Unicode character block.
1959          * @since 1.7
1960          */
1961         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1962             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1963                              "SUPPLEMENTAL PUNCTUATION",
1964                              "SUPPLEMENTALPUNCTUATION");
1965 
1966         /**
1967          * Constant for the "CJK Strokes" Unicode character block.
1968          * @since 1.7
1969          */
1970         public static final UnicodeBlock CJK_STROKES =
1971             new UnicodeBlock("CJK_STROKES",
1972                              "CJK STROKES",
1973                              "CJKSTROKES");
1974 
1975         /**
1976          * Constant for the "Lisu" Unicode character block.
1977          * @since 1.7
1978          */
1979         public static final UnicodeBlock LISU =
1980             new UnicodeBlock("LISU");
1981 
1982         /**
1983          * Constant for the "Vai" Unicode character block.
1984          * @since 1.7
1985          */
1986         public static final UnicodeBlock VAI =
1987             new UnicodeBlock("VAI");
1988 
1989         /**
1990          * Constant for the "Cyrillic Extended-B" Unicode character block.
1991          * @since 1.7
1992          */
1993         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1994             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1995                              "CYRILLIC EXTENDED-B",
1996                              "CYRILLICEXTENDED-B");
1997 
1998         /**
1999          * Constant for the "Bamum" Unicode character block.
2000          * @since 1.7
2001          */
2002         public static final UnicodeBlock BAMUM =
2003             new UnicodeBlock("BAMUM");
2004 
2005         /**
2006          * Constant for the "Modifier Tone Letters" Unicode character block.
2007          * @since 1.7
2008          */
2009         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2010             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2011                              "MODIFIER TONE LETTERS",
2012                              "MODIFIERTONELETTERS");
2013 
2014         /**
2015          * Constant for the "Latin Extended-D" Unicode character block.
2016          * @since 1.7
2017          */
2018         public static final UnicodeBlock LATIN_EXTENDED_D =
2019             new UnicodeBlock("LATIN_EXTENDED_D",
2020                              "LATIN EXTENDED-D",
2021                              "LATINEXTENDED-D");
2022 
2023         /**
2024          * Constant for the "Syloti Nagri" Unicode character block.
2025          * @since 1.7
2026          */
2027         public static final UnicodeBlock SYLOTI_NAGRI =
2028             new UnicodeBlock("SYLOTI_NAGRI",
2029                              "SYLOTI NAGRI",
2030                              "SYLOTINAGRI");
2031 
2032         /**
2033          * Constant for the "Common Indic Number Forms" Unicode character block.
2034          * @since 1.7
2035          */
2036         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2037             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2038                              "COMMON INDIC NUMBER FORMS",
2039                              "COMMONINDICNUMBERFORMS");
2040 
2041         /**
2042          * Constant for the "Phags-pa" Unicode character block.
2043          * @since 1.7
2044          */
2045         public static final UnicodeBlock PHAGS_PA =
2046             new UnicodeBlock("PHAGS_PA",
2047                              "PHAGS-PA");
2048 
2049         /**
2050          * Constant for the "Saurashtra" Unicode character block.
2051          * @since 1.7
2052          */
2053         public static final UnicodeBlock SAURASHTRA =
2054             new UnicodeBlock("SAURASHTRA");
2055 
2056         /**
2057          * Constant for the "Devanagari Extended" Unicode character block.
2058          * @since 1.7
2059          */
2060         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2061             new UnicodeBlock("DEVANAGARI_EXTENDED",
2062                              "DEVANAGARI EXTENDED",
2063                              "DEVANAGARIEXTENDED");
2064 
2065         /**
2066          * Constant for the "Kayah Li" Unicode character block.
2067          * @since 1.7
2068          */
2069         public static final UnicodeBlock KAYAH_LI =
2070             new UnicodeBlock("KAYAH_LI",
2071                              "KAYAH LI",
2072                              "KAYAHLI");
2073 
2074         /**
2075          * Constant for the "Rejang" Unicode character block.
2076          * @since 1.7
2077          */
2078         public static final UnicodeBlock REJANG =
2079             new UnicodeBlock("REJANG");
2080 
2081         /**
2082          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2083          * @since 1.7
2084          */
2085         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2086             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2087                              "HANGUL JAMO EXTENDED-A",
2088                              "HANGULJAMOEXTENDED-A");
2089 
2090         /**
2091          * Constant for the "Javanese" Unicode character block.
2092          * @since 1.7
2093          */
2094         public static final UnicodeBlock JAVANESE =
2095             new UnicodeBlock("JAVANESE");
2096 
2097         /**
2098          * Constant for the "Cham" Unicode character block.
2099          * @since 1.7
2100          */
2101         public static final UnicodeBlock CHAM =
2102             new UnicodeBlock("CHAM");
2103 
2104         /**
2105          * Constant for the "Myanmar Extended-A" Unicode character block.
2106          * @since 1.7
2107          */
2108         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2109             new UnicodeBlock("MYANMAR_EXTENDED_A",
2110                              "MYANMAR EXTENDED-A",
2111                              "MYANMAREXTENDED-A");
2112 
2113         /**
2114          * Constant for the "Tai Viet" Unicode character block.
2115          * @since 1.7
2116          */
2117         public static final UnicodeBlock TAI_VIET =
2118             new UnicodeBlock("TAI_VIET",
2119                              "TAI VIET",
2120                              "TAIVIET");
2121 
2122         /**
2123          * Constant for the "Ethiopic Extended-A" Unicode character block.
2124          * @since 1.7
2125          */
2126         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2127             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2128                              "ETHIOPIC EXTENDED-A",
2129                              "ETHIOPICEXTENDED-A");
2130 
2131         /**
2132          * Constant for the "Meetei Mayek" Unicode character block.
2133          * @since 1.7
2134          */
2135         public static final UnicodeBlock MEETEI_MAYEK =
2136             new UnicodeBlock("MEETEI_MAYEK",
2137                              "MEETEI MAYEK",
2138                              "MEETEIMAYEK");
2139 
2140         /**
2141          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2142          * @since 1.7
2143          */
2144         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2145             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2146                              "HANGUL JAMO EXTENDED-B",
2147                              "HANGULJAMOEXTENDED-B");
2148 
2149         /**
2150          * Constant for the "Vertical Forms" Unicode character block.
2151          * @since 1.7
2152          */
2153         public static final UnicodeBlock VERTICAL_FORMS =
2154             new UnicodeBlock("VERTICAL_FORMS",
2155                              "VERTICAL FORMS",
2156                              "VERTICALFORMS");
2157 
2158         /**
2159          * Constant for the "Ancient Greek Numbers" Unicode character block.
2160          * @since 1.7
2161          */
2162         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2163             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2164                              "ANCIENT GREEK NUMBERS",
2165                              "ANCIENTGREEKNUMBERS");
2166 
2167         /**
2168          * Constant for the "Ancient Symbols" Unicode character block.
2169          * @since 1.7
2170          */
2171         public static final UnicodeBlock ANCIENT_SYMBOLS =
2172             new UnicodeBlock("ANCIENT_SYMBOLS",
2173                              "ANCIENT SYMBOLS",
2174                              "ANCIENTSYMBOLS");
2175 
2176         /**
2177          * Constant for the "Phaistos Disc" Unicode character block.
2178          * @since 1.7
2179          */
2180         public static final UnicodeBlock PHAISTOS_DISC =
2181             new UnicodeBlock("PHAISTOS_DISC",
2182                              "PHAISTOS DISC",
2183                              "PHAISTOSDISC");
2184 
2185         /**
2186          * Constant for the "Lycian" Unicode character block.
2187          * @since 1.7
2188          */
2189         public static final UnicodeBlock LYCIAN =
2190             new UnicodeBlock("LYCIAN");
2191 
2192         /**
2193          * Constant for the "Carian" Unicode character block.
2194          * @since 1.7
2195          */
2196         public static final UnicodeBlock CARIAN =
2197             new UnicodeBlock("CARIAN");
2198 
2199         /**
2200          * Constant for the "Old Persian" Unicode character block.
2201          * @since 1.7
2202          */
2203         public static final UnicodeBlock OLD_PERSIAN =
2204             new UnicodeBlock("OLD_PERSIAN",
2205                              "OLD PERSIAN",
2206                              "OLDPERSIAN");
2207 
2208         /**
2209          * Constant for the "Imperial Aramaic" Unicode character block.
2210          * @since 1.7
2211          */
2212         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2213             new UnicodeBlock("IMPERIAL_ARAMAIC",
2214                              "IMPERIAL ARAMAIC",
2215                              "IMPERIALARAMAIC");
2216 
2217         /**
2218          * Constant for the "Phoenician" Unicode character block.
2219          * @since 1.7
2220          */
2221         public static final UnicodeBlock PHOENICIAN =
2222             new UnicodeBlock("PHOENICIAN");
2223 
2224         /**
2225          * Constant for the "Lydian" Unicode character block.
2226          * @since 1.7
2227          */
2228         public static final UnicodeBlock LYDIAN =
2229             new UnicodeBlock("LYDIAN");
2230 
2231         /**
2232          * Constant for the "Kharoshthi" Unicode character block.
2233          * @since 1.7
2234          */
2235         public static final UnicodeBlock KHAROSHTHI =
2236             new UnicodeBlock("KHAROSHTHI");
2237 
2238         /**
2239          * Constant for the "Old South Arabian" Unicode character block.
2240          * @since 1.7
2241          */
2242         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2243             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2244                              "OLD SOUTH ARABIAN",
2245                              "OLDSOUTHARABIAN");
2246 
2247         /**
2248          * Constant for the "Avestan" Unicode character block.
2249          * @since 1.7
2250          */
2251         public static final UnicodeBlock AVESTAN =
2252             new UnicodeBlock("AVESTAN");
2253 
2254         /**
2255          * Constant for the "Inscriptional Parthian" Unicode character block.
2256          * @since 1.7
2257          */
2258         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2259             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2260                              "INSCRIPTIONAL PARTHIAN",
2261                              "INSCRIPTIONALPARTHIAN");
2262 
2263         /**
2264          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2265          * @since 1.7
2266          */
2267         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2268             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2269                              "INSCRIPTIONAL PAHLAVI",
2270                              "INSCRIPTIONALPAHLAVI");
2271 
2272         /**
2273          * Constant for the "Old Turkic" Unicode character block.
2274          * @since 1.7
2275          */
2276         public static final UnicodeBlock OLD_TURKIC =
2277             new UnicodeBlock("OLD_TURKIC",
2278                              "OLD TURKIC",
2279                              "OLDTURKIC");
2280 
2281         /**
2282          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2283          * @since 1.7
2284          */
2285         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2286             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2287                              "RUMI NUMERAL SYMBOLS",
2288                              "RUMINUMERALSYMBOLS");
2289 
2290         /**
2291          * Constant for the "Brahmi" Unicode character block.
2292          * @since 1.7
2293          */
2294         public static final UnicodeBlock BRAHMI =
2295             new UnicodeBlock("BRAHMI");
2296 
2297         /**
2298          * Constant for the "Kaithi" Unicode character block.
2299          * @since 1.7
2300          */
2301         public static final UnicodeBlock KAITHI =
2302             new UnicodeBlock("KAITHI");
2303 
2304         /**
2305          * Constant for the "Cuneiform" Unicode character block.
2306          * @since 1.7
2307          */
2308         public static final UnicodeBlock CUNEIFORM =
2309             new UnicodeBlock("CUNEIFORM");
2310 
2311         /**
2312          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2313          * character block.
2314          * @since 1.7
2315          */
2316         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2317             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2318                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2319                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2320 
2321         /**
2322          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2323          * @since 1.7
2324          */
2325         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2326             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2327                              "EGYPTIAN HIEROGLYPHS",
2328                              "EGYPTIANHIEROGLYPHS");
2329 
2330         /**
2331          * Constant for the "Bamum Supplement" Unicode character block.
2332          * @since 1.7
2333          */
2334         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2335             new UnicodeBlock("BAMUM_SUPPLEMENT",
2336                              "BAMUM SUPPLEMENT",
2337                              "BAMUMSUPPLEMENT");
2338 
2339         /**
2340          * Constant for the "Kana Supplement" Unicode character block.
2341          * @since 1.7
2342          */
2343         public static final UnicodeBlock KANA_SUPPLEMENT =
2344             new UnicodeBlock("KANA_SUPPLEMENT",
2345                              "KANA SUPPLEMENT",
2346                              "KANASUPPLEMENT");
2347 
2348         /**
2349          * Constant for the "Ancient Greek Musical Notation" Unicode character
2350          * block.
2351          * @since 1.7
2352          */
2353         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2354             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2355                              "ANCIENT GREEK MUSICAL NOTATION",
2356                              "ANCIENTGREEKMUSICALNOTATION");
2357 
2358         /**
2359          * Constant for the "Counting Rod Numerals" Unicode character block.
2360          * @since 1.7
2361          */
2362         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2363             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2364                              "COUNTING ROD NUMERALS",
2365                              "COUNTINGRODNUMERALS");
2366 
2367         /**
2368          * Constant for the "Mahjong Tiles" Unicode character block.
2369          * @since 1.7
2370          */
2371         public static final UnicodeBlock MAHJONG_TILES =
2372             new UnicodeBlock("MAHJONG_TILES",
2373                              "MAHJONG TILES",
2374                              "MAHJONGTILES");
2375 
2376         /**
2377          * Constant for the "Domino Tiles" Unicode character block.
2378          * @since 1.7
2379          */
2380         public static final UnicodeBlock DOMINO_TILES =
2381             new UnicodeBlock("DOMINO_TILES",
2382                              "DOMINO TILES",
2383                              "DOMINOTILES");
2384 
2385         /**
2386          * Constant for the "Playing Cards" Unicode character block.
2387          * @since 1.7
2388          */
2389         public static final UnicodeBlock PLAYING_CARDS =
2390             new UnicodeBlock("PLAYING_CARDS",
2391                              "PLAYING CARDS",
2392                              "PLAYINGCARDS");
2393 
2394         /**
2395          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2396          * block.
2397          * @since 1.7
2398          */
2399         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2400             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2401                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2402                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2403 
2404         /**
2405          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2406          * block.
2407          * @since 1.7
2408          */
2409         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2410             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2411                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2412                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2413 
2414         /**
2415          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2416          * character block.
2417          * @since 1.7
2418          */
2419         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2420             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2421                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2422                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2423 
2424         /**
2425          * Constant for the "Emoticons" Unicode character block.
2426          * @since 1.7
2427          */
2428         public static final UnicodeBlock EMOTICONS =
2429             new UnicodeBlock("EMOTICONS");
2430 
2431         /**
2432          * Constant for the "Transport And Map Symbols" Unicode character block.
2433          * @since 1.7
2434          */
2435         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2436             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2437                              "TRANSPORT AND MAP SYMBOLS",
2438                              "TRANSPORTANDMAPSYMBOLS");
2439 
2440         /**
2441          * Constant for the "Alchemical Symbols" Unicode character block.
2442          * @since 1.7
2443          */
2444         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2445             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2446                              "ALCHEMICAL SYMBOLS",
2447                              "ALCHEMICALSYMBOLS");
2448 
2449         /**
2450          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2451          * character block.
2452          * @since 1.7
2453          */
2454         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2455             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2456                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2457                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2458 
2459         /**
2460          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2461          * character block.
2462          * @since 1.7
2463          */
2464         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2465             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2466                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2467                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2468 
2469         /**
2470          * Constant for the "Arabic Extended-A" Unicode character block.
2471          * @since 1.8
2472          */
2473         public static final UnicodeBlock ARABIC_EXTENDED_A =
2474             new UnicodeBlock("ARABIC_EXTENDED_A",
2475                              "ARABIC EXTENDED-A",
2476                              "ARABICEXTENDED-A");
2477 
2478         /**
2479          * Constant for the "Sundanese Supplement" Unicode character block.
2480          * @since 1.8
2481          */
2482         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2483             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2484                              "SUNDANESE SUPPLEMENT",
2485                              "SUNDANESESUPPLEMENT");
2486 
2487         /**
2488          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2489          * @since 1.8
2490          */
2491         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2492             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2493                              "MEETEI MAYEK EXTENSIONS",
2494                              "MEETEIMAYEKEXTENSIONS");
2495 
2496         /**
2497          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2498          * @since 1.8
2499          */
2500         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2501             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2502                              "MEROITIC HIEROGLYPHS",
2503                              "MEROITICHIEROGLYPHS");
2504 
2505         /**
2506          * Constant for the "Meroitic Cursive" Unicode character block.
2507          * @since 1.8
2508          */
2509         public static final UnicodeBlock MEROITIC_CURSIVE =
2510             new UnicodeBlock("MEROITIC_CURSIVE",
2511                              "MEROITIC CURSIVE",
2512                              "MEROITICCURSIVE");
2513 
2514         /**
2515          * Constant for the "Sora Sompeng" Unicode character block.
2516          * @since 1.8
2517          */
2518         public static final UnicodeBlock SORA_SOMPENG =
2519             new UnicodeBlock("SORA_SOMPENG",
2520                              "SORA SOMPENG",
2521                              "SORASOMPENG");
2522 
2523         /**
2524          * Constant for the "Chakma" Unicode character block.
2525          * @since 1.8
2526          */
2527         public static final UnicodeBlock CHAKMA =
2528             new UnicodeBlock("CHAKMA");
2529 
2530         /**
2531          * Constant for the "Sharada" Unicode character block.
2532          * @since 1.8
2533          */
2534         public static final UnicodeBlock SHARADA =
2535             new UnicodeBlock("SHARADA");
2536 
2537         /**
2538          * Constant for the "Takri" Unicode character block.
2539          * @since 1.8
2540          */
2541         public static final UnicodeBlock TAKRI =
2542             new UnicodeBlock("TAKRI");
2543 
2544         /**
2545          * Constant for the "Miao" Unicode character block.
2546          * @since 1.8
2547          */
2548         public static final UnicodeBlock MIAO =
2549             new UnicodeBlock("MIAO");
2550 
2551         /**
2552          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2553          * character block.
2554          * @since 1.8
2555          */
2556         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2557             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2558                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2559                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2560 
2561         private static final int blockStarts[] = {
2562             0x0000,   // 0000..007F; Basic Latin
2563             0x0080,   // 0080..00FF; Latin-1 Supplement
2564             0x0100,   // 0100..017F; Latin Extended-A
2565             0x0180,   // 0180..024F; Latin Extended-B
2566             0x0250,   // 0250..02AF; IPA Extensions
2567             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2568             0x0300,   // 0300..036F; Combining Diacritical Marks
2569             0x0370,   // 0370..03FF; Greek and Coptic
2570             0x0400,   // 0400..04FF; Cyrillic
2571             0x0500,   // 0500..052F; Cyrillic Supplement
2572             0x0530,   // 0530..058F; Armenian
2573             0x0590,   // 0590..05FF; Hebrew
2574             0x0600,   // 0600..06FF; Arabic
2575             0x0700,   // 0700..074F; Syriac
2576             0x0750,   // 0750..077F; Arabic Supplement
2577             0x0780,   // 0780..07BF; Thaana
2578             0x07C0,   // 07C0..07FF; NKo
2579             0x0800,   // 0800..083F; Samaritan
2580             0x0840,   // 0840..085F; Mandaic
2581             0x0860,   //             unassigned
2582             0x08A0,   // 08A0..08FF; Arabic Extended-A
2583             0x0900,   // 0900..097F; Devanagari
2584             0x0980,   // 0980..09FF; Bengali
2585             0x0A00,   // 0A00..0A7F; Gurmukhi
2586             0x0A80,   // 0A80..0AFF; Gujarati
2587             0x0B00,   // 0B00..0B7F; Oriya
2588             0x0B80,   // 0B80..0BFF; Tamil
2589             0x0C00,   // 0C00..0C7F; Telugu
2590             0x0C80,   // 0C80..0CFF; Kannada
2591             0x0D00,   // 0D00..0D7F; Malayalam
2592             0x0D80,   // 0D80..0DFF; Sinhala
2593             0x0E00,   // 0E00..0E7F; Thai
2594             0x0E80,   // 0E80..0EFF; Lao
2595             0x0F00,   // 0F00..0FFF; Tibetan
2596             0x1000,   // 1000..109F; Myanmar
2597             0x10A0,   // 10A0..10FF; Georgian
2598             0x1100,   // 1100..11FF; Hangul Jamo
2599             0x1200,   // 1200..137F; Ethiopic
2600             0x1380,   // 1380..139F; Ethiopic Supplement
2601             0x13A0,   // 13A0..13FF; Cherokee
2602             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2603             0x1680,   // 1680..169F; Ogham
2604             0x16A0,   // 16A0..16FF; Runic
2605             0x1700,   // 1700..171F; Tagalog
2606             0x1720,   // 1720..173F; Hanunoo
2607             0x1740,   // 1740..175F; Buhid
2608             0x1760,   // 1760..177F; Tagbanwa
2609             0x1780,   // 1780..17FF; Khmer
2610             0x1800,   // 1800..18AF; Mongolian
2611             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2612             0x1900,   // 1900..194F; Limbu
2613             0x1950,   // 1950..197F; Tai Le
2614             0x1980,   // 1980..19DF; New Tai Lue
2615             0x19E0,   // 19E0..19FF; Khmer Symbols
2616             0x1A00,   // 1A00..1A1F; Buginese
2617             0x1A20,   // 1A20..1AAF; Tai Tham
2618             0x1AB0,   //             unassigned
2619             0x1B00,   // 1B00..1B7F; Balinese
2620             0x1B80,   // 1B80..1BBF; Sundanese
2621             0x1BC0,   // 1BC0..1BFF; Batak
2622             0x1C00,   // 1C00..1C4F; Lepcha
2623             0x1C50,   // 1C50..1C7F; Ol Chiki
2624             0x1C80,   //             unassigned
2625             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2626             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2627             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2628             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2629             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2630             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2631             0x1F00,   // 1F00..1FFF; Greek Extended
2632             0x2000,   // 2000..206F; General Punctuation
2633             0x2070,   // 2070..209F; Superscripts and Subscripts
2634             0x20A0,   // 20A0..20CF; Currency Symbols
2635             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2636             0x2100,   // 2100..214F; Letterlike Symbols
2637             0x2150,   // 2150..218F; Number Forms
2638             0x2190,   // 2190..21FF; Arrows
2639             0x2200,   // 2200..22FF; Mathematical Operators
2640             0x2300,   // 2300..23FF; Miscellaneous Technical
2641             0x2400,   // 2400..243F; Control Pictures
2642             0x2440,   // 2440..245F; Optical Character Recognition
2643             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2644             0x2500,   // 2500..257F; Box Drawing
2645             0x2580,   // 2580..259F; Block Elements
2646             0x25A0,   // 25A0..25FF; Geometric Shapes
2647             0x2600,   // 2600..26FF; Miscellaneous Symbols
2648             0x2700,   // 2700..27BF; Dingbats
2649             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2650             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2651             0x2800,   // 2800..28FF; Braille Patterns
2652             0x2900,   // 2900..297F; Supplemental Arrows-B
2653             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2654             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2655             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2656             0x2C00,   // 2C00..2C5F; Glagolitic
2657             0x2C60,   // 2C60..2C7F; Latin Extended-C
2658             0x2C80,   // 2C80..2CFF; Coptic
2659             0x2D00,   // 2D00..2D2F; Georgian Supplement
2660             0x2D30,   // 2D30..2D7F; Tifinagh
2661             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2662             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2663             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2664             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2665             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2666             0x2FE0,   //             unassigned
2667             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2668             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2669             0x3040,   // 3040..309F; Hiragana
2670             0x30A0,   // 30A0..30FF; Katakana
2671             0x3100,   // 3100..312F; Bopomofo
2672             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2673             0x3190,   // 3190..319F; Kanbun
2674             0x31A0,   // 31A0..31BF; Bopomofo Extended
2675             0x31C0,   // 31C0..31EF; CJK Strokes
2676             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2677             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2678             0x3300,   // 3300..33FF; CJK Compatibility
2679             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2680             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2681             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2682             0xA000,   // A000..A48F; Yi Syllables
2683             0xA490,   // A490..A4CF; Yi Radicals
2684             0xA4D0,   // A4D0..A4FF; Lisu
2685             0xA500,   // A500..A63F; Vai
2686             0xA640,   // A640..A69F; Cyrillic Extended-B
2687             0xA6A0,   // A6A0..A6FF; Bamum
2688             0xA700,   // A700..A71F; Modifier Tone Letters
2689             0xA720,   // A720..A7FF; Latin Extended-D
2690             0xA800,   // A800..A82F; Syloti Nagri
2691             0xA830,   // A830..A83F; Common Indic Number Forms
2692             0xA840,   // A840..A87F; Phags-pa
2693             0xA880,   // A880..A8DF; Saurashtra
2694             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2695             0xA900,   // A900..A92F; Kayah Li
2696             0xA930,   // A930..A95F; Rejang
2697             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2698             0xA980,   // A980..A9DF; Javanese
2699             0xA9E0,   //             unassigned
2700             0xAA00,   // AA00..AA5F; Cham
2701             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2702             0xAA80,   // AA80..AADF; Tai Viet
2703             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2704             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2705             0xAB30,   //             unassigned
2706             0xABC0,   // ABC0..ABFF; Meetei Mayek
2707             0xAC00,   // AC00..D7AF; Hangul Syllables
2708             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2709             0xD800,   // D800..DB7F; High Surrogates
2710             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2711             0xDC00,   // DC00..DFFF; Low Surrogates
2712             0xE000,   // E000..F8FF; Private Use Area
2713             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2714             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2715             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2716             0xFE00,   // FE00..FE0F; Variation Selectors
2717             0xFE10,   // FE10..FE1F; Vertical Forms
2718             0xFE20,   // FE20..FE2F; Combining Half Marks
2719             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2720             0xFE50,   // FE50..FE6F; Small Form Variants
2721             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2722             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2723             0xFFF0,   // FFF0..FFFF; Specials
2724             0x10000,  // 10000..1007F; Linear B Syllabary
2725             0x10080,  // 10080..100FF; Linear B Ideograms
2726             0x10100,  // 10100..1013F; Aegean Numbers
2727             0x10140,  // 10140..1018F; Ancient Greek Numbers
2728             0x10190,  // 10190..101CF; Ancient Symbols
2729             0x101D0,  // 101D0..101FF; Phaistos Disc
2730             0x10200,  //               unassigned
2731             0x10280,  // 10280..1029F; Lycian
2732             0x102A0,  // 102A0..102DF; Carian
2733             0x102E0,  //               unassigned
2734             0x10300,  // 10300..1032F; Old Italic
2735             0x10330,  // 10330..1034F; Gothic
2736             0x10350,  //               unassigned
2737             0x10380,  // 10380..1039F; Ugaritic
2738             0x103A0,  // 103A0..103DF; Old Persian
2739             0x103E0,  //               unassigned
2740             0x10400,  // 10400..1044F; Deseret
2741             0x10450,  // 10450..1047F; Shavian
2742             0x10480,  // 10480..104AF; Osmanya
2743             0x104B0,  //               unassigned
2744             0x10800,  // 10800..1083F; Cypriot Syllabary
2745             0x10840,  // 10840..1085F; Imperial Aramaic
2746             0x10860,  //               unassigned
2747             0x10900,  // 10900..1091F; Phoenician
2748             0x10920,  // 10920..1093F; Lydian
2749             0x10940,  //               unassigned
2750             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2751             0x109A0,  // 109A0..109FF; Meroitic Cursive
2752             0x10A00,  // 10A00..10A5F; Kharoshthi
2753             0x10A60,  // 10A60..10A7F; Old South Arabian
2754             0x10A80,  //               unassigned
2755             0x10B00,  // 10B00..10B3F; Avestan
2756             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2757             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2758             0x10B80,  //               unassigned
2759             0x10C00,  // 10C00..10C4F; Old Turkic
2760             0x10C50,  //               unassigned
2761             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2762             0x10E80,  //               unassigned
2763             0x11000,  // 11000..1107F; Brahmi
2764             0x11080,  // 11080..110CF; Kaithi
2765             0x110D0,  // 110D0..110FF; Sora Sompeng
2766             0x11100,  // 11100..1114F; Chakma
2767             0x11150,  //               unassigned
2768             0x11180,  // 11180..111DF; Sharada
2769             0x111E0,  //               unassigned
2770             0x11680,  // 11680..116CF; Takri
2771             0x116D0,  //               unassigned
2772             0x12000,  // 12000..123FF; Cuneiform
2773             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2774             0x12480,  //               unassigned
2775             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2776             0x13430,  //               unassigned
2777             0x16800,  // 16800..16A3F; Bamum Supplement
2778             0x16A40,  //               unassigned
2779             0x16F00,  // 16F00..16F9F; Miao
2780             0x16FA0,  //               unassigned
2781             0x1B000,  // 1B000..1B0FF; Kana Supplement
2782             0x1B100,  //               unassigned
2783             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2784             0x1D100,  // 1D100..1D1FF; Musical Symbols
2785             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2786             0x1D250,  //               unassigned
2787             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2788             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2789             0x1D380,  //               unassigned
2790             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2791             0x1D800,  //               unassigned
2792             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2793             0x1EF00,  //               unassigned
2794             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2795             0x1F030,  // 1F030..1F09F; Domino Tiles
2796             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2797             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2798             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2799             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2800             0x1F600,  // 1F600..1F64F; Emoticons
2801             0x1F650,  //               unassigned
2802             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2803             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2804             0x1F780,  //               unassigned
2805             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2806             0x2A6E0,  //               unassigned
2807             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2808             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2809             0x2B820,  //               unassigned
2810             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2811             0x2FA20,  //               unassigned
2812             0xE0000,  // E0000..E007F; Tags
2813             0xE0080,  //               unassigned
2814             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2815             0xE01F0,  //               unassigned
2816             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2817             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2818         };
2819 
2820         private static final UnicodeBlock[] blocks = {
2821             BASIC_LATIN,
2822             LATIN_1_SUPPLEMENT,
2823             LATIN_EXTENDED_A,
2824             LATIN_EXTENDED_B,
2825             IPA_EXTENSIONS,
2826             SPACING_MODIFIER_LETTERS,
2827             COMBINING_DIACRITICAL_MARKS,
2828             GREEK,
2829             CYRILLIC,
2830             CYRILLIC_SUPPLEMENTARY,
2831             ARMENIAN,
2832             HEBREW,
2833             ARABIC,
2834             SYRIAC,
2835             ARABIC_SUPPLEMENT,
2836             THAANA,
2837             NKO,
2838             SAMARITAN,
2839             MANDAIC,
2840             null,
2841             ARABIC_EXTENDED_A,
2842             DEVANAGARI,
2843             BENGALI,
2844             GURMUKHI,
2845             GUJARATI,
2846             ORIYA,
2847             TAMIL,
2848             TELUGU,
2849             KANNADA,
2850             MALAYALAM,
2851             SINHALA,
2852             THAI,
2853             LAO,
2854             TIBETAN,
2855             MYANMAR,
2856             GEORGIAN,
2857             HANGUL_JAMO,
2858             ETHIOPIC,
2859             ETHIOPIC_SUPPLEMENT,
2860             CHEROKEE,
2861             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2862             OGHAM,
2863             RUNIC,
2864             TAGALOG,
2865             HANUNOO,
2866             BUHID,
2867             TAGBANWA,
2868             KHMER,
2869             MONGOLIAN,
2870             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2871             LIMBU,
2872             TAI_LE,
2873             NEW_TAI_LUE,
2874             KHMER_SYMBOLS,
2875             BUGINESE,
2876             TAI_THAM,
2877             null,
2878             BALINESE,
2879             SUNDANESE,
2880             BATAK,
2881             LEPCHA,
2882             OL_CHIKI,
2883             null,
2884             SUNDANESE_SUPPLEMENT,
2885             VEDIC_EXTENSIONS,
2886             PHONETIC_EXTENSIONS,
2887             PHONETIC_EXTENSIONS_SUPPLEMENT,
2888             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2889             LATIN_EXTENDED_ADDITIONAL,
2890             GREEK_EXTENDED,
2891             GENERAL_PUNCTUATION,
2892             SUPERSCRIPTS_AND_SUBSCRIPTS,
2893             CURRENCY_SYMBOLS,
2894             COMBINING_MARKS_FOR_SYMBOLS,
2895             LETTERLIKE_SYMBOLS,
2896             NUMBER_FORMS,
2897             ARROWS,
2898             MATHEMATICAL_OPERATORS,
2899             MISCELLANEOUS_TECHNICAL,
2900             CONTROL_PICTURES,
2901             OPTICAL_CHARACTER_RECOGNITION,
2902             ENCLOSED_ALPHANUMERICS,
2903             BOX_DRAWING,
2904             BLOCK_ELEMENTS,
2905             GEOMETRIC_SHAPES,
2906             MISCELLANEOUS_SYMBOLS,
2907             DINGBATS,
2908             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2909             SUPPLEMENTAL_ARROWS_A,
2910             BRAILLE_PATTERNS,
2911             SUPPLEMENTAL_ARROWS_B,
2912             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2913             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2914             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2915             GLAGOLITIC,
2916             LATIN_EXTENDED_C,
2917             COPTIC,
2918             GEORGIAN_SUPPLEMENT,
2919             TIFINAGH,
2920             ETHIOPIC_EXTENDED,
2921             CYRILLIC_EXTENDED_A,
2922             SUPPLEMENTAL_PUNCTUATION,
2923             CJK_RADICALS_SUPPLEMENT,
2924             KANGXI_RADICALS,
2925             null,
2926             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2927             CJK_SYMBOLS_AND_PUNCTUATION,
2928             HIRAGANA,
2929             KATAKANA,
2930             BOPOMOFO,
2931             HANGUL_COMPATIBILITY_JAMO,
2932             KANBUN,
2933             BOPOMOFO_EXTENDED,
2934             CJK_STROKES,
2935             KATAKANA_PHONETIC_EXTENSIONS,
2936             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2937             CJK_COMPATIBILITY,
2938             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2939             YIJING_HEXAGRAM_SYMBOLS,
2940             CJK_UNIFIED_IDEOGRAPHS,
2941             YI_SYLLABLES,
2942             YI_RADICALS,
2943             LISU,
2944             VAI,
2945             CYRILLIC_EXTENDED_B,
2946             BAMUM,
2947             MODIFIER_TONE_LETTERS,
2948             LATIN_EXTENDED_D,
2949             SYLOTI_NAGRI,
2950             COMMON_INDIC_NUMBER_FORMS,
2951             PHAGS_PA,
2952             SAURASHTRA,
2953             DEVANAGARI_EXTENDED,
2954             KAYAH_LI,
2955             REJANG,
2956             HANGUL_JAMO_EXTENDED_A,
2957             JAVANESE,
2958             null,
2959             CHAM,
2960             MYANMAR_EXTENDED_A,
2961             TAI_VIET,
2962             MEETEI_MAYEK_EXTENSIONS,
2963             ETHIOPIC_EXTENDED_A,
2964             null,
2965             MEETEI_MAYEK,
2966             HANGUL_SYLLABLES,
2967             HANGUL_JAMO_EXTENDED_B,
2968             HIGH_SURROGATES,
2969             HIGH_PRIVATE_USE_SURROGATES,
2970             LOW_SURROGATES,
2971             PRIVATE_USE_AREA,
2972             CJK_COMPATIBILITY_IDEOGRAPHS,
2973             ALPHABETIC_PRESENTATION_FORMS,
2974             ARABIC_PRESENTATION_FORMS_A,
2975             VARIATION_SELECTORS,
2976             VERTICAL_FORMS,
2977             COMBINING_HALF_MARKS,
2978             CJK_COMPATIBILITY_FORMS,
2979             SMALL_FORM_VARIANTS,
2980             ARABIC_PRESENTATION_FORMS_B,
2981             HALFWIDTH_AND_FULLWIDTH_FORMS,
2982             SPECIALS,
2983             LINEAR_B_SYLLABARY,
2984             LINEAR_B_IDEOGRAMS,
2985             AEGEAN_NUMBERS,
2986             ANCIENT_GREEK_NUMBERS,
2987             ANCIENT_SYMBOLS,
2988             PHAISTOS_DISC,
2989             null,
2990             LYCIAN,
2991             CARIAN,
2992             null,
2993             OLD_ITALIC,
2994             GOTHIC,
2995             null,
2996             UGARITIC,
2997             OLD_PERSIAN,
2998             null,
2999             DESERET,
3000             SHAVIAN,
3001             OSMANYA,
3002             null,
3003             CYPRIOT_SYLLABARY,
3004             IMPERIAL_ARAMAIC,
3005             null,
3006             PHOENICIAN,
3007             LYDIAN,
3008             null,
3009             MEROITIC_HIEROGLYPHS,
3010             MEROITIC_CURSIVE,
3011             KHAROSHTHI,
3012             OLD_SOUTH_ARABIAN,
3013             null,
3014             AVESTAN,
3015             INSCRIPTIONAL_PARTHIAN,
3016             INSCRIPTIONAL_PAHLAVI,
3017             null,
3018             OLD_TURKIC,
3019             null,
3020             RUMI_NUMERAL_SYMBOLS,
3021             null,
3022             BRAHMI,
3023             KAITHI,
3024             SORA_SOMPENG,
3025             CHAKMA,
3026             null,
3027             SHARADA,
3028             null,
3029             TAKRI,
3030             null,
3031             CUNEIFORM,
3032             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3033             null,
3034             EGYPTIAN_HIEROGLYPHS,
3035             null,
3036             BAMUM_SUPPLEMENT,
3037             null,
3038             MIAO,
3039             null,
3040             KANA_SUPPLEMENT,
3041             null,
3042             BYZANTINE_MUSICAL_SYMBOLS,
3043             MUSICAL_SYMBOLS,
3044             ANCIENT_GREEK_MUSICAL_NOTATION,
3045             null,
3046             TAI_XUAN_JING_SYMBOLS,
3047             COUNTING_ROD_NUMERALS,
3048             null,
3049             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3050             null,
3051             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3052             null,
3053             MAHJONG_TILES,
3054             DOMINO_TILES,
3055             PLAYING_CARDS,
3056             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3057             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3058             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3059             EMOTICONS,
3060             null,
3061             TRANSPORT_AND_MAP_SYMBOLS,
3062             ALCHEMICAL_SYMBOLS,
3063             null,
3064             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3065             null,
3066             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3067             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3068             null,
3069             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3070             null,
3071             TAGS,
3072             null,
3073             VARIATION_SELECTORS_SUPPLEMENT,
3074             null,
3075             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3076             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3077         };
3078 
3079 
3080         /**
3081          * Returns the object representing the Unicode block containing the
3082          * given character, or {@code null} if the character is not a
3083          * member of a defined block.
3084          *
3085          * <p><b>Note:</b> This method cannot handle
3086          * <a href="Character.html#supplementary"> supplementary
3087          * characters</a>.  To support all Unicode characters, including
3088          * supplementary characters, use the {@link #of(int)} method.
3089          *
3090          * @param   c  The character in question
3091          * @return  The {@code UnicodeBlock} instance representing the
3092          *          Unicode block of which this character is a member, or
3093          *          {@code null} if the character is not a member of any
3094          *          Unicode block
3095          */
3096         public static UnicodeBlock of(char c) {
3097             return of((int)c);
3098         }
3099 
3100         /**
3101          * Returns the object representing the Unicode block
3102          * containing the given character (Unicode code point), or
3103          * {@code null} if the character is not a member of a
3104          * defined block.
3105          *
3106          * @param   codePoint the character (Unicode code point) in question.
3107          * @return  The {@code UnicodeBlock} instance representing the
3108          *          Unicode block of which this character is a member, or
3109          *          {@code null} if the character is not a member of any
3110          *          Unicode block
3111          * @exception IllegalArgumentException if the specified
3112          * {@code codePoint} is an invalid Unicode code point.
3113          * @see Character#isValidCodePoint(int)
3114          * @since   1.5
3115          */
3116         public static UnicodeBlock of(int codePoint) {
3117             if (!isValidCodePoint(codePoint)) {
3118                 throw new IllegalArgumentException();
3119             }
3120 
3121             int top, bottom, current;
3122             bottom = 0;
3123             top = blockStarts.length;
3124             current = top/2;
3125 
3126             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3127             while (top - bottom > 1) {
3128                 if (codePoint >= blockStarts[current]) {
3129                     bottom = current;
3130                 } else {
3131                     top = current;
3132                 }
3133                 current = (top + bottom) / 2;
3134             }
3135             return blocks[current];
3136         }
3137 
3138         /**
3139          * Returns the UnicodeBlock with the given name. Block
3140          * names are determined by The Unicode Standard. The file
3141          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3142          * version of the standard. The {@link Character} class specifies
3143          * the version of the standard that it supports.
3144          * <p>
3145          * This method accepts block names in the following forms:
3146          * <ol>
3147          * <li> Canonical block names as defined by the Unicode Standard.
3148          * For example, the standard defines a "Basic Latin" block. Therefore, this
3149          * method accepts "Basic Latin" as a valid block name. The documentation of
3150          * each UnicodeBlock provides the canonical name.
3151          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3152          * is a valid block name for the "Basic Latin" block.
3153          * <li>The text representation of each constant UnicodeBlock identifier.
3154          * For example, this method will return the {@link #BASIC_LATIN} block if
3155          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3156          * hyphens in the canonical name with underscores.
3157          * </ol>
3158          * Finally, character case is ignored for all of the valid block name forms.
3159          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3160          * The en_US locale's case mapping rules are used to provide case-insensitive
3161          * string comparisons for block name validation.
3162          * <p>
3163          * If the Unicode Standard changes block names, both the previous and
3164          * current names will be accepted.
3165          *
3166          * @param blockName A {@code UnicodeBlock} name.
3167          * @return The {@code UnicodeBlock} instance identified
3168          *         by {@code blockName}
3169          * @throws IllegalArgumentException if {@code blockName} is an
3170          *         invalid name
3171          * @throws NullPointerException if {@code blockName} is null
3172          * @since 1.5
3173          */
3174         public static final UnicodeBlock forName(String blockName) {
3175             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3176             if (block == null) {
3177                 throw new IllegalArgumentException();
3178             }
3179             return block;
3180         }
3181     }
3182 
3183 
3184     /**
3185      * A family of character subsets representing the character scripts
3186      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3187      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3188      * character is assigned to a single Unicode script, either a specific
3189      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3190      * one of the following three special values,
3191      * {@link Character.UnicodeScript#INHERITED Inherited},
3192      * {@link Character.UnicodeScript#COMMON Common} or
3193      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3194      *
3195      * @since 1.7
3196      */
3197     public static enum UnicodeScript {
3198         /**
3199          * Unicode script "Common".
3200          */
3201         COMMON,
3202 
3203         /**
3204          * Unicode script "Latin".
3205          */
3206         LATIN,
3207 
3208         /**
3209          * Unicode script "Greek".
3210          */
3211         GREEK,
3212 
3213         /**
3214          * Unicode script "Cyrillic".
3215          */
3216         CYRILLIC,
3217 
3218         /**
3219          * Unicode script "Armenian".
3220          */
3221         ARMENIAN,
3222 
3223         /**
3224          * Unicode script "Hebrew".
3225          */
3226         HEBREW,
3227 
3228         /**
3229          * Unicode script "Arabic".
3230          */
3231         ARABIC,
3232 
3233         /**
3234          * Unicode script "Syriac".
3235          */
3236         SYRIAC,
3237 
3238         /**
3239          * Unicode script "Thaana".
3240          */
3241         THAANA,
3242 
3243         /**
3244          * Unicode script "Devanagari".
3245          */
3246         DEVANAGARI,
3247 
3248         /**
3249          * Unicode script "Bengali".
3250          */
3251         BENGALI,
3252 
3253         /**
3254          * Unicode script "Gurmukhi".
3255          */
3256         GURMUKHI,
3257 
3258         /**
3259          * Unicode script "Gujarati".
3260          */
3261         GUJARATI,
3262 
3263         /**
3264          * Unicode script "Oriya".
3265          */
3266         ORIYA,
3267 
3268         /**
3269          * Unicode script "Tamil".
3270          */
3271         TAMIL,
3272 
3273         /**
3274          * Unicode script "Telugu".
3275          */
3276         TELUGU,
3277 
3278         /**
3279          * Unicode script "Kannada".
3280          */
3281         KANNADA,
3282 
3283         /**
3284          * Unicode script "Malayalam".
3285          */
3286         MALAYALAM,
3287 
3288         /**
3289          * Unicode script "Sinhala".
3290          */
3291         SINHALA,
3292 
3293         /**
3294          * Unicode script "Thai".
3295          */
3296         THAI,
3297 
3298         /**
3299          * Unicode script "Lao".
3300          */
3301         LAO,
3302 
3303         /**
3304          * Unicode script "Tibetan".
3305          */
3306         TIBETAN,
3307 
3308         /**
3309          * Unicode script "Myanmar".
3310          */
3311         MYANMAR,
3312 
3313         /**
3314          * Unicode script "Georgian".
3315          */
3316         GEORGIAN,
3317 
3318         /**
3319          * Unicode script "Hangul".
3320          */
3321         HANGUL,
3322 
3323         /**
3324          * Unicode script "Ethiopic".
3325          */
3326         ETHIOPIC,
3327 
3328         /**
3329          * Unicode script "Cherokee".
3330          */
3331         CHEROKEE,
3332 
3333         /**
3334          * Unicode script "Canadian_Aboriginal".
3335          */
3336         CANADIAN_ABORIGINAL,
3337 
3338         /**
3339          * Unicode script "Ogham".
3340          */
3341         OGHAM,
3342 
3343         /**
3344          * Unicode script "Runic".
3345          */
3346         RUNIC,
3347 
3348         /**
3349          * Unicode script "Khmer".
3350          */
3351         KHMER,
3352 
3353         /**
3354          * Unicode script "Mongolian".
3355          */
3356         MONGOLIAN,
3357 
3358         /**
3359          * Unicode script "Hiragana".
3360          */
3361         HIRAGANA,
3362 
3363         /**
3364          * Unicode script "Katakana".
3365          */
3366         KATAKANA,
3367 
3368         /**
3369          * Unicode script "Bopomofo".
3370          */
3371         BOPOMOFO,
3372 
3373         /**
3374          * Unicode script "Han".
3375          */
3376         HAN,
3377 
3378         /**
3379          * Unicode script "Yi".
3380          */
3381         YI,
3382 
3383         /**
3384          * Unicode script "Old_Italic".
3385          */
3386         OLD_ITALIC,
3387 
3388         /**
3389          * Unicode script "Gothic".
3390          */
3391         GOTHIC,
3392 
3393         /**
3394          * Unicode script "Deseret".
3395          */
3396         DESERET,
3397 
3398         /**
3399          * Unicode script "Inherited".
3400          */
3401         INHERITED,
3402 
3403         /**
3404          * Unicode script "Tagalog".
3405          */
3406         TAGALOG,
3407 
3408         /**
3409          * Unicode script "Hanunoo".
3410          */
3411         HANUNOO,
3412 
3413         /**
3414          * Unicode script "Buhid".
3415          */
3416         BUHID,
3417 
3418         /**
3419          * Unicode script "Tagbanwa".
3420          */
3421         TAGBANWA,
3422 
3423         /**
3424          * Unicode script "Limbu".
3425          */
3426         LIMBU,
3427 
3428         /**
3429          * Unicode script "Tai_Le".
3430          */
3431         TAI_LE,
3432 
3433         /**
3434          * Unicode script "Linear_B".
3435          */
3436         LINEAR_B,
3437 
3438         /**
3439          * Unicode script "Ugaritic".
3440          */
3441         UGARITIC,
3442 
3443         /**
3444          * Unicode script "Shavian".
3445          */
3446         SHAVIAN,
3447 
3448         /**
3449          * Unicode script "Osmanya".
3450          */
3451         OSMANYA,
3452 
3453         /**
3454          * Unicode script "Cypriot".
3455          */
3456         CYPRIOT,
3457 
3458         /**
3459          * Unicode script "Braille".
3460          */
3461         BRAILLE,
3462 
3463         /**
3464          * Unicode script "Buginese".
3465          */
3466         BUGINESE,
3467 
3468         /**
3469          * Unicode script "Coptic".
3470          */
3471         COPTIC,
3472 
3473         /**
3474          * Unicode script "New_Tai_Lue".
3475          */
3476         NEW_TAI_LUE,
3477 
3478         /**
3479          * Unicode script "Glagolitic".
3480          */
3481         GLAGOLITIC,
3482 
3483         /**
3484          * Unicode script "Tifinagh".
3485          */
3486         TIFINAGH,
3487 
3488         /**
3489          * Unicode script "Syloti_Nagri".
3490          */
3491         SYLOTI_NAGRI,
3492 
3493         /**
3494          * Unicode script "Old_Persian".
3495          */
3496         OLD_PERSIAN,
3497 
3498         /**
3499          * Unicode script "Kharoshthi".
3500          */
3501         KHAROSHTHI,
3502 
3503         /**
3504          * Unicode script "Balinese".
3505          */
3506         BALINESE,
3507 
3508         /**
3509          * Unicode script "Cuneiform".
3510          */
3511         CUNEIFORM,
3512 
3513         /**
3514          * Unicode script "Phoenician".
3515          */
3516         PHOENICIAN,
3517 
3518         /**
3519          * Unicode script "Phags_Pa".
3520          */
3521         PHAGS_PA,
3522 
3523         /**
3524          * Unicode script "Nko".
3525          */
3526         NKO,
3527 
3528         /**
3529          * Unicode script "Sundanese".
3530          */
3531         SUNDANESE,
3532 
3533         /**
3534          * Unicode script "Batak".
3535          */
3536         BATAK,
3537 
3538         /**
3539          * Unicode script "Lepcha".
3540          */
3541         LEPCHA,
3542 
3543         /**
3544          * Unicode script "Ol_Chiki".
3545          */
3546         OL_CHIKI,
3547 
3548         /**
3549          * Unicode script "Vai".
3550          */
3551         VAI,
3552 
3553         /**
3554          * Unicode script "Saurashtra".
3555          */
3556         SAURASHTRA,
3557 
3558         /**
3559          * Unicode script "Kayah_Li".
3560          */
3561         KAYAH_LI,
3562 
3563         /**
3564          * Unicode script "Rejang".
3565          */
3566         REJANG,
3567 
3568         /**
3569          * Unicode script "Lycian".
3570          */
3571         LYCIAN,
3572 
3573         /**
3574          * Unicode script "Carian".
3575          */
3576         CARIAN,
3577 
3578         /**
3579          * Unicode script "Lydian".
3580          */
3581         LYDIAN,
3582 
3583         /**
3584          * Unicode script "Cham".
3585          */
3586         CHAM,
3587 
3588         /**
3589          * Unicode script "Tai_Tham".
3590          */
3591         TAI_THAM,
3592 
3593         /**
3594          * Unicode script "Tai_Viet".
3595          */
3596         TAI_VIET,
3597 
3598         /**
3599          * Unicode script "Avestan".
3600          */
3601         AVESTAN,
3602 
3603         /**
3604          * Unicode script "Egyptian_Hieroglyphs".
3605          */
3606         EGYPTIAN_HIEROGLYPHS,
3607 
3608         /**
3609          * Unicode script "Samaritan".
3610          */
3611         SAMARITAN,
3612 
3613         /**
3614          * Unicode script "Mandaic".
3615          */
3616         MANDAIC,
3617 
3618         /**
3619          * Unicode script "Lisu".
3620          */
3621         LISU,
3622 
3623         /**
3624          * Unicode script "Bamum".
3625          */
3626         BAMUM,
3627 
3628         /**
3629          * Unicode script "Javanese".
3630          */
3631         JAVANESE,
3632 
3633         /**
3634          * Unicode script "Meetei_Mayek".
3635          */
3636         MEETEI_MAYEK,
3637 
3638         /**
3639          * Unicode script "Imperial_Aramaic".
3640          */
3641         IMPERIAL_ARAMAIC,
3642 
3643         /**
3644          * Unicode script "Old_South_Arabian".
3645          */
3646         OLD_SOUTH_ARABIAN,
3647 
3648         /**
3649          * Unicode script "Inscriptional_Parthian".
3650          */
3651         INSCRIPTIONAL_PARTHIAN,
3652 
3653         /**
3654          * Unicode script "Inscriptional_Pahlavi".
3655          */
3656         INSCRIPTIONAL_PAHLAVI,
3657 
3658         /**
3659          * Unicode script "Old_Turkic".
3660          */
3661         OLD_TURKIC,
3662 
3663         /**
3664          * Unicode script "Brahmi".
3665          */
3666         BRAHMI,
3667 
3668         /**
3669          * Unicode script "Kaithi".
3670          */
3671         KAITHI,
3672 
3673         /**
3674          * Unicode script "Meroitic Hieroglyphs".
3675          */
3676         MEROITIC_HIEROGLYPHS,
3677 
3678         /**
3679          * Unicode script "Meroitic Cursive".
3680          */
3681         MEROITIC_CURSIVE,
3682 
3683         /**
3684          * Unicode script "Sora Sompeng".
3685          */
3686         SORA_SOMPENG,
3687 
3688         /**
3689          * Unicode script "Chakma".
3690          */
3691         CHAKMA,
3692 
3693         /**
3694          * Unicode script "Sharada".
3695          */
3696         SHARADA,
3697 
3698         /**
3699          * Unicode script "Takri".
3700          */
3701         TAKRI,
3702 
3703         /**
3704          * Unicode script "Miao".
3705          */
3706         MIAO,
3707 
3708         /**
3709          * Unicode script "Unknown".
3710          */
3711         UNKNOWN;
3712 
3713         private static final int[] scriptStarts = {
3714             0x0000,   // 0000..0040; COMMON
3715             0x0041,   // 0041..005A; LATIN
3716             0x005B,   // 005B..0060; COMMON
3717             0x0061,   // 0061..007A; LATIN
3718             0x007B,   // 007B..00A9; COMMON
3719             0x00AA,   // 00AA..00AA; LATIN
3720             0x00AB,   // 00AB..00B9; COMMON
3721             0x00BA,   // 00BA..00BA; LATIN
3722             0x00BB,   // 00BB..00BF; COMMON
3723             0x00C0,   // 00C0..00D6; LATIN
3724             0x00D7,   // 00D7..00D7; COMMON
3725             0x00D8,   // 00D8..00F6; LATIN
3726             0x00F7,   // 00F7..00F7; COMMON
3727             0x00F8,   // 00F8..02B8; LATIN
3728             0x02B9,   // 02B9..02DF; COMMON
3729             0x02E0,   // 02E0..02E4; LATIN
3730             0x02E5,   // 02E5..02E9; COMMON
3731             0x02EA,   // 02EA..02EB; BOPOMOFO
3732             0x02EC,   // 02EC..02FF; COMMON
3733             0x0300,   // 0300..036F; INHERITED
3734             0x0370,   // 0370..0373; GREEK
3735             0x0374,   // 0374..0374; COMMON
3736             0x0375,   // 0375..037D; GREEK
3737             0x037E,   // 037E..0383; COMMON
3738             0x0384,   // 0384..0384; GREEK
3739             0x0385,   // 0385..0385; COMMON
3740             0x0386,   // 0386..0386; GREEK
3741             0x0387,   // 0387..0387; COMMON
3742             0x0388,   // 0388..03E1; GREEK
3743             0x03E2,   // 03E2..03EF; COPTIC
3744             0x03F0,   // 03F0..03FF; GREEK
3745             0x0400,   // 0400..0484; CYRILLIC
3746             0x0485,   // 0485..0486; INHERITED
3747             0x0487,   // 0487..0530; CYRILLIC
3748             0x0531,   // 0531..0588; ARMENIAN
3749             0x0589,   // 0589..0589; COMMON
3750             0x058A,   // 058A..0590; ARMENIAN
3751             0x0591,   // 0591..05FF; HEBREW
3752             0x0600,   // 0600..060B; ARABIC
3753             0x060C,   // 060C..060C; COMMON
3754             0x060D,   // 060D..061A; ARABIC
3755             0x061B,   // 061B..061D; COMMON
3756             0x061E,   // 061E..061E; ARABIC
3757             0x061F,   // 061F..061F; COMMON
3758             0x0620,   // 0620..063F; ARABIC
3759             0x0640,   // 0640..0640; COMMON
3760             0x0641,   // 0641..064A; ARABIC
3761             0x064B,   // 064B..0655; INHERITED
3762             0x0656,   // 0656..065F; ARABIC
3763             0x0660,   // 0660..0669; COMMON
3764             0x066A,   // 066A..066F; ARABIC
3765             0x0670,   // 0670..0670; INHERITED
3766             0x0671,   // 0671..06DC; ARABIC
3767             0x06DD,   // 06DD..06DD; COMMON
3768             0x06DE,   // 06DE..06FF; ARABIC
3769             0x0700,   // 0700..074F; SYRIAC
3770             0x0750,   // 0750..077F; ARABIC
3771             0x0780,   // 0780..07BF; THAANA
3772             0x07C0,   // 07C0..07FF; NKO
3773             0x0800,   // 0800..083F; SAMARITAN
3774             0x0840,   // 0840..089F; MANDAIC
3775             0x08A0,   // 08A0..08FF; ARABIC
3776             0x0900,   // 0900..0950; DEVANAGARI
3777             0x0951,   // 0951..0952; INHERITED
3778             0x0953,   // 0953..0963; DEVANAGARI
3779             0x0964,   // 0964..0965; COMMON
3780             0x0966,   // 0966..0980; DEVANAGARI
3781             0x0981,   // 0981..0A00; BENGALI
3782             0x0A01,   // 0A01..0A80; GURMUKHI
3783             0x0A81,   // 0A81..0B00; GUJARATI
3784             0x0B01,   // 0B01..0B81; ORIYA
3785             0x0B82,   // 0B82..0C00; TAMIL
3786             0x0C01,   // 0C01..0C81; TELUGU
3787             0x0C82,   // 0C82..0CF0; KANNADA
3788             0x0D02,   // 0D02..0D81; MALAYALAM
3789             0x0D82,   // 0D82..0E00; SINHALA
3790             0x0E01,   // 0E01..0E3E; THAI
3791             0x0E3F,   // 0E3F..0E3F; COMMON
3792             0x0E40,   // 0E40..0E80; THAI
3793             0x0E81,   // 0E81..0EFF; LAO
3794             0x0F00,   // 0F00..0FD4; TIBETAN
3795             0x0FD5,   // 0FD5..0FD8; COMMON
3796             0x0FD9,   // 0FD9..0FFF; TIBETAN
3797             0x1000,   // 1000..109F; MYANMAR
3798             0x10A0,   // 10A0..10FA; GEORGIAN
3799             0x10FB,   // 10FB..10FB; COMMON
3800             0x10FC,   // 10FC..10FF; GEORGIAN
3801             0x1100,   // 1100..11FF; HANGUL
3802             0x1200,   // 1200..139F; ETHIOPIC
3803             0x13A0,   // 13A0..13FF; CHEROKEE
3804             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3805             0x1680,   // 1680..169F; OGHAM
3806             0x16A0,   // 16A0..16EA; RUNIC
3807             0x16EB,   // 16EB..16ED; COMMON
3808             0x16EE,   // 16EE..16FF; RUNIC
3809             0x1700,   // 1700..171F; TAGALOG
3810             0x1720,   // 1720..1734; HANUNOO
3811             0x1735,   // 1735..173F; COMMON
3812             0x1740,   // 1740..175F; BUHID
3813             0x1760,   // 1760..177F; TAGBANWA
3814             0x1780,   // 1780..17FF; KHMER
3815             0x1800,   // 1800..1801; MONGOLIAN
3816             0x1802,   // 1802..1803; COMMON
3817             0x1804,   // 1804..1804; MONGOLIAN
3818             0x1805,   // 1805..1805; COMMON
3819             0x1806,   // 1806..18AF; MONGOLIAN
3820             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3821             0x1900,   // 1900..194F; LIMBU
3822             0x1950,   // 1950..197F; TAI_LE
3823             0x1980,   // 1980..19DF; NEW_TAI_LUE
3824             0x19E0,   // 19E0..19FF; KHMER
3825             0x1A00,   // 1A00..1A1F; BUGINESE
3826             0x1A20,   // 1A20..1AFF; TAI_THAM
3827             0x1B00,   // 1B00..1B7F; BALINESE
3828             0x1B80,   // 1B80..1BBF; SUNDANESE
3829             0x1BC0,   // 1BC0..1BFF; BATAK
3830             0x1C00,   // 1C00..1C4F; LEPCHA
3831             0x1C50,   // 1C50..1CBF; OL_CHIKI
3832             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3833             0x1CD0,   // 1CD0..1CD2; INHERITED
3834             0x1CD3,   // 1CD3..1CD3; COMMON
3835             0x1CD4,   // 1CD4..1CE0; INHERITED
3836             0x1CE1,   // 1CE1..1CE1; COMMON
3837             0x1CE2,   // 1CE2..1CE8; INHERITED
3838             0x1CE9,   // 1CE9..1CEC; COMMON
3839             0x1CED,   // 1CED..1CED; INHERITED
3840             0x1CEE,   // 1CEE..1CF3; COMMON
3841             0x1CF4,   // 1CF4..1CF4; INHERITED
3842             0x1CF5,   // 1CF5..1CFF; COMMON
3843             0x1D00,   // 1D00..1D25; LATIN
3844             0x1D26,   // 1D26..1D2A; GREEK
3845             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3846             0x1D2C,   // 1D2C..1D5C; LATIN
3847             0x1D5D,   // 1D5D..1D61; GREEK
3848             0x1D62,   // 1D62..1D65; LATIN
3849             0x1D66,   // 1D66..1D6A; GREEK
3850             0x1D6B,   // 1D6B..1D77; LATIN
3851             0x1D78,   // 1D78..1D78; CYRILLIC
3852             0x1D79,   // 1D79..1DBE; LATIN
3853             0x1DBF,   // 1DBF..1DBF; GREEK
3854             0x1DC0,   // 1DC0..1DFF; INHERITED
3855             0x1E00,   // 1E00..1EFF; LATIN
3856             0x1F00,   // 1F00..1FFF; GREEK
3857             0x2000,   // 2000..200B; COMMON
3858             0x200C,   // 200C..200D; INHERITED
3859             0x200E,   // 200E..2070; COMMON
3860             0x2071,   // 2071..2073; LATIN
3861             0x2074,   // 2074..207E; COMMON
3862             0x207F,   // 207F..207F; LATIN
3863             0x2080,   // 2080..208F; COMMON
3864             0x2090,   // 2090..209F; LATIN
3865             0x20A0,   // 20A0..20CF; COMMON
3866             0x20D0,   // 20D0..20FF; INHERITED
3867             0x2100,   // 2100..2125; COMMON
3868             0x2126,   // 2126..2126; GREEK
3869             0x2127,   // 2127..2129; COMMON
3870             0x212A,   // 212A..212B; LATIN
3871             0x212C,   // 212C..2131; COMMON
3872             0x2132,   // 2132..2132; LATIN
3873             0x2133,   // 2133..214D; COMMON
3874             0x214E,   // 214E..214E; LATIN
3875             0x214F,   // 214F..215F; COMMON
3876             0x2160,   // 2160..2188; LATIN
3877             0x2189,   // 2189..27FF; COMMON
3878             0x2800,   // 2800..28FF; BRAILLE
3879             0x2900,   // 2900..2BFF; COMMON
3880             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3881             0x2C60,   // 2C60..2C7F; LATIN
3882             0x2C80,   // 2C80..2CFF; COPTIC
3883             0x2D00,   // 2D00..2D2F; GEORGIAN
3884             0x2D30,   // 2D30..2D7F; TIFINAGH
3885             0x2D80,   // 2D80..2DDF; ETHIOPIC
3886             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3887             0x2E00,   // 2E00..2E7F; COMMON
3888             0x2E80,   // 2E80..2FEF; HAN
3889             0x2FF0,   // 2FF0..3004; COMMON
3890             0x3005,   // 3005..3005; HAN
3891             0x3006,   // 3006..3006; COMMON
3892             0x3007,   // 3007..3007; HAN
3893             0x3008,   // 3008..3020; COMMON
3894             0x3021,   // 3021..3029; HAN
3895             0x302A,   // 302A..302D; INHERITED
3896             0x302E,   // 302E..302F; HANGUL
3897             0x3030,   // 3030..3037; COMMON
3898             0x3038,   // 3038..303B; HAN
3899             0x303C,   // 303C..3040; COMMON
3900             0x3041,   // 3041..3098; HIRAGANA
3901             0x3099,   // 3099..309A; INHERITED
3902             0x309B,   // 309B..309C; COMMON
3903             0x309D,   // 309D..309F; HIRAGANA
3904             0x30A0,   // 30A0..30A0; COMMON
3905             0x30A1,   // 30A1..30FA; KATAKANA
3906             0x30FB,   // 30FB..30FC; COMMON
3907             0x30FD,   // 30FD..3104; KATAKANA
3908             0x3105,   // 3105..3130; BOPOMOFO
3909             0x3131,   // 3131..318F; HANGUL
3910             0x3190,   // 3190..319F; COMMON
3911             0x31A0,   // 31A0..31BF; BOPOMOFO
3912             0x31C0,   // 31C0..31EF; COMMON
3913             0x31F0,   // 31F0..31FF; KATAKANA
3914             0x3200,   // 3200..321F; HANGUL
3915             0x3220,   // 3220..325F; COMMON
3916             0x3260,   // 3260..327E; HANGUL
3917             0x327F,   // 327F..32CF; COMMON
3918             0x32D0,   // 32D0..3357; KATAKANA
3919             0x3358,   // 3358..33FF; COMMON
3920             0x3400,   // 3400..4DBF; HAN
3921             0x4DC0,   // 4DC0..4DFF; COMMON
3922             0x4E00,   // 4E00..9FFF; HAN
3923             0xA000,   // A000..A4CF; YI
3924             0xA4D0,   // A4D0..A4FF; LISU
3925             0xA500,   // A500..A63F; VAI
3926             0xA640,   // A640..A69F; CYRILLIC
3927             0xA6A0,   // A6A0..A6FF; BAMUM
3928             0xA700,   // A700..A721; COMMON
3929             0xA722,   // A722..A787; LATIN
3930             0xA788,   // A788..A78A; COMMON
3931             0xA78B,   // A78B..A7FF; LATIN
3932             0xA800,   // A800..A82F; SYLOTI_NAGRI
3933             0xA830,   // A830..A83F; COMMON
3934             0xA840,   // A840..A87F; PHAGS_PA
3935             0xA880,   // A880..A8DF; SAURASHTRA
3936             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3937             0xA900,   // A900..A92F; KAYAH_LI
3938             0xA930,   // A930..A95F; REJANG
3939             0xA960,   // A960..A97F; HANGUL
3940             0xA980,   // A980..A9FF; JAVANESE
3941             0xAA00,   // AA00..AA5F; CHAM
3942             0xAA60,   // AA60..AA7F; MYANMAR
3943             0xAA80,   // AA80..AADF; TAI_VIET
3944             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3945             0xAB01,   // AB01..ABBF; ETHIOPIC
3946             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3947             0xAC00,   // AC00..D7FB; HANGUL
3948             0xD7FC,   // D7FC..F8FF; UNKNOWN
3949             0xF900,   // F900..FAFF; HAN
3950             0xFB00,   // FB00..FB12; LATIN
3951             0xFB13,   // FB13..FB1C; ARMENIAN
3952             0xFB1D,   // FB1D..FB4F; HEBREW
3953             0xFB50,   // FB50..FD3D; ARABIC
3954             0xFD3E,   // FD3E..FD4F; COMMON
3955             0xFD50,   // FD50..FDFC; ARABIC
3956             0xFDFD,   // FDFD..FDFF; COMMON
3957             0xFE00,   // FE00..FE0F; INHERITED
3958             0xFE10,   // FE10..FE1F; COMMON
3959             0xFE20,   // FE20..FE2F; INHERITED
3960             0xFE30,   // FE30..FE6F; COMMON
3961             0xFE70,   // FE70..FEFE; ARABIC
3962             0xFEFF,   // FEFF..FF20; COMMON
3963             0xFF21,   // FF21..FF3A; LATIN
3964             0xFF3B,   // FF3B..FF40; COMMON
3965             0xFF41,   // FF41..FF5A; LATIN
3966             0xFF5B,   // FF5B..FF65; COMMON
3967             0xFF66,   // FF66..FF6F; KATAKANA
3968             0xFF70,   // FF70..FF70; COMMON
3969             0xFF71,   // FF71..FF9D; KATAKANA
3970             0xFF9E,   // FF9E..FF9F; COMMON
3971             0xFFA0,   // FFA0..FFDF; HANGUL
3972             0xFFE0,   // FFE0..FFFF; COMMON
3973             0x10000,  // 10000..100FF; LINEAR_B
3974             0x10100,  // 10100..1013F; COMMON
3975             0x10140,  // 10140..1018F; GREEK
3976             0x10190,  // 10190..101FC; COMMON
3977             0x101FD,  // 101FD..1027F; INHERITED
3978             0x10280,  // 10280..1029F; LYCIAN
3979             0x102A0,  // 102A0..102FF; CARIAN
3980             0x10300,  // 10300..1032F; OLD_ITALIC
3981             0x10330,  // 10330..1037F; GOTHIC
3982             0x10380,  // 10380..1039F; UGARITIC
3983             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3984             0x10400,  // 10400..1044F; DESERET
3985             0x10450,  // 10450..1047F; SHAVIAN
3986             0x10480,  // 10480..107FF; OSMANYA
3987             0x10800,  // 10800..1083F; CYPRIOT
3988             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3989             0x10900,  // 10900..1091F; PHOENICIAN
3990             0x10920,  // 10920..1097F; LYDIAN
3991             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
3992             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
3993             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3994             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3995             0x10B00,  // 10B00..10B3F; AVESTAN
3996             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3997             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3998             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3999             0x10E60,  // 10E60..10FFF; ARABIC
4000             0x11000,  // 11000..1107F; BRAHMI
4001             0x11080,  // 11080..110CF; KAITHI
4002             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4003             0x11100,  // 11100..1117F; CHAKMA
4004             0x11180,  // 11180..1167F; SHARADA
4005             0x11680,  // 11680..116CF; TAKRI
4006             0x12000,  // 12000..12FFF; CUNEIFORM
4007             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4008             0x16800,  // 16800..16A38; BAMUM
4009             0x16F00,  // 16F00..16F9F; MIAO
4010             0x1B000,  // 1B000..1B000; KATAKANA
4011             0x1B001,  // 1B001..1CFFF; HIRAGANA
4012             0x1D000,  // 1D000..1D166; COMMON
4013             0x1D167,  // 1D167..1D169; INHERITED
4014             0x1D16A,  // 1D16A..1D17A; COMMON
4015             0x1D17B,  // 1D17B..1D182; INHERITED
4016             0x1D183,  // 1D183..1D184; COMMON
4017             0x1D185,  // 1D185..1D18B; INHERITED
4018             0x1D18C,  // 1D18C..1D1A9; COMMON
4019             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4020             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4021             0x1D200,  // 1D200..1D2FF; GREEK
4022             0x1D300,  // 1D300..1EDFF; COMMON
4023             0x1EE00,  // 1EE00..1EFFF; ARABIC
4024             0x1F000,  // 1F000..1F1FF; COMMON
4025             0x1F200,  // 1F200..1F200; HIRAGANA
4026             0x1F201,  // 1F210..1FFFF; COMMON
4027             0x20000,  // 20000..E0000; HAN
4028             0xE0001,  // E0001..E00FF; COMMON
4029             0xE0100,  // E0100..E01EF; INHERITED
4030             0xE01F0   // E01F0..10FFFF; UNKNOWN
4031 
4032         };
4033 
4034         private static final UnicodeScript[] scripts = {
4035             COMMON,
4036             LATIN,
4037             COMMON,
4038             LATIN,
4039             COMMON,
4040             LATIN,
4041             COMMON,
4042             LATIN,
4043             COMMON,
4044             LATIN,
4045             COMMON,
4046             LATIN,
4047             COMMON,
4048             LATIN,
4049             COMMON,
4050             LATIN,
4051             COMMON,
4052             BOPOMOFO,
4053             COMMON,
4054             INHERITED,
4055             GREEK,
4056             COMMON,
4057             GREEK,
4058             COMMON,
4059             GREEK,
4060             COMMON,
4061             GREEK,
4062             COMMON,
4063             GREEK,
4064             COPTIC,
4065             GREEK,
4066             CYRILLIC,
4067             INHERITED,
4068             CYRILLIC,
4069             ARMENIAN,
4070             COMMON,
4071             ARMENIAN,
4072             HEBREW,
4073             ARABIC,
4074             COMMON,
4075             ARABIC,
4076             COMMON,
4077             ARABIC,
4078             COMMON,
4079             ARABIC,
4080             COMMON,
4081             ARABIC,
4082             INHERITED,
4083             ARABIC,
4084             COMMON,
4085             ARABIC,
4086             INHERITED,
4087             ARABIC,
4088             COMMON,
4089             ARABIC,
4090             SYRIAC,
4091             ARABIC,
4092             THAANA,
4093             NKO,
4094             SAMARITAN,
4095             MANDAIC,
4096             ARABIC,
4097             DEVANAGARI,
4098             INHERITED,
4099             DEVANAGARI,
4100             COMMON,
4101             DEVANAGARI,
4102             BENGALI,
4103             GURMUKHI,
4104             GUJARATI,
4105             ORIYA,
4106             TAMIL,
4107             TELUGU,
4108             KANNADA,
4109             MALAYALAM,
4110             SINHALA,
4111             THAI,
4112             COMMON,
4113             THAI,
4114             LAO,
4115             TIBETAN,
4116             COMMON,
4117             TIBETAN,
4118             MYANMAR,
4119             GEORGIAN,
4120             COMMON,
4121             GEORGIAN,
4122             HANGUL,
4123             ETHIOPIC,
4124             CHEROKEE,
4125             CANADIAN_ABORIGINAL,
4126             OGHAM,
4127             RUNIC,
4128             COMMON,
4129             RUNIC,
4130             TAGALOG,
4131             HANUNOO,
4132             COMMON,
4133             BUHID,
4134             TAGBANWA,
4135             KHMER,
4136             MONGOLIAN,
4137             COMMON,
4138             MONGOLIAN,
4139             COMMON,
4140             MONGOLIAN,
4141             CANADIAN_ABORIGINAL,
4142             LIMBU,
4143             TAI_LE,
4144             NEW_TAI_LUE,
4145             KHMER,
4146             BUGINESE,
4147             TAI_THAM,
4148             BALINESE,
4149             SUNDANESE,
4150             BATAK,
4151             LEPCHA,
4152             OL_CHIKI,
4153             SUNDANESE,
4154             INHERITED,
4155             COMMON,
4156             INHERITED,
4157             COMMON,
4158             INHERITED,
4159             COMMON,
4160             INHERITED,
4161             COMMON,
4162             INHERITED,
4163             COMMON,
4164             LATIN,
4165             GREEK,
4166             CYRILLIC,
4167             LATIN,
4168             GREEK,
4169             LATIN,
4170             GREEK,
4171             LATIN,
4172             CYRILLIC,
4173             LATIN,
4174             GREEK,
4175             INHERITED,
4176             LATIN,
4177             GREEK,
4178             COMMON,
4179             INHERITED,
4180             COMMON,
4181             LATIN,
4182             COMMON,
4183             LATIN,
4184             COMMON,
4185             LATIN,
4186             COMMON,
4187             INHERITED,
4188             COMMON,
4189             GREEK,
4190             COMMON,
4191             LATIN,
4192             COMMON,
4193             LATIN,
4194             COMMON,
4195             LATIN,
4196             COMMON,
4197             LATIN,
4198             COMMON,
4199             BRAILLE,
4200             COMMON,
4201             GLAGOLITIC,
4202             LATIN,
4203             COPTIC,
4204             GEORGIAN,
4205             TIFINAGH,
4206             ETHIOPIC,
4207             CYRILLIC,
4208             COMMON,
4209             HAN,
4210             COMMON,
4211             HAN,
4212             COMMON,
4213             HAN,
4214             COMMON,
4215             HAN,
4216             INHERITED,
4217             HANGUL,
4218             COMMON,
4219             HAN,
4220             COMMON,
4221             HIRAGANA,
4222             INHERITED,
4223             COMMON,
4224             HIRAGANA,
4225             COMMON,
4226             KATAKANA,
4227             COMMON,
4228             KATAKANA,
4229             BOPOMOFO,
4230             HANGUL,
4231             COMMON,
4232             BOPOMOFO,
4233             COMMON,
4234             KATAKANA,
4235             HANGUL,
4236             COMMON,
4237             HANGUL,
4238             COMMON,
4239             KATAKANA,
4240             COMMON,
4241             HAN,
4242             COMMON,
4243             HAN,
4244             YI,
4245             LISU,
4246             VAI,
4247             CYRILLIC,
4248             BAMUM,
4249             COMMON,
4250             LATIN,
4251             COMMON,
4252             LATIN,
4253             SYLOTI_NAGRI,
4254             COMMON,
4255             PHAGS_PA,
4256             SAURASHTRA,
4257             DEVANAGARI,
4258             KAYAH_LI,
4259             REJANG,
4260             HANGUL,
4261             JAVANESE,
4262             CHAM,
4263             MYANMAR,
4264             TAI_VIET,
4265             MEETEI_MAYEK,
4266             ETHIOPIC,
4267             MEETEI_MAYEK,
4268             HANGUL,
4269             UNKNOWN     ,
4270             HAN,
4271             LATIN,
4272             ARMENIAN,
4273             HEBREW,
4274             ARABIC,
4275             COMMON,
4276             ARABIC,
4277             COMMON,
4278             INHERITED,
4279             COMMON,
4280             INHERITED,
4281             COMMON,
4282             ARABIC,
4283             COMMON,
4284             LATIN,
4285             COMMON,
4286             LATIN,
4287             COMMON,
4288             KATAKANA,
4289             COMMON,
4290             KATAKANA,
4291             COMMON,
4292             HANGUL,
4293             COMMON,
4294             LINEAR_B,
4295             COMMON,
4296             GREEK,
4297             COMMON,
4298             INHERITED,
4299             LYCIAN,
4300             CARIAN,
4301             OLD_ITALIC,
4302             GOTHIC,
4303             UGARITIC,
4304             OLD_PERSIAN,
4305             DESERET,
4306             SHAVIAN,
4307             OSMANYA,
4308             CYPRIOT,
4309             IMPERIAL_ARAMAIC,
4310             PHOENICIAN,
4311             LYDIAN,
4312             MEROITIC_HIEROGLYPHS,
4313             MEROITIC_CURSIVE,
4314             KHAROSHTHI,
4315             OLD_SOUTH_ARABIAN,
4316             AVESTAN,
4317             INSCRIPTIONAL_PARTHIAN,
4318             INSCRIPTIONAL_PAHLAVI,
4319             OLD_TURKIC,
4320             ARABIC,
4321             BRAHMI,
4322             KAITHI,
4323             SORA_SOMPENG,
4324             CHAKMA,
4325             SHARADA,
4326             TAKRI,
4327             CUNEIFORM,
4328             EGYPTIAN_HIEROGLYPHS,
4329             BAMUM,
4330             MIAO,
4331             KATAKANA,
4332             HIRAGANA,
4333             COMMON,
4334             INHERITED,
4335             COMMON,
4336             INHERITED,
4337             COMMON,
4338             INHERITED,
4339             COMMON,
4340             INHERITED,
4341             COMMON,
4342             GREEK,
4343             COMMON,
4344             ARABIC,
4345             COMMON,
4346             HIRAGANA,
4347             COMMON,
4348             HAN,
4349             COMMON,
4350             INHERITED,
4351             UNKNOWN
4352         };
4353 
4354         private static HashMap<String, Character.UnicodeScript> aliases;
4355         static {
4356             aliases = new HashMap<>(128);
4357             aliases.put("ARAB", ARABIC);
4358             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4359             aliases.put("ARMN", ARMENIAN);
4360             aliases.put("AVST", AVESTAN);
4361             aliases.put("BALI", BALINESE);
4362             aliases.put("BAMU", BAMUM);
4363             aliases.put("BATK", BATAK);
4364             aliases.put("BENG", BENGALI);
4365             aliases.put("BOPO", BOPOMOFO);
4366             aliases.put("BRAI", BRAILLE);
4367             aliases.put("BRAH", BRAHMI);
4368             aliases.put("BUGI", BUGINESE);
4369             aliases.put("BUHD", BUHID);
4370             aliases.put("CAKM", CHAKMA);
4371             aliases.put("CANS", CANADIAN_ABORIGINAL);
4372             aliases.put("CARI", CARIAN);
4373             aliases.put("CHAM", CHAM);
4374             aliases.put("CHER", CHEROKEE);
4375             aliases.put("COPT", COPTIC);
4376             aliases.put("CPRT", CYPRIOT);
4377             aliases.put("CYRL", CYRILLIC);
4378             aliases.put("DEVA", DEVANAGARI);
4379             aliases.put("DSRT", DESERET);
4380             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4381             aliases.put("ETHI", ETHIOPIC);
4382             aliases.put("GEOR", GEORGIAN);
4383             aliases.put("GLAG", GLAGOLITIC);
4384             aliases.put("GOTH", GOTHIC);
4385             aliases.put("GREK", GREEK);
4386             aliases.put("GUJR", GUJARATI);
4387             aliases.put("GURU", GURMUKHI);
4388             aliases.put("HANG", HANGUL);
4389             aliases.put("HANI", HAN);
4390             aliases.put("HANO", HANUNOO);
4391             aliases.put("HEBR", HEBREW);
4392             aliases.put("HIRA", HIRAGANA);
4393             // it appears we don't have the KATAKANA_OR_HIRAGANA
4394             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4395             aliases.put("ITAL", OLD_ITALIC);
4396             aliases.put("JAVA", JAVANESE);
4397             aliases.put("KALI", KAYAH_LI);
4398             aliases.put("KANA", KATAKANA);
4399             aliases.put("KHAR", KHAROSHTHI);
4400             aliases.put("KHMR", KHMER);
4401             aliases.put("KNDA", KANNADA);
4402             aliases.put("KTHI", KAITHI);
4403             aliases.put("LANA", TAI_THAM);
4404             aliases.put("LAOO", LAO);
4405             aliases.put("LATN", LATIN);
4406             aliases.put("LEPC", LEPCHA);
4407             aliases.put("LIMB", LIMBU);
4408             aliases.put("LINB", LINEAR_B);
4409             aliases.put("LISU", LISU);
4410             aliases.put("LYCI", LYCIAN);
4411             aliases.put("LYDI", LYDIAN);
4412             aliases.put("MAND", MANDAIC);
4413             aliases.put("MERC", MEROITIC_CURSIVE);
4414             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4415             aliases.put("MLYM", MALAYALAM);
4416             aliases.put("MONG", MONGOLIAN);
4417             aliases.put("MTEI", MEETEI_MAYEK);
4418             aliases.put("MYMR", MYANMAR);
4419             aliases.put("NKOO", NKO);
4420             aliases.put("OGAM", OGHAM);
4421             aliases.put("OLCK", OL_CHIKI);
4422             aliases.put("ORKH", OLD_TURKIC);
4423             aliases.put("ORYA", ORIYA);
4424             aliases.put("OSMA", OSMANYA);
4425             aliases.put("PHAG", PHAGS_PA);
4426             aliases.put("PLRD", MIAO);
4427             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4428             aliases.put("PHNX", PHOENICIAN);
4429             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4430             aliases.put("RJNG", REJANG);
4431             aliases.put("RUNR", RUNIC);
4432             aliases.put("SAMR", SAMARITAN);
4433             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4434             aliases.put("SAUR", SAURASHTRA);
4435             aliases.put("SHAW", SHAVIAN);
4436             aliases.put("SHRD", SHARADA);
4437             aliases.put("SINH", SINHALA);
4438             aliases.put("SORA", SORA_SOMPENG);
4439             aliases.put("SUND", SUNDANESE);
4440             aliases.put("SYLO", SYLOTI_NAGRI);
4441             aliases.put("SYRC", SYRIAC);
4442             aliases.put("TAGB", TAGBANWA);
4443             aliases.put("TALE", TAI_LE);
4444             aliases.put("TAKR", TAKRI);
4445             aliases.put("TALU", NEW_TAI_LUE);
4446             aliases.put("TAML", TAMIL);
4447             aliases.put("TAVT", TAI_VIET);
4448             aliases.put("TELU", TELUGU);
4449             aliases.put("TFNG", TIFINAGH);
4450             aliases.put("TGLG", TAGALOG);
4451             aliases.put("THAA", THAANA);
4452             aliases.put("THAI", THAI);
4453             aliases.put("TIBT", TIBETAN);
4454             aliases.put("UGAR", UGARITIC);
4455             aliases.put("VAII", VAI);
4456             aliases.put("XPEO", OLD_PERSIAN);
4457             aliases.put("XSUX", CUNEIFORM);
4458             aliases.put("YIII", YI);
4459             aliases.put("ZINH", INHERITED);
4460             aliases.put("ZYYY", COMMON);
4461             aliases.put("ZZZZ", UNKNOWN);
4462         }
4463 
4464         /**
4465          * Returns the enum constant representing the Unicode script of which
4466          * the given character (Unicode code point) is assigned to.
4467          *
4468          * @param   codePoint the character (Unicode code point) in question.
4469          * @return  The {@code UnicodeScript} constant representing the
4470          *          Unicode script of which this character is assigned to.
4471          *
4472          * @exception IllegalArgumentException if the specified
4473          * {@code codePoint} is an invalid Unicode code point.
4474          * @see Character#isValidCodePoint(int)
4475          *
4476          */
4477         public static UnicodeScript of(int codePoint) {
4478             if (!isValidCodePoint(codePoint))
4479                 throw new IllegalArgumentException();
4480             int type = getType(codePoint);
4481             // leave SURROGATE and PRIVATE_USE for table lookup
4482             if (type == UNASSIGNED)
4483                 return UNKNOWN;
4484             int index = Arrays.binarySearch(scriptStarts, codePoint);
4485             if (index < 0)
4486                 index = -index - 2;
4487             return scripts[index];
4488         }
4489 
4490         /**
4491          * Returns the UnicodeScript constant with the given Unicode script
4492          * name or the script name alias. Script names and their aliases are
4493          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4494          * and PropertyValueAliases&lt;version&gt;.txt define script names
4495          * and the script name aliases for a particular version of the
4496          * standard. The {@link Character} class specifies the version of
4497          * the standard that it supports.
4498          * <p>
4499          * Character case is ignored for all of the valid script names.
4500          * The en_US locale's case mapping rules are used to provide
4501          * case-insensitive string comparisons for script name validation.
4502          *
4503          * @param scriptName A {@code UnicodeScript} name.
4504          * @return The {@code UnicodeScript} constant identified
4505          *         by {@code scriptName}
4506          * @throws IllegalArgumentException if {@code scriptName} is an
4507          *         invalid name
4508          * @throws NullPointerException if {@code scriptName} is null
4509          */
4510         public static final UnicodeScript forName(String scriptName) {
4511             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4512                                  //.replace(' ', '_'));
4513             UnicodeScript sc = aliases.get(scriptName);
4514             if (sc != null)
4515                 return sc;
4516             return valueOf(scriptName);
4517         }
4518     }
4519 
4520     /**
4521      * The value of the {@code Character}.
4522      *
4523      * @serial
4524      */
4525     private final char value;
4526 
4527     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4528     private static final long serialVersionUID = 3786198910865385080L;
4529 
4530     /**
4531      * Constructs a newly allocated {@code Character} object that
4532      * represents the specified {@code char} value.
4533      *
4534      * @param  value   the value to be represented by the
4535      *                  {@code Character} object.
4536      */
4537     public Character(char value) {
4538         this.value = value;
4539     }
4540 
4541     private static class CharacterCache {
4542         private CharacterCache(){}
4543 
4544         static final Character cache[] = new Character[127 + 1];
4545 
4546         static {
4547             for (int i = 0; i < cache.length; i++)
4548                 cache[i] = new Character((char)i);
4549         }
4550     }
4551 
4552     /**
4553      * Returns a <tt>Character</tt> instance representing the specified
4554      * <tt>char</tt> value.
4555      * If a new <tt>Character</tt> instance is not required, this method
4556      * should generally be used in preference to the constructor
4557      * {@link #Character(char)}, as this method is likely to yield
4558      * significantly better space and time performance by caching
4559      * frequently requested values.
4560      *
4561      * This method will always cache values in the range {@code
4562      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4563      * cache other values outside of this range.
4564      *
4565      * @param  c a char value.
4566      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4567      * @since  1.5
4568      */
4569     public static Character valueOf(char c) {
4570         if (c <= 127) { // must cache
4571             return CharacterCache.cache[(int)c];
4572         }
4573         return new Character(c);
4574     }
4575 
4576     /**
4577      * Returns the value of this {@code Character} object.
4578      * @return  the primitive {@code char} value represented by
4579      *          this object.
4580      */
4581     public char charValue() {
4582         return value;
4583     }
4584 
4585     /**
4586      * Returns a hash code for this {@code Character}; equal to the result
4587      * of invoking {@code charValue()}.
4588      *
4589      * @return a hash code value for this {@code Character}
4590      */
4591     @Override
4592     public int hashCode() {
4593         return Character.hashCode(value);
4594     }
4595 
4596     /**
4597      * Returns a hash code for a {@code char} value; compatible with
4598      * {@code Character.hashCode()}.
4599      *
4600      * @since 1.8
4601      *
4602      * @param value The {@code char} for which to return a hash code.
4603      * @return a hash code value for a {@code char} value.
4604      */
4605     public static int hashCode(char value) {
4606         return (int)value;
4607     }
4608 
4609     /**
4610      * Compares this object against the specified object.
4611      * The result is {@code true} if and only if the argument is not
4612      * {@code null} and is a {@code Character} object that
4613      * represents the same {@code char} value as this object.
4614      *
4615      * @param   obj   the object to compare with.
4616      * @return  {@code true} if the objects are the same;
4617      *          {@code false} otherwise.
4618      */
4619     public boolean equals(Object obj) {
4620         if (obj instanceof Character) {
4621             return value == ((Character)obj).charValue();
4622         }
4623         return false;
4624     }
4625 
4626     /**
4627      * Returns a {@code String} object representing this
4628      * {@code Character}'s value.  The result is a string of
4629      * length 1 whose sole component is the primitive
4630      * {@code char} value represented by this
4631      * {@code Character} object.
4632      *
4633      * @return  a string representation of this object.
4634      */
4635     public String toString() {
4636         char buf[] = {value};
4637         return String.valueOf(buf);
4638     }
4639 
4640     /**
4641      * Returns a {@code String} object representing the
4642      * specified {@code char}.  The result is a string of length
4643      * 1 consisting solely of the specified {@code char}.
4644      *
4645      * @param c the {@code char} to be converted
4646      * @return the string representation of the specified {@code char}
4647      * @since 1.4
4648      */
4649     public static String toString(char c) {
4650         return String.valueOf(c);
4651     }
4652 
4653     /**
4654      * Determines whether the specified code point is a valid
4655      * <a href="http://www.unicode.org/glossary/#code_point">
4656      * Unicode code point value</a>.
4657      *
4658      * @param  codePoint the Unicode code point to be tested
4659      * @return {@code true} if the specified code point value is between
4660      *         {@link #MIN_CODE_POINT} and
4661      *         {@link #MAX_CODE_POINT} inclusive;
4662      *         {@code false} otherwise.
4663      * @since  1.5
4664      */
4665     public static boolean isValidCodePoint(int codePoint) {
4666         // Optimized form of:
4667         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4668         int plane = codePoint >>> 16;
4669         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4670     }
4671 
4672     /**
4673      * Determines whether the specified character (Unicode code point)
4674      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4675      * Such code points can be represented using a single {@code char}.
4676      *
4677      * @param  codePoint the character (Unicode code point) to be tested
4678      * @return {@code true} if the specified code point is between
4679      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4680      *         {@code false} otherwise.
4681      * @since  1.7
4682      */
4683     public static boolean isBmpCodePoint(int codePoint) {
4684         return codePoint >>> 16 == 0;
4685         // Optimized form of:
4686         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4687         // We consistently use logical shift (>>>) to facilitate
4688         // additional runtime optimizations.
4689     }
4690 
4691     /**
4692      * Determines whether the specified character (Unicode code point)
4693      * is in the <a href="#supplementary">supplementary character</a> range.
4694      *
4695      * @param  codePoint the character (Unicode code point) to be tested
4696      * @return {@code true} if the specified code point is between
4697      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4698      *         {@link #MAX_CODE_POINT} inclusive;
4699      *         {@code false} otherwise.
4700      * @since  1.5
4701      */
4702     public static boolean isSupplementaryCodePoint(int codePoint) {
4703         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4704             && codePoint <  MAX_CODE_POINT + 1;
4705     }
4706 
4707     /**
4708      * Determines if the given {@code char} value is a
4709      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4710      * Unicode high-surrogate code unit</a>
4711      * (also known as <i>leading-surrogate code unit</i>).
4712      *
4713      * <p>Such values do not represent characters by themselves,
4714      * but are used in the representation of
4715      * <a href="#supplementary">supplementary characters</a>
4716      * in the UTF-16 encoding.
4717      *
4718      * @param  ch the {@code char} value to be tested.
4719      * @return {@code true} if the {@code char} value is between
4720      *         {@link #MIN_HIGH_SURROGATE} and
4721      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4722      *         {@code false} otherwise.
4723      * @see    Character#isLowSurrogate(char)
4724      * @see    Character.UnicodeBlock#of(int)
4725      * @since  1.5
4726      */
4727     public static boolean isHighSurrogate(char ch) {
4728         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4729         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4730     }
4731 
4732     /**
4733      * Determines if the given {@code char} value is a
4734      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4735      * Unicode low-surrogate code unit</a>
4736      * (also known as <i>trailing-surrogate code unit</i>).
4737      *
4738      * <p>Such values do not represent characters by themselves,
4739      * but are used in the representation of
4740      * <a href="#supplementary">supplementary characters</a>
4741      * in the UTF-16 encoding.
4742      *
4743      * @param  ch the {@code char} value to be tested.
4744      * @return {@code true} if the {@code char} value is between
4745      *         {@link #MIN_LOW_SURROGATE} and
4746      *         {@link #MAX_LOW_SURROGATE} inclusive;
4747      *         {@code false} otherwise.
4748      * @see    Character#isHighSurrogate(char)
4749      * @since  1.5
4750      */
4751     public static boolean isLowSurrogate(char ch) {
4752         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4753     }
4754 
4755     /**
4756      * Determines if the given {@code char} value is a Unicode
4757      * <i>surrogate code unit</i>.
4758      *
4759      * <p>Such values do not represent characters by themselves,
4760      * but are used in the representation of
4761      * <a href="#supplementary">supplementary characters</a>
4762      * in the UTF-16 encoding.
4763      *
4764      * <p>A char value is a surrogate code unit if and only if it is either
4765      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4766      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4767      *
4768      * @param  ch the {@code char} value to be tested.
4769      * @return {@code true} if the {@code char} value is between
4770      *         {@link #MIN_SURROGATE} and
4771      *         {@link #MAX_SURROGATE} inclusive;
4772      *         {@code false} otherwise.
4773      * @since  1.7
4774      */
4775     public static boolean isSurrogate(char ch) {
4776         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4777     }
4778 
4779     /**
4780      * Determines whether the specified pair of {@code char}
4781      * values is a valid
4782      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4783      * Unicode surrogate pair</a>.
4784 
4785      * <p>This method is equivalent to the expression:
4786      * <blockquote><pre>{@code
4787      * isHighSurrogate(high) && isLowSurrogate(low)
4788      * }</pre></blockquote>
4789      *
4790      * @param  high the high-surrogate code value to be tested
4791      * @param  low the low-surrogate code value to be tested
4792      * @return {@code true} if the specified high and
4793      * low-surrogate code values represent a valid surrogate pair;
4794      * {@code false} otherwise.
4795      * @since  1.5
4796      */
4797     public static boolean isSurrogatePair(char high, char low) {
4798         return isHighSurrogate(high) && isLowSurrogate(low);
4799     }
4800 
4801     /**
4802      * Determines the number of {@code char} values needed to
4803      * represent the specified character (Unicode code point). If the
4804      * specified character is equal to or greater than 0x10000, then
4805      * the method returns 2. Otherwise, the method returns 1.
4806      *
4807      * <p>This method doesn't validate the specified character to be a
4808      * valid Unicode code point. The caller must validate the
4809      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4810      * if necessary.
4811      *
4812      * @param   codePoint the character (Unicode code point) to be tested.
4813      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4814      * @see     Character#isSupplementaryCodePoint(int)
4815      * @since   1.5
4816      */
4817     public static int charCount(int codePoint) {
4818         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4819     }
4820 
4821     /**
4822      * Converts the specified surrogate pair to its supplementary code
4823      * point value. This method does not validate the specified
4824      * surrogate pair. The caller must validate it using {@link
4825      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4826      *
4827      * @param  high the high-surrogate code unit
4828      * @param  low the low-surrogate code unit
4829      * @return the supplementary code point composed from the
4830      *         specified surrogate pair.
4831      * @since  1.5
4832      */
4833     public static int toCodePoint(char high, char low) {
4834         // Optimized form of:
4835         // return ((high - MIN_HIGH_SURROGATE) << 10)
4836         //         + (low - MIN_LOW_SURROGATE)
4837         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4838         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4839                                        - (MIN_HIGH_SURROGATE << 10)
4840                                        - MIN_LOW_SURROGATE);
4841     }
4842 
4843     /**
4844      * Returns the code point at the given index of the
4845      * {@code CharSequence}. If the {@code char} value at
4846      * the given index in the {@code CharSequence} is in the
4847      * high-surrogate range, the following index is less than the
4848      * length of the {@code CharSequence}, and the
4849      * {@code char} value at the following index is in the
4850      * low-surrogate range, then the supplementary code point
4851      * corresponding to this surrogate pair is returned. Otherwise,
4852      * the {@code char} value at the given index is returned.
4853      *
4854      * @param seq a sequence of {@code char} values (Unicode code
4855      * units)
4856      * @param index the index to the {@code char} values (Unicode
4857      * code units) in {@code seq} to be converted
4858      * @return the Unicode code point at the given index
4859      * @exception NullPointerException if {@code seq} is null.
4860      * @exception IndexOutOfBoundsException if the value
4861      * {@code index} is negative or not less than
4862      * {@link CharSequence#length() seq.length()}.
4863      * @since  1.5
4864      */
4865     public static int codePointAt(CharSequence seq, int index) {
4866         char c1 = seq.charAt(index);
4867         if (isHighSurrogate(c1) && ++index < seq.length()) {
4868             char c2 = seq.charAt(index);
4869             if (isLowSurrogate(c2)) {
4870                 return toCodePoint(c1, c2);
4871             }
4872         }
4873         return c1;
4874     }
4875 
4876     /**
4877      * Returns the code point at the given index of the
4878      * {@code char} array. If the {@code char} value at
4879      * the given index in the {@code char} array is in the
4880      * high-surrogate range, the following index is less than the
4881      * length of the {@code char} array, and the
4882      * {@code char} value at the following index is in the
4883      * low-surrogate range, then the supplementary code point
4884      * corresponding to this surrogate pair is returned. Otherwise,
4885      * the {@code char} value at the given index is returned.
4886      *
4887      * @param a the {@code char} array
4888      * @param index the index to the {@code char} values (Unicode
4889      * code units) in the {@code char} array to be converted
4890      * @return the Unicode code point at the given index
4891      * @exception NullPointerException if {@code a} is null.
4892      * @exception IndexOutOfBoundsException if the value
4893      * {@code index} is negative or not less than
4894      * the length of the {@code char} array.
4895      * @since  1.5
4896      */
4897     public static int codePointAt(char[] a, int index) {
4898         return codePointAtImpl(a, index, a.length);
4899     }
4900 
4901     /**
4902      * Returns the code point at the given index of the
4903      * {@code char} array, where only array elements with
4904      * {@code index} less than {@code limit} can be used. If
4905      * the {@code char} value at the given index in the
4906      * {@code char} array is in the high-surrogate range, the
4907      * following index is less than the {@code limit}, and the
4908      * {@code char} value at the following index is in the
4909      * low-surrogate range, then the supplementary code point
4910      * corresponding to this surrogate pair is returned. Otherwise,
4911      * the {@code char} value at the given index is returned.
4912      *
4913      * @param a the {@code char} array
4914      * @param index the index to the {@code char} values (Unicode
4915      * code units) in the {@code char} array to be converted
4916      * @param limit the index after the last array element that
4917      * can be used in the {@code char} array
4918      * @return the Unicode code point at the given index
4919      * @exception NullPointerException if {@code a} is null.
4920      * @exception IndexOutOfBoundsException if the {@code index}
4921      * argument is negative or not less than the {@code limit}
4922      * argument, or if the {@code limit} argument is negative or
4923      * greater than the length of the {@code char} array.
4924      * @since  1.5
4925      */
4926     public static int codePointAt(char[] a, int index, int limit) {
4927         if (index >= limit || limit < 0 || limit > a.length) {
4928             throw new IndexOutOfBoundsException();
4929         }
4930         return codePointAtImpl(a, index, limit);
4931     }
4932 
4933     // throws ArrayIndexOutOfBoundsException if index out of bounds
4934     static int codePointAtImpl(char[] a, int index, int limit) {
4935         char c1 = a[index];
4936         if (isHighSurrogate(c1) && ++index < limit) {
4937             char c2 = a[index];
4938             if (isLowSurrogate(c2)) {
4939                 return toCodePoint(c1, c2);
4940             }
4941         }
4942         return c1;
4943     }
4944 
4945     /**
4946      * Returns the code point preceding the given index of the
4947      * {@code CharSequence}. If the {@code char} value at
4948      * {@code (index - 1)} in the {@code CharSequence} is in
4949      * the low-surrogate range, {@code (index - 2)} is not
4950      * negative, and the {@code char} value at {@code (index - 2)}
4951      * in the {@code CharSequence} is in the
4952      * high-surrogate range, then the supplementary code point
4953      * corresponding to this surrogate pair is returned. Otherwise,
4954      * the {@code char} value at {@code (index - 1)} is
4955      * returned.
4956      *
4957      * @param seq the {@code CharSequence} instance
4958      * @param index the index following the code point that should be returned
4959      * @return the Unicode code point value before the given index.
4960      * @exception NullPointerException if {@code seq} is null.
4961      * @exception IndexOutOfBoundsException if the {@code index}
4962      * argument is less than 1 or greater than {@link
4963      * CharSequence#length() seq.length()}.
4964      * @since  1.5
4965      */
4966     public static int codePointBefore(CharSequence seq, int index) {
4967         char c2 = seq.charAt(--index);
4968         if (isLowSurrogate(c2) && index > 0) {
4969             char c1 = seq.charAt(--index);
4970             if (isHighSurrogate(c1)) {
4971                 return toCodePoint(c1, c2);
4972             }
4973         }
4974         return c2;
4975     }
4976 
4977     /**
4978      * Returns the code point preceding the given index of the
4979      * {@code char} array. If the {@code char} value at
4980      * {@code (index - 1)} in the {@code char} array is in
4981      * the low-surrogate range, {@code (index - 2)} is not
4982      * negative, and the {@code char} value at {@code (index - 2)}
4983      * in the {@code char} array is in the
4984      * high-surrogate range, then the supplementary code point
4985      * corresponding to this surrogate pair is returned. Otherwise,
4986      * the {@code char} value at {@code (index - 1)} is
4987      * returned.
4988      *
4989      * @param a the {@code char} array
4990      * @param index the index following the code point that should be returned
4991      * @return the Unicode code point value before the given index.
4992      * @exception NullPointerException if {@code a} is null.
4993      * @exception IndexOutOfBoundsException if the {@code index}
4994      * argument is less than 1 or greater than the length of the
4995      * {@code char} array
4996      * @since  1.5
4997      */
4998     public static int codePointBefore(char[] a, int index) {
4999         return codePointBeforeImpl(a, index, 0);
5000     }
5001 
5002     /**
5003      * Returns the code point preceding the given index of the
5004      * {@code char} array, where only array elements with
5005      * {@code index} greater than or equal to {@code start}
5006      * can be used. If the {@code char} value at {@code (index - 1)}
5007      * in the {@code char} array is in the
5008      * low-surrogate range, {@code (index - 2)} is not less than
5009      * {@code start}, and the {@code char} value at
5010      * {@code (index - 2)} in the {@code char} array is in
5011      * the high-surrogate range, then the supplementary code point
5012      * corresponding to this surrogate pair is returned. Otherwise,
5013      * the {@code char} value at {@code (index - 1)} is
5014      * returned.
5015      *
5016      * @param a the {@code char} array
5017      * @param index the index following the code point that should be returned
5018      * @param start the index of the first array element in the
5019      * {@code char} array
5020      * @return the Unicode code point value before the given index.
5021      * @exception NullPointerException if {@code a} is null.
5022      * @exception IndexOutOfBoundsException if the {@code index}
5023      * argument is not greater than the {@code start} argument or
5024      * is greater than the length of the {@code char} array, or
5025      * if the {@code start} argument is negative or not less than
5026      * the length of the {@code char} array.
5027      * @since  1.5
5028      */
5029     public static int codePointBefore(char[] a, int index, int start) {
5030         if (index <= start || start < 0 || start >= a.length) {
5031             throw new IndexOutOfBoundsException();
5032         }
5033         return codePointBeforeImpl(a, index, start);
5034     }
5035 
5036     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5037     static int codePointBeforeImpl(char[] a, int index, int start) {
5038         char c2 = a[--index];
5039         if (isLowSurrogate(c2) && index > start) {
5040             char c1 = a[--index];
5041             if (isHighSurrogate(c1)) {
5042                 return toCodePoint(c1, c2);
5043             }
5044         }
5045         return c2;
5046     }
5047 
5048     /**
5049      * Returns the leading surrogate (a
5050      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5051      * high surrogate code unit</a>) of the
5052      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5053      * surrogate pair</a>
5054      * representing the specified supplementary character (Unicode
5055      * code point) in the UTF-16 encoding.  If the specified character
5056      * is not a
5057      * <a href="Character.html#supplementary">supplementary character</a>,
5058      * an unspecified {@code char} is returned.
5059      *
5060      * <p>If
5061      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5062      * is {@code true}, then
5063      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5064      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5065      * are also always {@code true}.
5066      *
5067      * @param   codePoint a supplementary character (Unicode code point)
5068      * @return  the leading surrogate code unit used to represent the
5069      *          character in the UTF-16 encoding
5070      * @since   1.7
5071      */
5072     public static char highSurrogate(int codePoint) {
5073         return (char) ((codePoint >>> 10)
5074             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5075     }
5076 
5077     /**
5078      * Returns the trailing surrogate (a
5079      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5080      * low surrogate code unit</a>) of the
5081      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5082      * surrogate pair</a>
5083      * representing the specified supplementary character (Unicode
5084      * code point) in the UTF-16 encoding.  If the specified character
5085      * is not a
5086      * <a href="Character.html#supplementary">supplementary character</a>,
5087      * an unspecified {@code char} is returned.
5088      *
5089      * <p>If
5090      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5091      * is {@code true}, then
5092      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5093      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5094      * are also always {@code true}.
5095      *
5096      * @param   codePoint a supplementary character (Unicode code point)
5097      * @return  the trailing surrogate code unit used to represent the
5098      *          character in the UTF-16 encoding
5099      * @since   1.7
5100      */
5101     public static char lowSurrogate(int codePoint) {
5102         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5103     }
5104 
5105     /**
5106      * Converts the specified character (Unicode code point) to its
5107      * UTF-16 representation. If the specified code point is a BMP
5108      * (Basic Multilingual Plane or Plane 0) value, the same value is
5109      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5110      * specified code point is a supplementary character, its
5111      * surrogate values are stored in {@code dst[dstIndex]}
5112      * (high-surrogate) and {@code dst[dstIndex+1]}
5113      * (low-surrogate), and 2 is returned.
5114      *
5115      * @param  codePoint the character (Unicode code point) to be converted.
5116      * @param  dst an array of {@code char} in which the
5117      * {@code codePoint}'s UTF-16 value is stored.
5118      * @param dstIndex the start index into the {@code dst}
5119      * array where the converted value is stored.
5120      * @return 1 if the code point is a BMP code point, 2 if the
5121      * code point is a supplementary code point.
5122      * @exception IllegalArgumentException if the specified
5123      * {@code codePoint} is not a valid Unicode code point.
5124      * @exception NullPointerException if the specified {@code dst} is null.
5125      * @exception IndexOutOfBoundsException if {@code dstIndex}
5126      * is negative or not less than {@code dst.length}, or if
5127      * {@code dst} at {@code dstIndex} doesn't have enough
5128      * array element(s) to store the resulting {@code char}
5129      * value(s). (If {@code dstIndex} is equal to
5130      * {@code dst.length-1} and the specified
5131      * {@code codePoint} is a supplementary character, the
5132      * high-surrogate value is not stored in
5133      * {@code dst[dstIndex]}.)
5134      * @since  1.5
5135      */
5136     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5137         if (isBmpCodePoint(codePoint)) {
5138             dst[dstIndex] = (char) codePoint;
5139             return 1;
5140         } else if (isValidCodePoint(codePoint)) {
5141             toSurrogates(codePoint, dst, dstIndex);
5142             return 2;
5143         } else {
5144             throw new IllegalArgumentException();
5145         }
5146     }
5147 
5148     /**
5149      * Converts the specified character (Unicode code point) to its
5150      * UTF-16 representation stored in a {@code char} array. If
5151      * the specified code point is a BMP (Basic Multilingual Plane or
5152      * Plane 0) value, the resulting {@code char} array has
5153      * the same value as {@code codePoint}. If the specified code
5154      * point is a supplementary code point, the resulting
5155      * {@code char} array has the corresponding surrogate pair.
5156      *
5157      * @param  codePoint a Unicode code point
5158      * @return a {@code char} array having
5159      *         {@code codePoint}'s UTF-16 representation.
5160      * @exception IllegalArgumentException if the specified
5161      * {@code codePoint} is not a valid Unicode code point.
5162      * @since  1.5
5163      */
5164     public static char[] toChars(int codePoint) {
5165         if (isBmpCodePoint(codePoint)) {
5166             return new char[] { (char) codePoint };
5167         } else if (isValidCodePoint(codePoint)) {
5168             char[] result = new char[2];
5169             toSurrogates(codePoint, result, 0);
5170             return result;
5171         } else {
5172             throw new IllegalArgumentException();
5173         }
5174     }
5175 
5176     static void toSurrogates(int codePoint, char[] dst, int index) {
5177         // We write elements "backwards" to guarantee all-or-nothing
5178         dst[index+1] = lowSurrogate(codePoint);
5179         dst[index] = highSurrogate(codePoint);
5180     }
5181 
5182     /**
5183      * Returns the number of Unicode code points in the text range of
5184      * the specified char sequence. The text range begins at the
5185      * specified {@code beginIndex} and extends to the
5186      * {@code char} at index {@code endIndex - 1}. Thus the
5187      * length (in {@code char}s) of the text range is
5188      * {@code endIndex-beginIndex}. Unpaired surrogates within
5189      * the text range count as one code point each.
5190      *
5191      * @param seq the char sequence
5192      * @param beginIndex the index to the first {@code char} of
5193      * the text range.
5194      * @param endIndex the index after the last {@code char} of
5195      * the text range.
5196      * @return the number of Unicode code points in the specified text
5197      * range
5198      * @exception NullPointerException if {@code seq} is null.
5199      * @exception IndexOutOfBoundsException if the
5200      * {@code beginIndex} is negative, or {@code endIndex}
5201      * is larger than the length of the given sequence, or
5202      * {@code beginIndex} is larger than {@code endIndex}.
5203      * @since  1.5
5204      */
5205     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5206         int length = seq.length();
5207         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5208             throw new IndexOutOfBoundsException();
5209         }
5210         int n = endIndex - beginIndex;
5211         for (int i = beginIndex; i < endIndex; ) {
5212             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5213                 isLowSurrogate(seq.charAt(i))) {
5214                 n--;
5215                 i++;
5216             }
5217         }
5218         return n;
5219     }
5220 
5221     /**
5222      * Returns the number of Unicode code points in a subarray of the
5223      * {@code char} array argument. The {@code offset}
5224      * argument is the index of the first {@code char} of the
5225      * subarray and the {@code count} argument specifies the
5226      * length of the subarray in {@code char}s. Unpaired
5227      * surrogates within the subarray count as one code point each.
5228      *
5229      * @param a the {@code char} array
5230      * @param offset the index of the first {@code char} in the
5231      * given {@code char} array
5232      * @param count the length of the subarray in {@code char}s
5233      * @return the number of Unicode code points in the specified subarray
5234      * @exception NullPointerException if {@code a} is null.
5235      * @exception IndexOutOfBoundsException if {@code offset} or
5236      * {@code count} is negative, or if {@code offset +
5237      * count} is larger than the length of the given array.
5238      * @since  1.5
5239      */
5240     public static int codePointCount(char[] a, int offset, int count) {
5241         if (count > a.length - offset || offset < 0 || count < 0) {
5242             throw new IndexOutOfBoundsException();
5243         }
5244         return codePointCountImpl(a, offset, count);
5245     }
5246 
5247     static int codePointCountImpl(char[] a, int offset, int count) {
5248         int endIndex = offset + count;
5249         int n = count;
5250         for (int i = offset; i < endIndex; ) {
5251             if (isHighSurrogate(a[i++]) && i < endIndex &&
5252                 isLowSurrogate(a[i])) {
5253                 n--;
5254                 i++;
5255             }
5256         }
5257         return n;
5258     }
5259 
5260     /**
5261      * Returns the index within the given char sequence that is offset
5262      * from the given {@code index} by {@code codePointOffset}
5263      * code points. Unpaired surrogates within the text range given by
5264      * {@code index} and {@code codePointOffset} count as
5265      * one code point each.
5266      *
5267      * @param seq the char sequence
5268      * @param index the index to be offset
5269      * @param codePointOffset the offset in code points
5270      * @return the index within the char sequence
5271      * @exception NullPointerException if {@code seq} is null.
5272      * @exception IndexOutOfBoundsException if {@code index}
5273      *   is negative or larger then the length of the char sequence,
5274      *   or if {@code codePointOffset} is positive and the
5275      *   subsequence starting with {@code index} has fewer than
5276      *   {@code codePointOffset} code points, or if
5277      *   {@code codePointOffset} is negative and the subsequence
5278      *   before {@code index} has fewer than the absolute value
5279      *   of {@code codePointOffset} code points.
5280      * @since 1.5
5281      */
5282     public static int offsetByCodePoints(CharSequence seq, int index,
5283                                          int codePointOffset) {
5284         int length = seq.length();
5285         if (index < 0 || index > length) {
5286             throw new IndexOutOfBoundsException();
5287         }
5288 
5289         int x = index;
5290         if (codePointOffset >= 0) {
5291             int i;
5292             for (i = 0; x < length && i < codePointOffset; i++) {
5293                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5294                     isLowSurrogate(seq.charAt(x))) {
5295                     x++;
5296                 }
5297             }
5298             if (i < codePointOffset) {
5299                 throw new IndexOutOfBoundsException();
5300             }
5301         } else {
5302             int i;
5303             for (i = codePointOffset; x > 0 && i < 0; i++) {
5304                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5305                     isHighSurrogate(seq.charAt(x-1))) {
5306                     x--;
5307                 }
5308             }
5309             if (i < 0) {
5310                 throw new IndexOutOfBoundsException();
5311             }
5312         }
5313         return x;
5314     }
5315 
5316     /**
5317      * Returns the index within the given {@code char} subarray
5318      * that is offset from the given {@code index} by
5319      * {@code codePointOffset} code points. The
5320      * {@code start} and {@code count} arguments specify a
5321      * subarray of the {@code char} array. Unpaired surrogates
5322      * within the text range given by {@code index} and
5323      * {@code codePointOffset} count as one code point each.
5324      *
5325      * @param a the {@code char} array
5326      * @param start the index of the first {@code char} of the
5327      * subarray
5328      * @param count the length of the subarray in {@code char}s
5329      * @param index the index to be offset
5330      * @param codePointOffset the offset in code points
5331      * @return the index within the subarray
5332      * @exception NullPointerException if {@code a} is null.
5333      * @exception IndexOutOfBoundsException
5334      *   if {@code start} or {@code count} is negative,
5335      *   or if {@code start + count} is larger than the length of
5336      *   the given array,
5337      *   or if {@code index} is less than {@code start} or
5338      *   larger then {@code start + count},
5339      *   or if {@code codePointOffset} is positive and the text range
5340      *   starting with {@code index} and ending with {@code start + count - 1}
5341      *   has fewer than {@code codePointOffset} code
5342      *   points,
5343      *   or if {@code codePointOffset} is negative and the text range
5344      *   starting with {@code start} and ending with {@code index - 1}
5345      *   has fewer than the absolute value of
5346      *   {@code codePointOffset} code points.
5347      * @since 1.5
5348      */
5349     public static int offsetByCodePoints(char[] a, int start, int count,
5350                                          int index, int codePointOffset) {
5351         if (count > a.length-start || start < 0 || count < 0
5352             || index < start || index > start+count) {
5353             throw new IndexOutOfBoundsException();
5354         }
5355         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5356     }
5357 
5358     static int offsetByCodePointsImpl(char[]a, int start, int count,
5359                                       int index, int codePointOffset) {
5360         int x = index;
5361         if (codePointOffset >= 0) {
5362             int limit = start + count;
5363             int i;
5364             for (i = 0; x < limit && i < codePointOffset; i++) {
5365                 if (isHighSurrogate(a[x++]) && x < limit &&
5366                     isLowSurrogate(a[x])) {
5367                     x++;
5368                 }
5369             }
5370             if (i < codePointOffset) {
5371                 throw new IndexOutOfBoundsException();
5372             }
5373         } else {
5374             int i;
5375             for (i = codePointOffset; x > start && i < 0; i++) {
5376                 if (isLowSurrogate(a[--x]) && x > start &&
5377                     isHighSurrogate(a[x-1])) {
5378                     x--;
5379                 }
5380             }
5381             if (i < 0) {
5382                 throw new IndexOutOfBoundsException();
5383             }
5384         }
5385         return x;
5386     }
5387 
5388     /**
5389      * Determines if the specified character is a lowercase character.
5390      * <p>
5391      * A character is lowercase if its general category type, provided
5392      * by {@code Character.getType(ch)}, is
5393      * {@code LOWERCASE_LETTER}, or it has contributory property
5394      * Other_Lowercase as defined by the Unicode Standard.
5395      * <p>
5396      * The following are examples of lowercase characters:
5397      * <blockquote><pre>
5398      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5399      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5400      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5401      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5402      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5403      * </pre></blockquote>
5404      * <p> Many other Unicode characters are lowercase too.
5405      *
5406      * <p><b>Note:</b> This method cannot handle <a
5407      * href="#supplementary"> supplementary characters</a>. To support
5408      * all Unicode characters, including supplementary characters, use
5409      * the {@link #isLowerCase(int)} method.
5410      *
5411      * @param   ch   the character to be tested.
5412      * @return  {@code true} if the character is lowercase;
5413      *          {@code false} otherwise.
5414      * @see     Character#isLowerCase(char)
5415      * @see     Character#isTitleCase(char)
5416      * @see     Character#toLowerCase(char)
5417      * @see     Character#getType(char)
5418      */
5419     public static boolean isLowerCase(char ch) {
5420         return isLowerCase((int)ch);
5421     }
5422 
5423     /**
5424      * Determines if the specified character (Unicode code point) is a
5425      * lowercase character.
5426      * <p>
5427      * A character is lowercase if its general category type, provided
5428      * by {@link Character#getType getType(codePoint)}, is
5429      * {@code LOWERCASE_LETTER}, or it has contributory property
5430      * Other_Lowercase as defined by the Unicode Standard.
5431      * <p>
5432      * The following are examples of lowercase characters:
5433      * <blockquote><pre>
5434      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5435      * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
5436      * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
5437      * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
5438      * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
5439      * </pre></blockquote>
5440      * <p> Many other Unicode characters are lowercase too.
5441      *
5442      * @param   codePoint the character (Unicode code point) to be tested.
5443      * @return  {@code true} if the character is lowercase;
5444      *          {@code false} otherwise.
5445      * @see     Character#isLowerCase(int)
5446      * @see     Character#isTitleCase(int)
5447      * @see     Character#toLowerCase(int)
5448      * @see     Character#getType(int)
5449      * @since   1.5
5450      */
5451     public static boolean isLowerCase(int codePoint) {
5452         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5453                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5454     }
5455 
5456     /**
5457      * Determines if the specified character is an uppercase character.
5458      * <p>
5459      * A character is uppercase if its general category type, provided by
5460      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5461      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5462      * <p>
5463      * The following are examples of uppercase characters:
5464      * <blockquote><pre>
5465      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5466      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5467      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5468      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5469      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5470      * </pre></blockquote>
5471      * <p> Many other Unicode characters are uppercase too.
5472      *
5473      * <p><b>Note:</b> This method cannot handle <a
5474      * href="#supplementary"> supplementary characters</a>. To support
5475      * all Unicode characters, including supplementary characters, use
5476      * the {@link #isUpperCase(int)} method.
5477      *
5478      * @param   ch   the character to be tested.
5479      * @return  {@code true} if the character is uppercase;
5480      *          {@code false} otherwise.
5481      * @see     Character#isLowerCase(char)
5482      * @see     Character#isTitleCase(char)
5483      * @see     Character#toUpperCase(char)
5484      * @see     Character#getType(char)
5485      * @since   1.0
5486      */
5487     public static boolean isUpperCase(char ch) {
5488         return isUpperCase((int)ch);
5489     }
5490 
5491     /**
5492      * Determines if the specified character (Unicode code point) is an uppercase character.
5493      * <p>
5494      * A character is uppercase if its general category type, provided by
5495      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5496      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5497      * <p>
5498      * The following are examples of uppercase characters:
5499      * <blockquote><pre>
5500      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5501      * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
5502      * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
5503      * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
5504      * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
5505      * </pre></blockquote>
5506      * <p> Many other Unicode characters are uppercase too.
5507      *
5508      * @param   codePoint the character (Unicode code point) to be tested.
5509      * @return  {@code true} if the character is uppercase;
5510      *          {@code false} otherwise.
5511      * @see     Character#isLowerCase(int)
5512      * @see     Character#isTitleCase(int)
5513      * @see     Character#toUpperCase(int)
5514      * @see     Character#getType(int)
5515      * @since   1.5
5516      */
5517     public static boolean isUpperCase(int codePoint) {
5518         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5519                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5520     }
5521 
5522     /**
5523      * Determines if the specified character is a titlecase character.
5524      * <p>
5525      * A character is a titlecase character if its general
5526      * category type, provided by {@code Character.getType(ch)},
5527      * is {@code TITLECASE_LETTER}.
5528      * <p>
5529      * Some characters look like pairs of Latin letters. For example, there
5530      * is an uppercase letter that looks like "LJ" and has a corresponding
5531      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5532      * is the appropriate form to use when rendering a word in lowercase
5533      * with initial capitals, as for a book title.
5534      * <p>
5535      * These are some of the Unicode characters for which this method returns
5536      * {@code true}:
5537      * <ul>
5538      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5539      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5540      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5541      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5542      * </ul>
5543      * <p> Many other Unicode characters are titlecase too.
5544      *
5545      * <p><b>Note:</b> This method cannot handle <a
5546      * href="#supplementary"> supplementary characters</a>. To support
5547      * all Unicode characters, including supplementary characters, use
5548      * the {@link #isTitleCase(int)} method.
5549      *
5550      * @param   ch   the character to be tested.
5551      * @return  {@code true} if the character is titlecase;
5552      *          {@code false} otherwise.
5553      * @see     Character#isLowerCase(char)
5554      * @see     Character#isUpperCase(char)
5555      * @see     Character#toTitleCase(char)
5556      * @see     Character#getType(char)
5557      * @since   1.0.2
5558      */
5559     public static boolean isTitleCase(char ch) {
5560         return isTitleCase((int)ch);
5561     }
5562 
5563     /**
5564      * Determines if the specified character (Unicode code point) is a titlecase character.
5565      * <p>
5566      * A character is a titlecase character if its general
5567      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5568      * is {@code TITLECASE_LETTER}.
5569      * <p>
5570      * Some characters look like pairs of Latin letters. For example, there
5571      * is an uppercase letter that looks like "LJ" and has a corresponding
5572      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5573      * is the appropriate form to use when rendering a word in lowercase
5574      * with initial capitals, as for a book title.
5575      * <p>
5576      * These are some of the Unicode characters for which this method returns
5577      * {@code true}:
5578      * <ul>
5579      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5580      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5581      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5582      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5583      * </ul>
5584      * <p> Many other Unicode characters are titlecase too.
5585      *
5586      * @param   codePoint the character (Unicode code point) to be tested.
5587      * @return  {@code true} if the character is titlecase;
5588      *          {@code false} otherwise.
5589      * @see     Character#isLowerCase(int)
5590      * @see     Character#isUpperCase(int)
5591      * @see     Character#toTitleCase(int)
5592      * @see     Character#getType(int)
5593      * @since   1.5
5594      */
5595     public static boolean isTitleCase(int codePoint) {
5596         return getType(codePoint) == Character.TITLECASE_LETTER;
5597     }
5598 
5599     /**
5600      * Determines if the specified character is a digit.
5601      * <p>
5602      * A character is a digit if its general category type, provided
5603      * by {@code Character.getType(ch)}, is
5604      * {@code DECIMAL_DIGIT_NUMBER}.
5605      * <p>
5606      * Some Unicode character ranges that contain digits:
5607      * <ul>
5608      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5609      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5610      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5611      *     Arabic-Indic digits
5612      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5613      *     Extended Arabic-Indic digits
5614      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5615      *     Devanagari digits
5616      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5617      *     Fullwidth digits
5618      * </ul>
5619      *
5620      * Many other character ranges contain digits as well.
5621      *
5622      * <p><b>Note:</b> This method cannot handle <a
5623      * href="#supplementary"> supplementary characters</a>. To support
5624      * all Unicode characters, including supplementary characters, use
5625      * the {@link #isDigit(int)} method.
5626      *
5627      * @param   ch   the character to be tested.
5628      * @return  {@code true} if the character is a digit;
5629      *          {@code false} otherwise.
5630      * @see     Character#digit(char, int)
5631      * @see     Character#forDigit(int, int)
5632      * @see     Character#getType(char)
5633      */
5634     public static boolean isDigit(char ch) {
5635         return isDigit((int)ch);
5636     }
5637 
5638     /**
5639      * Determines if the specified character (Unicode code point) is a digit.
5640      * <p>
5641      * A character is a digit if its general category type, provided
5642      * by {@link Character#getType(int) getType(codePoint)}, is
5643      * {@code DECIMAL_DIGIT_NUMBER}.
5644      * <p>
5645      * Some Unicode character ranges that contain digits:
5646      * <ul>
5647      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5648      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5649      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5650      *     Arabic-Indic digits
5651      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5652      *     Extended Arabic-Indic digits
5653      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5654      *     Devanagari digits
5655      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5656      *     Fullwidth digits
5657      * </ul>
5658      *
5659      * Many other character ranges contain digits as well.
5660      *
5661      * @param   codePoint the character (Unicode code point) to be tested.
5662      * @return  {@code true} if the character is a digit;
5663      *          {@code false} otherwise.
5664      * @see     Character#forDigit(int, int)
5665      * @see     Character#getType(int)
5666      * @since   1.5
5667      */
5668     public static boolean isDigit(int codePoint) {
5669         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5670     }
5671 
5672     /**
5673      * Determines if a character is defined in Unicode.
5674      * <p>
5675      * A character is defined if at least one of the following is true:
5676      * <ul>
5677      * <li>It has an entry in the UnicodeData file.
5678      * <li>It has a value in a range defined by the UnicodeData file.
5679      * </ul>
5680      *
5681      * <p><b>Note:</b> This method cannot handle <a
5682      * href="#supplementary"> supplementary characters</a>. To support
5683      * all Unicode characters, including supplementary characters, use
5684      * the {@link #isDefined(int)} method.
5685      *
5686      * @param   ch   the character to be tested
5687      * @return  {@code true} if the character has a defined meaning
5688      *          in Unicode; {@code false} otherwise.
5689      * @see     Character#isDigit(char)
5690      * @see     Character#isLetter(char)
5691      * @see     Character#isLetterOrDigit(char)
5692      * @see     Character#isLowerCase(char)
5693      * @see     Character#isTitleCase(char)
5694      * @see     Character#isUpperCase(char)
5695      * @since   1.0.2
5696      */
5697     public static boolean isDefined(char ch) {
5698         return isDefined((int)ch);
5699     }
5700 
5701     /**
5702      * Determines if a character (Unicode code point) is defined in Unicode.
5703      * <p>
5704      * A character is defined if at least one of the following is true:
5705      * <ul>
5706      * <li>It has an entry in the UnicodeData file.
5707      * <li>It has a value in a range defined by the UnicodeData file.
5708      * </ul>
5709      *
5710      * @param   codePoint the character (Unicode code point) to be tested.
5711      * @return  {@code true} if the character has a defined meaning
5712      *          in Unicode; {@code false} otherwise.
5713      * @see     Character#isDigit(int)
5714      * @see     Character#isLetter(int)
5715      * @see     Character#isLetterOrDigit(int)
5716      * @see     Character#isLowerCase(int)
5717      * @see     Character#isTitleCase(int)
5718      * @see     Character#isUpperCase(int)
5719      * @since   1.5
5720      */
5721     public static boolean isDefined(int codePoint) {
5722         return getType(codePoint) != Character.UNASSIGNED;
5723     }
5724 
5725     /**
5726      * Determines if the specified character is a letter.
5727      * <p>
5728      * A character is considered to be a letter if its general
5729      * category type, provided by {@code Character.getType(ch)},
5730      * is any of the following:
5731      * <ul>
5732      * <li> {@code UPPERCASE_LETTER}
5733      * <li> {@code LOWERCASE_LETTER}
5734      * <li> {@code TITLECASE_LETTER}
5735      * <li> {@code MODIFIER_LETTER}
5736      * <li> {@code OTHER_LETTER}
5737      * </ul>
5738      *
5739      * Not all letters have case. Many characters are
5740      * letters but are neither uppercase nor lowercase nor titlecase.
5741      *
5742      * <p><b>Note:</b> This method cannot handle <a
5743      * href="#supplementary"> supplementary characters</a>. To support
5744      * all Unicode characters, including supplementary characters, use
5745      * the {@link #isLetter(int)} method.
5746      *
5747      * @param   ch   the character to be tested.
5748      * @return  {@code true} if the character is a letter;
5749      *          {@code false} otherwise.
5750      * @see     Character#isDigit(char)
5751      * @see     Character#isJavaIdentifierStart(char)
5752      * @see     Character#isJavaLetter(char)
5753      * @see     Character#isJavaLetterOrDigit(char)
5754      * @see     Character#isLetterOrDigit(char)
5755      * @see     Character#isLowerCase(char)
5756      * @see     Character#isTitleCase(char)
5757      * @see     Character#isUnicodeIdentifierStart(char)
5758      * @see     Character#isUpperCase(char)
5759      */
5760     public static boolean isLetter(char ch) {
5761         return isLetter((int)ch);
5762     }
5763 
5764     /**
5765      * Determines if the specified character (Unicode code point) is a letter.
5766      * <p>
5767      * A character is considered to be a letter if its general
5768      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5769      * is any of the following:
5770      * <ul>
5771      * <li> {@code UPPERCASE_LETTER}
5772      * <li> {@code LOWERCASE_LETTER}
5773      * <li> {@code TITLECASE_LETTER}
5774      * <li> {@code MODIFIER_LETTER}
5775      * <li> {@code OTHER_LETTER}
5776      * </ul>
5777      *
5778      * Not all letters have case. Many characters are
5779      * letters but are neither uppercase nor lowercase nor titlecase.
5780      *
5781      * @param   codePoint the character (Unicode code point) to be tested.
5782      * @return  {@code true} if the character is a letter;
5783      *          {@code false} otherwise.
5784      * @see     Character#isDigit(int)
5785      * @see     Character#isJavaIdentifierStart(int)
5786      * @see     Character#isLetterOrDigit(int)
5787      * @see     Character#isLowerCase(int)
5788      * @see     Character#isTitleCase(int)
5789      * @see     Character#isUnicodeIdentifierStart(int)
5790      * @see     Character#isUpperCase(int)
5791      * @since   1.5
5792      */
5793     public static boolean isLetter(int codePoint) {
5794         return ((((1 << Character.UPPERCASE_LETTER) |
5795             (1 << Character.LOWERCASE_LETTER) |
5796             (1 << Character.TITLECASE_LETTER) |
5797             (1 << Character.MODIFIER_LETTER) |
5798             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5799             != 0;
5800     }
5801 
5802     /**
5803      * Determines if the specified character is a letter or digit.
5804      * <p>
5805      * A character is considered to be a letter or digit if either
5806      * {@code Character.isLetter(char ch)} or
5807      * {@code Character.isDigit(char ch)} returns
5808      * {@code true} for the character.
5809      *
5810      * <p><b>Note:</b> This method cannot handle <a
5811      * href="#supplementary"> supplementary characters</a>. To support
5812      * all Unicode characters, including supplementary characters, use
5813      * the {@link #isLetterOrDigit(int)} method.
5814      *
5815      * @param   ch   the character to be tested.
5816      * @return  {@code true} if the character is a letter or digit;
5817      *          {@code false} otherwise.
5818      * @see     Character#isDigit(char)
5819      * @see     Character#isJavaIdentifierPart(char)
5820      * @see     Character#isJavaLetter(char)
5821      * @see     Character#isJavaLetterOrDigit(char)
5822      * @see     Character#isLetter(char)
5823      * @see     Character#isUnicodeIdentifierPart(char)
5824      * @since   1.0.2
5825      */
5826     public static boolean isLetterOrDigit(char ch) {
5827         return isLetterOrDigit((int)ch);
5828     }
5829 
5830     /**
5831      * Determines if the specified character (Unicode code point) is a letter or digit.
5832      * <p>
5833      * A character is considered to be a letter or digit if either
5834      * {@link #isLetter(int) isLetter(codePoint)} or
5835      * {@link #isDigit(int) isDigit(codePoint)} returns
5836      * {@code true} for the character.
5837      *
5838      * @param   codePoint the character (Unicode code point) to be tested.
5839      * @return  {@code true} if the character is a letter or digit;
5840      *          {@code false} otherwise.
5841      * @see     Character#isDigit(int)
5842      * @see     Character#isJavaIdentifierPart(int)
5843      * @see     Character#isLetter(int)
5844      * @see     Character#isUnicodeIdentifierPart(int)
5845      * @since   1.5
5846      */
5847     public static boolean isLetterOrDigit(int codePoint) {
5848         return ((((1 << Character.UPPERCASE_LETTER) |
5849             (1 << Character.LOWERCASE_LETTER) |
5850             (1 << Character.TITLECASE_LETTER) |
5851             (1 << Character.MODIFIER_LETTER) |
5852             (1 << Character.OTHER_LETTER) |
5853             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5854             != 0;
5855     }
5856 
5857     /**
5858      * Determines if the specified character is permissible as the first
5859      * character in a Java identifier.
5860      * <p>
5861      * A character may start a Java identifier if and only if
5862      * one of the following is true:
5863      * <ul>
5864      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5865      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5866      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5867      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5868      * </ul>
5869      *
5870      * @param   ch the character to be tested.
5871      * @return  {@code true} if the character may start a Java
5872      *          identifier; {@code false} otherwise.
5873      * @see     Character#isJavaLetterOrDigit(char)
5874      * @see     Character#isJavaIdentifierStart(char)
5875      * @see     Character#isJavaIdentifierPart(char)
5876      * @see     Character#isLetter(char)
5877      * @see     Character#isLetterOrDigit(char)
5878      * @see     Character#isUnicodeIdentifierStart(char)
5879      * @since   1.0.2
5880      * @deprecated Replaced by isJavaIdentifierStart(char).
5881      */
5882     @Deprecated
5883     public static boolean isJavaLetter(char ch) {
5884         return isJavaIdentifierStart(ch);
5885     }
5886 
5887     /**
5888      * Determines if the specified character may be part of a Java
5889      * identifier as other than the first character.
5890      * <p>
5891      * A character may be part of a Java identifier if and only if any
5892      * of the following are true:
5893      * <ul>
5894      * <li>  it is a letter
5895      * <li>  it is a currency symbol (such as {@code '$'})
5896      * <li>  it is a connecting punctuation character (such as {@code '_'})
5897      * <li>  it is a digit
5898      * <li>  it is a numeric letter (such as a Roman numeral character)
5899      * <li>  it is a combining mark
5900      * <li>  it is a non-spacing mark
5901      * <li> {@code isIdentifierIgnorable} returns
5902      * {@code true} for the character.
5903      * </ul>
5904      *
5905      * @param   ch the character to be tested.
5906      * @return  {@code true} if the character may be part of a
5907      *          Java identifier; {@code false} otherwise.
5908      * @see     Character#isJavaLetter(char)
5909      * @see     Character#isJavaIdentifierStart(char)
5910      * @see     Character#isJavaIdentifierPart(char)
5911      * @see     Character#isLetter(char)
5912      * @see     Character#isLetterOrDigit(char)
5913      * @see     Character#isUnicodeIdentifierPart(char)
5914      * @see     Character#isIdentifierIgnorable(char)
5915      * @since   1.0.2
5916      * @deprecated Replaced by isJavaIdentifierPart(char).
5917      */
5918     @Deprecated
5919     public static boolean isJavaLetterOrDigit(char ch) {
5920         return isJavaIdentifierPart(ch);
5921     }
5922 
5923     /**
5924      * Determines if the specified character (Unicode code point) is an alphabet.
5925      * <p>
5926      * A character is considered to be alphabetic if its general category type,
5927      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5928      * the following:
5929      * <ul>
5930      * <li> <code>UPPERCASE_LETTER</code>
5931      * <li> <code>LOWERCASE_LETTER</code>
5932      * <li> <code>TITLECASE_LETTER</code>
5933      * <li> <code>MODIFIER_LETTER</code>
5934      * <li> <code>OTHER_LETTER</code>
5935      * <li> <code>LETTER_NUMBER</code>
5936      * </ul>
5937      * or it has contributory property Other_Alphabetic as defined by the
5938      * Unicode Standard.
5939      *
5940      * @param   codePoint the character (Unicode code point) to be tested.
5941      * @return  <code>true</code> if the character is a Unicode alphabet
5942      *          character, <code>false</code> otherwise.
5943      * @since   1.7
5944      */
5945     public static boolean isAlphabetic(int codePoint) {
5946         return (((((1 << Character.UPPERCASE_LETTER) |
5947             (1 << Character.LOWERCASE_LETTER) |
5948             (1 << Character.TITLECASE_LETTER) |
5949             (1 << Character.MODIFIER_LETTER) |
5950             (1 << Character.OTHER_LETTER) |
5951             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5952             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5953     }
5954 
5955     /**
5956      * Determines if the specified character (Unicode code point) is a CJKV
5957      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5958      * the Unicode Standard.
5959      *
5960      * @param   codePoint the character (Unicode code point) to be tested.
5961      * @return  <code>true</code> if the character is a Unicode ideograph
5962      *          character, <code>false</code> otherwise.
5963      * @since   1.7
5964      */
5965     public static boolean isIdeographic(int codePoint) {
5966         return CharacterData.of(codePoint).isIdeographic(codePoint);
5967     }
5968 
5969     /**
5970      * Determines if the specified character is
5971      * permissible as the first character in a Java identifier.
5972      * <p>
5973      * A character may start a Java identifier if and only if
5974      * one of the following conditions is true:
5975      * <ul>
5976      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5977      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5978      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5979      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5980      * </ul>
5981      *
5982      * <p><b>Note:</b> This method cannot handle <a
5983      * href="#supplementary"> supplementary characters</a>. To support
5984      * all Unicode characters, including supplementary characters, use
5985      * the {@link #isJavaIdentifierStart(int)} method.
5986      *
5987      * @param   ch the character to be tested.
5988      * @return  {@code true} if the character may start a Java identifier;
5989      *          {@code false} otherwise.
5990      * @see     Character#isJavaIdentifierPart(char)
5991      * @see     Character#isLetter(char)
5992      * @see     Character#isUnicodeIdentifierStart(char)
5993      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5994      * @since   1.1
5995      */
5996     public static boolean isJavaIdentifierStart(char ch) {
5997         return isJavaIdentifierStart((int)ch);
5998     }
5999 
6000     /**
6001      * Determines if the character (Unicode code point) is
6002      * permissible as the first character in a Java identifier.
6003      * <p>
6004      * A character may start a Java identifier if and only if
6005      * one of the following conditions is true:
6006      * <ul>
6007      * <li> {@link #isLetter(int) isLetter(codePoint)}
6008      *      returns {@code true}
6009      * <li> {@link #getType(int) getType(codePoint)}
6010      *      returns {@code LETTER_NUMBER}
6011      * <li> the referenced character is a currency symbol (such as {@code '$'})
6012      * <li> the referenced character is a connecting punctuation character
6013      *      (such as {@code '_'}).
6014      * </ul>
6015      *
6016      * @param   codePoint the character (Unicode code point) to be tested.
6017      * @return  {@code true} if the character may start a Java identifier;
6018      *          {@code false} otherwise.
6019      * @see     Character#isJavaIdentifierPart(int)
6020      * @see     Character#isLetter(int)
6021      * @see     Character#isUnicodeIdentifierStart(int)
6022      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6023      * @since   1.5
6024      */
6025     public static boolean isJavaIdentifierStart(int codePoint) {
6026         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6027     }
6028 
6029     /**
6030      * Determines if the specified character may be part of a Java
6031      * identifier as other than the first character.
6032      * <p>
6033      * A character may be part of a Java identifier if any of the following
6034      * are true:
6035      * <ul>
6036      * <li>  it is a letter
6037      * <li>  it is a currency symbol (such as {@code '$'})
6038      * <li>  it is a connecting punctuation character (such as {@code '_'})
6039      * <li>  it is a digit
6040      * <li>  it is a numeric letter (such as a Roman numeral character)
6041      * <li>  it is a combining mark
6042      * <li>  it is a non-spacing mark
6043      * <li> {@code isIdentifierIgnorable} returns
6044      * {@code true} for the character
6045      * </ul>
6046      *
6047      * <p><b>Note:</b> This method cannot handle <a
6048      * href="#supplementary"> supplementary characters</a>. To support
6049      * all Unicode characters, including supplementary characters, use
6050      * the {@link #isJavaIdentifierPart(int)} method.
6051      *
6052      * @param   ch      the character to be tested.
6053      * @return {@code true} if the character may be part of a
6054      *          Java identifier; {@code false} otherwise.
6055      * @see     Character#isIdentifierIgnorable(char)
6056      * @see     Character#isJavaIdentifierStart(char)
6057      * @see     Character#isLetterOrDigit(char)
6058      * @see     Character#isUnicodeIdentifierPart(char)
6059      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6060      * @since   1.1
6061      */
6062     public static boolean isJavaIdentifierPart(char ch) {
6063         return isJavaIdentifierPart((int)ch);
6064     }
6065 
6066     /**
6067      * Determines if the character (Unicode code point) may be part of a Java
6068      * identifier as other than the first character.
6069      * <p>
6070      * A character may be part of a Java identifier if any of the following
6071      * are true:
6072      * <ul>
6073      * <li>  it is a letter
6074      * <li>  it is a currency symbol (such as {@code '$'})
6075      * <li>  it is a connecting punctuation character (such as {@code '_'})
6076      * <li>  it is a digit
6077      * <li>  it is a numeric letter (such as a Roman numeral character)
6078      * <li>  it is a combining mark
6079      * <li>  it is a non-spacing mark
6080      * <li> {@link #isIdentifierIgnorable(int)
6081      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6082      * the character
6083      * </ul>
6084      *
6085      * @param   codePoint the character (Unicode code point) to be tested.
6086      * @return {@code true} if the character may be part of a
6087      *          Java identifier; {@code false} otherwise.
6088      * @see     Character#isIdentifierIgnorable(int)
6089      * @see     Character#isJavaIdentifierStart(int)
6090      * @see     Character#isLetterOrDigit(int)
6091      * @see     Character#isUnicodeIdentifierPart(int)
6092      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6093      * @since   1.5
6094      */
6095     public static boolean isJavaIdentifierPart(int codePoint) {
6096         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6097     }
6098 
6099     /**
6100      * Determines if the specified character is permissible as the
6101      * first character in a Unicode identifier.
6102      * <p>
6103      * A character may start a Unicode identifier if and only if
6104      * one of the following conditions is true:
6105      * <ul>
6106      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6107      * <li> {@link #getType(char) getType(ch)} returns
6108      *      {@code LETTER_NUMBER}.
6109      * </ul>
6110      *
6111      * <p><b>Note:</b> This method cannot handle <a
6112      * href="#supplementary"> supplementary characters</a>. To support
6113      * all Unicode characters, including supplementary characters, use
6114      * the {@link #isUnicodeIdentifierStart(int)} method.
6115      *
6116      * @param   ch      the character to be tested.
6117      * @return  {@code true} if the character may start a Unicode
6118      *          identifier; {@code false} otherwise.
6119      * @see     Character#isJavaIdentifierStart(char)
6120      * @see     Character#isLetter(char)
6121      * @see     Character#isUnicodeIdentifierPart(char)
6122      * @since   1.1
6123      */
6124     public static boolean isUnicodeIdentifierStart(char ch) {
6125         return isUnicodeIdentifierStart((int)ch);
6126     }
6127 
6128     /**
6129      * Determines if the specified character (Unicode code point) is permissible as the
6130      * first character in a Unicode identifier.
6131      * <p>
6132      * A character may start a Unicode identifier if and only if
6133      * one of the following conditions is true:
6134      * <ul>
6135      * <li> {@link #isLetter(int) isLetter(codePoint)}
6136      *      returns {@code true}
6137      * <li> {@link #getType(int) getType(codePoint)}
6138      *      returns {@code LETTER_NUMBER}.
6139      * </ul>
6140      * @param   codePoint the character (Unicode code point) to be tested.
6141      * @return  {@code true} if the character may start a Unicode
6142      *          identifier; {@code false} otherwise.
6143      * @see     Character#isJavaIdentifierStart(int)
6144      * @see     Character#isLetter(int)
6145      * @see     Character#isUnicodeIdentifierPart(int)
6146      * @since   1.5
6147      */
6148     public static boolean isUnicodeIdentifierStart(int codePoint) {
6149         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6150     }
6151 
6152     /**
6153      * Determines if the specified character may be part of a Unicode
6154      * identifier as other than the first character.
6155      * <p>
6156      * A character may be part of a Unicode identifier if and only if
6157      * one of the following statements is true:
6158      * <ul>
6159      * <li>  it is a letter
6160      * <li>  it is a connecting punctuation character (such as {@code '_'})
6161      * <li>  it is a digit
6162      * <li>  it is a numeric letter (such as a Roman numeral character)
6163      * <li>  it is a combining mark
6164      * <li>  it is a non-spacing mark
6165      * <li> {@code isIdentifierIgnorable} returns
6166      * {@code true} for this character.
6167      * </ul>
6168      *
6169      * <p><b>Note:</b> This method cannot handle <a
6170      * href="#supplementary"> supplementary characters</a>. To support
6171      * all Unicode characters, including supplementary characters, use
6172      * the {@link #isUnicodeIdentifierPart(int)} method.
6173      *
6174      * @param   ch      the character to be tested.
6175      * @return  {@code true} if the character may be part of a
6176      *          Unicode identifier; {@code false} otherwise.
6177      * @see     Character#isIdentifierIgnorable(char)
6178      * @see     Character#isJavaIdentifierPart(char)
6179      * @see     Character#isLetterOrDigit(char)
6180      * @see     Character#isUnicodeIdentifierStart(char)
6181      * @since   1.1
6182      */
6183     public static boolean isUnicodeIdentifierPart(char ch) {
6184         return isUnicodeIdentifierPart((int)ch);
6185     }
6186 
6187     /**
6188      * Determines if the specified character (Unicode code point) may be part of a Unicode
6189      * identifier as other than the first character.
6190      * <p>
6191      * A character may be part of a Unicode identifier if and only if
6192      * one of the following statements is true:
6193      * <ul>
6194      * <li>  it is a letter
6195      * <li>  it is a connecting punctuation character (such as {@code '_'})
6196      * <li>  it is a digit
6197      * <li>  it is a numeric letter (such as a Roman numeral character)
6198      * <li>  it is a combining mark
6199      * <li>  it is a non-spacing mark
6200      * <li> {@code isIdentifierIgnorable} returns
6201      * {@code true} for this character.
6202      * </ul>
6203      * @param   codePoint the character (Unicode code point) to be tested.
6204      * @return  {@code true} if the character may be part of a
6205      *          Unicode identifier; {@code false} otherwise.
6206      * @see     Character#isIdentifierIgnorable(int)
6207      * @see     Character#isJavaIdentifierPart(int)
6208      * @see     Character#isLetterOrDigit(int)
6209      * @see     Character#isUnicodeIdentifierStart(int)
6210      * @since   1.5
6211      */
6212     public static boolean isUnicodeIdentifierPart(int codePoint) {
6213         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6214     }
6215 
6216     /**
6217      * Determines if the specified character should be regarded as
6218      * an ignorable character in a Java identifier or a Unicode identifier.
6219      * <p>
6220      * The following Unicode characters are ignorable in a Java identifier
6221      * or a Unicode identifier:
6222      * <ul>
6223      * <li>ISO control characters that are not whitespace
6224      * <ul>
6225      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6226      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6227      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6228      * </ul>
6229      *
6230      * <li>all characters that have the {@code FORMAT} general
6231      * category value
6232      * </ul>
6233      *
6234      * <p><b>Note:</b> This method cannot handle <a
6235      * href="#supplementary"> supplementary characters</a>. To support
6236      * all Unicode characters, including supplementary characters, use
6237      * the {@link #isIdentifierIgnorable(int)} method.
6238      *
6239      * @param   ch      the character to be tested.
6240      * @return  {@code true} if the character is an ignorable control
6241      *          character that may be part of a Java or Unicode identifier;
6242      *           {@code false} otherwise.
6243      * @see     Character#isJavaIdentifierPart(char)
6244      * @see     Character#isUnicodeIdentifierPart(char)
6245      * @since   1.1
6246      */
6247     public static boolean isIdentifierIgnorable(char ch) {
6248         return isIdentifierIgnorable((int)ch);
6249     }
6250 
6251     /**
6252      * Determines if the specified character (Unicode code point) should be regarded as
6253      * an ignorable character in a Java identifier or a Unicode identifier.
6254      * <p>
6255      * The following Unicode characters are ignorable in a Java identifier
6256      * or a Unicode identifier:
6257      * <ul>
6258      * <li>ISO control characters that are not whitespace
6259      * <ul>
6260      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6261      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6262      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6263      * </ul>
6264      *
6265      * <li>all characters that have the {@code FORMAT} general
6266      * category value
6267      * </ul>
6268      *
6269      * @param   codePoint the character (Unicode code point) to be tested.
6270      * @return  {@code true} if the character is an ignorable control
6271      *          character that may be part of a Java or Unicode identifier;
6272      *          {@code false} otherwise.
6273      * @see     Character#isJavaIdentifierPart(int)
6274      * @see     Character#isUnicodeIdentifierPart(int)
6275      * @since   1.5
6276      */
6277     public static boolean isIdentifierIgnorable(int codePoint) {
6278         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6279     }
6280 
6281     /**
6282      * Converts the character argument to lowercase using case
6283      * mapping information from the UnicodeData file.
6284      * <p>
6285      * Note that
6286      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6287      * does not always return {@code true} for some ranges of
6288      * characters, particularly those that are symbols or ideographs.
6289      *
6290      * <p>In general, {@link String#toLowerCase()} should be used to map
6291      * characters to lowercase. {@code String} case mapping methods
6292      * have several benefits over {@code Character} case mapping methods.
6293      * {@code String} case mapping methods can perform locale-sensitive
6294      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6295      * the {@code Character} case mapping methods cannot.
6296      *
6297      * <p><b>Note:</b> This method cannot handle <a
6298      * href="#supplementary"> supplementary characters</a>. To support
6299      * all Unicode characters, including supplementary characters, use
6300      * the {@link #toLowerCase(int)} method.
6301      *
6302      * @param   ch   the character to be converted.
6303      * @return  the lowercase equivalent of the character, if any;
6304      *          otherwise, the character itself.
6305      * @see     Character#isLowerCase(char)
6306      * @see     String#toLowerCase()
6307      */
6308     public static char toLowerCase(char ch) {
6309         return (char)toLowerCase((int)ch);
6310     }
6311 
6312     /**
6313      * Converts the character (Unicode code point) argument to
6314      * lowercase using case mapping information from the UnicodeData
6315      * file.
6316      *
6317      * <p> Note that
6318      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6319      * does not always return {@code true} for some ranges of
6320      * characters, particularly those that are symbols or ideographs.
6321      *
6322      * <p>In general, {@link String#toLowerCase()} should be used to map
6323      * characters to lowercase. {@code String} case mapping methods
6324      * have several benefits over {@code Character} case mapping methods.
6325      * {@code String} case mapping methods can perform locale-sensitive
6326      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6327      * the {@code Character} case mapping methods cannot.
6328      *
6329      * @param   codePoint   the character (Unicode code point) to be converted.
6330      * @return  the lowercase equivalent of the character (Unicode code
6331      *          point), if any; otherwise, the character itself.
6332      * @see     Character#isLowerCase(int)
6333      * @see     String#toLowerCase()
6334      *
6335      * @since   1.5
6336      */
6337     public static int toLowerCase(int codePoint) {
6338         return CharacterData.of(codePoint).toLowerCase(codePoint);
6339     }
6340 
6341     /**
6342      * Converts the character argument to uppercase using case mapping
6343      * information from the UnicodeData file.
6344      * <p>
6345      * Note that
6346      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6347      * does not always return {@code true} for some ranges of
6348      * characters, particularly those that are symbols or ideographs.
6349      *
6350      * <p>In general, {@link String#toUpperCase()} should be used to map
6351      * characters to uppercase. {@code String} case mapping methods
6352      * have several benefits over {@code Character} case mapping methods.
6353      * {@code String} case mapping methods can perform locale-sensitive
6354      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6355      * the {@code Character} case mapping methods cannot.
6356      *
6357      * <p><b>Note:</b> This method cannot handle <a
6358      * href="#supplementary"> supplementary characters</a>. To support
6359      * all Unicode characters, including supplementary characters, use
6360      * the {@link #toUpperCase(int)} method.
6361      *
6362      * @param   ch   the character to be converted.
6363      * @return  the uppercase equivalent of the character, if any;
6364      *          otherwise, the character itself.
6365      * @see     Character#isUpperCase(char)
6366      * @see     String#toUpperCase()
6367      */
6368     public static char toUpperCase(char ch) {
6369         return (char)toUpperCase((int)ch);
6370     }
6371 
6372     /**
6373      * Converts the character (Unicode code point) argument to
6374      * uppercase using case mapping information from the UnicodeData
6375      * file.
6376      *
6377      * <p>Note that
6378      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6379      * does not always return {@code true} for some ranges of
6380      * characters, particularly those that are symbols or ideographs.
6381      *
6382      * <p>In general, {@link String#toUpperCase()} should be used to map
6383      * characters to uppercase. {@code String} case mapping methods
6384      * have several benefits over {@code Character} case mapping methods.
6385      * {@code String} case mapping methods can perform locale-sensitive
6386      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6387      * the {@code Character} case mapping methods cannot.
6388      *
6389      * @param   codePoint   the character (Unicode code point) to be converted.
6390      * @return  the uppercase equivalent of the character, if any;
6391      *          otherwise, the character itself.
6392      * @see     Character#isUpperCase(int)
6393      * @see     String#toUpperCase()
6394      *
6395      * @since   1.5
6396      */
6397     public static int toUpperCase(int codePoint) {
6398         return CharacterData.of(codePoint).toUpperCase(codePoint);
6399     }
6400 
6401     /**
6402      * Converts the character argument to titlecase using case mapping
6403      * information from the UnicodeData file. If a character has no
6404      * explicit titlecase mapping and is not itself a titlecase char
6405      * according to UnicodeData, then the uppercase mapping is
6406      * returned as an equivalent titlecase mapping. If the
6407      * {@code char} argument is already a titlecase
6408      * {@code char}, the same {@code char} value will be
6409      * returned.
6410      * <p>
6411      * Note that
6412      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6413      * does not always return {@code true} for some ranges of
6414      * characters.
6415      *
6416      * <p><b>Note:</b> This method cannot handle <a
6417      * href="#supplementary"> supplementary characters</a>. To support
6418      * all Unicode characters, including supplementary characters, use
6419      * the {@link #toTitleCase(int)} method.
6420      *
6421      * @param   ch   the character to be converted.
6422      * @return  the titlecase equivalent of the character, if any;
6423      *          otherwise, the character itself.
6424      * @see     Character#isTitleCase(char)
6425      * @see     Character#toLowerCase(char)
6426      * @see     Character#toUpperCase(char)
6427      * @since   1.0.2
6428      */
6429     public static char toTitleCase(char ch) {
6430         return (char)toTitleCase((int)ch);
6431     }
6432 
6433     /**
6434      * Converts the character (Unicode code point) argument to titlecase using case mapping
6435      * information from the UnicodeData file. If a character has no
6436      * explicit titlecase mapping and is not itself a titlecase char
6437      * according to UnicodeData, then the uppercase mapping is
6438      * returned as an equivalent titlecase mapping. If the
6439      * character argument is already a titlecase
6440      * character, the same character value will be
6441      * returned.
6442      *
6443      * <p>Note that
6444      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6445      * does not always return {@code true} for some ranges of
6446      * characters.
6447      *
6448      * @param   codePoint   the character (Unicode code point) to be converted.
6449      * @return  the titlecase equivalent of the character, if any;
6450      *          otherwise, the character itself.
6451      * @see     Character#isTitleCase(int)
6452      * @see     Character#toLowerCase(int)
6453      * @see     Character#toUpperCase(int)
6454      * @since   1.5
6455      */
6456     public static int toTitleCase(int codePoint) {
6457         return CharacterData.of(codePoint).toTitleCase(codePoint);
6458     }
6459 
6460     /**
6461      * Returns the numeric value of the character {@code ch} in the
6462      * specified radix.
6463      * <p>
6464      * If the radix is not in the range {@code MIN_RADIX} &le;
6465      * {@code radix} &le; {@code MAX_RADIX} or if the
6466      * value of {@code ch} is not a valid digit in the specified
6467      * radix, {@code -1} is returned. A character is a valid digit
6468      * if at least one of the following is true:
6469      * <ul>
6470      * <li>The method {@code isDigit} is {@code true} of the character
6471      *     and the Unicode decimal digit value of the character (or its
6472      *     single-character decomposition) is less than the specified radix.
6473      *     In this case the decimal digit value is returned.
6474      * <li>The character is one of the uppercase Latin letters
6475      *     {@code 'A'} through {@code 'Z'} and its code is less than
6476      *     {@code radix + 'A' - 10}.
6477      *     In this case, {@code ch - 'A' + 10}
6478      *     is returned.
6479      * <li>The character is one of the lowercase Latin letters
6480      *     {@code 'a'} through {@code 'z'} and its code is less than
6481      *     {@code radix + 'a' - 10}.
6482      *     In this case, {@code ch - 'a' + 10}
6483      *     is returned.
6484      * <li>The character is one of the fullwidth uppercase Latin letters A
6485      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6486      *     and its code is less than
6487      *     {@code radix + '\u005CuFF21' - 10}.
6488      *     In this case, {@code ch - '\u005CuFF21' + 10}
6489      *     is returned.
6490      * <li>The character is one of the fullwidth lowercase Latin letters a
6491      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6492      *     and its code is less than
6493      *     {@code radix + '\u005CuFF41' - 10}.
6494      *     In this case, {@code ch - '\u005CuFF41' + 10}
6495      *     is returned.
6496      * </ul>
6497      *
6498      * <p><b>Note:</b> This method cannot handle <a
6499      * href="#supplementary"> supplementary characters</a>. To support
6500      * all Unicode characters, including supplementary characters, use
6501      * the {@link #digit(int, int)} method.
6502      *
6503      * @param   ch      the character to be converted.
6504      * @param   radix   the radix.
6505      * @return  the numeric value represented by the character in the
6506      *          specified radix.
6507      * @see     Character#forDigit(int, int)
6508      * @see     Character#isDigit(char)
6509      */
6510     public static int digit(char ch, int radix) {
6511         return digit((int)ch, radix);
6512     }
6513 
6514     /**
6515      * Returns the numeric value of the specified character (Unicode
6516      * code point) in the specified radix.
6517      *
6518      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6519      * {@code radix} &le; {@code MAX_RADIX} or if the
6520      * character is not a valid digit in the specified
6521      * radix, {@code -1} is returned. A character is a valid digit
6522      * if at least one of the following is true:
6523      * <ul>
6524      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6525      *     and the Unicode decimal digit value of the character (or its
6526      *     single-character decomposition) is less than the specified radix.
6527      *     In this case the decimal digit value is returned.
6528      * <li>The character is one of the uppercase Latin letters
6529      *     {@code 'A'} through {@code 'Z'} and its code is less than
6530      *     {@code radix + 'A' - 10}.
6531      *     In this case, {@code codePoint - 'A' + 10}
6532      *     is returned.
6533      * <li>The character is one of the lowercase Latin letters
6534      *     {@code 'a'} through {@code 'z'} and its code is less than
6535      *     {@code radix + 'a' - 10}.
6536      *     In this case, {@code codePoint - 'a' + 10}
6537      *     is returned.
6538      * <li>The character is one of the fullwidth uppercase Latin letters A
6539      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6540      *     and its code is less than
6541      *     {@code radix + '\u005CuFF21' - 10}.
6542      *     In this case,
6543      *     {@code codePoint - '\u005CuFF21' + 10}
6544      *     is returned.
6545      * <li>The character is one of the fullwidth lowercase Latin letters a
6546      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6547      *     and its code is less than
6548      *     {@code radix + '\u005CuFF41'- 10}.
6549      *     In this case,
6550      *     {@code codePoint - '\u005CuFF41' + 10}
6551      *     is returned.
6552      * </ul>
6553      *
6554      * @param   codePoint the character (Unicode code point) to be converted.
6555      * @param   radix   the radix.
6556      * @return  the numeric value represented by the character in the
6557      *          specified radix.
6558      * @see     Character#forDigit(int, int)
6559      * @see     Character#isDigit(int)
6560      * @since   1.5
6561      */
6562     public static int digit(int codePoint, int radix) {
6563         return CharacterData.of(codePoint).digit(codePoint, radix);
6564     }
6565 
6566     /**
6567      * Returns the {@code int} value that the specified Unicode
6568      * character represents. For example, the character
6569      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6570      * an int with a value of 50.
6571      * <p>
6572      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6573      * {@code '\u005Cu005A'}), lowercase
6574      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6575      * full width variant ({@code '\u005CuFF21'} through
6576      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6577      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6578      * through 35. This is independent of the Unicode specification,
6579      * which does not assign numeric values to these {@code char}
6580      * values.
6581      * <p>
6582      * If the character does not have a numeric value, then -1 is returned.
6583      * If the character has a numeric value that cannot be represented as a
6584      * nonnegative integer (for example, a fractional value), then -2
6585      * is returned.
6586      *
6587      * <p><b>Note:</b> This method cannot handle <a
6588      * href="#supplementary"> supplementary characters</a>. To support
6589      * all Unicode characters, including supplementary characters, use
6590      * the {@link #getNumericValue(int)} method.
6591      *
6592      * @param   ch      the character to be converted.
6593      * @return  the numeric value of the character, as a nonnegative {@code int}
6594      *           value; -2 if the character has a numeric value that is not a
6595      *          nonnegative integer; -1 if the character has no numeric value.
6596      * @see     Character#forDigit(int, int)
6597      * @see     Character#isDigit(char)
6598      * @since   1.1
6599      */
6600     public static int getNumericValue(char ch) {
6601         return getNumericValue((int)ch);
6602     }
6603 
6604     /**
6605      * Returns the {@code int} value that the specified
6606      * character (Unicode code point) represents. For example, the character
6607      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6608      * an {@code int} with a value of 50.
6609      * <p>
6610      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6611      * {@code '\u005Cu005A'}), lowercase
6612      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6613      * full width variant ({@code '\u005CuFF21'} through
6614      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6615      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6616      * through 35. This is independent of the Unicode specification,
6617      * which does not assign numeric values to these {@code char}
6618      * values.
6619      * <p>
6620      * If the character does not have a numeric value, then -1 is returned.
6621      * If the character has a numeric value that cannot be represented as a
6622      * nonnegative integer (for example, a fractional value), then -2
6623      * is returned.
6624      *
6625      * @param   codePoint the character (Unicode code point) to be converted.
6626      * @return  the numeric value of the character, as a nonnegative {@code int}
6627      *          value; -2 if the character has a numeric value that is not a
6628      *          nonnegative integer; -1 if the character has no numeric value.
6629      * @see     Character#forDigit(int, int)
6630      * @see     Character#isDigit(int)
6631      * @since   1.5
6632      */
6633     public static int getNumericValue(int codePoint) {
6634         return CharacterData.of(codePoint).getNumericValue(codePoint);
6635     }
6636 
6637     /**
6638      * Determines if the specified character is ISO-LATIN-1 white space.
6639      * This method returns {@code true} for the following five
6640      * characters only:
6641      * <table summary="truechars">
6642      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6643      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6644      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6645      *     <td>{@code NEW LINE}</td></tr>
6646      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6647      *     <td>{@code FORM FEED}</td></tr>
6648      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6649      *     <td>{@code CARRIAGE RETURN}</td></tr>
6650      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6651      *     <td>{@code SPACE}</td></tr>
6652      * </table>
6653      *
6654      * @param      ch   the character to be tested.
6655      * @return     {@code true} if the character is ISO-LATIN-1 white
6656      *             space; {@code false} otherwise.
6657      * @see        Character#isSpaceChar(char)
6658      * @see        Character#isWhitespace(char)
6659      * @deprecated Replaced by isWhitespace(char).
6660      */
6661     @Deprecated
6662     public static boolean isSpace(char ch) {
6663         return (ch <= 0x0020) &&
6664             (((((1L << 0x0009) |
6665             (1L << 0x000A) |
6666             (1L << 0x000C) |
6667             (1L << 0x000D) |
6668             (1L << 0x0020)) >> ch) & 1L) != 0);
6669     }
6670 
6671 
6672     /**
6673      * Determines if the specified character is a Unicode space character.
6674      * A character is considered to be a space character if and only if
6675      * it is specified to be a space character by the Unicode Standard. This
6676      * method returns true if the character's general category type is any of
6677      * the following:
6678      * <ul>
6679      * <li> {@code SPACE_SEPARATOR}
6680      * <li> {@code LINE_SEPARATOR}
6681      * <li> {@code PARAGRAPH_SEPARATOR}
6682      * </ul>
6683      *
6684      * <p><b>Note:</b> This method cannot handle <a
6685      * href="#supplementary"> supplementary characters</a>. To support
6686      * all Unicode characters, including supplementary characters, use
6687      * the {@link #isSpaceChar(int)} method.
6688      *
6689      * @param   ch      the character to be tested.
6690      * @return  {@code true} if the character is a space character;
6691      *          {@code false} otherwise.
6692      * @see     Character#isWhitespace(char)
6693      * @since   1.1
6694      */
6695     public static boolean isSpaceChar(char ch) {
6696         return isSpaceChar((int)ch);
6697     }
6698 
6699     /**
6700      * Determines if the specified character (Unicode code point) is a
6701      * Unicode space character.  A character is considered to be a
6702      * space character if and only if it is specified to be a space
6703      * character by the Unicode Standard. This method returns true if
6704      * the character's general category type is any of the following:
6705      *
6706      * <ul>
6707      * <li> {@link #SPACE_SEPARATOR}
6708      * <li> {@link #LINE_SEPARATOR}
6709      * <li> {@link #PARAGRAPH_SEPARATOR}
6710      * </ul>
6711      *
6712      * @param   codePoint the character (Unicode code point) to be tested.
6713      * @return  {@code true} if the character is a space character;
6714      *          {@code false} otherwise.
6715      * @see     Character#isWhitespace(int)
6716      * @since   1.5
6717      */
6718     public static boolean isSpaceChar(int codePoint) {
6719         return ((((1 << Character.SPACE_SEPARATOR) |
6720                   (1 << Character.LINE_SEPARATOR) |
6721                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6722             != 0;
6723     }
6724 
6725     /**
6726      * Determines if the specified character is white space according to Java.
6727      * A character is a Java whitespace character if and only if it satisfies
6728      * one of the following criteria:
6729      * <ul>
6730      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6731      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6732      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6733      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6734      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6735      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6736      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6737      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6738      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6739      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6740      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6741      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6742      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6743      * </ul>
6744      *
6745      * <p><b>Note:</b> This method cannot handle <a
6746      * href="#supplementary"> supplementary characters</a>. To support
6747      * all Unicode characters, including supplementary characters, use
6748      * the {@link #isWhitespace(int)} method.
6749      *
6750      * @param   ch the character to be tested.
6751      * @return  {@code true} if the character is a Java whitespace
6752      *          character; {@code false} otherwise.
6753      * @see     Character#isSpaceChar(char)
6754      * @since   1.1
6755      */
6756     public static boolean isWhitespace(char ch) {
6757         return isWhitespace((int)ch);
6758     }
6759 
6760     /**
6761      * Determines if the specified character (Unicode code point) is
6762      * white space according to Java.  A character is a Java
6763      * whitespace character if and only if it satisfies one of the
6764      * following criteria:
6765      * <ul>
6766      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6767      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6768      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6769      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6770      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6771      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6772      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6773      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6774      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6775      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6776      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6777      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6778      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6779      * </ul>
6780      *
6781      * @param   codePoint the character (Unicode code point) to be tested.
6782      * @return  {@code true} if the character is a Java whitespace
6783      *          character; {@code false} otherwise.
6784      * @see     Character#isSpaceChar(int)
6785      * @since   1.5
6786      */
6787     public static boolean isWhitespace(int codePoint) {
6788         return CharacterData.of(codePoint).isWhitespace(codePoint);
6789     }
6790 
6791     /**
6792      * Determines if the specified character is an ISO control
6793      * character.  A character is considered to be an ISO control
6794      * character if its code is in the range {@code '\u005Cu0000'}
6795      * through {@code '\u005Cu001F'} or in the range
6796      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6797      *
6798      * <p><b>Note:</b> This method cannot handle <a
6799      * href="#supplementary"> supplementary characters</a>. To support
6800      * all Unicode characters, including supplementary characters, use
6801      * the {@link #isISOControl(int)} method.
6802      *
6803      * @param   ch      the character to be tested.
6804      * @return  {@code true} if the character is an ISO control character;
6805      *          {@code false} otherwise.
6806      *
6807      * @see     Character#isSpaceChar(char)
6808      * @see     Character#isWhitespace(char)
6809      * @since   1.1
6810      */
6811     public static boolean isISOControl(char ch) {
6812         return isISOControl((int)ch);
6813     }
6814 
6815     /**
6816      * Determines if the referenced character (Unicode code point) is an ISO control
6817      * character.  A character is considered to be an ISO control
6818      * character if its code is in the range {@code '\u005Cu0000'}
6819      * through {@code '\u005Cu001F'} or in the range
6820      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6821      *
6822      * @param   codePoint the character (Unicode code point) to be tested.
6823      * @return  {@code true} if the character is an ISO control character;
6824      *          {@code false} otherwise.
6825      * @see     Character#isSpaceChar(int)
6826      * @see     Character#isWhitespace(int)
6827      * @since   1.5
6828      */
6829     public static boolean isISOControl(int codePoint) {
6830         // Optimized form of:
6831         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6832         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6833         return codePoint <= 0x9F &&
6834             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6835     }
6836 
6837     /**
6838      * Returns a value indicating a character's general category.
6839      *
6840      * <p><b>Note:</b> This method cannot handle <a
6841      * href="#supplementary"> supplementary characters</a>. To support
6842      * all Unicode characters, including supplementary characters, use
6843      * the {@link #getType(int)} method.
6844      *
6845      * @param   ch      the character to be tested.
6846      * @return  a value of type {@code int} representing the
6847      *          character's general category.
6848      * @see     Character#COMBINING_SPACING_MARK
6849      * @see     Character#CONNECTOR_PUNCTUATION
6850      * @see     Character#CONTROL
6851      * @see     Character#CURRENCY_SYMBOL
6852      * @see     Character#DASH_PUNCTUATION
6853      * @see     Character#DECIMAL_DIGIT_NUMBER
6854      * @see     Character#ENCLOSING_MARK
6855      * @see     Character#END_PUNCTUATION
6856      * @see     Character#FINAL_QUOTE_PUNCTUATION
6857      * @see     Character#FORMAT
6858      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6859      * @see     Character#LETTER_NUMBER
6860      * @see     Character#LINE_SEPARATOR
6861      * @see     Character#LOWERCASE_LETTER
6862      * @see     Character#MATH_SYMBOL
6863      * @see     Character#MODIFIER_LETTER
6864      * @see     Character#MODIFIER_SYMBOL
6865      * @see     Character#NON_SPACING_MARK
6866      * @see     Character#OTHER_LETTER
6867      * @see     Character#OTHER_NUMBER
6868      * @see     Character#OTHER_PUNCTUATION
6869      * @see     Character#OTHER_SYMBOL
6870      * @see     Character#PARAGRAPH_SEPARATOR
6871      * @see     Character#PRIVATE_USE
6872      * @see     Character#SPACE_SEPARATOR
6873      * @see     Character#START_PUNCTUATION
6874      * @see     Character#SURROGATE
6875      * @see     Character#TITLECASE_LETTER
6876      * @see     Character#UNASSIGNED
6877      * @see     Character#UPPERCASE_LETTER
6878      * @since   1.1
6879      */
6880     public static int getType(char ch) {
6881         return getType((int)ch);
6882     }
6883 
6884     /**
6885      * Returns a value indicating a character's general category.
6886      *
6887      * @param   codePoint the character (Unicode code point) to be tested.
6888      * @return  a value of type {@code int} representing the
6889      *          character's general category.
6890      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6891      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6892      * @see     Character#CONTROL CONTROL
6893      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6894      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6895      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6896      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6897      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6898      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6899      * @see     Character#FORMAT FORMAT
6900      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6901      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6902      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6903      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6904      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6905      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6906      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6907      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6908      * @see     Character#OTHER_LETTER OTHER_LETTER
6909      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6910      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6911      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6912      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6913      * @see     Character#PRIVATE_USE PRIVATE_USE
6914      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6915      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6916      * @see     Character#SURROGATE SURROGATE
6917      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6918      * @see     Character#UNASSIGNED UNASSIGNED
6919      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6920      * @since   1.5
6921      */
6922     public static int getType(int codePoint) {
6923         return CharacterData.of(codePoint).getType(codePoint);
6924     }
6925 
6926     /**
6927      * Determines the character representation for a specific digit in
6928      * the specified radix. If the value of {@code radix} is not a
6929      * valid radix, or the value of {@code digit} is not a valid
6930      * digit in the specified radix, the null character
6931      * ({@code '\u005Cu0000'}) is returned.
6932      * <p>
6933      * The {@code radix} argument is valid if it is greater than or
6934      * equal to {@code MIN_RADIX} and less than or equal to
6935      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6936      * {@code 0 <= digit < radix}.
6937      * <p>
6938      * If the digit is less than 10, then
6939      * {@code '0' + digit} is returned. Otherwise, the value
6940      * {@code 'a' + digit - 10} is returned.
6941      *
6942      * @param   digit   the number to convert to a character.
6943      * @param   radix   the radix.
6944      * @return  the {@code char} representation of the specified digit
6945      *          in the specified radix.
6946      * @see     Character#MIN_RADIX
6947      * @see     Character#MAX_RADIX
6948      * @see     Character#digit(char, int)
6949      */
6950     public static char forDigit(int digit, int radix) {
6951         if ((digit >= radix) || (digit < 0)) {
6952             return '\0';
6953         }
6954         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6955             return '\0';
6956         }
6957         if (digit < 10) {
6958             return (char)('0' + digit);
6959         }
6960         return (char)('a' - 10 + digit);
6961     }
6962 
6963     /**
6964      * Returns the Unicode directionality property for the given
6965      * character.  Character directionality is used to calculate the
6966      * visual ordering of text. The directionality value of undefined
6967      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6968      *
6969      * <p><b>Note:</b> This method cannot handle <a
6970      * href="#supplementary"> supplementary characters</a>. To support
6971      * all Unicode characters, including supplementary characters, use
6972      * the {@link #getDirectionality(int)} method.
6973      *
6974      * @param  ch {@code char} for which the directionality property
6975      *            is requested.
6976      * @return the directionality property of the {@code char} value.
6977      *
6978      * @see Character#DIRECTIONALITY_UNDEFINED
6979      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6980      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6981      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6982      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6983      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6984      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6985      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6986      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6987      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6988      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6989      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6990      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6991      * @see Character#DIRECTIONALITY_WHITESPACE
6992      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6993      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6994      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6995      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6996      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6997      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6998      * @since 1.4
6999      */
7000     public static byte getDirectionality(char ch) {
7001         return getDirectionality((int)ch);
7002     }
7003 
7004     /**
7005      * Returns the Unicode directionality property for the given
7006      * character (Unicode code point).  Character directionality is
7007      * used to calculate the visual ordering of text. The
7008      * directionality value of undefined character is {@link
7009      * #DIRECTIONALITY_UNDEFINED}.
7010      *
7011      * @param   codePoint the character (Unicode code point) for which
7012      *          the directionality property is requested.
7013      * @return the directionality property of the character.
7014      *
7015      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7016      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7017      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7018      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7019      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7020      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7021      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7022      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7023      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7024      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7025      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7026      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7027      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7028      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7029      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7030      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7031      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7032      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7033      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7034      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7035      * @since    1.5
7036      */
7037     public static byte getDirectionality(int codePoint) {
7038         return CharacterData.of(codePoint).getDirectionality(codePoint);
7039     }
7040 
7041     /**
7042      * Determines whether the character is mirrored according to the
7043      * Unicode specification.  Mirrored characters should have their
7044      * glyphs horizontally mirrored when displayed in text that is
7045      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7046      * PARENTHESIS is semantically defined to be an <i>opening
7047      * parenthesis</i>.  This will appear as a "(" in text that is
7048      * left-to-right but as a ")" in text that is right-to-left.
7049      *
7050      * <p><b>Note:</b> This method cannot handle <a
7051      * href="#supplementary"> supplementary characters</a>. To support
7052      * all Unicode characters, including supplementary characters, use
7053      * the {@link #isMirrored(int)} method.
7054      *
7055      * @param  ch {@code char} for which the mirrored property is requested
7056      * @return {@code true} if the char is mirrored, {@code false}
7057      *         if the {@code char} is not mirrored or is not defined.
7058      * @since 1.4
7059      */
7060     public static boolean isMirrored(char ch) {
7061         return isMirrored((int)ch);
7062     }
7063 
7064     /**
7065      * Determines whether the specified character (Unicode code point)
7066      * is mirrored according to the Unicode specification.  Mirrored
7067      * characters should have their glyphs horizontally mirrored when
7068      * displayed in text that is right-to-left.  For example,
7069      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7070      * defined to be an <i>opening parenthesis</i>.  This will appear
7071      * as a "(" in text that is left-to-right but as a ")" in text
7072      * that is right-to-left.
7073      *
7074      * @param   codePoint the character (Unicode code point) to be tested.
7075      * @return  {@code true} if the character is mirrored, {@code false}
7076      *          if the character is not mirrored or is not defined.
7077      * @since   1.5
7078      */
7079     public static boolean isMirrored(int codePoint) {
7080         return CharacterData.of(codePoint).isMirrored(codePoint);
7081     }
7082 
7083     /**
7084      * Compares two {@code Character} objects numerically.
7085      *
7086      * @param   anotherCharacter   the {@code Character} to be compared.
7087 
7088      * @return  the value {@code 0} if the argument {@code Character}
7089      *          is equal to this {@code Character}; a value less than
7090      *          {@code 0} if this {@code Character} is numerically less
7091      *          than the {@code Character} argument; and a value greater than
7092      *          {@code 0} if this {@code Character} is numerically greater
7093      *          than the {@code Character} argument (unsigned comparison).
7094      *          Note that this is strictly a numerical comparison; it is not
7095      *          locale-dependent.
7096      * @since   1.2
7097      */
7098     public int compareTo(Character anotherCharacter) {
7099         return compare(this.value, anotherCharacter.value);
7100     }
7101 
7102     /**
7103      * Compares two {@code char} values numerically.
7104      * The value returned is identical to what would be returned by:
7105      * <pre>
7106      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7107      * </pre>
7108      *
7109      * @param  x the first {@code char} to compare
7110      * @param  y the second {@code char} to compare
7111      * @return the value {@code 0} if {@code x == y};
7112      *         a value less than {@code 0} if {@code x < y}; and
7113      *         a value greater than {@code 0} if {@code x > y}
7114      * @since 1.7
7115      */
7116     public static int compare(char x, char y) {
7117         return x - y;
7118     }
7119 
7120     /**
7121      * Converts the character (Unicode code point) argument to uppercase using
7122      * information from the UnicodeData file.
7123      *
7124      * @param   codePoint   the character (Unicode code point) to be converted.
7125      * @return  either the uppercase equivalent of the character, if
7126      *          any, or an error flag ({@code Character.ERROR})
7127      *          that indicates that a 1:M {@code char} mapping exists.
7128      * @see     Character#isLowerCase(char)
7129      * @see     Character#isUpperCase(char)
7130      * @see     Character#toLowerCase(char)
7131      * @see     Character#toTitleCase(char)
7132      * @since 1.4
7133      */
7134     static int toUpperCaseEx(int codePoint) {
7135         assert isValidCodePoint(codePoint);
7136         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7137     }
7138 
7139     /**
7140      * Converts the character (Unicode code point) argument to uppercase using case
7141      * mapping information from the SpecialCasing file in the Unicode
7142      * specification. If a character has no explicit uppercase
7143      * mapping, then the {@code char} itself is returned in the
7144      * {@code char[]}.
7145      *
7146      * @param   codePoint   the character (Unicode code point) to be converted.
7147      * @return a {@code char[]} with the uppercased character.
7148      * @since 1.4
7149      */
7150     static char[] toUpperCaseCharArray(int codePoint) {
7151         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7152         assert isBmpCodePoint(codePoint);
7153         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7154     }
7155 
7156     /**
7157      * The number of bits used to represent a <tt>char</tt> value in unsigned
7158      * binary form, constant {@code 16}.
7159      *
7160      * @since 1.5
7161      */
7162     public static final int SIZE = 16;
7163 
7164     /**
7165      * The number of bytes used to represent a {@code char} value in unsigned
7166      * binary form.
7167      *
7168      * @since 1.8
7169      */
7170     public static final int BYTES = SIZE / Byte.SIZE;
7171 
7172     /**
7173      * Returns the value obtained by reversing the order of the bytes in the
7174      * specified <tt>char</tt> value.
7175      *
7176      * @param ch The {@code char} of which to reverse the byte order.
7177      * @return the value obtained by reversing (or, equivalently, swapping)
7178      *     the bytes in the specified <tt>char</tt> value.
7179      * @since 1.5
7180      */
7181     public static char reverseBytes(char ch) {
7182         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7183     }
7184 
7185     /**
7186      * Returns the Unicode name of the specified character
7187      * {@code codePoint}, or null if the code point is
7188      * {@link #UNASSIGNED unassigned}.
7189      * <p>
7190      * Note: if the specified character is not assigned a name by
7191      * the <i>UnicodeData</i> file (part of the Unicode Character
7192      * Database maintained by the Unicode Consortium), the returned
7193      * name is the same as the result of expression.
7194      *
7195      * <blockquote>{@code
7196      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7197      *     + " "
7198      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7199      *
7200      * }</blockquote>
7201      *
7202      * @param  codePoint the character (Unicode code point)
7203      *
7204      * @return the Unicode name of the specified character, or null if
7205      *         the code point is unassigned.
7206      *
7207      * @exception IllegalArgumentException if the specified
7208      *            {@code codePoint} is not a valid Unicode
7209      *            code point.
7210      *
7211      * @since 1.7
7212      */
7213     public static String getName(int codePoint) {
7214         if (!isValidCodePoint(codePoint)) {
7215             throw new IllegalArgumentException();
7216         }
7217         String name = CharacterName.get(codePoint);
7218         if (name != null)
7219             return name;
7220         if (getType(codePoint) == UNASSIGNED)
7221             return null;
7222         UnicodeBlock block = UnicodeBlock.of(codePoint);
7223         if (block != null)
7224             return block.toString().replace('_', ' ') + " "
7225                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7226         // should never come here
7227         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7228     }
7229 }