1 /*
   2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.0.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (&#92;uD800-&#92;uDBFF), the second from the
  80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648 
 649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 650 
 651         /**
 652          * Creates a UnicodeBlock with the given identifier name.
 653          * This name must be the same as the block identifier.
 654          */
 655         private UnicodeBlock(String idName) {
 656             super(idName);
 657             map.put(idName, this);
 658         }
 659 
 660         /**
 661          * Creates a UnicodeBlock with the given identifier name and
 662          * alias name.
 663          */
 664         private UnicodeBlock(String idName, String alias) {
 665             this(idName);
 666             map.put(alias, this);
 667         }
 668 
 669         /**
 670          * Creates a UnicodeBlock with the given identifier name and
 671          * alias names.
 672          */
 673         private UnicodeBlock(String idName, String... aliases) {
 674             this(idName);
 675             for (String alias : aliases)
 676                 map.put(alias, this);
 677         }
 678 
 679         /**
 680          * Constant for the "Basic Latin" Unicode character block.
 681          * @since 1.2
 682          */
 683         public static final UnicodeBlock  BASIC_LATIN =
 684             new UnicodeBlock("BASIC_LATIN",
 685                              "BASIC LATIN",
 686                              "BASICLATIN");
 687 
 688         /**
 689          * Constant for the "Latin-1 Supplement" Unicode character block.
 690          * @since 1.2
 691          */
 692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 693             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 694                              "LATIN-1 SUPPLEMENT",
 695                              "LATIN-1SUPPLEMENT");
 696 
 697         /**
 698          * Constant for the "Latin Extended-A" Unicode character block.
 699          * @since 1.2
 700          */
 701         public static final UnicodeBlock LATIN_EXTENDED_A =
 702             new UnicodeBlock("LATIN_EXTENDED_A",
 703                              "LATIN EXTENDED-A",
 704                              "LATINEXTENDED-A");
 705 
 706         /**
 707          * Constant for the "Latin Extended-B" Unicode character block.
 708          * @since 1.2
 709          */
 710         public static final UnicodeBlock LATIN_EXTENDED_B =
 711             new UnicodeBlock("LATIN_EXTENDED_B",
 712                              "LATIN EXTENDED-B",
 713                              "LATINEXTENDED-B");
 714 
 715         /**
 716          * Constant for the "IPA Extensions" Unicode character block.
 717          * @since 1.2
 718          */
 719         public static final UnicodeBlock IPA_EXTENSIONS =
 720             new UnicodeBlock("IPA_EXTENSIONS",
 721                              "IPA EXTENSIONS",
 722                              "IPAEXTENSIONS");
 723 
 724         /**
 725          * Constant for the "Spacing Modifier Letters" Unicode character block.
 726          * @since 1.2
 727          */
 728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 730                              "SPACING MODIFIER LETTERS",
 731                              "SPACINGMODIFIERLETTERS");
 732 
 733         /**
 734          * Constant for the "Combining Diacritical Marks" Unicode character block.
 735          * @since 1.2
 736          */
 737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 739                              "COMBINING DIACRITICAL MARKS",
 740                              "COMBININGDIACRITICALMARKS");
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK =
 750             new UnicodeBlock("GREEK",
 751                              "GREEK AND COPTIC",
 752                              "GREEKANDCOPTIC");
 753 
 754         /**
 755          * Constant for the "Cyrillic" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock CYRILLIC =
 759             new UnicodeBlock("CYRILLIC");
 760 
 761         /**
 762          * Constant for the "Armenian" Unicode character block.
 763          * @since 1.2
 764          */
 765         public static final UnicodeBlock ARMENIAN =
 766             new UnicodeBlock("ARMENIAN");
 767 
 768         /**
 769          * Constant for the "Hebrew" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock HEBREW =
 773             new UnicodeBlock("HEBREW");
 774 
 775         /**
 776          * Constant for the "Arabic" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARABIC =
 780             new UnicodeBlock("ARABIC");
 781 
 782         /**
 783          * Constant for the "Devanagari" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock DEVANAGARI =
 787             new UnicodeBlock("DEVANAGARI");
 788 
 789         /**
 790          * Constant for the "Bengali" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock BENGALI =
 794             new UnicodeBlock("BENGALI");
 795 
 796         /**
 797          * Constant for the "Gurmukhi" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock GURMUKHI =
 801             new UnicodeBlock("GURMUKHI");
 802 
 803         /**
 804          * Constant for the "Gujarati" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock GUJARATI =
 808             new UnicodeBlock("GUJARATI");
 809 
 810         /**
 811          * Constant for the "Oriya" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock ORIYA =
 815             new UnicodeBlock("ORIYA");
 816 
 817         /**
 818          * Constant for the "Tamil" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock TAMIL =
 822             new UnicodeBlock("TAMIL");
 823 
 824         /**
 825          * Constant for the "Telugu" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock TELUGU =
 829             new UnicodeBlock("TELUGU");
 830 
 831         /**
 832          * Constant for the "Kannada" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock KANNADA =
 836             new UnicodeBlock("KANNADA");
 837 
 838         /**
 839          * Constant for the "Malayalam" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock MALAYALAM =
 843             new UnicodeBlock("MALAYALAM");
 844 
 845         /**
 846          * Constant for the "Thai" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock THAI =
 850             new UnicodeBlock("THAI");
 851 
 852         /**
 853          * Constant for the "Lao" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock LAO =
 857             new UnicodeBlock("LAO");
 858 
 859         /**
 860          * Constant for the "Tibetan" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock TIBETAN =
 864             new UnicodeBlock("TIBETAN");
 865 
 866         /**
 867          * Constant for the "Georgian" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock GEORGIAN =
 871             new UnicodeBlock("GEORGIAN");
 872 
 873         /**
 874          * Constant for the "Hangul Jamo" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock HANGUL_JAMO =
 878             new UnicodeBlock("HANGUL_JAMO",
 879                              "HANGUL JAMO",
 880                              "HANGULJAMO");
 881 
 882         /**
 883          * Constant for the "Latin Extended Additional" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 888                              "LATIN EXTENDED ADDITIONAL",
 889                              "LATINEXTENDEDADDITIONAL");
 890 
 891         /**
 892          * Constant for the "Greek Extended" Unicode character block.
 893          * @since 1.2
 894          */
 895         public static final UnicodeBlock GREEK_EXTENDED =
 896             new UnicodeBlock("GREEK_EXTENDED",
 897                              "GREEK EXTENDED",
 898                              "GREEKEXTENDED");
 899 
 900         /**
 901          * Constant for the "General Punctuation" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock GENERAL_PUNCTUATION =
 905             new UnicodeBlock("GENERAL_PUNCTUATION",
 906                              "GENERAL PUNCTUATION",
 907                              "GENERALPUNCTUATION");
 908 
 909         /**
 910          * Constant for the "Superscripts and Subscripts" Unicode character
 911          * block.
 912          * @since 1.2
 913          */
 914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 916                              "SUPERSCRIPTS AND SUBSCRIPTS",
 917                              "SUPERSCRIPTSANDSUBSCRIPTS");
 918 
 919         /**
 920          * Constant for the "Currency Symbols" Unicode character block.
 921          * @since 1.2
 922          */
 923         public static final UnicodeBlock CURRENCY_SYMBOLS =
 924             new UnicodeBlock("CURRENCY_SYMBOLS",
 925                              "CURRENCY SYMBOLS",
 926                              "CURRENCYSYMBOLS");
 927 
 928         /**
 929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 930          * character block.
 931          * <p>
 932          * This block was previously known as "Combining Marks for Symbols".
 933          * @since 1.2
 934          */
 935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 939                              "COMBINING MARKS FOR SYMBOLS",
 940                              "COMBININGMARKSFORSYMBOLS");
 941 
 942         /**
 943          * Constant for the "Letterlike Symbols" Unicode character block.
 944          * @since 1.2
 945          */
 946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 947             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 948                              "LETTERLIKE SYMBOLS",
 949                              "LETTERLIKESYMBOLS");
 950 
 951         /**
 952          * Constant for the "Number Forms" Unicode character block.
 953          * @since 1.2
 954          */
 955         public static final UnicodeBlock NUMBER_FORMS =
 956             new UnicodeBlock("NUMBER_FORMS",
 957                              "NUMBER FORMS",
 958                              "NUMBERFORMS");
 959 
 960         /**
 961          * Constant for the "Arrows" Unicode character block.
 962          * @since 1.2
 963          */
 964         public static final UnicodeBlock ARROWS =
 965             new UnicodeBlock("ARROWS");
 966 
 967         /**
 968          * Constant for the "Mathematical Operators" Unicode character block.
 969          * @since 1.2
 970          */
 971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 972             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 973                              "MATHEMATICAL OPERATORS",
 974                              "MATHEMATICALOPERATORS");
 975 
 976         /**
 977          * Constant for the "Miscellaneous Technical" Unicode character block.
 978          * @since 1.2
 979          */
 980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 982                              "MISCELLANEOUS TECHNICAL",
 983                              "MISCELLANEOUSTECHNICAL");
 984 
 985         /**
 986          * Constant for the "Control Pictures" Unicode character block.
 987          * @since 1.2
 988          */
 989         public static final UnicodeBlock CONTROL_PICTURES =
 990             new UnicodeBlock("CONTROL_PICTURES",
 991                              "CONTROL PICTURES",
 992                              "CONTROLPICTURES");
 993 
 994         /**
 995          * Constant for the "Optical Character Recognition" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1000                              "OPTICAL CHARACTER RECOGNITION",
1001                              "OPTICALCHARACTERRECOGNITION");
1002 
1003         /**
1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1009                              "ENCLOSED ALPHANUMERICS",
1010                              "ENCLOSEDALPHANUMERICS");
1011 
1012         /**
1013          * Constant for the "Box Drawing" Unicode character block.
1014          * @since 1.2
1015          */
1016         public static final UnicodeBlock BOX_DRAWING =
1017             new UnicodeBlock("BOX_DRAWING",
1018                              "BOX DRAWING",
1019                              "BOXDRAWING");
1020 
1021         /**
1022          * Constant for the "Block Elements" Unicode character block.
1023          * @since 1.2
1024          */
1025         public static final UnicodeBlock BLOCK_ELEMENTS =
1026             new UnicodeBlock("BLOCK_ELEMENTS",
1027                              "BLOCK ELEMENTS",
1028                              "BLOCKELEMENTS");
1029 
1030         /**
1031          * Constant for the "Geometric Shapes" Unicode character block.
1032          * @since 1.2
1033          */
1034         public static final UnicodeBlock GEOMETRIC_SHAPES =
1035             new UnicodeBlock("GEOMETRIC_SHAPES",
1036                              "GEOMETRIC SHAPES",
1037                              "GEOMETRICSHAPES");
1038 
1039         /**
1040          * Constant for the "Miscellaneous Symbols" Unicode character block.
1041          * @since 1.2
1042          */
1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1045                              "MISCELLANEOUS SYMBOLS",
1046                              "MISCELLANEOUSSYMBOLS");
1047 
1048         /**
1049          * Constant for the "Dingbats" Unicode character block.
1050          * @since 1.2
1051          */
1052         public static final UnicodeBlock DINGBATS =
1053             new UnicodeBlock("DINGBATS");
1054 
1055         /**
1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1057          * @since 1.2
1058          */
1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1061                              "CJK SYMBOLS AND PUNCTUATION",
1062                              "CJKSYMBOLSANDPUNCTUATION");
1063 
1064         /**
1065          * Constant for the "Hiragana" Unicode character block.
1066          * @since 1.2
1067          */
1068         public static final UnicodeBlock HIRAGANA =
1069             new UnicodeBlock("HIRAGANA");
1070 
1071         /**
1072          * Constant for the "Katakana" Unicode character block.
1073          * @since 1.2
1074          */
1075         public static final UnicodeBlock KATAKANA =
1076             new UnicodeBlock("KATAKANA");
1077 
1078         /**
1079          * Constant for the "Bopomofo" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock BOPOMOFO =
1083             new UnicodeBlock("BOPOMOFO");
1084 
1085         /**
1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1091                              "HANGUL COMPATIBILITY JAMO",
1092                              "HANGULCOMPATIBILITYJAMO");
1093 
1094         /**
1095          * Constant for the "Kanbun" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock KANBUN =
1099             new UnicodeBlock("KANBUN");
1100 
1101         /**
1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1107                              "ENCLOSED CJK LETTERS AND MONTHS",
1108                              "ENCLOSEDCJKLETTERSANDMONTHS");
1109 
1110         /**
1111          * Constant for the "CJK Compatibility" Unicode character block.
1112          * @since 1.2
1113          */
1114         public static final UnicodeBlock CJK_COMPATIBILITY =
1115             new UnicodeBlock("CJK_COMPATIBILITY",
1116                              "CJK COMPATIBILITY",
1117                              "CJKCOMPATIBILITY");
1118 
1119         /**
1120          * Constant for the "CJK Unified Ideographs" Unicode character block.
1121          * @since 1.2
1122          */
1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1125                              "CJK UNIFIED IDEOGRAPHS",
1126                              "CJKUNIFIEDIDEOGRAPHS");
1127 
1128         /**
1129          * Constant for the "Hangul Syllables" Unicode character block.
1130          * @since 1.2
1131          */
1132         public static final UnicodeBlock HANGUL_SYLLABLES =
1133             new UnicodeBlock("HANGUL_SYLLABLES",
1134                              "HANGUL SYLLABLES",
1135                              "HANGULSYLLABLES");
1136 
1137         /**
1138          * Constant for the "Private Use Area" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock PRIVATE_USE_AREA =
1142             new UnicodeBlock("PRIVATE_USE_AREA",
1143                              "PRIVATE USE AREA",
1144                              "PRIVATEUSEAREA");
1145 
1146         /**
1147          * Constant for the "CJK Compatibility Ideographs" Unicode character
1148          * block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1153                              "CJK COMPATIBILITY IDEOGRAPHS",
1154                              "CJKCOMPATIBILITYIDEOGRAPHS");
1155 
1156         /**
1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1162                              "ALPHABETIC PRESENTATION FORMS",
1163                              "ALPHABETICPRESENTATIONFORMS");
1164 
1165         /**
1166          * Constant for the "Arabic Presentation Forms-A" Unicode character
1167          * block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1172                              "ARABIC PRESENTATION FORMS-A",
1173                              "ARABICPRESENTATIONFORMS-A");
1174 
1175         /**
1176          * Constant for the "Combining Half Marks" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock COMBINING_HALF_MARKS =
1180             new UnicodeBlock("COMBINING_HALF_MARKS",
1181                              "COMBINING HALF MARKS",
1182                              "COMBININGHALFMARKS");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Forms" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1190                              "CJK COMPATIBILITY FORMS",
1191                              "CJKCOMPATIBILITYFORMS");
1192 
1193         /**
1194          * Constant for the "Small Form Variants" Unicode character block.
1195          * @since 1.2
1196          */
1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1198             new UnicodeBlock("SMALL_FORM_VARIANTS",
1199                              "SMALL FORM VARIANTS",
1200                              "SMALLFORMVARIANTS");
1201 
1202         /**
1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1204          * @since 1.2
1205          */
1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1208                              "ARABIC PRESENTATION FORMS-B",
1209                              "ARABICPRESENTATIONFORMS-B");
1210 
1211         /**
1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1213          * block.
1214          * @since 1.2
1215          */
1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1218                              "HALFWIDTH AND FULLWIDTH FORMS",
1219                              "HALFWIDTHANDFULLWIDTHFORMS");
1220 
1221         /**
1222          * Constant for the "Specials" Unicode character block.
1223          * @since 1.2
1224          */
1225         public static final UnicodeBlock SPECIALS =
1226             new UnicodeBlock("SPECIALS");
1227 
1228         /**
1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1231          *             {@link #LOW_SURROGATES}. These new constants match
1232          *             the block definitions of the Unicode Standard.
1233          *             The {@link #of(char)} and {@link #of(int)} methods
1234          *             return the new constants, not SURROGATES_AREA.
1235          */
1236         @Deprecated
1237         public static final UnicodeBlock SURROGATES_AREA =
1238             new UnicodeBlock("SURROGATES_AREA");
1239 
1240         /**
1241          * Constant for the "Syriac" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock SYRIAC =
1245             new UnicodeBlock("SYRIAC");
1246 
1247         /**
1248          * Constant for the "Thaana" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock THAANA =
1252             new UnicodeBlock("THAANA");
1253 
1254         /**
1255          * Constant for the "Sinhala" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock SINHALA =
1259             new UnicodeBlock("SINHALA");
1260 
1261         /**
1262          * Constant for the "Myanmar" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MYANMAR =
1266             new UnicodeBlock("MYANMAR");
1267 
1268         /**
1269          * Constant for the "Ethiopic" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock ETHIOPIC =
1273             new UnicodeBlock("ETHIOPIC");
1274 
1275         /**
1276          * Constant for the "Cherokee" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock CHEROKEE =
1280             new UnicodeBlock("CHEROKEE");
1281 
1282         /**
1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1290 
1291         /**
1292          * Constant for the "Ogham" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock OGHAM =
1296             new UnicodeBlock("OGHAM");
1297 
1298         /**
1299          * Constant for the "Runic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock RUNIC =
1303             new UnicodeBlock("RUNIC");
1304 
1305         /**
1306          * Constant for the "Khmer" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock KHMER =
1310             new UnicodeBlock("KHMER");
1311 
1312         /**
1313          * Constant for the "Mongolian" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock MONGOLIAN =
1317             new UnicodeBlock("MONGOLIAN");
1318 
1319         /**
1320          * Constant for the "Braille Patterns" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock BRAILLE_PATTERNS =
1324             new UnicodeBlock("BRAILLE_PATTERNS",
1325                              "BRAILLE PATTERNS",
1326                              "BRAILLEPATTERNS");
1327 
1328         /**
1329          * Constant for the "CJK Radicals Supplement" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1334                              "CJK RADICALS SUPPLEMENT",
1335                              "CJKRADICALSSUPPLEMENT");
1336 
1337         /**
1338          * Constant for the "Kangxi Radicals" Unicode character block.
1339          * @since 1.4
1340          */
1341         public static final UnicodeBlock KANGXI_RADICALS =
1342             new UnicodeBlock("KANGXI_RADICALS",
1343                              "KANGXI RADICALS",
1344                              "KANGXIRADICALS");
1345 
1346         /**
1347          * Constant for the "Ideographic Description Characters" Unicode character block.
1348          * @since 1.4
1349          */
1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1354 
1355         /**
1356          * Constant for the "Bopomofo Extended" Unicode character block.
1357          * @since 1.4
1358          */
1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1360             new UnicodeBlock("BOPOMOFO_EXTENDED",
1361                              "BOPOMOFO EXTENDED",
1362                              "BOPOMOFOEXTENDED");
1363 
1364         /**
1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1366          * @since 1.4
1367          */
1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1372 
1373         /**
1374          * Constant for the "Yi Syllables" Unicode character block.
1375          * @since 1.4
1376          */
1377         public static final UnicodeBlock YI_SYLLABLES =
1378             new UnicodeBlock("YI_SYLLABLES",
1379                              "YI SYLLABLES",
1380                              "YISYLLABLES");
1381 
1382         /**
1383          * Constant for the "Yi Radicals" Unicode character block.
1384          * @since 1.4
1385          */
1386         public static final UnicodeBlock YI_RADICALS =
1387             new UnicodeBlock("YI_RADICALS",
1388                              "YI RADICALS",
1389                              "YIRADICALS");
1390 
1391         /**
1392          * Constant for the "Cyrillic Supplementary" Unicode character block.
1393          * @since 1.5
1394          */
1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1397                              "CYRILLIC SUPPLEMENTARY",
1398                              "CYRILLICSUPPLEMENTARY",
1399                              "CYRILLIC SUPPLEMENT",
1400                              "CYRILLICSUPPLEMENT");
1401 
1402         /**
1403          * Constant for the "Tagalog" Unicode character block.
1404          * @since 1.5
1405          */
1406         public static final UnicodeBlock TAGALOG =
1407             new UnicodeBlock("TAGALOG");
1408 
1409         /**
1410          * Constant for the "Hanunoo" Unicode character block.
1411          * @since 1.5
1412          */
1413         public static final UnicodeBlock HANUNOO =
1414             new UnicodeBlock("HANUNOO");
1415 
1416         /**
1417          * Constant for the "Buhid" Unicode character block.
1418          * @since 1.5
1419          */
1420         public static final UnicodeBlock BUHID =
1421             new UnicodeBlock("BUHID");
1422 
1423         /**
1424          * Constant for the "Tagbanwa" Unicode character block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock TAGBANWA =
1428             new UnicodeBlock("TAGBANWA");
1429 
1430         /**
1431          * Constant for the "Limbu" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock LIMBU =
1435             new UnicodeBlock("LIMBU");
1436 
1437         /**
1438          * Constant for the "Tai Le" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAI_LE =
1442             new UnicodeBlock("TAI_LE",
1443                              "TAI LE",
1444                              "TAILE");
1445 
1446         /**
1447          * Constant for the "Khmer Symbols" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock KHMER_SYMBOLS =
1451             new UnicodeBlock("KHMER_SYMBOLS",
1452                              "KHMER SYMBOLS",
1453                              "KHMERSYMBOLS");
1454 
1455         /**
1456          * Constant for the "Phonetic Extensions" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1460             new UnicodeBlock("PHONETIC_EXTENSIONS",
1461                              "PHONETIC EXTENSIONS",
1462                              "PHONETICEXTENSIONS");
1463 
1464         /**
1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1472 
1473         /**
1474          * Constant for the "Supplemental Arrows-A" Unicode character block.
1475          * @since 1.5
1476          */
1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1479                              "SUPPLEMENTAL ARROWS-A",
1480                              "SUPPLEMENTALARROWS-A");
1481 
1482         /**
1483          * Constant for the "Supplemental Arrows-B" Unicode character block.
1484          * @since 1.5
1485          */
1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1488                              "SUPPLEMENTAL ARROWS-B",
1489                              "SUPPLEMENTALARROWS-B");
1490 
1491         /**
1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1493          * character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1500 
1501         /**
1502          * Constant for the "Supplemental Mathematical Operators" Unicode
1503          * character block.
1504          * @since 1.5
1505          */
1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1510 
1511         /**
1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1513          * block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1519                              "MISCELLANEOUSSYMBOLSANDARROWS");
1520 
1521         /**
1522          * Constant for the "Katakana Phonetic Extensions" Unicode character
1523          * block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1528                              "KATAKANA PHONETIC EXTENSIONS",
1529                              "KATAKANAPHONETICEXTENSIONS");
1530 
1531         /**
1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1537                              "YIJING HEXAGRAM SYMBOLS",
1538                              "YIJINGHEXAGRAMSYMBOLS");
1539 
1540         /**
1541          * Constant for the "Variation Selectors" Unicode character block.
1542          * @since 1.5
1543          */
1544         public static final UnicodeBlock VARIATION_SELECTORS =
1545             new UnicodeBlock("VARIATION_SELECTORS",
1546                              "VARIATION SELECTORS",
1547                              "VARIATIONSELECTORS");
1548 
1549         /**
1550          * Constant for the "Linear B Syllabary" Unicode character block.
1551          * @since 1.5
1552          */
1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1554             new UnicodeBlock("LINEAR_B_SYLLABARY",
1555                              "LINEAR B SYLLABARY",
1556                              "LINEARBSYLLABARY");
1557 
1558         /**
1559          * Constant for the "Linear B Ideograms" Unicode character block.
1560          * @since 1.5
1561          */
1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1564                              "LINEAR B IDEOGRAMS",
1565                              "LINEARBIDEOGRAMS");
1566 
1567         /**
1568          * Constant for the "Aegean Numbers" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock AEGEAN_NUMBERS =
1572             new UnicodeBlock("AEGEAN_NUMBERS",
1573                              "AEGEAN NUMBERS",
1574                              "AEGEANNUMBERS");
1575 
1576         /**
1577          * Constant for the "Old Italic" Unicode character block.
1578          * @since 1.5
1579          */
1580         public static final UnicodeBlock OLD_ITALIC =
1581             new UnicodeBlock("OLD_ITALIC",
1582                              "OLD ITALIC",
1583                              "OLDITALIC");
1584 
1585         /**
1586          * Constant for the "Gothic" Unicode character block.
1587          * @since 1.5
1588          */
1589         public static final UnicodeBlock GOTHIC =
1590             new UnicodeBlock("GOTHIC");
1591 
1592         /**
1593          * Constant for the "Ugaritic" Unicode character block.
1594          * @since 1.5
1595          */
1596         public static final UnicodeBlock UGARITIC =
1597             new UnicodeBlock("UGARITIC");
1598 
1599         /**
1600          * Constant for the "Deseret" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock DESERET =
1604             new UnicodeBlock("DESERET");
1605 
1606         /**
1607          * Constant for the "Shavian" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock SHAVIAN =
1611             new UnicodeBlock("SHAVIAN");
1612 
1613         /**
1614          * Constant for the "Osmanya" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock OSMANYA =
1618             new UnicodeBlock("OSMANYA");
1619 
1620         /**
1621          * Constant for the "Cypriot Syllabary" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1625             new UnicodeBlock("CYPRIOT_SYLLABARY",
1626                              "CYPRIOT SYLLABARY",
1627                              "CYPRIOTSYLLABARY");
1628 
1629         /**
1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1635                              "BYZANTINE MUSICAL SYMBOLS",
1636                              "BYZANTINEMUSICALSYMBOLS");
1637 
1638         /**
1639          * Constant for the "Musical Symbols" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock MUSICAL_SYMBOLS =
1643             new UnicodeBlock("MUSICAL_SYMBOLS",
1644                              "MUSICAL SYMBOLS",
1645                              "MUSICALSYMBOLS");
1646 
1647         /**
1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1649          * @since 1.5
1650          */
1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1653                              "TAI XUAN JING SYMBOLS",
1654                              "TAIXUANJINGSYMBOLS");
1655 
1656         /**
1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1658          * character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1664                              "MATHEMATICALALPHANUMERICSYMBOLS");
1665 
1666         /**
1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1668          * character block.
1669          * @since 1.5
1670          */
1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1675 
1676         /**
1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1678          * @since 1.5
1679          */
1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1684 
1685         /**
1686          * Constant for the "Tags" Unicode character block.
1687          * @since 1.5
1688          */
1689         public static final UnicodeBlock TAGS =
1690             new UnicodeBlock("TAGS");
1691 
1692         /**
1693          * Constant for the "Variation Selectors Supplement" Unicode character
1694          * block.
1695          * @since 1.5
1696          */
1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1699                              "VARIATION SELECTORS SUPPLEMENT",
1700                              "VARIATIONSELECTORSSUPPLEMENT");
1701 
1702         /**
1703          * Constant for the "Supplementary Private Use Area-A" Unicode character
1704          * block.
1705          * @since 1.5
1706          */
1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1711 
1712         /**
1713          * Constant for the "Supplementary Private Use Area-B" Unicode character
1714          * block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1721 
1722         /**
1723          * Constant for the "High Surrogates" Unicode character block.
1724          * This block represents codepoint values in the high surrogate
1725          * range: U+D800 through U+DB7F
1726          *
1727          * @since 1.5
1728          */
1729         public static final UnicodeBlock HIGH_SURROGATES =
1730             new UnicodeBlock("HIGH_SURROGATES",
1731                              "HIGH SURROGATES",
1732                              "HIGHSURROGATES");
1733 
1734         /**
1735          * Constant for the "High Private Use Surrogates" Unicode character
1736          * block.
1737          * This block represents codepoint values in the private use high
1738          * surrogate range: U+DB80 through U+DBFF
1739          *
1740          * @since 1.5
1741          */
1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1744                              "HIGH PRIVATE USE SURROGATES",
1745                              "HIGHPRIVATEUSESURROGATES");
1746 
1747         /**
1748          * Constant for the "Low Surrogates" Unicode character block.
1749          * This block represents codepoint values in the low surrogate
1750          * range: U+DC00 through U+DFFF
1751          *
1752          * @since 1.5
1753          */
1754         public static final UnicodeBlock LOW_SURROGATES =
1755             new UnicodeBlock("LOW_SURROGATES",
1756                              "LOW SURROGATES",
1757                              "LOWSURROGATES");
1758 
1759         /**
1760          * Constant for the "Arabic Supplement" Unicode character block.
1761          * @since 1.7
1762          */
1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1764             new UnicodeBlock("ARABIC_SUPPLEMENT",
1765                              "ARABIC SUPPLEMENT",
1766                              "ARABICSUPPLEMENT");
1767 
1768         /**
1769          * Constant for the "NKo" Unicode character block.
1770          * @since 1.7
1771          */
1772         public static final UnicodeBlock NKO =
1773             new UnicodeBlock("NKO");
1774 
1775         /**
1776          * Constant for the "Samaritan" Unicode character block.
1777          * @since 1.7
1778          */
1779         public static final UnicodeBlock SAMARITAN =
1780             new UnicodeBlock("SAMARITAN");
1781 
1782         /**
1783          * Constant for the "Mandaic" Unicode character block.
1784          * @since 1.7
1785          */
1786         public static final UnicodeBlock MANDAIC =
1787             new UnicodeBlock("MANDAIC");
1788 
1789         /**
1790          * Constant for the "Ethiopic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1795                              "ETHIOPIC SUPPLEMENT",
1796                              "ETHIOPICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1800          * Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1807 
1808         /**
1809          * Constant for the "New Tai Lue" Unicode character block.
1810          * @since 1.7
1811          */
1812         public static final UnicodeBlock NEW_TAI_LUE =
1813             new UnicodeBlock("NEW_TAI_LUE",
1814                              "NEW TAI LUE",
1815                              "NEWTAILUE");
1816 
1817         /**
1818          * Constant for the "Buginese" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock BUGINESE =
1822             new UnicodeBlock("BUGINESE");
1823 
1824         /**
1825          * Constant for the "Tai Tham" Unicode character block.
1826          * @since 1.7
1827          */
1828         public static final UnicodeBlock TAI_THAM =
1829             new UnicodeBlock("TAI_THAM",
1830                              "TAI THAM",
1831                              "TAITHAM");
1832 
1833         /**
1834          * Constant for the "Balinese" Unicode character block.
1835          * @since 1.7
1836          */
1837         public static final UnicodeBlock BALINESE =
1838             new UnicodeBlock("BALINESE");
1839 
1840         /**
1841          * Constant for the "Sundanese" Unicode character block.
1842          * @since 1.7
1843          */
1844         public static final UnicodeBlock SUNDANESE =
1845             new UnicodeBlock("SUNDANESE");
1846 
1847         /**
1848          * Constant for the "Batak" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BATAK =
1852             new UnicodeBlock("BATAK");
1853 
1854         /**
1855          * Constant for the "Lepcha" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock LEPCHA =
1859             new UnicodeBlock("LEPCHA");
1860 
1861         /**
1862          * Constant for the "Ol Chiki" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock OL_CHIKI =
1866             new UnicodeBlock("OL_CHIKI",
1867                              "OL CHIKI",
1868                              "OLCHIKI");
1869 
1870         /**
1871          * Constant for the "Vedic Extensions" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
1875             new UnicodeBlock("VEDIC_EXTENSIONS",
1876                              "VEDIC EXTENSIONS",
1877                              "VEDICEXTENSIONS");
1878 
1879         /**
1880          * Constant for the "Phonetic Extensions Supplement" Unicode character
1881          * block.
1882          * @since 1.7
1883          */
1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1886                              "PHONETIC EXTENSIONS SUPPLEMENT",
1887                              "PHONETICEXTENSIONSSUPPLEMENT");
1888 
1889         /**
1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1891          * character block.
1892          * @since 1.7
1893          */
1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1898 
1899         /**
1900          * Constant for the "Glagolitic" Unicode character block.
1901          * @since 1.7
1902          */
1903         public static final UnicodeBlock GLAGOLITIC =
1904             new UnicodeBlock("GLAGOLITIC");
1905 
1906         /**
1907          * Constant for the "Latin Extended-C" Unicode character block.
1908          * @since 1.7
1909          */
1910         public static final UnicodeBlock LATIN_EXTENDED_C =
1911             new UnicodeBlock("LATIN_EXTENDED_C",
1912                              "LATIN EXTENDED-C",
1913                              "LATINEXTENDED-C");
1914 
1915         /**
1916          * Constant for the "Coptic" Unicode character block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock COPTIC =
1920             new UnicodeBlock("COPTIC");
1921 
1922         /**
1923          * Constant for the "Georgian Supplement" Unicode character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1928                              "GEORGIAN SUPPLEMENT",
1929                              "GEORGIANSUPPLEMENT");
1930 
1931         /**
1932          * Constant for the "Tifinagh" Unicode character block.
1933          * @since 1.7
1934          */
1935         public static final UnicodeBlock TIFINAGH =
1936             new UnicodeBlock("TIFINAGH");
1937 
1938         /**
1939          * Constant for the "Ethiopic Extended" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1943             new UnicodeBlock("ETHIOPIC_EXTENDED",
1944                              "ETHIOPIC EXTENDED",
1945                              "ETHIOPICEXTENDED");
1946 
1947         /**
1948          * Constant for the "Cyrillic Extended-A" Unicode character block.
1949          * @since 1.7
1950          */
1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1953                              "CYRILLIC EXTENDED-A",
1954                              "CYRILLICEXTENDED-A");
1955 
1956         /**
1957          * Constant for the "Supplemental Punctuation" Unicode character block.
1958          * @since 1.7
1959          */
1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1962                              "SUPPLEMENTAL PUNCTUATION",
1963                              "SUPPLEMENTALPUNCTUATION");
1964 
1965         /**
1966          * Constant for the "CJK Strokes" Unicode character block.
1967          * @since 1.7
1968          */
1969         public static final UnicodeBlock CJK_STROKES =
1970             new UnicodeBlock("CJK_STROKES",
1971                              "CJK STROKES",
1972                              "CJKSTROKES");
1973 
1974         /**
1975          * Constant for the "Lisu" Unicode character block.
1976          * @since 1.7
1977          */
1978         public static final UnicodeBlock LISU =
1979             new UnicodeBlock("LISU");
1980 
1981         /**
1982          * Constant for the "Vai" Unicode character block.
1983          * @since 1.7
1984          */
1985         public static final UnicodeBlock VAI =
1986             new UnicodeBlock("VAI");
1987 
1988         /**
1989          * Constant for the "Cyrillic Extended-B" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1994                              "CYRILLIC EXTENDED-B",
1995                              "CYRILLICEXTENDED-B");
1996 
1997         /**
1998          * Constant for the "Bamum" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock BAMUM =
2002             new UnicodeBlock("BAMUM");
2003 
2004         /**
2005          * Constant for the "Modifier Tone Letters" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2010                              "MODIFIER TONE LETTERS",
2011                              "MODIFIERTONELETTERS");
2012 
2013         /**
2014          * Constant for the "Latin Extended-D" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LATIN_EXTENDED_D =
2018             new UnicodeBlock("LATIN_EXTENDED_D",
2019                              "LATIN EXTENDED-D",
2020                              "LATINEXTENDED-D");
2021 
2022         /**
2023          * Constant for the "Syloti Nagri" Unicode character block.
2024          * @since 1.7
2025          */
2026         public static final UnicodeBlock SYLOTI_NAGRI =
2027             new UnicodeBlock("SYLOTI_NAGRI",
2028                              "SYLOTI NAGRI",
2029                              "SYLOTINAGRI");
2030 
2031         /**
2032          * Constant for the "Common Indic Number Forms" Unicode character block.
2033          * @since 1.7
2034          */
2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2037                              "COMMON INDIC NUMBER FORMS",
2038                              "COMMONINDICNUMBERFORMS");
2039 
2040         /**
2041          * Constant for the "Phags-pa" Unicode character block.
2042          * @since 1.7
2043          */
2044         public static final UnicodeBlock PHAGS_PA =
2045             new UnicodeBlock("PHAGS_PA",
2046                              "PHAGS-PA");
2047 
2048         /**
2049          * Constant for the "Saurashtra" Unicode character block.
2050          * @since 1.7
2051          */
2052         public static final UnicodeBlock SAURASHTRA =
2053             new UnicodeBlock("SAURASHTRA");
2054 
2055         /**
2056          * Constant for the "Devanagari Extended" Unicode character block.
2057          * @since 1.7
2058          */
2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2060             new UnicodeBlock("DEVANAGARI_EXTENDED",
2061                              "DEVANAGARI EXTENDED",
2062                              "DEVANAGARIEXTENDED");
2063 
2064         /**
2065          * Constant for the "Kayah Li" Unicode character block.
2066          * @since 1.7
2067          */
2068         public static final UnicodeBlock KAYAH_LI =
2069             new UnicodeBlock("KAYAH_LI",
2070                              "KAYAH LI",
2071                              "KAYAHLI");
2072 
2073         /**
2074          * Constant for the "Rejang" Unicode character block.
2075          * @since 1.7
2076          */
2077         public static final UnicodeBlock REJANG =
2078             new UnicodeBlock("REJANG");
2079 
2080         /**
2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2082          * @since 1.7
2083          */
2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2086                              "HANGUL JAMO EXTENDED-A",
2087                              "HANGULJAMOEXTENDED-A");
2088 
2089         /**
2090          * Constant for the "Javanese" Unicode character block.
2091          * @since 1.7
2092          */
2093         public static final UnicodeBlock JAVANESE =
2094             new UnicodeBlock("JAVANESE");
2095 
2096         /**
2097          * Constant for the "Cham" Unicode character block.
2098          * @since 1.7
2099          */
2100         public static final UnicodeBlock CHAM =
2101             new UnicodeBlock("CHAM");
2102 
2103         /**
2104          * Constant for the "Myanmar Extended-A" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2108             new UnicodeBlock("MYANMAR_EXTENDED_A",
2109                              "MYANMAR EXTENDED-A",
2110                              "MYANMAREXTENDED-A");
2111 
2112         /**
2113          * Constant for the "Tai Viet" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock TAI_VIET =
2117             new UnicodeBlock("TAI_VIET",
2118                              "TAI VIET",
2119                              "TAIVIET");
2120 
2121         /**
2122          * Constant for the "Ethiopic Extended-A" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2127                              "ETHIOPIC EXTENDED-A",
2128                              "ETHIOPICEXTENDED-A");
2129 
2130         /**
2131          * Constant for the "Meetei Mayek" Unicode character block.
2132          * @since 1.7
2133          */
2134         public static final UnicodeBlock MEETEI_MAYEK =
2135             new UnicodeBlock("MEETEI_MAYEK",
2136                              "MEETEI MAYEK",
2137                              "MEETEIMAYEK");
2138 
2139         /**
2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2141          * @since 1.7
2142          */
2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2145                              "HANGUL JAMO EXTENDED-B",
2146                              "HANGULJAMOEXTENDED-B");
2147 
2148         /**
2149          * Constant for the "Vertical Forms" Unicode character block.
2150          * @since 1.7
2151          */
2152         public static final UnicodeBlock VERTICAL_FORMS =
2153             new UnicodeBlock("VERTICAL_FORMS",
2154                              "VERTICAL FORMS",
2155                              "VERTICALFORMS");
2156 
2157         /**
2158          * Constant for the "Ancient Greek Numbers" Unicode character block.
2159          * @since 1.7
2160          */
2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2163                              "ANCIENT GREEK NUMBERS",
2164                              "ANCIENTGREEKNUMBERS");
2165 
2166         /**
2167          * Constant for the "Ancient Symbols" Unicode character block.
2168          * @since 1.7
2169          */
2170         public static final UnicodeBlock ANCIENT_SYMBOLS =
2171             new UnicodeBlock("ANCIENT_SYMBOLS",
2172                              "ANCIENT SYMBOLS",
2173                              "ANCIENTSYMBOLS");
2174 
2175         /**
2176          * Constant for the "Phaistos Disc" Unicode character block.
2177          * @since 1.7
2178          */
2179         public static final UnicodeBlock PHAISTOS_DISC =
2180             new UnicodeBlock("PHAISTOS_DISC",
2181                              "PHAISTOS DISC",
2182                              "PHAISTOSDISC");
2183 
2184         /**
2185          * Constant for the "Lycian" Unicode character block.
2186          * @since 1.7
2187          */
2188         public static final UnicodeBlock LYCIAN =
2189             new UnicodeBlock("LYCIAN");
2190 
2191         /**
2192          * Constant for the "Carian" Unicode character block.
2193          * @since 1.7
2194          */
2195         public static final UnicodeBlock CARIAN =
2196             new UnicodeBlock("CARIAN");
2197 
2198         /**
2199          * Constant for the "Old Persian" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock OLD_PERSIAN =
2203             new UnicodeBlock("OLD_PERSIAN",
2204                              "OLD PERSIAN",
2205                              "OLDPERSIAN");
2206 
2207         /**
2208          * Constant for the "Imperial Aramaic" Unicode character block.
2209          * @since 1.7
2210          */
2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2212             new UnicodeBlock("IMPERIAL_ARAMAIC",
2213                              "IMPERIAL ARAMAIC",
2214                              "IMPERIALARAMAIC");
2215 
2216         /**
2217          * Constant for the "Phoenician" Unicode character block.
2218          * @since 1.7
2219          */
2220         public static final UnicodeBlock PHOENICIAN =
2221             new UnicodeBlock("PHOENICIAN");
2222 
2223         /**
2224          * Constant for the "Lydian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYDIAN =
2228             new UnicodeBlock("LYDIAN");
2229 
2230         /**
2231          * Constant for the "Kharoshthi" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock KHAROSHTHI =
2235             new UnicodeBlock("KHAROSHTHI");
2236 
2237         /**
2238          * Constant for the "Old South Arabian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2243                              "OLD SOUTH ARABIAN",
2244                              "OLDSOUTHARABIAN");
2245 
2246         /**
2247          * Constant for the "Avestan" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock AVESTAN =
2251             new UnicodeBlock("AVESTAN");
2252 
2253         /**
2254          * Constant for the "Inscriptional Parthian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2259                              "INSCRIPTIONAL PARTHIAN",
2260                              "INSCRIPTIONALPARTHIAN");
2261 
2262         /**
2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2268                              "INSCRIPTIONAL PAHLAVI",
2269                              "INSCRIPTIONALPAHLAVI");
2270 
2271         /**
2272          * Constant for the "Old Turkic" Unicode character block.
2273          * @since 1.7
2274          */
2275         public static final UnicodeBlock OLD_TURKIC =
2276             new UnicodeBlock("OLD_TURKIC",
2277                              "OLD TURKIC",
2278                              "OLDTURKIC");
2279 
2280         /**
2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2282          * @since 1.7
2283          */
2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2286                              "RUMI NUMERAL SYMBOLS",
2287                              "RUMINUMERALSYMBOLS");
2288 
2289         /**
2290          * Constant for the "Brahmi" Unicode character block.
2291          * @since 1.7
2292          */
2293         public static final UnicodeBlock BRAHMI =
2294             new UnicodeBlock("BRAHMI");
2295 
2296         /**
2297          * Constant for the "Kaithi" Unicode character block.
2298          * @since 1.7
2299          */
2300         public static final UnicodeBlock KAITHI =
2301             new UnicodeBlock("KAITHI");
2302 
2303         /**
2304          * Constant for the "Cuneiform" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock CUNEIFORM =
2308             new UnicodeBlock("CUNEIFORM");
2309 
2310         /**
2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2312          * character block.
2313          * @since 1.7
2314          */
2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2319 
2320         /**
2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2322          * @since 1.7
2323          */
2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2326                              "EGYPTIAN HIEROGLYPHS",
2327                              "EGYPTIANHIEROGLYPHS");
2328 
2329         /**
2330          * Constant for the "Bamum Supplement" Unicode character block.
2331          * @since 1.7
2332          */
2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2334             new UnicodeBlock("BAMUM_SUPPLEMENT",
2335                              "BAMUM SUPPLEMENT",
2336                              "BAMUMSUPPLEMENT");
2337 
2338         /**
2339          * Constant for the "Kana Supplement" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock KANA_SUPPLEMENT =
2343             new UnicodeBlock("KANA_SUPPLEMENT",
2344                              "KANA SUPPLEMENT",
2345                              "KANASUPPLEMENT");
2346 
2347         /**
2348          * Constant for the "Ancient Greek Musical Notation" Unicode character
2349          * block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2354                              "ANCIENT GREEK MUSICAL NOTATION",
2355                              "ANCIENTGREEKMUSICALNOTATION");
2356 
2357         /**
2358          * Constant for the "Counting Rod Numerals" Unicode character block.
2359          * @since 1.7
2360          */
2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2363                              "COUNTING ROD NUMERALS",
2364                              "COUNTINGRODNUMERALS");
2365 
2366         /**
2367          * Constant for the "Mahjong Tiles" Unicode character block.
2368          * @since 1.7
2369          */
2370         public static final UnicodeBlock MAHJONG_TILES =
2371             new UnicodeBlock("MAHJONG_TILES",
2372                              "MAHJONG TILES",
2373                              "MAHJONGTILES");
2374 
2375         /**
2376          * Constant for the "Domino Tiles" Unicode character block.
2377          * @since 1.7
2378          */
2379         public static final UnicodeBlock DOMINO_TILES =
2380             new UnicodeBlock("DOMINO_TILES",
2381                              "DOMINO TILES",
2382                              "DOMINOTILES");
2383 
2384         /**
2385          * Constant for the "Playing Cards" Unicode character block.
2386          * @since 1.7
2387          */
2388         public static final UnicodeBlock PLAYING_CARDS =
2389             new UnicodeBlock("PLAYING_CARDS",
2390                              "PLAYING CARDS",
2391                              "PLAYINGCARDS");
2392 
2393         /**
2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2395          * block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2402 
2403         /**
2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2405          * block.
2406          * @since 1.7
2407          */
2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2412 
2413         /**
2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2415          * character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2422 
2423         /**
2424          * Constant for the "Emoticons" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock EMOTICONS =
2428             new UnicodeBlock("EMOTICONS");
2429 
2430         /**
2431          * Constant for the "Transport And Map Symbols" Unicode character block.
2432          * @since 1.7
2433          */
2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2436                              "TRANSPORT AND MAP SYMBOLS",
2437                              "TRANSPORTANDMAPSYMBOLS");
2438 
2439         /**
2440          * Constant for the "Alchemical Symbols" Unicode character block.
2441          * @since 1.7
2442          */
2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2445                              "ALCHEMICAL SYMBOLS",
2446                              "ALCHEMICALSYMBOLS");
2447 
2448         /**
2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2450          * character block.
2451          * @since 1.7
2452          */
2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2457 
2458         /**
2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2460          * character block.
2461          * @since 1.7
2462          */
2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2467 
2468         private static final int blockStarts[] = {
2469             0x0000,   // 0000..007F; Basic Latin
2470             0x0080,   // 0080..00FF; Latin-1 Supplement
2471             0x0100,   // 0100..017F; Latin Extended-A
2472             0x0180,   // 0180..024F; Latin Extended-B
2473             0x0250,   // 0250..02AF; IPA Extensions
2474             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2475             0x0300,   // 0300..036F; Combining Diacritical Marks
2476             0x0370,   // 0370..03FF; Greek and Coptic
2477             0x0400,   // 0400..04FF; Cyrillic
2478             0x0500,   // 0500..052F; Cyrillic Supplement
2479             0x0530,   // 0530..058F; Armenian
2480             0x0590,   // 0590..05FF; Hebrew
2481             0x0600,   // 0600..06FF; Arabic
2482             0x0700,   // 0700..074F; Syriac
2483             0x0750,   // 0750..077F; Arabic Supplement
2484             0x0780,   // 0780..07BF; Thaana
2485             0x07C0,   // 07C0..07FF; NKo
2486             0x0800,   // 0800..083F; Samaritan
2487             0x0840,   // 0840..085F; Mandaic
2488             0x0860,   //             unassigned
2489             0x0900,   // 0900..097F; Devanagari
2490             0x0980,   // 0980..09FF; Bengali
2491             0x0A00,   // 0A00..0A7F; Gurmukhi
2492             0x0A80,   // 0A80..0AFF; Gujarati
2493             0x0B00,   // 0B00..0B7F; Oriya
2494             0x0B80,   // 0B80..0BFF; Tamil
2495             0x0C00,   // 0C00..0C7F; Telugu
2496             0x0C80,   // 0C80..0CFF; Kannada
2497             0x0D00,   // 0D00..0D7F; Malayalam
2498             0x0D80,   // 0D80..0DFF; Sinhala
2499             0x0E00,   // 0E00..0E7F; Thai
2500             0x0E80,   // 0E80..0EFF; Lao
2501             0x0F00,   // 0F00..0FFF; Tibetan
2502             0x1000,   // 1000..109F; Myanmar
2503             0x10A0,   // 10A0..10FF; Georgian
2504             0x1100,   // 1100..11FF; Hangul Jamo
2505             0x1200,   // 1200..137F; Ethiopic
2506             0x1380,   // 1380..139F; Ethiopic Supplement
2507             0x13A0,   // 13A0..13FF; Cherokee
2508             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2509             0x1680,   // 1680..169F; Ogham
2510             0x16A0,   // 16A0..16FF; Runic
2511             0x1700,   // 1700..171F; Tagalog
2512             0x1720,   // 1720..173F; Hanunoo
2513             0x1740,   // 1740..175F; Buhid
2514             0x1760,   // 1760..177F; Tagbanwa
2515             0x1780,   // 1780..17FF; Khmer
2516             0x1800,   // 1800..18AF; Mongolian
2517             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2518             0x1900,   // 1900..194F; Limbu
2519             0x1950,   // 1950..197F; Tai Le
2520             0x1980,   // 1980..19DF; New Tai Lue
2521             0x19E0,   // 19E0..19FF; Khmer Symbols
2522             0x1A00,   // 1A00..1A1F; Buginese
2523             0x1A20,   // 1A20..1AAF; Tai Tham
2524             0x1AB0,   //             unassigned
2525             0x1B00,   // 1B00..1B7F; Balinese
2526             0x1B80,   // 1B80..1BBF; Sundanese
2527             0x1BC0,   // 1BC0..1BFF; Batak
2528             0x1C00,   // 1C00..1C4F; Lepcha
2529             0x1C50,   // 1C50..1C7F; Ol Chiki
2530             0x1C80,   //             unassigned
2531             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2532             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2533             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2534             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2535             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2536             0x1F00,   // 1F00..1FFF; Greek Extended
2537             0x2000,   // 2000..206F; General Punctuation
2538             0x2070,   // 2070..209F; Superscripts and Subscripts
2539             0x20A0,   // 20A0..20CF; Currency Symbols
2540             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2541             0x2100,   // 2100..214F; Letterlike Symbols
2542             0x2150,   // 2150..218F; Number Forms
2543             0x2190,   // 2190..21FF; Arrows
2544             0x2200,   // 2200..22FF; Mathematical Operators
2545             0x2300,   // 2300..23FF; Miscellaneous Technical
2546             0x2400,   // 2400..243F; Control Pictures
2547             0x2440,   // 2440..245F; Optical Character Recognition
2548             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2549             0x2500,   // 2500..257F; Box Drawing
2550             0x2580,   // 2580..259F; Block Elements
2551             0x25A0,   // 25A0..25FF; Geometric Shapes
2552             0x2600,   // 2600..26FF; Miscellaneous Symbols
2553             0x2700,   // 2700..27BF; Dingbats
2554             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2555             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2556             0x2800,   // 2800..28FF; Braille Patterns
2557             0x2900,   // 2900..297F; Supplemental Arrows-B
2558             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2559             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2560             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2561             0x2C00,   // 2C00..2C5F; Glagolitic
2562             0x2C60,   // 2C60..2C7F; Latin Extended-C
2563             0x2C80,   // 2C80..2CFF; Coptic
2564             0x2D00,   // 2D00..2D2F; Georgian Supplement
2565             0x2D30,   // 2D30..2D7F; Tifinagh
2566             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2567             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2568             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2569             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2570             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2571             0x2FE0,   //             unassigned
2572             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2573             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2574             0x3040,   // 3040..309F; Hiragana
2575             0x30A0,   // 30A0..30FF; Katakana
2576             0x3100,   // 3100..312F; Bopomofo
2577             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2578             0x3190,   // 3190..319F; Kanbun
2579             0x31A0,   // 31A0..31BF; Bopomofo Extended
2580             0x31C0,   // 31C0..31EF; CJK Strokes
2581             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2582             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2583             0x3300,   // 3300..33FF; CJK Compatibility
2584             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2585             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2586             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2587             0xA000,   // A000..A48F; Yi Syllables
2588             0xA490,   // A490..A4CF; Yi Radicals
2589             0xA4D0,   // A4D0..A4FF; Lisu
2590             0xA500,   // A500..A63F; Vai
2591             0xA640,   // A640..A69F; Cyrillic Extended-B
2592             0xA6A0,   // A6A0..A6FF; Bamum
2593             0xA700,   // A700..A71F; Modifier Tone Letters
2594             0xA720,   // A720..A7FF; Latin Extended-D
2595             0xA800,   // A800..A82F; Syloti Nagri
2596             0xA830,   // A830..A83F; Common Indic Number Forms
2597             0xA840,   // A840..A87F; Phags-pa
2598             0xA880,   // A880..A8DF; Saurashtra
2599             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2600             0xA900,   // A900..A92F; Kayah Li
2601             0xA930,   // A930..A95F; Rejang
2602             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2603             0xA980,   // A980..A9DF; Javanese
2604             0xA9E0,   //             unassigned
2605             0xAA00,   // AA00..AA5F; Cham
2606             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2607             0xAA80,   // AA80..AADF; Tai Viet
2608             0xAAE0,   //             unassigned
2609             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2610             0xAB30,   //             unassigned
2611             0xABC0,   // ABC0..ABFF; Meetei Mayek
2612             0xAC00,   // AC00..D7AF; Hangul Syllables
2613             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2614             0xD800,   // D800..DB7F; High Surrogates
2615             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2616             0xDC00,   // DC00..DFFF; Low Surrogates
2617             0xE000,   // E000..F8FF; Private Use Area
2618             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2619             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2620             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2621             0xFE00,   // FE00..FE0F; Variation Selectors
2622             0xFE10,   // FE10..FE1F; Vertical Forms
2623             0xFE20,   // FE20..FE2F; Combining Half Marks
2624             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2625             0xFE50,   // FE50..FE6F; Small Form Variants
2626             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2627             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2628             0xFFF0,   // FFF0..FFFF; Specials
2629             0x10000,  // 10000..1007F; Linear B Syllabary
2630             0x10080,  // 10080..100FF; Linear B Ideograms
2631             0x10100,  // 10100..1013F; Aegean Numbers
2632             0x10140,  // 10140..1018F; Ancient Greek Numbers
2633             0x10190,  // 10190..101CF; Ancient Symbols
2634             0x101D0,  // 101D0..101FF; Phaistos Disc
2635             0x10200,  //               unassigned
2636             0x10280,  // 10280..1029F; Lycian
2637             0x102A0,  // 102A0..102DF; Carian
2638             0x102E0,  //               unassigned
2639             0x10300,  // 10300..1032F; Old Italic
2640             0x10330,  // 10330..1034F; Gothic
2641             0x10350,  //               unassigned
2642             0x10380,  // 10380..1039F; Ugaritic
2643             0x103A0,  // 103A0..103DF; Old Persian
2644             0x103E0,  //               unassigned
2645             0x10400,  // 10400..1044F; Deseret
2646             0x10450,  // 10450..1047F; Shavian
2647             0x10480,  // 10480..104AF; Osmanya
2648             0x104B0,  //               unassigned
2649             0x10800,  // 10800..1083F; Cypriot Syllabary
2650             0x10840,  // 10840..1085F; Imperial Aramaic
2651             0x10860,  //               unassigned
2652             0x10900,  // 10900..1091F; Phoenician
2653             0x10920,  // 10920..1093F; Lydian
2654             0x10940,  //               unassigned
2655             0x10A00,  // 10A00..10A5F; Kharoshthi
2656             0x10A60,  // 10A60..10A7F; Old South Arabian
2657             0x10A80,  //               unassigned
2658             0x10B00,  // 10B00..10B3F; Avestan
2659             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2660             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2661             0x10B80,  //               unassigned
2662             0x10C00,  // 10C00..10C4F; Old Turkic
2663             0x10C50,  //               unassigned
2664             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2665             0x10E80,  //               unassigned
2666             0x11000,  // 11000..1107F; Brahmi
2667             0x11080,  // 11080..110CF; Kaithi
2668             0x110D0,  //               unassigned
2669             0x12000,  // 12000..123FF; Cuneiform
2670             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2671             0x12480,  //               unassigned
2672             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2673             0x13430,  //               unassigned
2674             0x16800,  // 16800..16A3F; Bamum Supplement
2675             0x16A40,  //               unassigned
2676             0x1B000,  // 1B000..1B0FF; Kana Supplement
2677             0x1B100,  //               unassigned
2678             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2679             0x1D100,  // 1D100..1D1FF; Musical Symbols
2680             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2681             0x1D250,  //               unassigned
2682             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2683             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2684             0x1D380,  //               unassigned
2685             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2686             0x1D800,  //               unassigned
2687             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2688             0x1F030,  // 1F030..1F09F; Domino Tiles
2689             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2690             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2691             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2692             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2693             0x1F600,  // 1F600..1F64F; Emoticons
2694             0x1F650,  //               unassigned
2695             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2696             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2697             0x1F780,  //               unassigned
2698             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2699             0x2A6E0,  //               unassigned
2700             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2701             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2702             0x2B820,  //               unassigned
2703             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2704             0x2FA20,  //               unassigned
2705             0xE0000,  // E0000..E007F; Tags
2706             0xE0080,  //               unassigned
2707             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2708             0xE01F0,  //               unassigned
2709             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2710             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2711         };
2712 
2713         private static final UnicodeBlock[] blocks = {
2714             BASIC_LATIN,
2715             LATIN_1_SUPPLEMENT,
2716             LATIN_EXTENDED_A,
2717             LATIN_EXTENDED_B,
2718             IPA_EXTENSIONS,
2719             SPACING_MODIFIER_LETTERS,
2720             COMBINING_DIACRITICAL_MARKS,
2721             GREEK,
2722             CYRILLIC,
2723             CYRILLIC_SUPPLEMENTARY,
2724             ARMENIAN,
2725             HEBREW,
2726             ARABIC,
2727             SYRIAC,
2728             ARABIC_SUPPLEMENT,
2729             THAANA,
2730             NKO,
2731             SAMARITAN,
2732             MANDAIC,
2733             null,
2734             DEVANAGARI,
2735             BENGALI,
2736             GURMUKHI,
2737             GUJARATI,
2738             ORIYA,
2739             TAMIL,
2740             TELUGU,
2741             KANNADA,
2742             MALAYALAM,
2743             SINHALA,
2744             THAI,
2745             LAO,
2746             TIBETAN,
2747             MYANMAR,
2748             GEORGIAN,
2749             HANGUL_JAMO,
2750             ETHIOPIC,
2751             ETHIOPIC_SUPPLEMENT,
2752             CHEROKEE,
2753             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2754             OGHAM,
2755             RUNIC,
2756             TAGALOG,
2757             HANUNOO,
2758             BUHID,
2759             TAGBANWA,
2760             KHMER,
2761             MONGOLIAN,
2762             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2763             LIMBU,
2764             TAI_LE,
2765             NEW_TAI_LUE,
2766             KHMER_SYMBOLS,
2767             BUGINESE,
2768             TAI_THAM,
2769             null,
2770             BALINESE,
2771             SUNDANESE,
2772             BATAK,
2773             LEPCHA,
2774             OL_CHIKI,
2775             null,
2776             VEDIC_EXTENSIONS,
2777             PHONETIC_EXTENSIONS,
2778             PHONETIC_EXTENSIONS_SUPPLEMENT,
2779             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2780             LATIN_EXTENDED_ADDITIONAL,
2781             GREEK_EXTENDED,
2782             GENERAL_PUNCTUATION,
2783             SUPERSCRIPTS_AND_SUBSCRIPTS,
2784             CURRENCY_SYMBOLS,
2785             COMBINING_MARKS_FOR_SYMBOLS,
2786             LETTERLIKE_SYMBOLS,
2787             NUMBER_FORMS,
2788             ARROWS,
2789             MATHEMATICAL_OPERATORS,
2790             MISCELLANEOUS_TECHNICAL,
2791             CONTROL_PICTURES,
2792             OPTICAL_CHARACTER_RECOGNITION,
2793             ENCLOSED_ALPHANUMERICS,
2794             BOX_DRAWING,
2795             BLOCK_ELEMENTS,
2796             GEOMETRIC_SHAPES,
2797             MISCELLANEOUS_SYMBOLS,
2798             DINGBATS,
2799             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2800             SUPPLEMENTAL_ARROWS_A,
2801             BRAILLE_PATTERNS,
2802             SUPPLEMENTAL_ARROWS_B,
2803             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2804             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2805             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2806             GLAGOLITIC,
2807             LATIN_EXTENDED_C,
2808             COPTIC,
2809             GEORGIAN_SUPPLEMENT,
2810             TIFINAGH,
2811             ETHIOPIC_EXTENDED,
2812             CYRILLIC_EXTENDED_A,
2813             SUPPLEMENTAL_PUNCTUATION,
2814             CJK_RADICALS_SUPPLEMENT,
2815             KANGXI_RADICALS,
2816             null,
2817             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2818             CJK_SYMBOLS_AND_PUNCTUATION,
2819             HIRAGANA,
2820             KATAKANA,
2821             BOPOMOFO,
2822             HANGUL_COMPATIBILITY_JAMO,
2823             KANBUN,
2824             BOPOMOFO_EXTENDED,
2825             CJK_STROKES,
2826             KATAKANA_PHONETIC_EXTENSIONS,
2827             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2828             CJK_COMPATIBILITY,
2829             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2830             YIJING_HEXAGRAM_SYMBOLS,
2831             CJK_UNIFIED_IDEOGRAPHS,
2832             YI_SYLLABLES,
2833             YI_RADICALS,
2834             LISU,
2835             VAI,
2836             CYRILLIC_EXTENDED_B,
2837             BAMUM,
2838             MODIFIER_TONE_LETTERS,
2839             LATIN_EXTENDED_D,
2840             SYLOTI_NAGRI,
2841             COMMON_INDIC_NUMBER_FORMS,
2842             PHAGS_PA,
2843             SAURASHTRA,
2844             DEVANAGARI_EXTENDED,
2845             KAYAH_LI,
2846             REJANG,
2847             HANGUL_JAMO_EXTENDED_A,
2848             JAVANESE,
2849             null,
2850             CHAM,
2851             MYANMAR_EXTENDED_A,
2852             TAI_VIET,
2853             null,
2854             ETHIOPIC_EXTENDED_A,
2855             null,
2856             MEETEI_MAYEK,
2857             HANGUL_SYLLABLES,
2858             HANGUL_JAMO_EXTENDED_B,
2859             HIGH_SURROGATES,
2860             HIGH_PRIVATE_USE_SURROGATES,
2861             LOW_SURROGATES,
2862             PRIVATE_USE_AREA,
2863             CJK_COMPATIBILITY_IDEOGRAPHS,
2864             ALPHABETIC_PRESENTATION_FORMS,
2865             ARABIC_PRESENTATION_FORMS_A,
2866             VARIATION_SELECTORS,
2867             VERTICAL_FORMS,
2868             COMBINING_HALF_MARKS,
2869             CJK_COMPATIBILITY_FORMS,
2870             SMALL_FORM_VARIANTS,
2871             ARABIC_PRESENTATION_FORMS_B,
2872             HALFWIDTH_AND_FULLWIDTH_FORMS,
2873             SPECIALS,
2874             LINEAR_B_SYLLABARY,
2875             LINEAR_B_IDEOGRAMS,
2876             AEGEAN_NUMBERS,
2877             ANCIENT_GREEK_NUMBERS,
2878             ANCIENT_SYMBOLS,
2879             PHAISTOS_DISC,
2880             null,
2881             LYCIAN,
2882             CARIAN,
2883             null,
2884             OLD_ITALIC,
2885             GOTHIC,
2886             null,
2887             UGARITIC,
2888             OLD_PERSIAN,
2889             null,
2890             DESERET,
2891             SHAVIAN,
2892             OSMANYA,
2893             null,
2894             CYPRIOT_SYLLABARY,
2895             IMPERIAL_ARAMAIC,
2896             null,
2897             PHOENICIAN,
2898             LYDIAN,
2899             null,
2900             KHAROSHTHI,
2901             OLD_SOUTH_ARABIAN,
2902             null,
2903             AVESTAN,
2904             INSCRIPTIONAL_PARTHIAN,
2905             INSCRIPTIONAL_PAHLAVI,
2906             null,
2907             OLD_TURKIC,
2908             null,
2909             RUMI_NUMERAL_SYMBOLS,
2910             null,
2911             BRAHMI,
2912             KAITHI,
2913             null,
2914             CUNEIFORM,
2915             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2916             null,
2917             EGYPTIAN_HIEROGLYPHS,
2918             null,
2919             BAMUM_SUPPLEMENT,
2920             null,
2921             KANA_SUPPLEMENT,
2922             null,
2923             BYZANTINE_MUSICAL_SYMBOLS,
2924             MUSICAL_SYMBOLS,
2925             ANCIENT_GREEK_MUSICAL_NOTATION,
2926             null,
2927             TAI_XUAN_JING_SYMBOLS,
2928             COUNTING_ROD_NUMERALS,
2929             null,
2930             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2931             null,
2932             MAHJONG_TILES,
2933             DOMINO_TILES,
2934             PLAYING_CARDS,
2935             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2936             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2937             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2938             EMOTICONS,
2939             null,
2940             TRANSPORT_AND_MAP_SYMBOLS,
2941             ALCHEMICAL_SYMBOLS,
2942             null,
2943             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2944             null,
2945             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2946             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2947             null,
2948             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2949             null,
2950             TAGS,
2951             null,
2952             VARIATION_SELECTORS_SUPPLEMENT,
2953             null,
2954             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2955             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2956         };
2957 
2958 
2959         /**
2960          * Returns the object representing the Unicode block containing the
2961          * given character, or {@code null} if the character is not a
2962          * member of a defined block.
2963          *
2964          * <p><b>Note:</b> This method cannot handle
2965          * <a href="Character.html#supplementary"> supplementary
2966          * characters</a>.  To support all Unicode characters, including
2967          * supplementary characters, use the {@link #of(int)} method.
2968          *
2969          * @param   c  The character in question
2970          * @return  The {@code UnicodeBlock} instance representing the
2971          *          Unicode block of which this character is a member, or
2972          *          {@code null} if the character is not a member of any
2973          *          Unicode block
2974          */
2975         public static UnicodeBlock of(char c) {
2976             return of((int)c);
2977         }
2978 
2979         /**
2980          * Returns the object representing the Unicode block
2981          * containing the given character (Unicode code point), or
2982          * {@code null} if the character is not a member of a
2983          * defined block.
2984          *
2985          * @param   codePoint the character (Unicode code point) in question.
2986          * @return  The {@code UnicodeBlock} instance representing the
2987          *          Unicode block of which this character is a member, or
2988          *          {@code null} if the character is not a member of any
2989          *          Unicode block
2990          * @exception IllegalArgumentException if the specified
2991          * {@code codePoint} is an invalid Unicode code point.
2992          * @see Character#isValidCodePoint(int)
2993          * @since   1.5
2994          */
2995         public static UnicodeBlock of(int codePoint) {
2996             if (!isValidCodePoint(codePoint)) {
2997                 throw new IllegalArgumentException();
2998             }
2999 
3000             int top, bottom, current;
3001             bottom = 0;
3002             top = blockStarts.length;
3003             current = top/2;
3004 
3005             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3006             while (top - bottom > 1) {
3007                 if (codePoint >= blockStarts[current]) {
3008                     bottom = current;
3009                 } else {
3010                     top = current;
3011                 }
3012                 current = (top + bottom) / 2;
3013             }
3014             return blocks[current];
3015         }
3016 
3017         /**
3018          * Returns the UnicodeBlock with the given name. Block
3019          * names are determined by The Unicode Standard. The file
3020          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3021          * version of the standard. The {@link Character} class specifies
3022          * the version of the standard that it supports.
3023          * <p>
3024          * This method accepts block names in the following forms:
3025          * <ol>
3026          * <li> Canonical block names as defined by the Unicode Standard.
3027          * For example, the standard defines a "Basic Latin" block. Therefore, this
3028          * method accepts "Basic Latin" as a valid block name. The documentation of
3029          * each UnicodeBlock provides the canonical name.
3030          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3031          * is a valid block name for the "Basic Latin" block.
3032          * <li>The text representation of each constant UnicodeBlock identifier.
3033          * For example, this method will return the {@link #BASIC_LATIN} block if
3034          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3035          * hyphens in the canonical name with underscores.
3036          * </ol>
3037          * Finally, character case is ignored for all of the valid block name forms.
3038          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3039          * The en_US locale's case mapping rules are used to provide case-insensitive
3040          * string comparisons for block name validation.
3041          * <p>
3042          * If the Unicode Standard changes block names, both the previous and
3043          * current names will be accepted.
3044          *
3045          * @param blockName A {@code UnicodeBlock} name.
3046          * @return The {@code UnicodeBlock} instance identified
3047          *         by {@code blockName}
3048          * @throws IllegalArgumentException if {@code blockName} is an
3049          *         invalid name
3050          * @throws NullPointerException if {@code blockName} is null
3051          * @since 1.5
3052          */
3053         public static final UnicodeBlock forName(String blockName) {
3054             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3055             if (block == null) {
3056                 throw new IllegalArgumentException();
3057             }
3058             return block;
3059         }
3060     }
3061 
3062 
3063     /**
3064      * A family of character subsets representing the character scripts
3065      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3066      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3067      * character is assigned to a single Unicode script, either a specific
3068      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3069      * one of the following three special values,
3070      * {@link Character.UnicodeScript#INHERITED Inherited},
3071      * {@link Character.UnicodeScript#COMMON Common} or
3072      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3073      *
3074      * @since 1.7
3075      */
3076     public static enum UnicodeScript {
3077         /**
3078          * Unicode script "Common".
3079          */
3080         COMMON,
3081 
3082         /**
3083          * Unicode script "Latin".
3084          */
3085         LATIN,
3086 
3087         /**
3088          * Unicode script "Greek".
3089          */
3090         GREEK,
3091 
3092         /**
3093          * Unicode script "Cyrillic".
3094          */
3095         CYRILLIC,
3096 
3097         /**
3098          * Unicode script "Armenian".
3099          */
3100         ARMENIAN,
3101 
3102         /**
3103          * Unicode script "Hebrew".
3104          */
3105         HEBREW,
3106 
3107         /**
3108          * Unicode script "Arabic".
3109          */
3110         ARABIC,
3111 
3112         /**
3113          * Unicode script "Syriac".
3114          */
3115         SYRIAC,
3116 
3117         /**
3118          * Unicode script "Thaana".
3119          */
3120         THAANA,
3121 
3122         /**
3123          * Unicode script "Devanagari".
3124          */
3125         DEVANAGARI,
3126 
3127         /**
3128          * Unicode script "Bengali".
3129          */
3130         BENGALI,
3131 
3132         /**
3133          * Unicode script "Gurmukhi".
3134          */
3135         GURMUKHI,
3136 
3137         /**
3138          * Unicode script "Gujarati".
3139          */
3140         GUJARATI,
3141 
3142         /**
3143          * Unicode script "Oriya".
3144          */
3145         ORIYA,
3146 
3147         /**
3148          * Unicode script "Tamil".
3149          */
3150         TAMIL,
3151 
3152         /**
3153          * Unicode script "Telugu".
3154          */
3155         TELUGU,
3156 
3157         /**
3158          * Unicode script "Kannada".
3159          */
3160         KANNADA,
3161 
3162         /**
3163          * Unicode script "Malayalam".
3164          */
3165         MALAYALAM,
3166 
3167         /**
3168          * Unicode script "Sinhala".
3169          */
3170         SINHALA,
3171 
3172         /**
3173          * Unicode script "Thai".
3174          */
3175         THAI,
3176 
3177         /**
3178          * Unicode script "Lao".
3179          */
3180         LAO,
3181 
3182         /**
3183          * Unicode script "Tibetan".
3184          */
3185         TIBETAN,
3186 
3187         /**
3188          * Unicode script "Myanmar".
3189          */
3190         MYANMAR,
3191 
3192         /**
3193          * Unicode script "Georgian".
3194          */
3195         GEORGIAN,
3196 
3197         /**
3198          * Unicode script "Hangul".
3199          */
3200         HANGUL,
3201 
3202         /**
3203          * Unicode script "Ethiopic".
3204          */
3205         ETHIOPIC,
3206 
3207         /**
3208          * Unicode script "Cherokee".
3209          */
3210         CHEROKEE,
3211 
3212         /**
3213          * Unicode script "Canadian_Aboriginal".
3214          */
3215         CANADIAN_ABORIGINAL,
3216 
3217         /**
3218          * Unicode script "Ogham".
3219          */
3220         OGHAM,
3221 
3222         /**
3223          * Unicode script "Runic".
3224          */
3225         RUNIC,
3226 
3227         /**
3228          * Unicode script "Khmer".
3229          */
3230         KHMER,
3231 
3232         /**
3233          * Unicode script "Mongolian".
3234          */
3235         MONGOLIAN,
3236 
3237         /**
3238          * Unicode script "Hiragana".
3239          */
3240         HIRAGANA,
3241 
3242         /**
3243          * Unicode script "Katakana".
3244          */
3245         KATAKANA,
3246 
3247         /**
3248          * Unicode script "Bopomofo".
3249          */
3250         BOPOMOFO,
3251 
3252         /**
3253          * Unicode script "Han".
3254          */
3255         HAN,
3256 
3257         /**
3258          * Unicode script "Yi".
3259          */
3260         YI,
3261 
3262         /**
3263          * Unicode script "Old_Italic".
3264          */
3265         OLD_ITALIC,
3266 
3267         /**
3268          * Unicode script "Gothic".
3269          */
3270         GOTHIC,
3271 
3272         /**
3273          * Unicode script "Deseret".
3274          */
3275         DESERET,
3276 
3277         /**
3278          * Unicode script "Inherited".
3279          */
3280         INHERITED,
3281 
3282         /**
3283          * Unicode script "Tagalog".
3284          */
3285         TAGALOG,
3286 
3287         /**
3288          * Unicode script "Hanunoo".
3289          */
3290         HANUNOO,
3291 
3292         /**
3293          * Unicode script "Buhid".
3294          */
3295         BUHID,
3296 
3297         /**
3298          * Unicode script "Tagbanwa".
3299          */
3300         TAGBANWA,
3301 
3302         /**
3303          * Unicode script "Limbu".
3304          */
3305         LIMBU,
3306 
3307         /**
3308          * Unicode script "Tai_Le".
3309          */
3310         TAI_LE,
3311 
3312         /**
3313          * Unicode script "Linear_B".
3314          */
3315         LINEAR_B,
3316 
3317         /**
3318          * Unicode script "Ugaritic".
3319          */
3320         UGARITIC,
3321 
3322         /**
3323          * Unicode script "Shavian".
3324          */
3325         SHAVIAN,
3326 
3327         /**
3328          * Unicode script "Osmanya".
3329          */
3330         OSMANYA,
3331 
3332         /**
3333          * Unicode script "Cypriot".
3334          */
3335         CYPRIOT,
3336 
3337         /**
3338          * Unicode script "Braille".
3339          */
3340         BRAILLE,
3341 
3342         /**
3343          * Unicode script "Buginese".
3344          */
3345         BUGINESE,
3346 
3347         /**
3348          * Unicode script "Coptic".
3349          */
3350         COPTIC,
3351 
3352         /**
3353          * Unicode script "New_Tai_Lue".
3354          */
3355         NEW_TAI_LUE,
3356 
3357         /**
3358          * Unicode script "Glagolitic".
3359          */
3360         GLAGOLITIC,
3361 
3362         /**
3363          * Unicode script "Tifinagh".
3364          */
3365         TIFINAGH,
3366 
3367         /**
3368          * Unicode script "Syloti_Nagri".
3369          */
3370         SYLOTI_NAGRI,
3371 
3372         /**
3373          * Unicode script "Old_Persian".
3374          */
3375         OLD_PERSIAN,
3376 
3377         /**
3378          * Unicode script "Kharoshthi".
3379          */
3380         KHAROSHTHI,
3381 
3382         /**
3383          * Unicode script "Balinese".
3384          */
3385         BALINESE,
3386 
3387         /**
3388          * Unicode script "Cuneiform".
3389          */
3390         CUNEIFORM,
3391 
3392         /**
3393          * Unicode script "Phoenician".
3394          */
3395         PHOENICIAN,
3396 
3397         /**
3398          * Unicode script "Phags_Pa".
3399          */
3400         PHAGS_PA,
3401 
3402         /**
3403          * Unicode script "Nko".
3404          */
3405         NKO,
3406 
3407         /**
3408          * Unicode script "Sundanese".
3409          */
3410         SUNDANESE,
3411 
3412         /**
3413          * Unicode script "Batak".
3414          */
3415         BATAK,
3416 
3417         /**
3418          * Unicode script "Lepcha".
3419          */
3420         LEPCHA,
3421 
3422         /**
3423          * Unicode script "Ol_Chiki".
3424          */
3425         OL_CHIKI,
3426 
3427         /**
3428          * Unicode script "Vai".
3429          */
3430         VAI,
3431 
3432         /**
3433          * Unicode script "Saurashtra".
3434          */
3435         SAURASHTRA,
3436 
3437         /**
3438          * Unicode script "Kayah_Li".
3439          */
3440         KAYAH_LI,
3441 
3442         /**
3443          * Unicode script "Rejang".
3444          */
3445         REJANG,
3446 
3447         /**
3448          * Unicode script "Lycian".
3449          */
3450         LYCIAN,
3451 
3452         /**
3453          * Unicode script "Carian".
3454          */
3455         CARIAN,
3456 
3457         /**
3458          * Unicode script "Lydian".
3459          */
3460         LYDIAN,
3461 
3462         /**
3463          * Unicode script "Cham".
3464          */
3465         CHAM,
3466 
3467         /**
3468          * Unicode script "Tai_Tham".
3469          */
3470         TAI_THAM,
3471 
3472         /**
3473          * Unicode script "Tai_Viet".
3474          */
3475         TAI_VIET,
3476 
3477         /**
3478          * Unicode script "Avestan".
3479          */
3480         AVESTAN,
3481 
3482         /**
3483          * Unicode script "Egyptian_Hieroglyphs".
3484          */
3485         EGYPTIAN_HIEROGLYPHS,
3486 
3487         /**
3488          * Unicode script "Samaritan".
3489          */
3490         SAMARITAN,
3491 
3492         /**
3493          * Unicode script "Mandaic".
3494          */
3495         MANDAIC,
3496 
3497         /**
3498          * Unicode script "Lisu".
3499          */
3500         LISU,
3501 
3502         /**
3503          * Unicode script "Bamum".
3504          */
3505         BAMUM,
3506 
3507         /**
3508          * Unicode script "Javanese".
3509          */
3510         JAVANESE,
3511 
3512         /**
3513          * Unicode script "Meetei_Mayek".
3514          */
3515         MEETEI_MAYEK,
3516 
3517         /**
3518          * Unicode script "Imperial_Aramaic".
3519          */
3520         IMPERIAL_ARAMAIC,
3521 
3522         /**
3523          * Unicode script "Old_South_Arabian".
3524          */
3525         OLD_SOUTH_ARABIAN,
3526 
3527         /**
3528          * Unicode script "Inscriptional_Parthian".
3529          */
3530         INSCRIPTIONAL_PARTHIAN,
3531 
3532         /**
3533          * Unicode script "Inscriptional_Pahlavi".
3534          */
3535         INSCRIPTIONAL_PAHLAVI,
3536 
3537         /**
3538          * Unicode script "Old_Turkic".
3539          */
3540         OLD_TURKIC,
3541 
3542         /**
3543          * Unicode script "Brahmi".
3544          */
3545         BRAHMI,
3546 
3547         /**
3548          * Unicode script "Kaithi".
3549          */
3550         KAITHI,
3551 
3552         /**
3553          * Unicode script "Unknown".
3554          */
3555         UNKNOWN;
3556 
3557         private static final int[] scriptStarts = {
3558             0x0000,   // 0000..0040; COMMON
3559             0x0041,   // 0041..005A; LATIN
3560             0x005B,   // 005B..0060; COMMON
3561             0x0061,   // 0061..007A; LATIN
3562             0x007B,   // 007B..00A9; COMMON
3563             0x00AA,   // 00AA..00AA; LATIN
3564             0x00AB,   // 00AB..00B9; COMMON
3565             0x00BA,   // 00BA..00BA; LATIN
3566             0x00BB,   // 00BB..00BF; COMMON
3567             0x00C0,   // 00C0..00D6; LATIN
3568             0x00D7,   // 00D7..00D7; COMMON
3569             0x00D8,   // 00D8..00F6; LATIN
3570             0x00F7,   // 00F7..00F7; COMMON
3571             0x00F8,   // 00F8..02B8; LATIN
3572             0x02B9,   // 02B9..02DF; COMMON
3573             0x02E0,   // 02E0..02E4; LATIN
3574             0x02E5,   // 02E5..02E9; COMMON
3575             0x02EA,   // 02EA..02EB; BOPOMOFO
3576             0x02EC,   // 02EC..02FF; COMMON
3577             0x0300,   // 0300..036F; INHERITED
3578             0x0370,   // 0370..0373; GREEK
3579             0x0374,   // 0374..0374; COMMON
3580             0x0375,   // 0375..037D; GREEK
3581             0x037E,   // 037E..0383; COMMON
3582             0x0384,   // 0384..0384; GREEK
3583             0x0385,   // 0385..0385; COMMON
3584             0x0386,   // 0386..0386; GREEK
3585             0x0387,   // 0387..0387; COMMON
3586             0x0388,   // 0388..03E1; GREEK
3587             0x03E2,   // 03E2..03EF; COPTIC
3588             0x03F0,   // 03F0..03FF; GREEK
3589             0x0400,   // 0400..0484; CYRILLIC
3590             0x0485,   // 0485..0486; INHERITED
3591             0x0487,   // 0487..0530; CYRILLIC
3592             0x0531,   // 0531..0588; ARMENIAN
3593             0x0589,   // 0589..0589; COMMON
3594             0x058A,   // 058A..0590; ARMENIAN
3595             0x0591,   // 0591..05FF; HEBREW
3596             0x0600,   // 0600..060B; ARABIC
3597             0x060C,   // 060C..060C; COMMON
3598             0x060D,   // 060D..061A; ARABIC
3599             0x061B,   // 061B..061D; COMMON
3600             0x061E,   // 061E..061E; ARABIC
3601             0x061F,   // 061F..061F; COMMON
3602             0x0620,   // 0620..063F; ARABIC
3603             0x0640,   // 0640..0640; COMMON
3604             0x0641,   // 0641..064A; ARABIC
3605             0x064B,   // 064B..0655; INHERITED
3606             0x0656,   // 0656..065E; ARABIC
3607             0x065F,   // 065F..065F; INHERITED
3608             0x0660,   // 0660..0669; COMMON
3609             0x066A,   // 066A..066F; ARABIC
3610             0x0670,   // 0670..0670; INHERITED
3611             0x0671,   // 0671..06DC; ARABIC
3612             0x06DD,   // 06DD..06DD; COMMON
3613             0x06DE,   // 06DE..06FF; ARABIC
3614             0x0700,   // 0700..074F; SYRIAC
3615             0x0750,   // 0750..077F; ARABIC
3616             0x0780,   // 0780..07BF; THAANA
3617             0x07C0,   // 07C0..07FF; NKO
3618             0x0800,   // 0800..083F; SAMARITAN
3619             0x0840,   // 0840..08FF; MANDAIC
3620             0x0900,   // 0900..0950; DEVANAGARI
3621             0x0951,   // 0951..0952; INHERITED
3622             0x0953,   // 0953..0963; DEVANAGARI
3623             0x0964,   // 0964..0965; COMMON
3624             0x0966,   // 0966..096F; DEVANAGARI
3625             0x0970,   // 0970..0970; COMMON
3626             0x0971,   // 0971..0980; DEVANAGARI
3627             0x0981,   // 0981..0A00; BENGALI
3628             0x0A01,   // 0A01..0A80; GURMUKHI
3629             0x0A81,   // 0A81..0B00; GUJARATI
3630             0x0B01,   // 0B01..0B81; ORIYA
3631             0x0B82,   // 0B82..0C00; TAMIL
3632             0x0C01,   // 0C01..0C81; TELUGU
3633             0x0C82,   // 0C82..0CF0; KANNADA
3634             0x0D02,   // 0D02..0D81; MALAYALAM
3635             0x0D82,   // 0D82..0E00; SINHALA
3636             0x0E01,   // 0E01..0E3E; THAI
3637             0x0E3F,   // 0E3F..0E3F; COMMON
3638             0x0E40,   // 0E40..0E80; THAI
3639             0x0E81,   // 0E81..0EFF; LAO
3640             0x0F00,   // 0F00..0FD4; TIBETAN
3641             0x0FD5,   // 0FD5..0FD8; COMMON
3642             0x0FD9,   // 0FD9..0FFF; TIBETAN
3643             0x1000,   // 1000..109F; MYANMAR
3644             0x10A0,   // 10A0..10FA; GEORGIAN
3645             0x10FB,   // 10FB..10FB; COMMON
3646             0x10FC,   // 10FC..10FF; GEORGIAN
3647             0x1100,   // 1100..11FF; HANGUL
3648             0x1200,   // 1200..139F; ETHIOPIC
3649             0x13A0,   // 13A0..13FF; CHEROKEE
3650             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3651             0x1680,   // 1680..169F; OGHAM
3652             0x16A0,   // 16A0..16EA; RUNIC
3653             0x16EB,   // 16EB..16ED; COMMON
3654             0x16EE,   // 16EE..16FF; RUNIC
3655             0x1700,   // 1700..171F; TAGALOG
3656             0x1720,   // 1720..1734; HANUNOO
3657             0x1735,   // 1735..173F; COMMON
3658             0x1740,   // 1740..175F; BUHID
3659             0x1760,   // 1760..177F; TAGBANWA
3660             0x1780,   // 1780..17FF; KHMER
3661             0x1800,   // 1800..1801; MONGOLIAN
3662             0x1802,   // 1802..1803; COMMON
3663             0x1804,   // 1804..1804; MONGOLIAN
3664             0x1805,   // 1805..1805; COMMON
3665             0x1806,   // 1806..18AF; MONGOLIAN
3666             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3667             0x1900,   // 1900..194F; LIMBU
3668             0x1950,   // 1950..197F; TAI_LE
3669             0x1980,   // 1980..19DF; NEW_TAI_LUE
3670             0x19E0,   // 19E0..19FF; KHMER
3671             0x1A00,   // 1A00..1A1F; BUGINESE
3672             0x1A20,   // 1A20..1AFF; TAI_THAM
3673             0x1B00,   // 1B00..1B7F; BALINESE
3674             0x1B80,   // 1B80..1BBF; SUNDANESE
3675             0x1BC0,   // 1BC0..1BFF; BATAK
3676             0x1C00,   // 1C00..1C4F; LEPCHA
3677             0x1C50,   // 1C50..1CCF; OL_CHIKI
3678             0x1CD0,   // 1CD0..1CD2; INHERITED
3679             0x1CD3,   // 1CD3..1CD3; COMMON
3680             0x1CD4,   // 1CD4..1CE0; INHERITED
3681             0x1CE1,   // 1CE1..1CE1; COMMON
3682             0x1CE2,   // 1CE2..1CE8; INHERITED
3683             0x1CE9,   // 1CE9..1CEC; COMMON
3684             0x1CED,   // 1CED..1CED; INHERITED
3685             0x1CEE,   // 1CEE..1CFF; COMMON
3686             0x1D00,   // 1D00..1D25; LATIN
3687             0x1D26,   // 1D26..1D2A; GREEK
3688             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3689             0x1D2C,   // 1D2C..1D5C; LATIN
3690             0x1D5D,   // 1D5D..1D61; GREEK
3691             0x1D62,   // 1D62..1D65; LATIN
3692             0x1D66,   // 1D66..1D6A; GREEK
3693             0x1D6B,   // 1D6B..1D77; LATIN
3694             0x1D78,   // 1D78..1D78; CYRILLIC
3695             0x1D79,   // 1D79..1DBE; LATIN
3696             0x1DBF,   // 1DBF..1DBF; GREEK
3697             0x1DC0,   // 1DC0..1DFF; INHERITED
3698             0x1E00,   // 1E00..1EFF; LATIN
3699             0x1F00,   // 1F00..1FFF; GREEK
3700             0x2000,   // 2000..200B; COMMON
3701             0x200C,   // 200C..200D; INHERITED
3702             0x200E,   // 200E..2070; COMMON
3703             0x2071,   // 2071..2073; LATIN
3704             0x2074,   // 2074..207E; COMMON
3705             0x207F,   // 207F..207F; LATIN
3706             0x2080,   // 2080..208F; COMMON
3707             0x2090,   // 2090..209F; LATIN
3708             0x20A0,   // 20A0..20CF; COMMON
3709             0x20D0,   // 20D0..20FF; INHERITED
3710             0x2100,   // 2100..2125; COMMON
3711             0x2126,   // 2126..2126; GREEK
3712             0x2127,   // 2127..2129; COMMON
3713             0x212A,   // 212A..212B; LATIN
3714             0x212C,   // 212C..2131; COMMON
3715             0x2132,   // 2132..2132; LATIN
3716             0x2133,   // 2133..214D; COMMON
3717             0x214E,   // 214E..214E; LATIN
3718             0x214F,   // 214F..215F; COMMON
3719             0x2160,   // 2160..2188; LATIN
3720             0x2189,   // 2189..27FF; COMMON
3721             0x2800,   // 2800..28FF; BRAILLE
3722             0x2900,   // 2900..2BFF; COMMON
3723             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3724             0x2C60,   // 2C60..2C7F; LATIN
3725             0x2C80,   // 2C80..2CFF; COPTIC
3726             0x2D00,   // 2D00..2D2F; GEORGIAN
3727             0x2D30,   // 2D30..2D7F; TIFINAGH
3728             0x2D80,   // 2D80..2DDF; ETHIOPIC
3729             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3730             0x2E00,   // 2E00..2E7F; COMMON
3731             0x2E80,   // 2E80..2FEF; HAN
3732             0x2FF0,   // 2FF0..3004; COMMON
3733             0x3005,   // 3005..3005; HAN
3734             0x3006,   // 3006..3006; COMMON
3735             0x3007,   // 3007..3007; HAN
3736             0x3008,   // 3008..3020; COMMON
3737             0x3021,   // 3021..3029; HAN
3738             0x302A,   // 302A..302D; INHERITED
3739             0x302E,   // 302E..302F; HANGUL
3740             0x3030,   // 3030..3037; COMMON
3741             0x3038,   // 3038..303B; HAN
3742             0x303C,   // 303C..3040; COMMON
3743             0x3041,   // 3041..3098; HIRAGANA
3744             0x3099,   // 3099..309A; INHERITED
3745             0x309B,   // 309B..309C; COMMON
3746             0x309D,   // 309D..309F; HIRAGANA
3747             0x30A0,   // 30A0..30A0; COMMON
3748             0x30A1,   // 30A1..30FA; KATAKANA
3749             0x30FB,   // 30FB..30FC; COMMON
3750             0x30FD,   // 30FD..3104; KATAKANA
3751             0x3105,   // 3105..3130; BOPOMOFO
3752             0x3131,   // 3131..318F; HANGUL
3753             0x3190,   // 3190..319F; COMMON
3754             0x31A0,   // 31A0..31BF; BOPOMOFO
3755             0x31C0,   // 31C0..31EF; COMMON
3756             0x31F0,   // 31F0..31FF; KATAKANA
3757             0x3200,   // 3200..321F; HANGUL
3758             0x3220,   // 3220..325F; COMMON
3759             0x3260,   // 3260..327E; HANGUL
3760             0x327F,   // 327F..32CF; COMMON
3761             0x32D0,   // 32D0..3357; KATAKANA
3762             0x3358,   // 3358..33FF; COMMON
3763             0x3400,   // 3400..4DBF; HAN
3764             0x4DC0,   // 4DC0..4DFF; COMMON
3765             0x4E00,   // 4E00..9FFF; HAN
3766             0xA000,   // A000..A4CF; YI
3767             0xA4D0,   // A4D0..A4FF; LISU
3768             0xA500,   // A500..A63F; VAI
3769             0xA640,   // A640..A69F; CYRILLIC
3770             0xA6A0,   // A6A0..A6FF; BAMUM
3771             0xA700,   // A700..A721; COMMON
3772             0xA722,   // A722..A787; LATIN
3773             0xA788,   // A788..A78A; COMMON
3774             0xA78B,   // A78B..A7FF; LATIN
3775             0xA800,   // A800..A82F; SYLOTI_NAGRI
3776             0xA830,   // A830..A83F; COMMON
3777             0xA840,   // A840..A87F; PHAGS_PA
3778             0xA880,   // A880..A8DF; SAURASHTRA
3779             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3780             0xA900,   // A900..A92F; KAYAH_LI
3781             0xA930,   // A930..A95F; REJANG
3782             0xA960,   // A960..A97F; HANGUL
3783             0xA980,   // A980..A9FF; JAVANESE
3784             0xAA00,   // AA00..AA5F; CHAM
3785             0xAA60,   // AA60..AA7F; MYANMAR
3786             0xAA80,   // AA80..AB00; TAI_VIET
3787             0xAB01,   // AB01..ABBF; ETHIOPIC
3788             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3789             0xAC00,   // AC00..D7FB; HANGUL
3790             0xD7FC,   // D7FC..F8FF; UNKNOWN
3791             0xF900,   // F900..FAFF; HAN
3792             0xFB00,   // FB00..FB12; LATIN
3793             0xFB13,   // FB13..FB1C; ARMENIAN
3794             0xFB1D,   // FB1D..FB4F; HEBREW
3795             0xFB50,   // FB50..FD3D; ARABIC
3796             0xFD3E,   // FD3E..FD4F; COMMON
3797             0xFD50,   // FD50..FDFC; ARABIC
3798             0xFDFD,   // FDFD..FDFF; COMMON
3799             0xFE00,   // FE00..FE0F; INHERITED
3800             0xFE10,   // FE10..FE1F; COMMON
3801             0xFE20,   // FE20..FE2F; INHERITED
3802             0xFE30,   // FE30..FE6F; COMMON
3803             0xFE70,   // FE70..FEFE; ARABIC
3804             0xFEFF,   // FEFF..FF20; COMMON
3805             0xFF21,   // FF21..FF3A; LATIN
3806             0xFF3B,   // FF3B..FF40; COMMON
3807             0xFF41,   // FF41..FF5A; LATIN
3808             0xFF5B,   // FF5B..FF65; COMMON
3809             0xFF66,   // FF66..FF6F; KATAKANA
3810             0xFF70,   // FF70..FF70; COMMON
3811             0xFF71,   // FF71..FF9D; KATAKANA
3812             0xFF9E,   // FF9E..FF9F; COMMON
3813             0xFFA0,   // FFA0..FFDF; HANGUL
3814             0xFFE0,   // FFE0..FFFF; COMMON
3815             0x10000,  // 10000..100FF; LINEAR_B
3816             0x10100,  // 10100..1013F; COMMON
3817             0x10140,  // 10140..1018F; GREEK
3818             0x10190,  // 10190..101FC; COMMON
3819             0x101FD,  // 101FD..1027F; INHERITED
3820             0x10280,  // 10280..1029F; LYCIAN
3821             0x102A0,  // 102A0..102FF; CARIAN
3822             0x10300,  // 10300..1032F; OLD_ITALIC
3823             0x10330,  // 10330..1037F; GOTHIC
3824             0x10380,  // 10380..1039F; UGARITIC
3825             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3826             0x10400,  // 10400..1044F; DESERET
3827             0x10450,  // 10450..1047F; SHAVIAN
3828             0x10480,  // 10480..107FF; OSMANYA
3829             0x10800,  // 10800..1083F; CYPRIOT
3830             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3831             0x10900,  // 10900..1091F; PHOENICIAN
3832             0x10920,  // 10920..109FF; LYDIAN
3833             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3834             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3835             0x10B00,  // 10B00..10B3F; AVESTAN
3836             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3837             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3838             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3839             0x10E60,  // 10E60..10FFF; ARABIC
3840             0x11000,  // 11000..1107F; BRAHMI
3841             0x11080,  // 11080..11FFF; KAITHI
3842             0x12000,  // 12000..12FFF; CUNEIFORM
3843             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3844             0x16800,  // 16800..16A38; BAMUM
3845             0x1B000,  // 1B000..1B000; KATAKANA
3846             0x1B001,  // 1B001..1CFFF; HIRAGANA
3847             0x1D000,  // 1D000..1D166; COMMON
3848             0x1D167,  // 1D167..1D169; INHERITED
3849             0x1D16A,  // 1D16A..1D17A; COMMON
3850             0x1D17B,  // 1D17B..1D182; INHERITED
3851             0x1D183,  // 1D183..1D184; COMMON
3852             0x1D185,  // 1D185..1D18B; INHERITED
3853             0x1D18C,  // 1D18C..1D1A9; COMMON
3854             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3855             0x1D1AE,  // 1D1AE..1D1FF; COMMON
3856             0x1D200,  // 1D200..1D2FF; GREEK
3857             0x1D300,  // 1D300..1F1FF; COMMON
3858             0x1F200,  // 1F200..1F200; HIRAGANA
3859             0x1F201,  // 1F210..1FFFF; COMMON
3860             0x20000,  // 20000..E0000; HAN
3861             0xE0001,  // E0001..E00FF; COMMON
3862             0xE0100,  // E0100..E01EF; INHERITED
3863             0xE01F0   // E01F0..10FFFF; UNKNOWN
3864 
3865         };
3866 
3867         private static final UnicodeScript[] scripts = {
3868             COMMON,
3869             LATIN,
3870             COMMON,
3871             LATIN,
3872             COMMON,
3873             LATIN,
3874             COMMON,
3875             LATIN,
3876             COMMON,
3877             LATIN,
3878             COMMON,
3879             LATIN,
3880             COMMON,
3881             LATIN,
3882             COMMON,
3883             LATIN,
3884             COMMON,
3885             BOPOMOFO,
3886             COMMON,
3887             INHERITED,
3888             GREEK,
3889             COMMON,
3890             GREEK,
3891             COMMON,
3892             GREEK,
3893             COMMON,
3894             GREEK,
3895             COMMON,
3896             GREEK,
3897             COPTIC,
3898             GREEK,
3899             CYRILLIC,
3900             INHERITED,
3901             CYRILLIC,
3902             ARMENIAN,
3903             COMMON,
3904             ARMENIAN,
3905             HEBREW,
3906             ARABIC,
3907             COMMON,
3908             ARABIC,
3909             COMMON,
3910             ARABIC,
3911             COMMON,
3912             ARABIC,
3913             COMMON,
3914             ARABIC,
3915             INHERITED,
3916             ARABIC,
3917             INHERITED,
3918             COMMON,
3919             ARABIC,
3920             INHERITED,
3921             ARABIC,
3922             COMMON,
3923             ARABIC,
3924             SYRIAC,
3925             ARABIC,
3926             THAANA,
3927             NKO,
3928             SAMARITAN,
3929             MANDAIC,
3930             DEVANAGARI,
3931             INHERITED,
3932             DEVANAGARI,
3933             COMMON,
3934             DEVANAGARI,
3935             COMMON,
3936             DEVANAGARI,
3937             BENGALI,
3938             GURMUKHI,
3939             GUJARATI,
3940             ORIYA,
3941             TAMIL,
3942             TELUGU,
3943             KANNADA,
3944             MALAYALAM,
3945             SINHALA,
3946             THAI,
3947             COMMON,
3948             THAI,
3949             LAO,
3950             TIBETAN,
3951             COMMON,
3952             TIBETAN,
3953             MYANMAR,
3954             GEORGIAN,
3955             COMMON,
3956             GEORGIAN,
3957             HANGUL,
3958             ETHIOPIC,
3959             CHEROKEE,
3960             CANADIAN_ABORIGINAL,
3961             OGHAM,
3962             RUNIC,
3963             COMMON,
3964             RUNIC,
3965             TAGALOG,
3966             HANUNOO,
3967             COMMON,
3968             BUHID,
3969             TAGBANWA,
3970             KHMER,
3971             MONGOLIAN,
3972             COMMON,
3973             MONGOLIAN,
3974             COMMON,
3975             MONGOLIAN,
3976             CANADIAN_ABORIGINAL,
3977             LIMBU,
3978             TAI_LE,
3979             NEW_TAI_LUE,
3980             KHMER,
3981             BUGINESE,
3982             TAI_THAM,
3983             BALINESE,
3984             SUNDANESE,
3985             BATAK,
3986             LEPCHA,
3987             OL_CHIKI,
3988             INHERITED,
3989             COMMON,
3990             INHERITED,
3991             COMMON,
3992             INHERITED,
3993             COMMON,
3994             INHERITED,
3995             COMMON,
3996             LATIN,
3997             GREEK,
3998             CYRILLIC,
3999             LATIN,
4000             GREEK,
4001             LATIN,
4002             GREEK,
4003             LATIN,
4004             CYRILLIC,
4005             LATIN,
4006             GREEK,
4007             INHERITED,
4008             LATIN,
4009             GREEK,
4010             COMMON,
4011             INHERITED,
4012             COMMON,
4013             LATIN,
4014             COMMON,
4015             LATIN,
4016             COMMON,
4017             LATIN,
4018             COMMON,
4019             INHERITED,
4020             COMMON,
4021             GREEK,
4022             COMMON,
4023             LATIN,
4024             COMMON,
4025             LATIN,
4026             COMMON,
4027             LATIN,
4028             COMMON,
4029             LATIN,
4030             COMMON,
4031             BRAILLE,
4032             COMMON,
4033             GLAGOLITIC,
4034             LATIN,
4035             COPTIC,
4036             GEORGIAN,
4037             TIFINAGH,
4038             ETHIOPIC,
4039             CYRILLIC,
4040             COMMON,
4041             HAN,
4042             COMMON,
4043             HAN,
4044             COMMON,
4045             HAN,
4046             COMMON,
4047             HAN,
4048             INHERITED,
4049             HANGUL,
4050             COMMON,
4051             HAN,
4052             COMMON,
4053             HIRAGANA,
4054             INHERITED,
4055             COMMON,
4056             HIRAGANA,
4057             COMMON,
4058             KATAKANA,
4059             COMMON,
4060             KATAKANA,
4061             BOPOMOFO,
4062             HANGUL,
4063             COMMON,
4064             BOPOMOFO,
4065             COMMON,
4066             KATAKANA,
4067             HANGUL,
4068             COMMON,
4069             HANGUL,
4070             COMMON,
4071             KATAKANA,
4072             COMMON,
4073             HAN,
4074             COMMON,
4075             HAN,
4076             YI,
4077             LISU,
4078             VAI,
4079             CYRILLIC,
4080             BAMUM,
4081             COMMON,
4082             LATIN,
4083             COMMON,
4084             LATIN,
4085             SYLOTI_NAGRI,
4086             COMMON,
4087             PHAGS_PA,
4088             SAURASHTRA,
4089             DEVANAGARI,
4090             KAYAH_LI,
4091             REJANG,
4092             HANGUL,
4093             JAVANESE,
4094             CHAM,
4095             MYANMAR,
4096             TAI_VIET,
4097             ETHIOPIC,
4098             MEETEI_MAYEK,
4099             HANGUL,
4100             UNKNOWN,
4101             HAN,
4102             LATIN,
4103             ARMENIAN,
4104             HEBREW,
4105             ARABIC,
4106             COMMON,
4107             ARABIC,
4108             COMMON,
4109             INHERITED,
4110             COMMON,
4111             INHERITED,
4112             COMMON,
4113             ARABIC,
4114             COMMON,
4115             LATIN,
4116             COMMON,
4117             LATIN,
4118             COMMON,
4119             KATAKANA,
4120             COMMON,
4121             KATAKANA,
4122             COMMON,
4123             HANGUL,
4124             COMMON,
4125             LINEAR_B,
4126             COMMON,
4127             GREEK,
4128             COMMON,
4129             INHERITED,
4130             LYCIAN,
4131             CARIAN,
4132             OLD_ITALIC,
4133             GOTHIC,
4134             UGARITIC,
4135             OLD_PERSIAN,
4136             DESERET,
4137             SHAVIAN,
4138             OSMANYA,
4139             CYPRIOT,
4140             IMPERIAL_ARAMAIC,
4141             PHOENICIAN,
4142             LYDIAN,
4143             KHAROSHTHI,
4144             OLD_SOUTH_ARABIAN,
4145             AVESTAN,
4146             INSCRIPTIONAL_PARTHIAN,
4147             INSCRIPTIONAL_PAHLAVI,
4148             OLD_TURKIC,
4149             ARABIC,
4150             BRAHMI,
4151             KAITHI,
4152             CUNEIFORM,
4153             EGYPTIAN_HIEROGLYPHS,
4154             BAMUM,
4155             KATAKANA,
4156             HIRAGANA,
4157             COMMON,
4158             INHERITED,
4159             COMMON,
4160             INHERITED,
4161             COMMON,
4162             INHERITED,
4163             COMMON,
4164             INHERITED,
4165             COMMON,
4166             GREEK,
4167             COMMON,
4168             HIRAGANA,
4169             COMMON,
4170             HAN,
4171             COMMON,
4172             INHERITED,
4173             UNKNOWN
4174         };
4175 
4176         private static HashMap<String, Character.UnicodeScript> aliases;
4177         static {
4178             aliases = new HashMap<>(128);
4179             aliases.put("ARAB", ARABIC);
4180             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4181             aliases.put("ARMN", ARMENIAN);
4182             aliases.put("AVST", AVESTAN);
4183             aliases.put("BALI", BALINESE);
4184             aliases.put("BAMU", BAMUM);
4185             aliases.put("BATK", BATAK);
4186             aliases.put("BENG", BENGALI);
4187             aliases.put("BOPO", BOPOMOFO);
4188             aliases.put("BRAI", BRAILLE);
4189             aliases.put("BRAH", BRAHMI);
4190             aliases.put("BUGI", BUGINESE);
4191             aliases.put("BUHD", BUHID);
4192             aliases.put("CANS", CANADIAN_ABORIGINAL);
4193             aliases.put("CARI", CARIAN);
4194             aliases.put("CHAM", CHAM);
4195             aliases.put("CHER", CHEROKEE);
4196             aliases.put("COPT", COPTIC);
4197             aliases.put("CPRT", CYPRIOT);
4198             aliases.put("CYRL", CYRILLIC);
4199             aliases.put("DEVA", DEVANAGARI);
4200             aliases.put("DSRT", DESERET);
4201             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4202             aliases.put("ETHI", ETHIOPIC);
4203             aliases.put("GEOR", GEORGIAN);
4204             aliases.put("GLAG", GLAGOLITIC);
4205             aliases.put("GOTH", GOTHIC);
4206             aliases.put("GREK", GREEK);
4207             aliases.put("GUJR", GUJARATI);
4208             aliases.put("GURU", GURMUKHI);
4209             aliases.put("HANG", HANGUL);
4210             aliases.put("HANI", HAN);
4211             aliases.put("HANO", HANUNOO);
4212             aliases.put("HEBR", HEBREW);
4213             aliases.put("HIRA", HIRAGANA);
4214             // it appears we don't have the KATAKANA_OR_HIRAGANA
4215             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4216             aliases.put("ITAL", OLD_ITALIC);
4217             aliases.put("JAVA", JAVANESE);
4218             aliases.put("KALI", KAYAH_LI);
4219             aliases.put("KANA", KATAKANA);
4220             aliases.put("KHAR", KHAROSHTHI);
4221             aliases.put("KHMR", KHMER);
4222             aliases.put("KNDA", KANNADA);
4223             aliases.put("KTHI", KAITHI);
4224             aliases.put("LANA", TAI_THAM);
4225             aliases.put("LAOO", LAO);
4226             aliases.put("LATN", LATIN);
4227             aliases.put("LEPC", LEPCHA);
4228             aliases.put("LIMB", LIMBU);
4229             aliases.put("LINB", LINEAR_B);
4230             aliases.put("LISU", LISU);
4231             aliases.put("LYCI", LYCIAN);
4232             aliases.put("LYDI", LYDIAN);
4233             aliases.put("MAND", MANDAIC);
4234             aliases.put("MLYM", MALAYALAM);
4235             aliases.put("MONG", MONGOLIAN);
4236             aliases.put("MTEI", MEETEI_MAYEK);
4237             aliases.put("MYMR", MYANMAR);
4238             aliases.put("NKOO", NKO);
4239             aliases.put("OGAM", OGHAM);
4240             aliases.put("OLCK", OL_CHIKI);
4241             aliases.put("ORKH", OLD_TURKIC);
4242             aliases.put("ORYA", ORIYA);
4243             aliases.put("OSMA", OSMANYA);
4244             aliases.put("PHAG", PHAGS_PA);
4245             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4246             aliases.put("PHNX", PHOENICIAN);
4247             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4248             aliases.put("RJNG", REJANG);
4249             aliases.put("RUNR", RUNIC);
4250             aliases.put("SAMR", SAMARITAN);
4251             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4252             aliases.put("SAUR", SAURASHTRA);
4253             aliases.put("SHAW", SHAVIAN);
4254             aliases.put("SINH", SINHALA);
4255             aliases.put("SUND", SUNDANESE);
4256             aliases.put("SYLO", SYLOTI_NAGRI);
4257             aliases.put("SYRC", SYRIAC);
4258             aliases.put("TAGB", TAGBANWA);
4259             aliases.put("TALE", TAI_LE);
4260             aliases.put("TALU", NEW_TAI_LUE);
4261             aliases.put("TAML", TAMIL);
4262             aliases.put("TAVT", TAI_VIET);
4263             aliases.put("TELU", TELUGU);
4264             aliases.put("TFNG", TIFINAGH);
4265             aliases.put("TGLG", TAGALOG);
4266             aliases.put("THAA", THAANA);
4267             aliases.put("THAI", THAI);
4268             aliases.put("TIBT", TIBETAN);
4269             aliases.put("UGAR", UGARITIC);
4270             aliases.put("VAII", VAI);
4271             aliases.put("XPEO", OLD_PERSIAN);
4272             aliases.put("XSUX", CUNEIFORM);
4273             aliases.put("YIII", YI);
4274             aliases.put("ZINH", INHERITED);
4275             aliases.put("ZYYY", COMMON);
4276             aliases.put("ZZZZ", UNKNOWN);
4277         }
4278 
4279         /**
4280          * Returns the enum constant representing the Unicode script of which
4281          * the given character (Unicode code point) is assigned to.
4282          *
4283          * @param   codePoint the character (Unicode code point) in question.
4284          * @return  The {@code UnicodeScript} constant representing the
4285          *          Unicode script of which this character is assigned to.
4286          *
4287          * @exception IllegalArgumentException if the specified
4288          * {@code codePoint} is an invalid Unicode code point.
4289          * @see Character#isValidCodePoint(int)
4290          *
4291          */
4292         public static UnicodeScript of(int codePoint) {
4293             if (!isValidCodePoint(codePoint))
4294                 throw new IllegalArgumentException();
4295             int type = getType(codePoint);
4296             // leave SURROGATE and PRIVATE_USE for table lookup
4297             if (type == UNASSIGNED)
4298                 return UNKNOWN;
4299             int index = Arrays.binarySearch(scriptStarts, codePoint);
4300             if (index < 0)
4301                 index = -index - 2;
4302             return scripts[index];
4303         }
4304 
4305         /**
4306          * Returns the UnicodeScript constant with the given Unicode script
4307          * name or the script name alias. Script names and their aliases are
4308          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4309          * and PropertyValueAliases&lt;version&gt;.txt define script names
4310          * and the script name aliases for a particular version of the
4311          * standard. The {@link Character} class specifies the version of
4312          * the standard that it supports.
4313          * <p>
4314          * Character case is ignored for all of the valid script names.
4315          * The en_US locale's case mapping rules are used to provide
4316          * case-insensitive string comparisons for script name validation.
4317          * <p>
4318          *
4319          * @param scriptName A {@code UnicodeScript} name.
4320          * @return The {@code UnicodeScript} constant identified
4321          *         by {@code scriptName}
4322          * @throws IllegalArgumentException if {@code scriptName} is an
4323          *         invalid name
4324          * @throws NullPointerException if {@code scriptName} is null
4325          */
4326         public static final UnicodeScript forName(String scriptName) {
4327             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4328                                  //.replace(' ', '_'));
4329             UnicodeScript sc = aliases.get(scriptName);
4330             if (sc != null)
4331                 return sc;
4332             return valueOf(scriptName);
4333         }
4334     }
4335 
4336     /**
4337      * The value of the {@code Character}.
4338      *
4339      * @serial
4340      */
4341     private final char value;
4342 
4343     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4344     private static final long serialVersionUID = 3786198910865385080L;
4345 
4346     /**
4347      * Constructs a newly allocated {@code Character} object that
4348      * represents the specified {@code char} value.
4349      *
4350      * @param  value   the value to be represented by the
4351      *                  {@code Character} object.
4352      */
4353     public Character(char value) {
4354         this.value = value;
4355     }
4356 
4357     private static class CharacterCache {
4358         private CharacterCache(){}
4359 
4360         static final Character cache[] = new Character[127 + 1];
4361 
4362         static {
4363             for (int i = 0; i < cache.length; i++)
4364                 cache[i] = new Character((char)i);
4365         }
4366     }
4367 
4368     /**
4369      * Returns a <tt>Character</tt> instance representing the specified
4370      * <tt>char</tt> value.
4371      * If a new <tt>Character</tt> instance is not required, this method
4372      * should generally be used in preference to the constructor
4373      * {@link #Character(char)}, as this method is likely to yield
4374      * significantly better space and time performance by caching
4375      * frequently requested values.
4376      *
4377      * This method will always cache values in the range {@code
4378      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4379      * cache other values outside of this range.
4380      *
4381      * @param  c a char value.
4382      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4383      * @since  1.5
4384      */
4385     public static Character valueOf(char c) {
4386         if (c <= 127) { // must cache
4387             return CharacterCache.cache[(int)c];
4388         }
4389         return new Character(c);
4390     }
4391 
4392     /**
4393      * Returns the value of this {@code Character} object.
4394      * @return  the primitive {@code char} value represented by
4395      *          this object.
4396      */
4397     public char charValue() {
4398         return value;
4399     }
4400 
4401     /**
4402      * Returns a hash code for this {@code Character}; equal to the result
4403      * of invoking {@code charValue()}.
4404      *
4405      * @return a hash code value for this {@code Character}
4406      */
4407     public int hashCode() {
4408         return (int)value;
4409     }
4410 
4411     /**
4412      * Compares this object against the specified object.
4413      * The result is {@code true} if and only if the argument is not
4414      * {@code null} and is a {@code Character} object that
4415      * represents the same {@code char} value as this object.
4416      *
4417      * @param   obj   the object to compare with.
4418      * @return  {@code true} if the objects are the same;
4419      *          {@code false} otherwise.
4420      */
4421     public boolean equals(Object obj) {
4422         if (obj instanceof Character) {
4423             return value == ((Character)obj).charValue();
4424         }
4425         return false;
4426     }
4427 
4428     /**
4429      * Returns a {@code String} object representing this
4430      * {@code Character}'s value.  The result is a string of
4431      * length 1 whose sole component is the primitive
4432      * {@code char} value represented by this
4433      * {@code Character} object.
4434      *
4435      * @return  a string representation of this object.
4436      */
4437     public String toString() {
4438         char buf[] = {value};
4439         return String.valueOf(buf);
4440     }
4441 
4442     /**
4443      * Returns a {@code String} object representing the
4444      * specified {@code char}.  The result is a string of length
4445      * 1 consisting solely of the specified {@code char}.
4446      *
4447      * @param c the {@code char} to be converted
4448      * @return the string representation of the specified {@code char}
4449      * @since 1.4
4450      */
4451     public static String toString(char c) {
4452         return String.valueOf(c);
4453     }
4454 
4455     /**
4456      * Determines whether the specified code point is a valid
4457      * <a href="http://www.unicode.org/glossary/#code_point">
4458      * Unicode code point value</a>.
4459      *
4460      * @param  codePoint the Unicode code point to be tested
4461      * @return {@code true} if the specified code point value is between
4462      *         {@link #MIN_CODE_POINT} and
4463      *         {@link #MAX_CODE_POINT} inclusive;
4464      *         {@code false} otherwise.
4465      * @since  1.5
4466      */
4467     public static boolean isValidCodePoint(int codePoint) {
4468         // Optimized form of:
4469         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4470         int plane = codePoint >>> 16;
4471         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4472     }
4473 
4474     /**
4475      * Determines whether the specified character (Unicode code point)
4476      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4477      * Such code points can be represented using a single {@code char}.
4478      *
4479      * @param  codePoint the character (Unicode code point) to be tested
4480      * @return {@code true} if the specified code point is between
4481      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4482      *         {@code false} otherwise.
4483      * @since  1.7
4484      */
4485     public static boolean isBmpCodePoint(int codePoint) {
4486         return codePoint >>> 16 == 0;
4487         // Optimized form of:
4488         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4489         // We consistently use logical shift (>>>) to facilitate
4490         // additional runtime optimizations.
4491     }
4492 
4493     /**
4494      * Determines whether the specified character (Unicode code point)
4495      * is in the <a href="#supplementary">supplementary character</a> range.
4496      *
4497      * @param  codePoint the character (Unicode code point) to be tested
4498      * @return {@code true} if the specified code point is between
4499      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4500      *         {@link #MAX_CODE_POINT} inclusive;
4501      *         {@code false} otherwise.
4502      * @since  1.5
4503      */
4504     public static boolean isSupplementaryCodePoint(int codePoint) {
4505         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4506             && codePoint <  MAX_CODE_POINT + 1;
4507     }
4508 
4509     /**
4510      * Determines if the given {@code char} value is a
4511      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4512      * Unicode high-surrogate code unit</a>
4513      * (also known as <i>leading-surrogate code unit</i>).
4514      *
4515      * <p>Such values do not represent characters by themselves,
4516      * but are used in the representation of
4517      * <a href="#supplementary">supplementary characters</a>
4518      * in the UTF-16 encoding.
4519      *
4520      * @param  ch the {@code char} value to be tested.
4521      * @return {@code true} if the {@code char} value is between
4522      *         {@link #MIN_HIGH_SURROGATE} and
4523      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4524      *         {@code false} otherwise.
4525      * @see    Character#isLowSurrogate(char)
4526      * @see    Character.UnicodeBlock#of(int)
4527      * @since  1.5
4528      */
4529     public static boolean isHighSurrogate(char ch) {
4530         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4531         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4532     }
4533 
4534     /**
4535      * Determines if the given {@code char} value is a
4536      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4537      * Unicode low-surrogate code unit</a>
4538      * (also known as <i>trailing-surrogate code unit</i>).
4539      *
4540      * <p>Such values do not represent characters by themselves,
4541      * but are used in the representation of
4542      * <a href="#supplementary">supplementary characters</a>
4543      * in the UTF-16 encoding.
4544      *
4545      * @param  ch the {@code char} value to be tested.
4546      * @return {@code true} if the {@code char} value is between
4547      *         {@link #MIN_LOW_SURROGATE} and
4548      *         {@link #MAX_LOW_SURROGATE} inclusive;
4549      *         {@code false} otherwise.
4550      * @see    Character#isHighSurrogate(char)
4551      * @since  1.5
4552      */
4553     public static boolean isLowSurrogate(char ch) {
4554         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4555     }
4556 
4557     /**
4558      * Determines if the given {@code char} value is a Unicode
4559      * <i>surrogate code unit</i>.
4560      *
4561      * <p>Such values do not represent characters by themselves,
4562      * but are used in the representation of
4563      * <a href="#supplementary">supplementary characters</a>
4564      * in the UTF-16 encoding.
4565      *
4566      * <p>A char value is a surrogate code unit if and only if it is either
4567      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4568      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4569      *
4570      * @param  ch the {@code char} value to be tested.
4571      * @return {@code true} if the {@code char} value is between
4572      *         {@link #MIN_SURROGATE} and
4573      *         {@link #MAX_SURROGATE} inclusive;
4574      *         {@code false} otherwise.
4575      * @since  1.7
4576      */
4577     public static boolean isSurrogate(char ch) {
4578         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4579     }
4580 
4581     /**
4582      * Determines whether the specified pair of {@code char}
4583      * values is a valid
4584      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4585      * Unicode surrogate pair</a>.
4586 
4587      * <p>This method is equivalent to the expression:
4588      * <blockquote><pre>
4589      * isHighSurrogate(high) && isLowSurrogate(low)
4590      * </pre></blockquote>
4591      *
4592      * @param  high the high-surrogate code value to be tested
4593      * @param  low the low-surrogate code value to be tested
4594      * @return {@code true} if the specified high and
4595      * low-surrogate code values represent a valid surrogate pair;
4596      * {@code false} otherwise.
4597      * @since  1.5
4598      */
4599     public static boolean isSurrogatePair(char high, char low) {
4600         return isHighSurrogate(high) && isLowSurrogate(low);
4601     }
4602 
4603     /**
4604      * Determines the number of {@code char} values needed to
4605      * represent the specified character (Unicode code point). If the
4606      * specified character is equal to or greater than 0x10000, then
4607      * the method returns 2. Otherwise, the method returns 1.
4608      *
4609      * <p>This method doesn't validate the specified character to be a
4610      * valid Unicode code point. The caller must validate the
4611      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4612      * if necessary.
4613      *
4614      * @param   codePoint the character (Unicode code point) to be tested.
4615      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4616      * @see     Character#isSupplementaryCodePoint(int)
4617      * @since   1.5
4618      */
4619     public static int charCount(int codePoint) {
4620         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4621     }
4622 
4623     /**
4624      * Converts the specified surrogate pair to its supplementary code
4625      * point value. This method does not validate the specified
4626      * surrogate pair. The caller must validate it using {@link
4627      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4628      *
4629      * @param  high the high-surrogate code unit
4630      * @param  low the low-surrogate code unit
4631      * @return the supplementary code point composed from the
4632      *         specified surrogate pair.
4633      * @since  1.5
4634      */
4635     public static int toCodePoint(char high, char low) {
4636         // Optimized form of:
4637         // return ((high - MIN_HIGH_SURROGATE) << 10)
4638         //         + (low - MIN_LOW_SURROGATE)
4639         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4640         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4641                                        - (MIN_HIGH_SURROGATE << 10)
4642                                        - MIN_LOW_SURROGATE);
4643     }
4644 
4645     /**
4646      * Returns the code point at the given index of the
4647      * {@code CharSequence}. If the {@code char} value at
4648      * the given index in the {@code CharSequence} is in the
4649      * high-surrogate range, the following index is less than the
4650      * length of the {@code CharSequence}, and the
4651      * {@code char} value at the following index is in the
4652      * low-surrogate range, then the supplementary code point
4653      * corresponding to this surrogate pair is returned. Otherwise,
4654      * the {@code char} value at the given index is returned.
4655      *
4656      * @param seq a sequence of {@code char} values (Unicode code
4657      * units)
4658      * @param index the index to the {@code char} values (Unicode
4659      * code units) in {@code seq} to be converted
4660      * @return the Unicode code point at the given index
4661      * @exception NullPointerException if {@code seq} is null.
4662      * @exception IndexOutOfBoundsException if the value
4663      * {@code index} is negative or not less than
4664      * {@link CharSequence#length() seq.length()}.
4665      * @since  1.5
4666      */
4667     public static int codePointAt(CharSequence seq, int index) {
4668         char c1 = seq.charAt(index++);
4669         if (isHighSurrogate(c1)) {
4670             if (index < seq.length()) {
4671                 char c2 = seq.charAt(index);
4672                 if (isLowSurrogate(c2)) {
4673                     return toCodePoint(c1, c2);
4674                 }
4675             }
4676         }
4677         return c1;
4678     }
4679 
4680     /**
4681      * Returns the code point at the given index of the
4682      * {@code char} array. If the {@code char} value at
4683      * the given index in the {@code char} array is in the
4684      * high-surrogate range, the following index is less than the
4685      * length of the {@code char} array, and the
4686      * {@code char} value at the following index is in the
4687      * low-surrogate range, then the supplementary code point
4688      * corresponding to this surrogate pair is returned. Otherwise,
4689      * the {@code char} value at the given index is returned.
4690      *
4691      * @param a the {@code char} array
4692      * @param index the index to the {@code char} values (Unicode
4693      * code units) in the {@code char} array to be converted
4694      * @return the Unicode code point at the given index
4695      * @exception NullPointerException if {@code a} is null.
4696      * @exception IndexOutOfBoundsException if the value
4697      * {@code index} is negative or not less than
4698      * the length of the {@code char} array.
4699      * @since  1.5
4700      */
4701     public static int codePointAt(char[] a, int index) {
4702         return codePointAtImpl(a, index, a.length);
4703     }
4704 
4705     /**
4706      * Returns the code point at the given index of the
4707      * {@code char} array, where only array elements with
4708      * {@code index} less than {@code limit} can be used. If
4709      * the {@code char} value at the given index in the
4710      * {@code char} array is in the high-surrogate range, the
4711      * following index is less than the {@code limit}, and the
4712      * {@code char} value at the following index is in the
4713      * low-surrogate range, then the supplementary code point
4714      * corresponding to this surrogate pair is returned. Otherwise,
4715      * the {@code char} value at the given index is returned.
4716      *
4717      * @param a the {@code char} array
4718      * @param index the index to the {@code char} values (Unicode
4719      * code units) in the {@code char} array to be converted
4720      * @param limit the index after the last array element that
4721      * can be used in the {@code char} array
4722      * @return the Unicode code point at the given index
4723      * @exception NullPointerException if {@code a} is null.
4724      * @exception IndexOutOfBoundsException if the {@code index}
4725      * argument is negative or not less than the {@code limit}
4726      * argument, or if the {@code limit} argument is negative or
4727      * greater than the length of the {@code char} array.
4728      * @since  1.5
4729      */
4730     public static int codePointAt(char[] a, int index, int limit) {
4731         if (index >= limit || limit < 0 || limit > a.length) {
4732             throw new IndexOutOfBoundsException();
4733         }
4734         return codePointAtImpl(a, index, limit);
4735     }
4736 
4737     // throws ArrayIndexOutofBoundsException if index out of bounds
4738     static int codePointAtImpl(char[] a, int index, int limit) {
4739         char c1 = a[index++];
4740         if (isHighSurrogate(c1)) {
4741             if (index < limit) {
4742                 char c2 = a[index];
4743                 if (isLowSurrogate(c2)) {
4744                     return toCodePoint(c1, c2);
4745                 }
4746             }
4747         }
4748         return c1;
4749     }
4750 
4751     /**
4752      * Returns the code point preceding the given index of the
4753      * {@code CharSequence}. If the {@code char} value at
4754      * {@code (index - 1)} in the {@code CharSequence} is in
4755      * the low-surrogate range, {@code (index - 2)} is not
4756      * negative, and the {@code char} value at {@code (index - 2)}
4757      * in the {@code CharSequence} is in the
4758      * high-surrogate range, then the supplementary code point
4759      * corresponding to this surrogate pair is returned. Otherwise,
4760      * the {@code char} value at {@code (index - 1)} is
4761      * returned.
4762      *
4763      * @param seq the {@code CharSequence} instance
4764      * @param index the index following the code point that should be returned
4765      * @return the Unicode code point value before the given index.
4766      * @exception NullPointerException if {@code seq} is null.
4767      * @exception IndexOutOfBoundsException if the {@code index}
4768      * argument is less than 1 or greater than {@link
4769      * CharSequence#length() seq.length()}.
4770      * @since  1.5
4771      */
4772     public static int codePointBefore(CharSequence seq, int index) {
4773         char c2 = seq.charAt(--index);
4774         if (isLowSurrogate(c2)) {
4775             if (index > 0) {
4776                 char c1 = seq.charAt(--index);
4777                 if (isHighSurrogate(c1)) {
4778                     return toCodePoint(c1, c2);
4779                 }
4780             }
4781         }
4782         return c2;
4783     }
4784 
4785     /**
4786      * Returns the code point preceding the given index of the
4787      * {@code char} array. If the {@code char} value at
4788      * {@code (index - 1)} in the {@code char} array is in
4789      * the low-surrogate range, {@code (index - 2)} is not
4790      * negative, and the {@code char} value at {@code (index - 2)}
4791      * in the {@code char} array is in the
4792      * high-surrogate range, then the supplementary code point
4793      * corresponding to this surrogate pair is returned. Otherwise,
4794      * the {@code char} value at {@code (index - 1)} is
4795      * returned.
4796      *
4797      * @param a the {@code char} array
4798      * @param index the index following the code point that should be returned
4799      * @return the Unicode code point value before the given index.
4800      * @exception NullPointerException if {@code a} is null.
4801      * @exception IndexOutOfBoundsException if the {@code index}
4802      * argument is less than 1 or greater than the length of the
4803      * {@code char} array
4804      * @since  1.5
4805      */
4806     public static int codePointBefore(char[] a, int index) {
4807         return codePointBeforeImpl(a, index, 0);
4808     }
4809 
4810     /**
4811      * Returns the code point preceding the given index of the
4812      * {@code char} array, where only array elements with
4813      * {@code index} greater than or equal to {@code start}
4814      * can be used. If the {@code char} value at {@code (index - 1)}
4815      * in the {@code char} array is in the
4816      * low-surrogate range, {@code (index - 2)} is not less than
4817      * {@code start}, and the {@code char} value at
4818      * {@code (index - 2)} in the {@code char} array is in
4819      * the high-surrogate range, then the supplementary code point
4820      * corresponding to this surrogate pair is returned. Otherwise,
4821      * the {@code char} value at {@code (index - 1)} is
4822      * returned.
4823      *
4824      * @param a the {@code char} array
4825      * @param index the index following the code point that should be returned
4826      * @param start the index of the first array element in the
4827      * {@code char} array
4828      * @return the Unicode code point value before the given index.
4829      * @exception NullPointerException if {@code a} is null.
4830      * @exception IndexOutOfBoundsException if the {@code index}
4831      * argument is not greater than the {@code start} argument or
4832      * is greater than the length of the {@code char} array, or
4833      * if the {@code start} argument is negative or not less than
4834      * the length of the {@code char} array.
4835      * @since  1.5
4836      */
4837     public static int codePointBefore(char[] a, int index, int start) {
4838         if (index <= start || start < 0 || start >= a.length) {
4839             throw new IndexOutOfBoundsException();
4840         }
4841         return codePointBeforeImpl(a, index, start);
4842     }
4843 
4844     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4845     static int codePointBeforeImpl(char[] a, int index, int start) {
4846         char c2 = a[--index];
4847         if (isLowSurrogate(c2)) {
4848             if (index > start) {
4849                 char c1 = a[--index];
4850                 if (isHighSurrogate(c1)) {
4851                     return toCodePoint(c1, c2);
4852                 }
4853             }
4854         }
4855         return c2;
4856     }
4857 
4858     /**
4859      * Returns the leading surrogate (a
4860      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4861      * high surrogate code unit</a>) of the
4862      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4863      * surrogate pair</a>
4864      * representing the specified supplementary character (Unicode
4865      * code point) in the UTF-16 encoding.  If the specified character
4866      * is not a
4867      * <a href="Character.html#supplementary">supplementary character</a>,
4868      * an unspecified {@code char} is returned.
4869      *
4870      * <p>If
4871      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4872      * is {@code true}, then
4873      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4874      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4875      * are also always {@code true}.
4876      *
4877      * @param   codePoint a supplementary character (Unicode code point)
4878      * @return  the leading surrogate code unit used to represent the
4879      *          character in the UTF-16 encoding
4880      * @since   1.7
4881      */
4882     public static char highSurrogate(int codePoint) {
4883         return (char) ((codePoint >>> 10)
4884             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4885     }
4886 
4887     /**
4888      * Returns the trailing surrogate (a
4889      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4890      * low surrogate code unit</a>) of the
4891      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4892      * surrogate pair</a>
4893      * representing the specified supplementary character (Unicode
4894      * code point) in the UTF-16 encoding.  If the specified character
4895      * is not a
4896      * <a href="Character.html#supplementary">supplementary character</a>,
4897      * an unspecified {@code char} is returned.
4898      *
4899      * <p>If
4900      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4901      * is {@code true}, then
4902      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4903      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4904      * are also always {@code true}.
4905      *
4906      * @param   codePoint a supplementary character (Unicode code point)
4907      * @return  the trailing surrogate code unit used to represent the
4908      *          character in the UTF-16 encoding
4909      * @since   1.7
4910      */
4911     public static char lowSurrogate(int codePoint) {
4912         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4913     }
4914 
4915     /**
4916      * Converts the specified character (Unicode code point) to its
4917      * UTF-16 representation. If the specified code point is a BMP
4918      * (Basic Multilingual Plane or Plane 0) value, the same value is
4919      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
4920      * specified code point is a supplementary character, its
4921      * surrogate values are stored in {@code dst[dstIndex]}
4922      * (high-surrogate) and {@code dst[dstIndex+1]}
4923      * (low-surrogate), and 2 is returned.
4924      *
4925      * @param  codePoint the character (Unicode code point) to be converted.
4926      * @param  dst an array of {@code char} in which the
4927      * {@code codePoint}'s UTF-16 value is stored.
4928      * @param dstIndex the start index into the {@code dst}
4929      * array where the converted value is stored.
4930      * @return 1 if the code point is a BMP code point, 2 if the
4931      * code point is a supplementary code point.
4932      * @exception IllegalArgumentException if the specified
4933      * {@code codePoint} is not a valid Unicode code point.
4934      * @exception NullPointerException if the specified {@code dst} is null.
4935      * @exception IndexOutOfBoundsException if {@code dstIndex}
4936      * is negative or not less than {@code dst.length}, or if
4937      * {@code dst} at {@code dstIndex} doesn't have enough
4938      * array element(s) to store the resulting {@code char}
4939      * value(s). (If {@code dstIndex} is equal to
4940      * {@code dst.length-1} and the specified
4941      * {@code codePoint} is a supplementary character, the
4942      * high-surrogate value is not stored in
4943      * {@code dst[dstIndex]}.)
4944      * @since  1.5
4945      */
4946     public static int toChars(int codePoint, char[] dst, int dstIndex) {
4947         if (isBmpCodePoint(codePoint)) {
4948             dst[dstIndex] = (char) codePoint;
4949             return 1;
4950         } else if (isValidCodePoint(codePoint)) {
4951             toSurrogates(codePoint, dst, dstIndex);
4952             return 2;
4953         } else {
4954             throw new IllegalArgumentException();
4955         }
4956     }
4957 
4958     /**
4959      * Converts the specified character (Unicode code point) to its
4960      * UTF-16 representation stored in a {@code char} array. If
4961      * the specified code point is a BMP (Basic Multilingual Plane or
4962      * Plane 0) value, the resulting {@code char} array has
4963      * the same value as {@code codePoint}. If the specified code
4964      * point is a supplementary code point, the resulting
4965      * {@code char} array has the corresponding surrogate pair.
4966      *
4967      * @param  codePoint a Unicode code point
4968      * @return a {@code char} array having
4969      *         {@code codePoint}'s UTF-16 representation.
4970      * @exception IllegalArgumentException if the specified
4971      * {@code codePoint} is not a valid Unicode code point.
4972      * @since  1.5
4973      */
4974     public static char[] toChars(int codePoint) {
4975         if (isBmpCodePoint(codePoint)) {
4976             return new char[] { (char) codePoint };
4977         } else if (isValidCodePoint(codePoint)) {
4978             char[] result = new char[2];
4979             toSurrogates(codePoint, result, 0);
4980             return result;
4981         } else {
4982             throw new IllegalArgumentException();
4983         }
4984     }
4985 
4986     static void toSurrogates(int codePoint, char[] dst, int index) {
4987         // We write elements "backwards" to guarantee all-or-nothing
4988         dst[index+1] = lowSurrogate(codePoint);
4989         dst[index] = highSurrogate(codePoint);
4990     }
4991 
4992     /**
4993      * Returns the number of Unicode code points in the text range of
4994      * the specified char sequence. The text range begins at the
4995      * specified {@code beginIndex} and extends to the
4996      * {@code char} at index {@code endIndex - 1}. Thus the
4997      * length (in {@code char}s) of the text range is
4998      * {@code endIndex-beginIndex}. Unpaired surrogates within
4999      * the text range count as one code point each.
5000      *
5001      * @param seq the char sequence
5002      * @param beginIndex the index to the first {@code char} of
5003      * the text range.
5004      * @param endIndex the index after the last {@code char} of
5005      * the text range.
5006      * @return the number of Unicode code points in the specified text
5007      * range
5008      * @exception NullPointerException if {@code seq} is null.
5009      * @exception IndexOutOfBoundsException if the
5010      * {@code beginIndex} is negative, or {@code endIndex}
5011      * is larger than the length of the given sequence, or
5012      * {@code beginIndex} is larger than {@code endIndex}.
5013      * @since  1.5
5014      */
5015     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5016         int length = seq.length();
5017         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5018             throw new IndexOutOfBoundsException();
5019         }
5020         int n = endIndex - beginIndex;
5021         for (int i = beginIndex; i < endIndex; ) {
5022             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5023                 isLowSurrogate(seq.charAt(i))) {
5024                 n--;
5025                 i++;
5026             }
5027         }
5028         return n;
5029     }
5030 
5031     /**
5032      * Returns the number of Unicode code points in a subarray of the
5033      * {@code char} array argument. The {@code offset}
5034      * argument is the index of the first {@code char} of the
5035      * subarray and the {@code count} argument specifies the
5036      * length of the subarray in {@code char}s. Unpaired
5037      * surrogates within the subarray count as one code point each.
5038      *
5039      * @param a the {@code char} array
5040      * @param offset the index of the first {@code char} in the
5041      * given {@code char} array
5042      * @param count the length of the subarray in {@code char}s
5043      * @return the number of Unicode code points in the specified subarray
5044      * @exception NullPointerException if {@code a} is null.
5045      * @exception IndexOutOfBoundsException if {@code offset} or
5046      * {@code count} is negative, or if {@code offset +
5047      * count} is larger than the length of the given array.
5048      * @since  1.5
5049      */
5050     public static int codePointCount(char[] a, int offset, int count) {
5051         if (count > a.length - offset || offset < 0 || count < 0) {
5052             throw new IndexOutOfBoundsException();
5053         }
5054         return codePointCountImpl(a, offset, count);
5055     }
5056 
5057     static int codePointCountImpl(char[] a, int offset, int count) {
5058         int endIndex = offset + count;
5059         int n = count;
5060         for (int i = offset; i < endIndex; ) {
5061             if (isHighSurrogate(a[i++]) && i < endIndex &&
5062                 isLowSurrogate(a[i])) {
5063                 n--;
5064                 i++;
5065             }
5066         }
5067         return n;
5068     }
5069 
5070     /**
5071      * Returns the index within the given char sequence that is offset
5072      * from the given {@code index} by {@code codePointOffset}
5073      * code points. Unpaired surrogates within the text range given by
5074      * {@code index} and {@code codePointOffset} count as
5075      * one code point each.
5076      *
5077      * @param seq the char sequence
5078      * @param index the index to be offset
5079      * @param codePointOffset the offset in code points
5080      * @return the index within the char sequence
5081      * @exception NullPointerException if {@code seq} is null.
5082      * @exception IndexOutOfBoundsException if {@code index}
5083      *   is negative or larger then the length of the char sequence,
5084      *   or if {@code codePointOffset} is positive and the
5085      *   subsequence starting with {@code index} has fewer than
5086      *   {@code codePointOffset} code points, or if
5087      *   {@code codePointOffset} is negative and the subsequence
5088      *   before {@code index} has fewer than the absolute value
5089      *   of {@code codePointOffset} code points.
5090      * @since 1.5
5091      */
5092     public static int offsetByCodePoints(CharSequence seq, int index,
5093                                          int codePointOffset) {
5094         int length = seq.length();
5095         if (index < 0 || index > length) {
5096             throw new IndexOutOfBoundsException();
5097         }
5098 
5099         int x = index;
5100         if (codePointOffset >= 0) {
5101             int i;
5102             for (i = 0; x < length && i < codePointOffset; i++) {
5103                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5104                     isLowSurrogate(seq.charAt(x))) {
5105                     x++;
5106                 }
5107             }
5108             if (i < codePointOffset) {
5109                 throw new IndexOutOfBoundsException();
5110             }
5111         } else {
5112             int i;
5113             for (i = codePointOffset; x > 0 && i < 0; i++) {
5114                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5115                     isHighSurrogate(seq.charAt(x-1))) {
5116                     x--;
5117                 }
5118             }
5119             if (i < 0) {
5120                 throw new IndexOutOfBoundsException();
5121             }
5122         }
5123         return x;
5124     }
5125 
5126     /**
5127      * Returns the index within the given {@code char} subarray
5128      * that is offset from the given {@code index} by
5129      * {@code codePointOffset} code points. The
5130      * {@code start} and {@code count} arguments specify a
5131      * subarray of the {@code char} array. Unpaired surrogates
5132      * within the text range given by {@code index} and
5133      * {@code codePointOffset} count as one code point each.
5134      *
5135      * @param a the {@code char} array
5136      * @param start the index of the first {@code char} of the
5137      * subarray
5138      * @param count the length of the subarray in {@code char}s
5139      * @param index the index to be offset
5140      * @param codePointOffset the offset in code points
5141      * @return the index within the subarray
5142      * @exception NullPointerException if {@code a} is null.
5143      * @exception IndexOutOfBoundsException
5144      *   if {@code start} or {@code count} is negative,
5145      *   or if {@code start + count} is larger than the length of
5146      *   the given array,
5147      *   or if {@code index} is less than {@code start} or
5148      *   larger then {@code start + count},
5149      *   or if {@code codePointOffset} is positive and the text range
5150      *   starting with {@code index} and ending with {@code start + count - 1}
5151      *   has fewer than {@code codePointOffset} code
5152      *   points,
5153      *   or if {@code codePointOffset} is negative and the text range
5154      *   starting with {@code start} and ending with {@code index - 1}
5155      *   has fewer than the absolute value of
5156      *   {@code codePointOffset} code points.
5157      * @since 1.5
5158      */
5159     public static int offsetByCodePoints(char[] a, int start, int count,
5160                                          int index, int codePointOffset) {
5161         if (count > a.length-start || start < 0 || count < 0
5162             || index < start || index > start+count) {
5163             throw new IndexOutOfBoundsException();
5164         }
5165         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5166     }
5167 
5168     static int offsetByCodePointsImpl(char[]a, int start, int count,
5169                                       int index, int codePointOffset) {
5170         int x = index;
5171         if (codePointOffset >= 0) {
5172             int limit = start + count;
5173             int i;
5174             for (i = 0; x < limit && i < codePointOffset; i++) {
5175                 if (isHighSurrogate(a[x++]) && x < limit &&
5176                     isLowSurrogate(a[x])) {
5177                     x++;
5178                 }
5179             }
5180             if (i < codePointOffset) {
5181                 throw new IndexOutOfBoundsException();
5182             }
5183         } else {
5184             int i;
5185             for (i = codePointOffset; x > start && i < 0; i++) {
5186                 if (isLowSurrogate(a[--x]) && x > start &&
5187                     isHighSurrogate(a[x-1])) {
5188                     x--;
5189                 }
5190             }
5191             if (i < 0) {
5192                 throw new IndexOutOfBoundsException();
5193             }
5194         }
5195         return x;
5196     }
5197 
5198     /**
5199      * Determines if the specified character is a lowercase character.
5200      * <p>
5201      * A character is lowercase if its general category type, provided
5202      * by {@code Character.getType(ch)}, is
5203      * {@code LOWERCASE_LETTER}.
5204      * <p>
5205      * The following are examples of lowercase characters:
5206      * <p><blockquote><pre>
5207      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5208      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5209      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5210      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5211      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5212      * </pre></blockquote>
5213      * <p> Many other Unicode characters are lowercase too.
5214      *
5215      * <p><b>Note:</b> This method cannot handle <a
5216      * href="#supplementary"> supplementary characters</a>. To support
5217      * all Unicode characters, including supplementary characters, use
5218      * the {@link #isLowerCase(int)} method.
5219      *
5220      * @param   ch   the character to be tested.
5221      * @return  {@code true} if the character is lowercase;
5222      *          {@code false} otherwise.
5223      * @see     Character#isLowerCase(char)
5224      * @see     Character#isTitleCase(char)
5225      * @see     Character#toLowerCase(char)
5226      * @see     Character#getType(char)
5227      */
5228     public static boolean isLowerCase(char ch) {
5229         return isLowerCase((int)ch);
5230     }
5231 
5232     /**
5233      * Determines if the specified character (Unicode code point) is a
5234      * lowercase character.
5235      * <p>
5236      * A character is lowercase if its general category type, provided
5237      * by {@link Character#getType getType(codePoint)}, is
5238      * {@code LOWERCASE_LETTER}.
5239      * <p>
5240      * The following are examples of lowercase characters:
5241      * <p><blockquote><pre>
5242      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5243      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5244      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5245      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5246      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5247      * </pre></blockquote>
5248      * <p> Many other Unicode characters are lowercase too.
5249      *
5250      * @param   codePoint the character (Unicode code point) to be tested.
5251      * @return  {@code true} if the character is lowercase;
5252      *          {@code false} otherwise.
5253      * @see     Character#isLowerCase(int)
5254      * @see     Character#isTitleCase(int)
5255      * @see     Character#toLowerCase(int)
5256      * @see     Character#getType(int)
5257      * @since   1.5
5258      */
5259     public static boolean isLowerCase(int codePoint) {
5260         return getType(codePoint) == Character.LOWERCASE_LETTER;
5261     }
5262 
5263     /**
5264      * Determines if the specified character is an uppercase character.
5265      * <p>
5266      * A character is uppercase if its general category type, provided by
5267      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5268      * <p>
5269      * The following are examples of uppercase characters:
5270      * <p><blockquote><pre>
5271      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5272      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5273      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5274      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5275      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5276      * </pre></blockquote>
5277      * <p> Many other Unicode characters are uppercase too.<p>
5278      *
5279      * <p><b>Note:</b> This method cannot handle <a
5280      * href="#supplementary"> supplementary characters</a>. To support
5281      * all Unicode characters, including supplementary characters, use
5282      * the {@link #isUpperCase(int)} method.
5283      *
5284      * @param   ch   the character to be tested.
5285      * @return  {@code true} if the character is uppercase;
5286      *          {@code false} otherwise.
5287      * @see     Character#isLowerCase(char)
5288      * @see     Character#isTitleCase(char)
5289      * @see     Character#toUpperCase(char)
5290      * @see     Character#getType(char)
5291      * @since   1.0
5292      */
5293     public static boolean isUpperCase(char ch) {
5294         return isUpperCase((int)ch);
5295     }
5296 
5297     /**
5298      * Determines if the specified character (Unicode code point) is an uppercase character.
5299      * <p>
5300      * A character is uppercase if its general category type, provided by
5301      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}.
5302      * <p>
5303      * The following are examples of uppercase characters:
5304      * <p><blockquote><pre>
5305      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5306      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5307      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5308      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5309      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5310      * </pre></blockquote>
5311      * <p> Many other Unicode characters are uppercase too.<p>
5312      *
5313      * @param   codePoint the character (Unicode code point) to be tested.
5314      * @return  {@code true} if the character is uppercase;
5315      *          {@code false} otherwise.
5316      * @see     Character#isLowerCase(int)
5317      * @see     Character#isTitleCase(int)
5318      * @see     Character#toUpperCase(int)
5319      * @see     Character#getType(int)
5320      * @since   1.5
5321      */
5322     public static boolean isUpperCase(int codePoint) {
5323         return getType(codePoint) == Character.UPPERCASE_LETTER;
5324     }
5325 
5326     /**
5327      * Determines if the specified character is a titlecase character.
5328      * <p>
5329      * A character is a titlecase character if its general
5330      * category type, provided by {@code Character.getType(ch)},
5331      * is {@code TITLECASE_LETTER}.
5332      * <p>
5333      * Some characters look like pairs of Latin letters. For example, there
5334      * is an uppercase letter that looks like "LJ" and has a corresponding
5335      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5336      * is the appropriate form to use when rendering a word in lowercase
5337      * with initial capitals, as for a book title.
5338      * <p>
5339      * These are some of the Unicode characters for which this method returns
5340      * {@code true}:
5341      * <ul>
5342      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5343      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5344      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5345      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5346      * </ul>
5347      * <p> Many other Unicode characters are titlecase too.<p>
5348      *
5349      * <p><b>Note:</b> This method cannot handle <a
5350      * href="#supplementary"> supplementary characters</a>. To support
5351      * all Unicode characters, including supplementary characters, use
5352      * the {@link #isTitleCase(int)} method.
5353      *
5354      * @param   ch   the character to be tested.
5355      * @return  {@code true} if the character is titlecase;
5356      *          {@code false} otherwise.
5357      * @see     Character#isLowerCase(char)
5358      * @see     Character#isUpperCase(char)
5359      * @see     Character#toTitleCase(char)
5360      * @see     Character#getType(char)
5361      * @since   1.0.2
5362      */
5363     public static boolean isTitleCase(char ch) {
5364         return isTitleCase((int)ch);
5365     }
5366 
5367     /**
5368      * Determines if the specified character (Unicode code point) is a titlecase character.
5369      * <p>
5370      * A character is a titlecase character if its general
5371      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5372      * is {@code TITLECASE_LETTER}.
5373      * <p>
5374      * Some characters look like pairs of Latin letters. For example, there
5375      * is an uppercase letter that looks like "LJ" and has a corresponding
5376      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5377      * is the appropriate form to use when rendering a word in lowercase
5378      * with initial capitals, as for a book title.
5379      * <p>
5380      * These are some of the Unicode characters for which this method returns
5381      * {@code true}:
5382      * <ul>
5383      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5384      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5385      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5386      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5387      * </ul>
5388      * <p> Many other Unicode characters are titlecase too.<p>
5389      *
5390      * @param   codePoint the character (Unicode code point) to be tested.
5391      * @return  {@code true} if the character is titlecase;
5392      *          {@code false} otherwise.
5393      * @see     Character#isLowerCase(int)
5394      * @see     Character#isUpperCase(int)
5395      * @see     Character#toTitleCase(int)
5396      * @see     Character#getType(int)
5397      * @since   1.5
5398      */
5399     public static boolean isTitleCase(int codePoint) {
5400         return getType(codePoint) == Character.TITLECASE_LETTER;
5401     }
5402 
5403     /**
5404      * Determines if the specified character is a digit.
5405      * <p>
5406      * A character is a digit if its general category type, provided
5407      * by {@code Character.getType(ch)}, is
5408      * {@code DECIMAL_DIGIT_NUMBER}.
5409      * <p>
5410      * Some Unicode character ranges that contain digits:
5411      * <ul>
5412      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5413      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5414      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5415      *     Arabic-Indic digits
5416      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5417      *     Extended Arabic-Indic digits
5418      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5419      *     Devanagari digits
5420      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5421      *     Fullwidth digits
5422      * </ul>
5423      *
5424      * Many other character ranges contain digits as well.
5425      *
5426      * <p><b>Note:</b> This method cannot handle <a
5427      * href="#supplementary"> supplementary characters</a>. To support
5428      * all Unicode characters, including supplementary characters, use
5429      * the {@link #isDigit(int)} method.
5430      *
5431      * @param   ch   the character to be tested.
5432      * @return  {@code true} if the character is a digit;
5433      *          {@code false} otherwise.
5434      * @see     Character#digit(char, int)
5435      * @see     Character#forDigit(int, int)
5436      * @see     Character#getType(char)
5437      */
5438     public static boolean isDigit(char ch) {
5439         return isDigit((int)ch);
5440     }
5441 
5442     /**
5443      * Determines if the specified character (Unicode code point) is a digit.
5444      * <p>
5445      * A character is a digit if its general category type, provided
5446      * by {@link Character#getType(int) getType(codePoint)}, is
5447      * {@code DECIMAL_DIGIT_NUMBER}.
5448      * <p>
5449      * Some Unicode character ranges that contain digits:
5450      * <ul>
5451      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5452      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5453      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5454      *     Arabic-Indic digits
5455      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5456      *     Extended Arabic-Indic digits
5457      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5458      *     Devanagari digits
5459      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5460      *     Fullwidth digits
5461      * </ul>
5462      *
5463      * Many other character ranges contain digits as well.
5464      *
5465      * @param   codePoint the character (Unicode code point) to be tested.
5466      * @return  {@code true} if the character is a digit;
5467      *          {@code false} otherwise.
5468      * @see     Character#forDigit(int, int)
5469      * @see     Character#getType(int)
5470      * @since   1.5
5471      */
5472     public static boolean isDigit(int codePoint) {
5473         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5474     }
5475 
5476     /**
5477      * Determines if a character is defined in Unicode.
5478      * <p>
5479      * A character is defined if at least one of the following is true:
5480      * <ul>
5481      * <li>It has an entry in the UnicodeData file.
5482      * <li>It has a value in a range defined by the UnicodeData file.
5483      * </ul>
5484      *
5485      * <p><b>Note:</b> This method cannot handle <a
5486      * href="#supplementary"> supplementary characters</a>. To support
5487      * all Unicode characters, including supplementary characters, use
5488      * the {@link #isDefined(int)} method.
5489      *
5490      * @param   ch   the character to be tested
5491      * @return  {@code true} if the character has a defined meaning
5492      *          in Unicode; {@code false} otherwise.
5493      * @see     Character#isDigit(char)
5494      * @see     Character#isLetter(char)
5495      * @see     Character#isLetterOrDigit(char)
5496      * @see     Character#isLowerCase(char)
5497      * @see     Character#isTitleCase(char)
5498      * @see     Character#isUpperCase(char)
5499      * @since   1.0.2
5500      */
5501     public static boolean isDefined(char ch) {
5502         return isDefined((int)ch);
5503     }
5504 
5505     /**
5506      * Determines if a character (Unicode code point) is defined in Unicode.
5507      * <p>
5508      * A character is defined if at least one of the following is true:
5509      * <ul>
5510      * <li>It has an entry in the UnicodeData file.
5511      * <li>It has a value in a range defined by the UnicodeData file.
5512      * </ul>
5513      *
5514      * @param   codePoint the character (Unicode code point) to be tested.
5515      * @return  {@code true} if the character has a defined meaning
5516      *          in Unicode; {@code false} otherwise.
5517      * @see     Character#isDigit(int)
5518      * @see     Character#isLetter(int)
5519      * @see     Character#isLetterOrDigit(int)
5520      * @see     Character#isLowerCase(int)
5521      * @see     Character#isTitleCase(int)
5522      * @see     Character#isUpperCase(int)
5523      * @since   1.5
5524      */
5525     public static boolean isDefined(int codePoint) {
5526         return getType(codePoint) != Character.UNASSIGNED;
5527     }
5528 
5529     /**
5530      * Determines if the specified character is a letter.
5531      * <p>
5532      * A character is considered to be a letter if its general
5533      * category type, provided by {@code Character.getType(ch)},
5534      * is any of the following:
5535      * <ul>
5536      * <li> {@code UPPERCASE_LETTER}
5537      * <li> {@code LOWERCASE_LETTER}
5538      * <li> {@code TITLECASE_LETTER}
5539      * <li> {@code MODIFIER_LETTER}
5540      * <li> {@code OTHER_LETTER}
5541      * </ul>
5542      *
5543      * Not all letters have case. Many characters are
5544      * letters but are neither uppercase nor lowercase nor titlecase.
5545      *
5546      * <p><b>Note:</b> This method cannot handle <a
5547      * href="#supplementary"> supplementary characters</a>. To support
5548      * all Unicode characters, including supplementary characters, use
5549      * the {@link #isLetter(int)} method.
5550      *
5551      * @param   ch   the character to be tested.
5552      * @return  {@code true} if the character is a letter;
5553      *          {@code false} otherwise.
5554      * @see     Character#isDigit(char)
5555      * @see     Character#isJavaIdentifierStart(char)
5556      * @see     Character#isJavaLetter(char)
5557      * @see     Character#isJavaLetterOrDigit(char)
5558      * @see     Character#isLetterOrDigit(char)
5559      * @see     Character#isLowerCase(char)
5560      * @see     Character#isTitleCase(char)
5561      * @see     Character#isUnicodeIdentifierStart(char)
5562      * @see     Character#isUpperCase(char)
5563      */
5564     public static boolean isLetter(char ch) {
5565         return isLetter((int)ch);
5566     }
5567 
5568     /**
5569      * Determines if the specified character (Unicode code point) is a letter.
5570      * <p>
5571      * A character is considered to be a letter if its general
5572      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5573      * is any of the following:
5574      * <ul>
5575      * <li> {@code UPPERCASE_LETTER}
5576      * <li> {@code LOWERCASE_LETTER}
5577      * <li> {@code TITLECASE_LETTER}
5578      * <li> {@code MODIFIER_LETTER}
5579      * <li> {@code OTHER_LETTER}
5580      * </ul>
5581      *
5582      * Not all letters have case. Many characters are
5583      * letters but are neither uppercase nor lowercase nor titlecase.
5584      *
5585      * @param   codePoint the character (Unicode code point) to be tested.
5586      * @return  {@code true} if the character is a letter;
5587      *          {@code false} otherwise.
5588      * @see     Character#isDigit(int)
5589      * @see     Character#isJavaIdentifierStart(int)
5590      * @see     Character#isLetterOrDigit(int)
5591      * @see     Character#isLowerCase(int)
5592      * @see     Character#isTitleCase(int)
5593      * @see     Character#isUnicodeIdentifierStart(int)
5594      * @see     Character#isUpperCase(int)
5595      * @since   1.5
5596      */
5597     public static boolean isLetter(int codePoint) {
5598         return ((((1 << Character.UPPERCASE_LETTER) |
5599             (1 << Character.LOWERCASE_LETTER) |
5600             (1 << Character.TITLECASE_LETTER) |
5601             (1 << Character.MODIFIER_LETTER) |
5602             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5603             != 0;
5604     }
5605 
5606     /**
5607      * Determines if the specified character is a letter or digit.
5608      * <p>
5609      * A character is considered to be a letter or digit if either
5610      * {@code Character.isLetter(char ch)} or
5611      * {@code Character.isDigit(char ch)} returns
5612      * {@code true} for the character.
5613      *
5614      * <p><b>Note:</b> This method cannot handle <a
5615      * href="#supplementary"> supplementary characters</a>. To support
5616      * all Unicode characters, including supplementary characters, use
5617      * the {@link #isLetterOrDigit(int)} method.
5618      *
5619      * @param   ch   the character to be tested.
5620      * @return  {@code true} if the character is a letter or digit;
5621      *          {@code false} otherwise.
5622      * @see     Character#isDigit(char)
5623      * @see     Character#isJavaIdentifierPart(char)
5624      * @see     Character#isJavaLetter(char)
5625      * @see     Character#isJavaLetterOrDigit(char)
5626      * @see     Character#isLetter(char)
5627      * @see     Character#isUnicodeIdentifierPart(char)
5628      * @since   1.0.2
5629      */
5630     public static boolean isLetterOrDigit(char ch) {
5631         return isLetterOrDigit((int)ch);
5632     }
5633 
5634     /**
5635      * Determines if the specified character (Unicode code point) is a letter or digit.
5636      * <p>
5637      * A character is considered to be a letter or digit if either
5638      * {@link #isLetter(int) isLetter(codePoint)} or
5639      * {@link #isDigit(int) isDigit(codePoint)} returns
5640      * {@code true} for the character.
5641      *
5642      * @param   codePoint the character (Unicode code point) to be tested.
5643      * @return  {@code true} if the character is a letter or digit;
5644      *          {@code false} otherwise.
5645      * @see     Character#isDigit(int)
5646      * @see     Character#isJavaIdentifierPart(int)
5647      * @see     Character#isLetter(int)
5648      * @see     Character#isUnicodeIdentifierPart(int)
5649      * @since   1.5
5650      */
5651     public static boolean isLetterOrDigit(int codePoint) {
5652         return ((((1 << Character.UPPERCASE_LETTER) |
5653             (1 << Character.LOWERCASE_LETTER) |
5654             (1 << Character.TITLECASE_LETTER) |
5655             (1 << Character.MODIFIER_LETTER) |
5656             (1 << Character.OTHER_LETTER) |
5657             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5658             != 0;
5659     }
5660 
5661     /**
5662      * Determines if the specified character is permissible as the first
5663      * character in a Java identifier.
5664      * <p>
5665      * A character may start a Java identifier if and only if
5666      * one of the following is true:
5667      * <ul>
5668      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5669      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5670      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5671      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5672      * </ul>
5673      *
5674      * @param   ch the character to be tested.
5675      * @return  {@code true} if the character may start a Java
5676      *          identifier; {@code false} otherwise.
5677      * @see     Character#isJavaLetterOrDigit(char)
5678      * @see     Character#isJavaIdentifierStart(char)
5679      * @see     Character#isJavaIdentifierPart(char)
5680      * @see     Character#isLetter(char)
5681      * @see     Character#isLetterOrDigit(char)
5682      * @see     Character#isUnicodeIdentifierStart(char)
5683      * @since   1.02
5684      * @deprecated Replaced by isJavaIdentifierStart(char).
5685      */
5686     @Deprecated
5687     public static boolean isJavaLetter(char ch) {
5688         return isJavaIdentifierStart(ch);
5689     }
5690 
5691     /**
5692      * Determines if the specified character may be part of a Java
5693      * identifier as other than the first character.
5694      * <p>
5695      * A character may be part of a Java identifier if and only if any
5696      * of the following are true:
5697      * <ul>
5698      * <li>  it is a letter
5699      * <li>  it is a currency symbol (such as {@code '$'})
5700      * <li>  it is a connecting punctuation character (such as {@code '_'})
5701      * <li>  it is a digit
5702      * <li>  it is a numeric letter (such as a Roman numeral character)
5703      * <li>  it is a combining mark
5704      * <li>  it is a non-spacing mark
5705      * <li> {@code isIdentifierIgnorable} returns
5706      * {@code true} for the character.
5707      * </ul>
5708      *
5709      * @param   ch the character to be tested.
5710      * @return  {@code true} if the character may be part of a
5711      *          Java identifier; {@code false} otherwise.
5712      * @see     Character#isJavaLetter(char)
5713      * @see     Character#isJavaIdentifierStart(char)
5714      * @see     Character#isJavaIdentifierPart(char)
5715      * @see     Character#isLetter(char)
5716      * @see     Character#isLetterOrDigit(char)
5717      * @see     Character#isUnicodeIdentifierPart(char)
5718      * @see     Character#isIdentifierIgnorable(char)
5719      * @since   1.02
5720      * @deprecated Replaced by isJavaIdentifierPart(char).
5721      */
5722     @Deprecated
5723     public static boolean isJavaLetterOrDigit(char ch) {
5724         return isJavaIdentifierPart(ch);
5725     }
5726 
5727     /**
5728      * Determines if the specified character is
5729      * permissible as the first character in a Java identifier.
5730      * <p>
5731      * A character may start a Java identifier if and only if
5732      * one of the following conditions is true:
5733      * <ul>
5734      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5735      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5736      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5737      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5738      * </ul>
5739      *
5740      * <p><b>Note:</b> This method cannot handle <a
5741      * href="#supplementary"> supplementary characters</a>. To support
5742      * all Unicode characters, including supplementary characters, use
5743      * the {@link #isJavaIdentifierStart(int)} method.
5744      *
5745      * @param   ch the character to be tested.
5746      * @return  {@code true} if the character may start a Java identifier;
5747      *          {@code false} otherwise.
5748      * @see     Character#isJavaIdentifierPart(char)
5749      * @see     Character#isLetter(char)
5750      * @see     Character#isUnicodeIdentifierStart(char)
5751      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5752      * @since   1.1
5753      */
5754     public static boolean isJavaIdentifierStart(char ch) {
5755         return isJavaIdentifierStart((int)ch);
5756     }
5757 
5758     /**
5759      * Determines if the character (Unicode code point) is
5760      * permissible as the first character in a Java identifier.
5761      * <p>
5762      * A character may start a Java identifier if and only if
5763      * one of the following conditions is true:
5764      * <ul>
5765      * <li> {@link #isLetter(int) isLetter(codePoint)}
5766      *      returns {@code true}
5767      * <li> {@link #getType(int) getType(codePoint)}
5768      *      returns {@code LETTER_NUMBER}
5769      * <li> the referenced character is a currency symbol (such as {@code '$'})
5770      * <li> the referenced character is a connecting punctuation character
5771      *      (such as {@code '_'}).
5772      * </ul>
5773      *
5774      * @param   codePoint the character (Unicode code point) to be tested.
5775      * @return  {@code true} if the character may start a Java identifier;
5776      *          {@code false} otherwise.
5777      * @see     Character#isJavaIdentifierPart(int)
5778      * @see     Character#isLetter(int)
5779      * @see     Character#isUnicodeIdentifierStart(int)
5780      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5781      * @since   1.5
5782      */
5783     public static boolean isJavaIdentifierStart(int codePoint) {
5784         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
5785     }
5786 
5787     /**
5788      * Determines if the specified character may be part of a Java
5789      * identifier as other than the first character.
5790      * <p>
5791      * A character may be part of a Java identifier if any of the following
5792      * are true:
5793      * <ul>
5794      * <li>  it is a letter
5795      * <li>  it is a currency symbol (such as {@code '$'})
5796      * <li>  it is a connecting punctuation character (such as {@code '_'})
5797      * <li>  it is a digit
5798      * <li>  it is a numeric letter (such as a Roman numeral character)
5799      * <li>  it is a combining mark
5800      * <li>  it is a non-spacing mark
5801      * <li> {@code isIdentifierIgnorable} returns
5802      * {@code true} for the character
5803      * </ul>
5804      *
5805      * <p><b>Note:</b> This method cannot handle <a
5806      * href="#supplementary"> supplementary characters</a>. To support
5807      * all Unicode characters, including supplementary characters, use
5808      * the {@link #isJavaIdentifierPart(int)} method.
5809      *
5810      * @param   ch      the character to be tested.
5811      * @return {@code true} if the character may be part of a
5812      *          Java identifier; {@code false} otherwise.
5813      * @see     Character#isIdentifierIgnorable(char)
5814      * @see     Character#isJavaIdentifierStart(char)
5815      * @see     Character#isLetterOrDigit(char)
5816      * @see     Character#isUnicodeIdentifierPart(char)
5817      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5818      * @since   1.1
5819      */
5820     public static boolean isJavaIdentifierPart(char ch) {
5821         return isJavaIdentifierPart((int)ch);
5822     }
5823 
5824     /**
5825      * Determines if the character (Unicode code point) may be part of a Java
5826      * identifier as other than the first character.
5827      * <p>
5828      * A character may be part of a Java identifier if any of the following
5829      * are true:
5830      * <ul>
5831      * <li>  it is a letter
5832      * <li>  it is a currency symbol (such as {@code '$'})
5833      * <li>  it is a connecting punctuation character (such as {@code '_'})
5834      * <li>  it is a digit
5835      * <li>  it is a numeric letter (such as a Roman numeral character)
5836      * <li>  it is a combining mark
5837      * <li>  it is a non-spacing mark
5838      * <li> {@link #isIdentifierIgnorable(int)
5839      * isIdentifierIgnorable(codePoint)} returns {@code true} for
5840      * the character
5841      * </ul>
5842      *
5843      * @param   codePoint the character (Unicode code point) to be tested.
5844      * @return {@code true} if the character may be part of a
5845      *          Java identifier; {@code false} otherwise.
5846      * @see     Character#isIdentifierIgnorable(int)
5847      * @see     Character#isJavaIdentifierStart(int)
5848      * @see     Character#isLetterOrDigit(int)
5849      * @see     Character#isUnicodeIdentifierPart(int)
5850      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5851      * @since   1.5
5852      */
5853     public static boolean isJavaIdentifierPart(int codePoint) {
5854         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
5855     }
5856 
5857     /**
5858      * Determines if the specified character is permissible as the
5859      * first character in a Unicode identifier.
5860      * <p>
5861      * A character may start a Unicode identifier if and only if
5862      * one of the following conditions is true:
5863      * <ul>
5864      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5865      * <li> {@link #getType(char) getType(ch)} returns
5866      *      {@code LETTER_NUMBER}.
5867      * </ul>
5868      *
5869      * <p><b>Note:</b> This method cannot handle <a
5870      * href="#supplementary"> supplementary characters</a>. To support
5871      * all Unicode characters, including supplementary characters, use
5872      * the {@link #isUnicodeIdentifierStart(int)} method.
5873      *
5874      * @param   ch      the character to be tested.
5875      * @return  {@code true} if the character may start a Unicode
5876      *          identifier; {@code false} otherwise.
5877      * @see     Character#isJavaIdentifierStart(char)
5878      * @see     Character#isLetter(char)
5879      * @see     Character#isUnicodeIdentifierPart(char)
5880      * @since   1.1
5881      */
5882     public static boolean isUnicodeIdentifierStart(char ch) {
5883         return isUnicodeIdentifierStart((int)ch);
5884     }
5885 
5886     /**
5887      * Determines if the specified character (Unicode code point) is permissible as the
5888      * first character in a Unicode identifier.
5889      * <p>
5890      * A character may start a Unicode identifier if and only if
5891      * one of the following conditions is true:
5892      * <ul>
5893      * <li> {@link #isLetter(int) isLetter(codePoint)}
5894      *      returns {@code true}
5895      * <li> {@link #getType(int) getType(codePoint)}
5896      *      returns {@code LETTER_NUMBER}.
5897      * </ul>
5898      * @param   codePoint the character (Unicode code point) to be tested.
5899      * @return  {@code true} if the character may start a Unicode
5900      *          identifier; {@code false} otherwise.
5901      * @see     Character#isJavaIdentifierStart(int)
5902      * @see     Character#isLetter(int)
5903      * @see     Character#isUnicodeIdentifierPart(int)
5904      * @since   1.5
5905      */
5906     public static boolean isUnicodeIdentifierStart(int codePoint) {
5907         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
5908     }
5909 
5910     /**
5911      * Determines if the specified character may be part of a Unicode
5912      * identifier as other than the first character.
5913      * <p>
5914      * A character may be part of a Unicode identifier if and only if
5915      * one of the following statements is true:
5916      * <ul>
5917      * <li>  it is a letter
5918      * <li>  it is a connecting punctuation character (such as {@code '_'})
5919      * <li>  it is a digit
5920      * <li>  it is a numeric letter (such as a Roman numeral character)
5921      * <li>  it is a combining mark
5922      * <li>  it is a non-spacing mark
5923      * <li> {@code isIdentifierIgnorable} returns
5924      * {@code true} for this character.
5925      * </ul>
5926      *
5927      * <p><b>Note:</b> This method cannot handle <a
5928      * href="#supplementary"> supplementary characters</a>. To support
5929      * all Unicode characters, including supplementary characters, use
5930      * the {@link #isUnicodeIdentifierPart(int)} method.
5931      *
5932      * @param   ch      the character to be tested.
5933      * @return  {@code true} if the character may be part of a
5934      *          Unicode identifier; {@code false} otherwise.
5935      * @see     Character#isIdentifierIgnorable(char)
5936      * @see     Character#isJavaIdentifierPart(char)
5937      * @see     Character#isLetterOrDigit(char)
5938      * @see     Character#isUnicodeIdentifierStart(char)
5939      * @since   1.1
5940      */
5941     public static boolean isUnicodeIdentifierPart(char ch) {
5942         return isUnicodeIdentifierPart((int)ch);
5943     }
5944 
5945     /**
5946      * Determines if the specified character (Unicode code point) may be part of a Unicode
5947      * identifier as other than the first character.
5948      * <p>
5949      * A character may be part of a Unicode identifier if and only if
5950      * one of the following statements is true:
5951      * <ul>
5952      * <li>  it is a letter
5953      * <li>  it is a connecting punctuation character (such as {@code '_'})
5954      * <li>  it is a digit
5955      * <li>  it is a numeric letter (such as a Roman numeral character)
5956      * <li>  it is a combining mark
5957      * <li>  it is a non-spacing mark
5958      * <li> {@code isIdentifierIgnorable} returns
5959      * {@code true} for this character.
5960      * </ul>
5961      * @param   codePoint the character (Unicode code point) to be tested.
5962      * @return  {@code true} if the character may be part of a
5963      *          Unicode identifier; {@code false} otherwise.
5964      * @see     Character#isIdentifierIgnorable(int)
5965      * @see     Character#isJavaIdentifierPart(int)
5966      * @see     Character#isLetterOrDigit(int)
5967      * @see     Character#isUnicodeIdentifierStart(int)
5968      * @since   1.5
5969      */
5970     public static boolean isUnicodeIdentifierPart(int codePoint) {
5971         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
5972     }
5973 
5974     /**
5975      * Determines if the specified character should be regarded as
5976      * an ignorable character in a Java identifier or a Unicode identifier.
5977      * <p>
5978      * The following Unicode characters are ignorable in a Java identifier
5979      * or a Unicode identifier:
5980      * <ul>
5981      * <li>ISO control characters that are not whitespace
5982      * <ul>
5983      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
5984      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
5985      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
5986      * </ul>
5987      *
5988      * <li>all characters that have the {@code FORMAT} general
5989      * category value
5990      * </ul>
5991      *
5992      * <p><b>Note:</b> This method cannot handle <a
5993      * href="#supplementary"> supplementary characters</a>. To support
5994      * all Unicode characters, including supplementary characters, use
5995      * the {@link #isIdentifierIgnorable(int)} method.
5996      *
5997      * @param   ch      the character to be tested.
5998      * @return  {@code true} if the character is an ignorable control
5999      *          character that may be part of a Java or Unicode identifier;
6000      *           {@code false} otherwise.
6001      * @see     Character#isJavaIdentifierPart(char)
6002      * @see     Character#isUnicodeIdentifierPart(char)
6003      * @since   1.1
6004      */
6005     public static boolean isIdentifierIgnorable(char ch) {
6006         return isIdentifierIgnorable((int)ch);
6007     }
6008 
6009     /**
6010      * Determines if the specified character (Unicode code point) should be regarded as
6011      * an ignorable character in a Java identifier or a Unicode identifier.
6012      * <p>
6013      * The following Unicode characters are ignorable in a Java identifier
6014      * or a Unicode identifier:
6015      * <ul>
6016      * <li>ISO control characters that are not whitespace
6017      * <ul>
6018      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6019      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6020      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6021      * </ul>
6022      *
6023      * <li>all characters that have the {@code FORMAT} general
6024      * category value
6025      * </ul>
6026      *
6027      * @param   codePoint the character (Unicode code point) to be tested.
6028      * @return  {@code true} if the character is an ignorable control
6029      *          character that may be part of a Java or Unicode identifier;
6030      *          {@code false} otherwise.
6031      * @see     Character#isJavaIdentifierPart(int)
6032      * @see     Character#isUnicodeIdentifierPart(int)
6033      * @since   1.5
6034      */
6035     public static boolean isIdentifierIgnorable(int codePoint) {
6036         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6037     }
6038 
6039     /**
6040      * Converts the character argument to lowercase using case
6041      * mapping information from the UnicodeData file.
6042      * <p>
6043      * Note that
6044      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6045      * does not always return {@code true} for some ranges of
6046      * characters, particularly those that are symbols or ideographs.
6047      *
6048      * <p>In general, {@link String#toLowerCase()} should be used to map
6049      * characters to lowercase. {@code String} case mapping methods
6050      * have several benefits over {@code Character} case mapping methods.
6051      * {@code String} case mapping methods can perform locale-sensitive
6052      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6053      * the {@code Character} case mapping methods cannot.
6054      *
6055      * <p><b>Note:</b> This method cannot handle <a
6056      * href="#supplementary"> supplementary characters</a>. To support
6057      * all Unicode characters, including supplementary characters, use
6058      * the {@link #toLowerCase(int)} method.
6059      *
6060      * @param   ch   the character to be converted.
6061      * @return  the lowercase equivalent of the character, if any;
6062      *          otherwise, the character itself.
6063      * @see     Character#isLowerCase(char)
6064      * @see     String#toLowerCase()
6065      */
6066     public static char toLowerCase(char ch) {
6067         return (char)toLowerCase((int)ch);
6068     }
6069 
6070     /**
6071      * Converts the character (Unicode code point) argument to
6072      * lowercase using case mapping information from the UnicodeData
6073      * file.
6074      *
6075      * <p> Note that
6076      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6077      * does not always return {@code true} for some ranges of
6078      * characters, particularly those that are symbols or ideographs.
6079      *
6080      * <p>In general, {@link String#toLowerCase()} should be used to map
6081      * characters to lowercase. {@code String} case mapping methods
6082      * have several benefits over {@code Character} case mapping methods.
6083      * {@code String} case mapping methods can perform locale-sensitive
6084      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6085      * the {@code Character} case mapping methods cannot.
6086      *
6087      * @param   codePoint   the character (Unicode code point) to be converted.
6088      * @return  the lowercase equivalent of the character (Unicode code
6089      *          point), if any; otherwise, the character itself.
6090      * @see     Character#isLowerCase(int)
6091      * @see     String#toLowerCase()
6092      *
6093      * @since   1.5
6094      */
6095     public static int toLowerCase(int codePoint) {
6096         return CharacterData.of(codePoint).toLowerCase(codePoint);
6097     }
6098 
6099     /**
6100      * Converts the character argument to uppercase using case mapping
6101      * information from the UnicodeData file.
6102      * <p>
6103      * Note that
6104      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6105      * does not always return {@code true} for some ranges of
6106      * characters, particularly those that are symbols or ideographs.
6107      *
6108      * <p>In general, {@link String#toUpperCase()} should be used to map
6109      * characters to uppercase. {@code String} case mapping methods
6110      * have several benefits over {@code Character} case mapping methods.
6111      * {@code String} case mapping methods can perform locale-sensitive
6112      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6113      * the {@code Character} case mapping methods cannot.
6114      *
6115      * <p><b>Note:</b> This method cannot handle <a
6116      * href="#supplementary"> supplementary characters</a>. To support
6117      * all Unicode characters, including supplementary characters, use
6118      * the {@link #toUpperCase(int)} method.
6119      *
6120      * @param   ch   the character to be converted.
6121      * @return  the uppercase equivalent of the character, if any;
6122      *          otherwise, the character itself.
6123      * @see     Character#isUpperCase(char)
6124      * @see     String#toUpperCase()
6125      */
6126     public static char toUpperCase(char ch) {
6127         return (char)toUpperCase((int)ch);
6128     }
6129 
6130     /**
6131      * Converts the character (Unicode code point) argument to
6132      * uppercase using case mapping information from the UnicodeData
6133      * file.
6134      *
6135      * <p>Note that
6136      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6137      * does not always return {@code true} for some ranges of
6138      * characters, particularly those that are symbols or ideographs.
6139      *
6140      * <p>In general, {@link String#toUpperCase()} should be used to map
6141      * characters to uppercase. {@code String} case mapping methods
6142      * have several benefits over {@code Character} case mapping methods.
6143      * {@code String} case mapping methods can perform locale-sensitive
6144      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6145      * the {@code Character} case mapping methods cannot.
6146      *
6147      * @param   codePoint   the character (Unicode code point) to be converted.
6148      * @return  the uppercase equivalent of the character, if any;
6149      *          otherwise, the character itself.
6150      * @see     Character#isUpperCase(int)
6151      * @see     String#toUpperCase()
6152      *
6153      * @since   1.5
6154      */
6155     public static int toUpperCase(int codePoint) {
6156         return CharacterData.of(codePoint).toUpperCase(codePoint);
6157     }
6158 
6159     /**
6160      * Converts the character argument to titlecase using case mapping
6161      * information from the UnicodeData file. If a character has no
6162      * explicit titlecase mapping and is not itself a titlecase char
6163      * according to UnicodeData, then the uppercase mapping is
6164      * returned as an equivalent titlecase mapping. If the
6165      * {@code char} argument is already a titlecase
6166      * {@code char}, the same {@code char} value will be
6167      * returned.
6168      * <p>
6169      * Note that
6170      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6171      * does not always return {@code true} for some ranges of
6172      * characters.
6173      *
6174      * <p><b>Note:</b> This method cannot handle <a
6175      * href="#supplementary"> supplementary characters</a>. To support
6176      * all Unicode characters, including supplementary characters, use
6177      * the {@link #toTitleCase(int)} method.
6178      *
6179      * @param   ch   the character to be converted.
6180      * @return  the titlecase equivalent of the character, if any;
6181      *          otherwise, the character itself.
6182      * @see     Character#isTitleCase(char)
6183      * @see     Character#toLowerCase(char)
6184      * @see     Character#toUpperCase(char)
6185      * @since   1.0.2
6186      */
6187     public static char toTitleCase(char ch) {
6188         return (char)toTitleCase((int)ch);
6189     }
6190 
6191     /**
6192      * Converts the character (Unicode code point) argument to titlecase using case mapping
6193      * information from the UnicodeData file. If a character has no
6194      * explicit titlecase mapping and is not itself a titlecase char
6195      * according to UnicodeData, then the uppercase mapping is
6196      * returned as an equivalent titlecase mapping. If the
6197      * character argument is already a titlecase
6198      * character, the same character value will be
6199      * returned.
6200      *
6201      * <p>Note that
6202      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6203      * does not always return {@code true} for some ranges of
6204      * characters.
6205      *
6206      * @param   codePoint   the character (Unicode code point) to be converted.
6207      * @return  the titlecase equivalent of the character, if any;
6208      *          otherwise, the character itself.
6209      * @see     Character#isTitleCase(int)
6210      * @see     Character#toLowerCase(int)
6211      * @see     Character#toUpperCase(int)
6212      * @since   1.5
6213      */
6214     public static int toTitleCase(int codePoint) {
6215         return CharacterData.of(codePoint).toTitleCase(codePoint);
6216     }
6217 
6218     /**
6219      * Returns the numeric value of the character {@code ch} in the
6220      * specified radix.
6221      * <p>
6222      * If the radix is not in the range {@code MIN_RADIX} &le;
6223      * {@code radix} &le; {@code MAX_RADIX} or if the
6224      * value of {@code ch} is not a valid digit in the specified
6225      * radix, {@code -1} is returned. A character is a valid digit
6226      * if at least one of the following is true:
6227      * <ul>
6228      * <li>The method {@code isDigit} is {@code true} of the character
6229      *     and the Unicode decimal digit value of the character (or its
6230      *     single-character decomposition) is less than the specified radix.
6231      *     In this case the decimal digit value is returned.
6232      * <li>The character is one of the uppercase Latin letters
6233      *     {@code 'A'} through {@code 'Z'} and its code is less than
6234      *     {@code radix + 'A' - 10}.
6235      *     In this case, {@code ch - 'A' + 10}
6236      *     is returned.
6237      * <li>The character is one of the lowercase Latin letters
6238      *     {@code 'a'} through {@code 'z'} and its code is less than
6239      *     {@code radix + 'a' - 10}.
6240      *     In this case, {@code ch - 'a' + 10}
6241      *     is returned.
6242      * <li>The character is one of the fullwidth uppercase Latin letters A
6243      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6244      *     and its code is less than
6245      *     {@code radix + '\u005CuFF21' - 10}.
6246      *     In this case, {@code ch - '\u005CuFF21' + 10}
6247      *     is returned.
6248      * <li>The character is one of the fullwidth lowercase Latin letters a
6249      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6250      *     and its code is less than
6251      *     {@code radix + '\u005CuFF41' - 10}.
6252      *     In this case, {@code ch - '\u005CuFF41' + 10}
6253      *     is returned.
6254      * </ul>
6255      *
6256      * <p><b>Note:</b> This method cannot handle <a
6257      * href="#supplementary"> supplementary characters</a>. To support
6258      * all Unicode characters, including supplementary characters, use
6259      * the {@link #digit(int, int)} method.
6260      *
6261      * @param   ch      the character to be converted.
6262      * @param   radix   the radix.
6263      * @return  the numeric value represented by the character in the
6264      *          specified radix.
6265      * @see     Character#forDigit(int, int)
6266      * @see     Character#isDigit(char)
6267      */
6268     public static int digit(char ch, int radix) {
6269         return digit((int)ch, radix);
6270     }
6271 
6272     /**
6273      * Returns the numeric value of the specified character (Unicode
6274      * code point) in the specified radix.
6275      *
6276      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6277      * {@code radix} &le; {@code MAX_RADIX} or if the
6278      * character is not a valid digit in the specified
6279      * radix, {@code -1} is returned. A character is a valid digit
6280      * if at least one of the following is true:
6281      * <ul>
6282      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6283      *     and the Unicode decimal digit value of the character (or its
6284      *     single-character decomposition) is less than the specified radix.
6285      *     In this case the decimal digit value is returned.
6286      * <li>The character is one of the uppercase Latin letters
6287      *     {@code 'A'} through {@code 'Z'} and its code is less than
6288      *     {@code radix + 'A' - 10}.
6289      *     In this case, {@code codePoint - 'A' + 10}
6290      *     is returned.
6291      * <li>The character is one of the lowercase Latin letters
6292      *     {@code 'a'} through {@code 'z'} and its code is less than
6293      *     {@code radix + 'a' - 10}.
6294      *     In this case, {@code codePoint - 'a' + 10}
6295      *     is returned.
6296      * <li>The character is one of the fullwidth uppercase Latin letters A
6297      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6298      *     and its code is less than
6299      *     {@code radix + '\u005CuFF21' - 10}.
6300      *     In this case,
6301      *     {@code codePoint - '\u005CuFF21' + 10}
6302      *     is returned.
6303      * <li>The character is one of the fullwidth lowercase Latin letters a
6304      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6305      *     and its code is less than
6306      *     {@code radix + '\u005CuFF41'- 10}.
6307      *     In this case,
6308      *     {@code codePoint - '\u005CuFF41' + 10}
6309      *     is returned.
6310      * </ul>
6311      *
6312      * @param   codePoint the character (Unicode code point) to be converted.
6313      * @param   radix   the radix.
6314      * @return  the numeric value represented by the character in the
6315      *          specified radix.
6316      * @see     Character#forDigit(int, int)
6317      * @see     Character#isDigit(int)
6318      * @since   1.5
6319      */
6320     public static int digit(int codePoint, int radix) {
6321         return CharacterData.of(codePoint).digit(codePoint, radix);
6322     }
6323 
6324     /**
6325      * Returns the {@code int} value that the specified Unicode
6326      * character represents. For example, the character
6327      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6328      * an int with a value of 50.
6329      * <p>
6330      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6331      * {@code '\u005Cu005A'}), lowercase
6332      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6333      * full width variant ({@code '\u005CuFF21'} through
6334      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6335      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6336      * through 35. This is independent of the Unicode specification,
6337      * which does not assign numeric values to these {@code char}
6338      * values.
6339      * <p>
6340      * If the character does not have a numeric value, then -1 is returned.
6341      * If the character has a numeric value that cannot be represented as a
6342      * nonnegative integer (for example, a fractional value), then -2
6343      * is returned.
6344      *
6345      * <p><b>Note:</b> This method cannot handle <a
6346      * href="#supplementary"> supplementary characters</a>. To support
6347      * all Unicode characters, including supplementary characters, use
6348      * the {@link #getNumericValue(int)} method.
6349      *
6350      * @param   ch      the character to be converted.
6351      * @return  the numeric value of the character, as a nonnegative {@code int}
6352      *           value; -2 if the character has a numeric value that is not a
6353      *          nonnegative integer; -1 if the character has no numeric value.
6354      * @see     Character#forDigit(int, int)
6355      * @see     Character#isDigit(char)
6356      * @since   1.1
6357      */
6358     public static int getNumericValue(char ch) {
6359         return getNumericValue((int)ch);
6360     }
6361 
6362     /**
6363      * Returns the {@code int} value that the specified
6364      * character (Unicode code point) represents. For example, the character
6365      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6366      * an {@code int} with a value of 50.
6367      * <p>
6368      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6369      * {@code '\u005Cu005A'}), lowercase
6370      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6371      * full width variant ({@code '\u005CuFF21'} through
6372      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6373      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6374      * through 35. This is independent of the Unicode specification,
6375      * which does not assign numeric values to these {@code char}
6376      * values.
6377      * <p>
6378      * If the character does not have a numeric value, then -1 is returned.
6379      * If the character has a numeric value that cannot be represented as a
6380      * nonnegative integer (for example, a fractional value), then -2
6381      * is returned.
6382      *
6383      * @param   codePoint the character (Unicode code point) to be converted.
6384      * @return  the numeric value of the character, as a nonnegative {@code int}
6385      *          value; -2 if the character has a numeric value that is not a
6386      *          nonnegative integer; -1 if the character has no numeric value.
6387      * @see     Character#forDigit(int, int)
6388      * @see     Character#isDigit(int)
6389      * @since   1.5
6390      */
6391     public static int getNumericValue(int codePoint) {
6392         return CharacterData.of(codePoint).getNumericValue(codePoint);
6393     }
6394 
6395     /**
6396      * Determines if the specified character is ISO-LATIN-1 white space.
6397      * This method returns {@code true} for the following five
6398      * characters only:
6399      * <table>
6400      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6401      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6402      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6403      *     <td>{@code NEW LINE}</td></tr>
6404      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6405      *     <td>{@code FORM FEED}</td></tr>
6406      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6407      *     <td>{@code CARRIAGE RETURN}</td></tr>
6408      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6409      *     <td>{@code SPACE}</td></tr>
6410      * </table>
6411      *
6412      * @param      ch   the character to be tested.
6413      * @return     {@code true} if the character is ISO-LATIN-1 white
6414      *             space; {@code false} otherwise.
6415      * @see        Character#isSpaceChar(char)
6416      * @see        Character#isWhitespace(char)
6417      * @deprecated Replaced by isWhitespace(char).
6418      */
6419     @Deprecated
6420     public static boolean isSpace(char ch) {
6421         return (ch <= 0x0020) &&
6422             (((((1L << 0x0009) |
6423             (1L << 0x000A) |
6424             (1L << 0x000C) |
6425             (1L << 0x000D) |
6426             (1L << 0x0020)) >> ch) & 1L) != 0);
6427     }
6428 
6429 
6430     /**
6431      * Determines if the specified character is a Unicode space character.
6432      * A character is considered to be a space character if and only if
6433      * it is specified to be a space character by the Unicode standard. This
6434      * method returns true if the character's general category type is any of
6435      * the following:
6436      * <ul>
6437      * <li> {@code SPACE_SEPARATOR}
6438      * <li> {@code LINE_SEPARATOR}
6439      * <li> {@code PARAGRAPH_SEPARATOR}
6440      * </ul>
6441      *
6442      * <p><b>Note:</b> This method cannot handle <a
6443      * href="#supplementary"> supplementary characters</a>. To support
6444      * all Unicode characters, including supplementary characters, use
6445      * the {@link #isSpaceChar(int)} method.
6446      *
6447      * @param   ch      the character to be tested.
6448      * @return  {@code true} if the character is a space character;
6449      *          {@code false} otherwise.
6450      * @see     Character#isWhitespace(char)
6451      * @since   1.1
6452      */
6453     public static boolean isSpaceChar(char ch) {
6454         return isSpaceChar((int)ch);
6455     }
6456 
6457     /**
6458      * Determines if the specified character (Unicode code point) is a
6459      * Unicode space character.  A character is considered to be a
6460      * space character if and only if it is specified to be a space
6461      * character by the Unicode standard. This method returns true if
6462      * the character's general category type is any of the following:
6463      *
6464      * <ul>
6465      * <li> {@link #SPACE_SEPARATOR}
6466      * <li> {@link #LINE_SEPARATOR}
6467      * <li> {@link #PARAGRAPH_SEPARATOR}
6468      * </ul>
6469      *
6470      * @param   codePoint the character (Unicode code point) to be tested.
6471      * @return  {@code true} if the character is a space character;
6472      *          {@code false} otherwise.
6473      * @see     Character#isWhitespace(int)
6474      * @since   1.5
6475      */
6476     public static boolean isSpaceChar(int codePoint) {
6477         return ((((1 << Character.SPACE_SEPARATOR) |
6478                   (1 << Character.LINE_SEPARATOR) |
6479                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6480             != 0;
6481     }
6482 
6483     /**
6484      * Determines if the specified character is white space according to Java.
6485      * A character is a Java whitespace character if and only if it satisfies
6486      * one of the following criteria:
6487      * <ul>
6488      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6489      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6490      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6491      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6492      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6493      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6494      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6495      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6496      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6497      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6498      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6499      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6500      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6501      * </ul>
6502      *
6503      * <p><b>Note:</b> This method cannot handle <a
6504      * href="#supplementary"> supplementary characters</a>. To support
6505      * all Unicode characters, including supplementary characters, use
6506      * the {@link #isWhitespace(int)} method.
6507      *
6508      * @param   ch the character to be tested.
6509      * @return  {@code true} if the character is a Java whitespace
6510      *          character; {@code false} otherwise.
6511      * @see     Character#isSpaceChar(char)
6512      * @since   1.1
6513      */
6514     public static boolean isWhitespace(char ch) {
6515         return isWhitespace((int)ch);
6516     }
6517 
6518     /**
6519      * Determines if the specified character (Unicode code point) is
6520      * white space according to Java.  A character is a Java
6521      * whitespace character if and only if it satisfies one of the
6522      * following criteria:
6523      * <ul>
6524      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6525      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6526      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6527      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6528      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6529      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6530      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6531      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6532      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6533      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6534      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6535      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6536      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6537      * </ul>
6538      * <p>
6539      *
6540      * @param   codePoint the character (Unicode code point) to be tested.
6541      * @return  {@code true} if the character is a Java whitespace
6542      *          character; {@code false} otherwise.
6543      * @see     Character#isSpaceChar(int)
6544      * @since   1.5
6545      */
6546     public static boolean isWhitespace(int codePoint) {
6547         return CharacterData.of(codePoint).isWhitespace(codePoint);
6548     }
6549 
6550     /**
6551      * Determines if the specified character is an ISO control
6552      * character.  A character is considered to be an ISO control
6553      * character if its code is in the range {@code '\u005Cu0000'}
6554      * through {@code '\u005Cu001F'} or in the range
6555      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6556      *
6557      * <p><b>Note:</b> This method cannot handle <a
6558      * href="#supplementary"> supplementary characters</a>. To support
6559      * all Unicode characters, including supplementary characters, use
6560      * the {@link #isISOControl(int)} method.
6561      *
6562      * @param   ch      the character to be tested.
6563      * @return  {@code true} if the character is an ISO control character;
6564      *          {@code false} otherwise.
6565      *
6566      * @see     Character#isSpaceChar(char)
6567      * @see     Character#isWhitespace(char)
6568      * @since   1.1
6569      */
6570     public static boolean isISOControl(char ch) {
6571         return isISOControl((int)ch);
6572     }
6573 
6574     /**
6575      * Determines if the referenced character (Unicode code point) is an ISO control
6576      * character.  A character is considered to be an ISO control
6577      * character if its code is in the range {@code '\u005Cu0000'}
6578      * through {@code '\u005Cu001F'} or in the range
6579      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6580      *
6581      * @param   codePoint the character (Unicode code point) to be tested.
6582      * @return  {@code true} if the character is an ISO control character;
6583      *          {@code false} otherwise.
6584      * @see     Character#isSpaceChar(int)
6585      * @see     Character#isWhitespace(int)
6586      * @since   1.5
6587      */
6588     public static boolean isISOControl(int codePoint) {
6589         // Optimized form of:
6590         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6591         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6592         return codePoint <= 0x9F &&
6593             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6594     }
6595 
6596     /**
6597      * Returns a value indicating a character's general category.
6598      *
6599      * <p><b>Note:</b> This method cannot handle <a
6600      * href="#supplementary"> supplementary characters</a>. To support
6601      * all Unicode characters, including supplementary characters, use
6602      * the {@link #getType(int)} method.
6603      *
6604      * @param   ch      the character to be tested.
6605      * @return  a value of type {@code int} representing the
6606      *          character's general category.
6607      * @see     Character#COMBINING_SPACING_MARK
6608      * @see     Character#CONNECTOR_PUNCTUATION
6609      * @see     Character#CONTROL
6610      * @see     Character#CURRENCY_SYMBOL
6611      * @see     Character#DASH_PUNCTUATION
6612      * @see     Character#DECIMAL_DIGIT_NUMBER
6613      * @see     Character#ENCLOSING_MARK
6614      * @see     Character#END_PUNCTUATION
6615      * @see     Character#FINAL_QUOTE_PUNCTUATION
6616      * @see     Character#FORMAT
6617      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6618      * @see     Character#LETTER_NUMBER
6619      * @see     Character#LINE_SEPARATOR
6620      * @see     Character#LOWERCASE_LETTER
6621      * @see     Character#MATH_SYMBOL
6622      * @see     Character#MODIFIER_LETTER
6623      * @see     Character#MODIFIER_SYMBOL
6624      * @see     Character#NON_SPACING_MARK
6625      * @see     Character#OTHER_LETTER
6626      * @see     Character#OTHER_NUMBER
6627      * @see     Character#OTHER_PUNCTUATION
6628      * @see     Character#OTHER_SYMBOL
6629      * @see     Character#PARAGRAPH_SEPARATOR
6630      * @see     Character#PRIVATE_USE
6631      * @see     Character#SPACE_SEPARATOR
6632      * @see     Character#START_PUNCTUATION
6633      * @see     Character#SURROGATE
6634      * @see     Character#TITLECASE_LETTER
6635      * @see     Character#UNASSIGNED
6636      * @see     Character#UPPERCASE_LETTER
6637      * @since   1.1
6638      */
6639     public static int getType(char ch) {
6640         return getType((int)ch);
6641     }
6642 
6643     /**
6644      * Returns a value indicating a character's general category.
6645      *
6646      * @param   codePoint the character (Unicode code point) to be tested.
6647      * @return  a value of type {@code int} representing the
6648      *          character's general category.
6649      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6650      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6651      * @see     Character#CONTROL CONTROL
6652      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6653      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6654      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6655      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6656      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6657      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6658      * @see     Character#FORMAT FORMAT
6659      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6660      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6661      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6662      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6663      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6664      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6665      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6666      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6667      * @see     Character#OTHER_LETTER OTHER_LETTER
6668      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6669      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6670      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6671      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6672      * @see     Character#PRIVATE_USE PRIVATE_USE
6673      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6674      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6675      * @see     Character#SURROGATE SURROGATE
6676      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6677      * @see     Character#UNASSIGNED UNASSIGNED
6678      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6679      * @since   1.5
6680      */
6681     public static int getType(int codePoint) {
6682         return CharacterData.of(codePoint).getType(codePoint);
6683     }
6684 
6685     /**
6686      * Determines the character representation for a specific digit in
6687      * the specified radix. If the value of {@code radix} is not a
6688      * valid radix, or the value of {@code digit} is not a valid
6689      * digit in the specified radix, the null character
6690      * ({@code '\u005Cu0000'}) is returned.
6691      * <p>
6692      * The {@code radix} argument is valid if it is greater than or
6693      * equal to {@code MIN_RADIX} and less than or equal to
6694      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6695      * {@code 0 <= digit < radix}.
6696      * <p>
6697      * If the digit is less than 10, then
6698      * {@code '0' + digit} is returned. Otherwise, the value
6699      * {@code 'a' + digit - 10} is returned.
6700      *
6701      * @param   digit   the number to convert to a character.
6702      * @param   radix   the radix.
6703      * @return  the {@code char} representation of the specified digit
6704      *          in the specified radix.
6705      * @see     Character#MIN_RADIX
6706      * @see     Character#MAX_RADIX
6707      * @see     Character#digit(char, int)
6708      */
6709     public static char forDigit(int digit, int radix) {
6710         if ((digit >= radix) || (digit < 0)) {
6711             return '\0';
6712         }
6713         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6714             return '\0';
6715         }
6716         if (digit < 10) {
6717             return (char)('0' + digit);
6718         }
6719         return (char)('a' - 10 + digit);
6720     }
6721 
6722     /**
6723      * Returns the Unicode directionality property for the given
6724      * character.  Character directionality is used to calculate the
6725      * visual ordering of text. The directionality value of undefined
6726      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6727      *
6728      * <p><b>Note:</b> This method cannot handle <a
6729      * href="#supplementary"> supplementary characters</a>. To support
6730      * all Unicode characters, including supplementary characters, use
6731      * the {@link #getDirectionality(int)} method.
6732      *
6733      * @param  ch {@code char} for which the directionality property
6734      *            is requested.
6735      * @return the directionality property of the {@code char} value.
6736      *
6737      * @see Character#DIRECTIONALITY_UNDEFINED
6738      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6739      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6740      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6741      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6742      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6743      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6744      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6745      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6746      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6747      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6748      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6749      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6750      * @see Character#DIRECTIONALITY_WHITESPACE
6751      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6752      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6753      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6754      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6755      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6756      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6757      * @since 1.4
6758      */
6759     public static byte getDirectionality(char ch) {
6760         return getDirectionality((int)ch);
6761     }
6762 
6763     /**
6764      * Returns the Unicode directionality property for the given
6765      * character (Unicode code point).  Character directionality is
6766      * used to calculate the visual ordering of text. The
6767      * directionality value of undefined character is {@link
6768      * #DIRECTIONALITY_UNDEFINED}.
6769      *
6770      * @param   codePoint the character (Unicode code point) for which
6771      *          the directionality property is requested.
6772      * @return the directionality property of the character.
6773      *
6774      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
6775      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
6776      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
6777      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6778      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
6779      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6780      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6781      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
6782      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6783      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
6784      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
6785      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
6786      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
6787      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
6788      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
6789      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6790      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6791      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6792      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6793      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6794      * @since    1.5
6795      */
6796     public static byte getDirectionality(int codePoint) {
6797         return CharacterData.of(codePoint).getDirectionality(codePoint);
6798     }
6799 
6800     /**
6801      * Determines whether the character is mirrored according to the
6802      * Unicode specification.  Mirrored characters should have their
6803      * glyphs horizontally mirrored when displayed in text that is
6804      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
6805      * PARENTHESIS is semantically defined to be an <i>opening
6806      * parenthesis</i>.  This will appear as a "(" in text that is
6807      * left-to-right but as a ")" in text that is right-to-left.
6808      *
6809      * <p><b>Note:</b> This method cannot handle <a
6810      * href="#supplementary"> supplementary characters</a>. To support
6811      * all Unicode characters, including supplementary characters, use
6812      * the {@link #isMirrored(int)} method.
6813      *
6814      * @param  ch {@code char} for which the mirrored property is requested
6815      * @return {@code true} if the char is mirrored, {@code false}
6816      *         if the {@code char} is not mirrored or is not defined.
6817      * @since 1.4
6818      */
6819     public static boolean isMirrored(char ch) {
6820         return isMirrored((int)ch);
6821     }
6822 
6823     /**
6824      * Determines whether the specified character (Unicode code point)
6825      * is mirrored according to the Unicode specification.  Mirrored
6826      * characters should have their glyphs horizontally mirrored when
6827      * displayed in text that is right-to-left.  For example,
6828      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
6829      * defined to be an <i>opening parenthesis</i>.  This will appear
6830      * as a "(" in text that is left-to-right but as a ")" in text
6831      * that is right-to-left.
6832      *
6833      * @param   codePoint the character (Unicode code point) to be tested.
6834      * @return  {@code true} if the character is mirrored, {@code false}
6835      *          if the character is not mirrored or is not defined.
6836      * @since   1.5
6837      */
6838     public static boolean isMirrored(int codePoint) {
6839         return CharacterData.of(codePoint).isMirrored(codePoint);
6840     }
6841 
6842     /**
6843      * Compares two {@code Character} objects numerically.
6844      *
6845      * @param   anotherCharacter   the {@code Character} to be compared.
6846 
6847      * @return  the value {@code 0} if the argument {@code Character}
6848      *          is equal to this {@code Character}; a value less than
6849      *          {@code 0} if this {@code Character} is numerically less
6850      *          than the {@code Character} argument; and a value greater than
6851      *          {@code 0} if this {@code Character} is numerically greater
6852      *          than the {@code Character} argument (unsigned comparison).
6853      *          Note that this is strictly a numerical comparison; it is not
6854      *          locale-dependent.
6855      * @since   1.2
6856      */
6857     public int compareTo(Character anotherCharacter) {
6858         return compare(this.value, anotherCharacter.value);
6859     }
6860 
6861     /**
6862      * Compares two {@code char} values numerically.
6863      * The value returned is identical to what would be returned by:
6864      * <pre>
6865      *    Character.valueOf(x).compareTo(Character.valueOf(y))
6866      * </pre>
6867      *
6868      * @param  x the first {@code char} to compare
6869      * @param  y the second {@code char} to compare
6870      * @return the value {@code 0} if {@code x == y};
6871      *         a value less than {@code 0} if {@code x < y}; and
6872      *         a value greater than {@code 0} if {@code x > y}
6873      * @since 1.7
6874      */
6875     public static int compare(char x, char y) {
6876         return x - y;
6877     }
6878 
6879     /**
6880      * Converts the character (Unicode code point) argument to uppercase using
6881      * information from the UnicodeData file.
6882      * <p>
6883      *
6884      * @param   codePoint   the character (Unicode code point) to be converted.
6885      * @return  either the uppercase equivalent of the character, if
6886      *          any, or an error flag ({@code Character.ERROR})
6887      *          that indicates that a 1:M {@code char} mapping exists.
6888      * @see     Character#isLowerCase(char)
6889      * @see     Character#isUpperCase(char)
6890      * @see     Character#toLowerCase(char)
6891      * @see     Character#toTitleCase(char)
6892      * @since 1.4
6893      */
6894     static int toUpperCaseEx(int codePoint) {
6895         assert isValidCodePoint(codePoint);
6896         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
6897     }
6898 
6899     /**
6900      * Converts the character (Unicode code point) argument to uppercase using case
6901      * mapping information from the SpecialCasing file in the Unicode
6902      * specification. If a character has no explicit uppercase
6903      * mapping, then the {@code char} itself is returned in the
6904      * {@code char[]}.
6905      *
6906      * @param   codePoint   the character (Unicode code point) to be converted.
6907      * @return a {@code char[]} with the uppercased character.
6908      * @since 1.4
6909      */
6910     static char[] toUpperCaseCharArray(int codePoint) {
6911         // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
6912         assert isBmpCodePoint(codePoint);
6913         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
6914     }
6915 
6916     /**
6917      * The number of bits used to represent a <tt>char</tt> value in unsigned
6918      * binary form, constant {@code 16}.
6919      *
6920      * @since 1.5
6921      */
6922     public static final int SIZE = 16;
6923 
6924     /**
6925      * Returns the value obtained by reversing the order of the bytes in the
6926      * specified <tt>char</tt> value.
6927      *
6928      * @return the value obtained by reversing (or, equivalently, swapping)
6929      *     the bytes in the specified <tt>char</tt> value.
6930      * @since 1.5
6931      */
6932     public static char reverseBytes(char ch) {
6933         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
6934     }
6935 
6936     /**
6937      * Returns the Unicode name of the specified character
6938      * {@code codePoint}, or null if the code point is
6939      * {@link #UNASSIGNED unassigned}.
6940      * <p>
6941      * Note: if the specified character is not assigned a name by
6942      * the <i>UnicodeData</i> file (part of the Unicode Character
6943      * Database maintained by the Unicode Consortium), the returned
6944      * name is the same as the result of expression
6945      *
6946      * <blockquote>{@code
6947      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
6948      *     + " "
6949      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6950      *
6951      * }</blockquote>
6952      *
6953      * @param  codePoint the character (Unicode code point)
6954      *
6955      * @return the Unicode name of the specified character, or null if
6956      *         the code point is unassigned.
6957      *
6958      * @exception IllegalArgumentException if the specified
6959      *            {@code codePoint} is not a valid Unicode
6960      *            code point.
6961      *
6962      * @since 1.7
6963      */
6964     public static String getName(int codePoint) {
6965         if (!isValidCodePoint(codePoint)) {
6966             throw new IllegalArgumentException();
6967         }
6968         String name = CharacterName.get(codePoint);
6969         if (name != null)
6970             return name;
6971         if (getType(codePoint) == UNASSIGNED)
6972             return null;
6973         UnicodeBlock block = UnicodeBlock.of(codePoint);
6974         if (block != null)
6975             return block.toString().replace('_', ' ') + " "
6976                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6977         // should never come here
6978         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
6979     }
6980 }