1 /*
   2  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.lang;
  27 
  28 import java.util.Arrays;
  29 import java.util.Map;
  30 import java.util.HashMap;
  31 import java.util.Locale;
  32 
  33 /**
  34  * The {@code Character} class wraps a value of the primitive
  35  * type {@code char} in an object. An object of type
  36  * {@code Character} contains a single field whose type is
  37  * {@code char}.
  38  * <p>
  39  * In addition, this class provides several methods for determining
  40  * a character's category (lowercase letter, digit, etc.) and for converting
  41  * characters from uppercase to lowercase and vice versa.
  42  * <p>
  43  * Character information is based on the Unicode Standard, version 6.0.0.
  44  * <p>
  45  * The methods and data of class {@code Character} are defined by
  46  * the information in the <i>UnicodeData</i> file that is part of the
  47  * Unicode Character Database maintained by the Unicode
  48  * Consortium. This file specifies various properties including name
  49  * and general category for every defined Unicode code point or
  50  * character range.
  51  * <p>
  52  * The file and its description are available from the Unicode Consortium at:
  53  * <ul>
  54  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
  55  * </ul>
  56  *
  57  * <h4><a name="unicode">Unicode Character Representations</a></h4>
  58  *
  59  * <p>The {@code char} data type (and therefore the value that a
  60  * {@code Character} object encapsulates) are based on the
  61  * original Unicode specification, which defined characters as
  62  * fixed-width 16-bit entities. The Unicode Standard has since been
  63  * changed to allow for characters whose representation requires more
  64  * than 16 bits.  The range of legal <em>code point</em>s is now
  65  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
  66  * (Refer to the <a
  67  * href="http://www.unicode.org/reports/tr27/#notation"><i>
  68  * definition</i></a> of the U+<i>n</i> notation in the Unicode
  69  * Standard.)
  70  *
  71  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
  72  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
  73  * <a name="supplementary">Characters</a> whose code points are greater
  74  * than U+FFFF are called <em>supplementary character</em>s.  The Java
  75  * platform uses the UTF-16 representation in {@code char} arrays and
  76  * in the {@code String} and {@code StringBuffer} classes. In
  77  * this representation, supplementary characters are represented as a pair
  78  * of {@code char} values, the first from the <em>high-surrogates</em>
  79  * range, (&#92;uD800-&#92;uDBFF), the second from the
  80  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
  81  *
  82  * <p>A {@code char} value, therefore, represents Basic
  83  * Multilingual Plane (BMP) code points, including the surrogate
  84  * code points, or code units of the UTF-16 encoding. An
  85  * {@code int} value represents all Unicode code points,
  86  * including supplementary code points. The lower (least significant)
  87  * 21 bits of {@code int} are used to represent Unicode code
  88  * points and the upper (most significant) 11 bits must be zero.
  89  * Unless otherwise specified, the behavior with respect to
  90  * supplementary characters and surrogate {@code char} values is
  91  * as follows:
  92  *
  93  * <ul>
  94  * <li>The methods that only accept a {@code char} value cannot support
  95  * supplementary characters. They treat {@code char} values from the
  96  * surrogate ranges as undefined characters. For example,
  97  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
  98  * this specific value if followed by any low-surrogate value in a string
  99  * would represent a letter.
 100  *
 101  * <li>The methods that accept an {@code int} value support all
 102  * Unicode characters, including supplementary characters. For
 103  * example, {@code Character.isLetter(0x2F81A)} returns
 104  * {@code true} because the code point value represents a letter
 105  * (a CJK ideograph).
 106  * </ul>
 107  *
 108  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
 109  * used for character values in the range between U+0000 and U+10FFFF,
 110  * and <em>Unicode code unit</em> is used for 16-bit
 111  * {@code char} values that are code units of the <em>UTF-16</em>
 112  * encoding. For more information on Unicode terminology, refer to the
 113  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
 114  *
 115  * @author  Lee Boynton
 116  * @author  Guy Steele
 117  * @author  Akira Tanaka
 118  * @author  Martin Buchholz
 119  * @author  Ulf Zibis
 120  * @since   1.0
 121  */
 122 public final
 123 class Character implements java.io.Serializable, Comparable<Character> {
 124     /**
 125      * The minimum radix available for conversion to and from strings.
 126      * The constant value of this field is the smallest value permitted
 127      * for the radix argument in radix-conversion methods such as the
 128      * {@code digit} method, the {@code forDigit} method, and the
 129      * {@code toString} method of class {@code Integer}.
 130      *
 131      * @see     Character#digit(char, int)
 132      * @see     Character#forDigit(int, int)
 133      * @see     Integer#toString(int, int)
 134      * @see     Integer#valueOf(String)
 135      */
 136     public static final int MIN_RADIX = 2;
 137 
 138     /**
 139      * The maximum radix available for conversion to and from strings.
 140      * The constant value of this field is the largest value permitted
 141      * for the radix argument in radix-conversion methods such as the
 142      * {@code digit} method, the {@code forDigit} method, and the
 143      * {@code toString} method of class {@code Integer}.
 144      *
 145      * @see     Character#digit(char, int)
 146      * @see     Character#forDigit(int, int)
 147      * @see     Integer#toString(int, int)
 148      * @see     Integer#valueOf(String)
 149      */
 150     public static final int MAX_RADIX = 36;
 151 
 152     /**
 153      * The constant value of this field is the smallest value of type
 154      * {@code char}, {@code '\u005Cu0000'}.
 155      *
 156      * @since   1.0.2
 157      */
 158     public static final char MIN_VALUE = '\u0000';
 159 
 160     /**
 161      * The constant value of this field is the largest value of type
 162      * {@code char}, {@code '\u005CuFFFF'}.
 163      *
 164      * @since   1.0.2
 165      */
 166     public static final char MAX_VALUE = '\uFFFF';
 167 
 168     /**
 169      * The {@code Class} instance representing the primitive type
 170      * {@code char}.
 171      *
 172      * @since   1.1
 173      */
 174     @SuppressWarnings("unchecked")
 175     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
 176 
 177     /*
 178      * Normative general types
 179      */
 180 
 181     /*
 182      * General character types
 183      */
 184 
 185     /**
 186      * General category "Cn" in the Unicode specification.
 187      * @since   1.1
 188      */
 189     public static final byte UNASSIGNED = 0;
 190 
 191     /**
 192      * General category "Lu" in the Unicode specification.
 193      * @since   1.1
 194      */
 195     public static final byte UPPERCASE_LETTER = 1;
 196 
 197     /**
 198      * General category "Ll" in the Unicode specification.
 199      * @since   1.1
 200      */
 201     public static final byte LOWERCASE_LETTER = 2;
 202 
 203     /**
 204      * General category "Lt" in the Unicode specification.
 205      * @since   1.1
 206      */
 207     public static final byte TITLECASE_LETTER = 3;
 208 
 209     /**
 210      * General category "Lm" in the Unicode specification.
 211      * @since   1.1
 212      */
 213     public static final byte MODIFIER_LETTER = 4;
 214 
 215     /**
 216      * General category "Lo" in the Unicode specification.
 217      * @since   1.1
 218      */
 219     public static final byte OTHER_LETTER = 5;
 220 
 221     /**
 222      * General category "Mn" in the Unicode specification.
 223      * @since   1.1
 224      */
 225     public static final byte NON_SPACING_MARK = 6;
 226 
 227     /**
 228      * General category "Me" in the Unicode specification.
 229      * @since   1.1
 230      */
 231     public static final byte ENCLOSING_MARK = 7;
 232 
 233     /**
 234      * General category "Mc" in the Unicode specification.
 235      * @since   1.1
 236      */
 237     public static final byte COMBINING_SPACING_MARK = 8;
 238 
 239     /**
 240      * General category "Nd" in the Unicode specification.
 241      * @since   1.1
 242      */
 243     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
 244 
 245     /**
 246      * General category "Nl" in the Unicode specification.
 247      * @since   1.1
 248      */
 249     public static final byte LETTER_NUMBER = 10;
 250 
 251     /**
 252      * General category "No" in the Unicode specification.
 253      * @since   1.1
 254      */
 255     public static final byte OTHER_NUMBER = 11;
 256 
 257     /**
 258      * General category "Zs" in the Unicode specification.
 259      * @since   1.1
 260      */
 261     public static final byte SPACE_SEPARATOR = 12;
 262 
 263     /**
 264      * General category "Zl" in the Unicode specification.
 265      * @since   1.1
 266      */
 267     public static final byte LINE_SEPARATOR = 13;
 268 
 269     /**
 270      * General category "Zp" in the Unicode specification.
 271      * @since   1.1
 272      */
 273     public static final byte PARAGRAPH_SEPARATOR = 14;
 274 
 275     /**
 276      * General category "Cc" in the Unicode specification.
 277      * @since   1.1
 278      */
 279     public static final byte CONTROL = 15;
 280 
 281     /**
 282      * General category "Cf" in the Unicode specification.
 283      * @since   1.1
 284      */
 285     public static final byte FORMAT = 16;
 286 
 287     /**
 288      * General category "Co" in the Unicode specification.
 289      * @since   1.1
 290      */
 291     public static final byte PRIVATE_USE = 18;
 292 
 293     /**
 294      * General category "Cs" in the Unicode specification.
 295      * @since   1.1
 296      */
 297     public static final byte SURROGATE = 19;
 298 
 299     /**
 300      * General category "Pd" in the Unicode specification.
 301      * @since   1.1
 302      */
 303     public static final byte DASH_PUNCTUATION = 20;
 304 
 305     /**
 306      * General category "Ps" in the Unicode specification.
 307      * @since   1.1
 308      */
 309     public static final byte START_PUNCTUATION = 21;
 310 
 311     /**
 312      * General category "Pe" in the Unicode specification.
 313      * @since   1.1
 314      */
 315     public static final byte END_PUNCTUATION = 22;
 316 
 317     /**
 318      * General category "Pc" in the Unicode specification.
 319      * @since   1.1
 320      */
 321     public static final byte CONNECTOR_PUNCTUATION = 23;
 322 
 323     /**
 324      * General category "Po" in the Unicode specification.
 325      * @since   1.1
 326      */
 327     public static final byte OTHER_PUNCTUATION = 24;
 328 
 329     /**
 330      * General category "Sm" in the Unicode specification.
 331      * @since   1.1
 332      */
 333     public static final byte MATH_SYMBOL = 25;
 334 
 335     /**
 336      * General category "Sc" in the Unicode specification.
 337      * @since   1.1
 338      */
 339     public static final byte CURRENCY_SYMBOL = 26;
 340 
 341     /**
 342      * General category "Sk" in the Unicode specification.
 343      * @since   1.1
 344      */
 345     public static final byte MODIFIER_SYMBOL = 27;
 346 
 347     /**
 348      * General category "So" in the Unicode specification.
 349      * @since   1.1
 350      */
 351     public static final byte OTHER_SYMBOL = 28;
 352 
 353     /**
 354      * General category "Pi" in the Unicode specification.
 355      * @since   1.4
 356      */
 357     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
 358 
 359     /**
 360      * General category "Pf" in the Unicode specification.
 361      * @since   1.4
 362      */
 363     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
 364 
 365     /**
 366      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
 367      */
 368     static final int ERROR = 0xFFFFFFFF;
 369 
 370 
 371     /**
 372      * Undefined bidirectional character type. Undefined {@code char}
 373      * values have undefined directionality in the Unicode specification.
 374      * @since 1.4
 375      */
 376     public static final byte DIRECTIONALITY_UNDEFINED = -1;
 377 
 378     /**
 379      * Strong bidirectional character type "L" in the Unicode specification.
 380      * @since 1.4
 381      */
 382     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
 383 
 384     /**
 385      * Strong bidirectional character type "R" in the Unicode specification.
 386      * @since 1.4
 387      */
 388     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
 389 
 390     /**
 391     * Strong bidirectional character type "AL" in the Unicode specification.
 392      * @since 1.4
 393      */
 394     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
 395 
 396     /**
 397      * Weak bidirectional character type "EN" in the Unicode specification.
 398      * @since 1.4
 399      */
 400     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
 401 
 402     /**
 403      * Weak bidirectional character type "ES" in the Unicode specification.
 404      * @since 1.4
 405      */
 406     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
 407 
 408     /**
 409      * Weak bidirectional character type "ET" in the Unicode specification.
 410      * @since 1.4
 411      */
 412     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
 413 
 414     /**
 415      * Weak bidirectional character type "AN" in the Unicode specification.
 416      * @since 1.4
 417      */
 418     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
 419 
 420     /**
 421      * Weak bidirectional character type "CS" in the Unicode specification.
 422      * @since 1.4
 423      */
 424     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
 425 
 426     /**
 427      * Weak bidirectional character type "NSM" in the Unicode specification.
 428      * @since 1.4
 429      */
 430     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
 431 
 432     /**
 433      * Weak bidirectional character type "BN" in the Unicode specification.
 434      * @since 1.4
 435      */
 436     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
 437 
 438     /**
 439      * Neutral bidirectional character type "B" in the Unicode specification.
 440      * @since 1.4
 441      */
 442     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
 443 
 444     /**
 445      * Neutral bidirectional character type "S" in the Unicode specification.
 446      * @since 1.4
 447      */
 448     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
 449 
 450     /**
 451      * Neutral bidirectional character type "WS" in the Unicode specification.
 452      * @since 1.4
 453      */
 454     public static final byte DIRECTIONALITY_WHITESPACE = 12;
 455 
 456     /**
 457      * Neutral bidirectional character type "ON" in the Unicode specification.
 458      * @since 1.4
 459      */
 460     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
 461 
 462     /**
 463      * Strong bidirectional character type "LRE" in the Unicode specification.
 464      * @since 1.4
 465      */
 466     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
 467 
 468     /**
 469      * Strong bidirectional character type "LRO" in the Unicode specification.
 470      * @since 1.4
 471      */
 472     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
 473 
 474     /**
 475      * Strong bidirectional character type "RLE" in the Unicode specification.
 476      * @since 1.4
 477      */
 478     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
 479 
 480     /**
 481      * Strong bidirectional character type "RLO" in the Unicode specification.
 482      * @since 1.4
 483      */
 484     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
 485 
 486     /**
 487      * Weak bidirectional character type "PDF" in the Unicode specification.
 488      * @since 1.4
 489      */
 490     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
 491 
 492     /**
 493      * The minimum value of a
 494      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 495      * Unicode high-surrogate code unit</a>
 496      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
 497      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 498      *
 499      * @since 1.5
 500      */
 501     public static final char MIN_HIGH_SURROGATE = '\uD800';
 502 
 503     /**
 504      * The maximum value of a
 505      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
 506      * Unicode high-surrogate code unit</a>
 507      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
 508      * A high-surrogate is also known as a <i>leading-surrogate</i>.
 509      *
 510      * @since 1.5
 511      */
 512     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
 513 
 514     /**
 515      * The minimum value of a
 516      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 517      * Unicode low-surrogate code unit</a>
 518      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
 519      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 520      *
 521      * @since 1.5
 522      */
 523     public static final char MIN_LOW_SURROGATE  = '\uDC00';
 524 
 525     /**
 526      * The maximum value of a
 527      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
 528      * Unicode low-surrogate code unit</a>
 529      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 530      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
 531      *
 532      * @since 1.5
 533      */
 534     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
 535 
 536     /**
 537      * The minimum value of a Unicode surrogate code unit in the
 538      * UTF-16 encoding, constant {@code '\u005CuD800'}.
 539      *
 540      * @since 1.5
 541      */
 542     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
 543 
 544     /**
 545      * The maximum value of a Unicode surrogate code unit in the
 546      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
 547      *
 548      * @since 1.5
 549      */
 550     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
 551 
 552     /**
 553      * The minimum value of a
 554      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
 555      * Unicode supplementary code point</a>, constant {@code U+10000}.
 556      *
 557      * @since 1.5
 558      */
 559     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
 560 
 561     /**
 562      * The minimum value of a
 563      * <a href="http://www.unicode.org/glossary/#code_point">
 564      * Unicode code point</a>, constant {@code U+0000}.
 565      *
 566      * @since 1.5
 567      */
 568     public static final int MIN_CODE_POINT = 0x000000;
 569 
 570     /**
 571      * The maximum value of a
 572      * <a href="http://www.unicode.org/glossary/#code_point">
 573      * Unicode code point</a>, constant {@code U+10FFFF}.
 574      *
 575      * @since 1.5
 576      */
 577     public static final int MAX_CODE_POINT = 0X10FFFF;
 578 
 579 
 580     /**
 581      * Instances of this class represent particular subsets of the Unicode
 582      * character set.  The only family of subsets defined in the
 583      * {@code Character} class is {@link Character.UnicodeBlock}.
 584      * Other portions of the Java API may define other subsets for their
 585      * own purposes.
 586      *
 587      * @since 1.2
 588      */
 589     public static class Subset  {
 590 
 591         private String name;
 592 
 593         /**
 594          * Constructs a new {@code Subset} instance.
 595          *
 596          * @param  name  The name of this subset
 597          * @exception NullPointerException if name is {@code null}
 598          */
 599         protected Subset(String name) {
 600             if (name == null) {
 601                 throw new NullPointerException("name");
 602             }
 603             this.name = name;
 604         }
 605 
 606         /**
 607          * Compares two {@code Subset} objects for equality.
 608          * This method returns {@code true} if and only if
 609          * {@code this} and the argument refer to the same
 610          * object; since this method is {@code final}, this
 611          * guarantee holds for all subclasses.
 612          */
 613         public final boolean equals(Object obj) {
 614             return (this == obj);
 615         }
 616 
 617         /**
 618          * Returns the standard hash code as defined by the
 619          * {@link Object#hashCode} method.  This method
 620          * is {@code final} in order to ensure that the
 621          * {@code equals} and {@code hashCode} methods will
 622          * be consistent in all subclasses.
 623          */
 624         public final int hashCode() {
 625             return super.hashCode();
 626         }
 627 
 628         /**
 629          * Returns the name of this subset.
 630          */
 631         public final String toString() {
 632             return name;
 633         }
 634     }
 635 
 636     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
 637     // for the latest specification of Unicode Blocks.
 638 
 639     /**
 640      * A family of character subsets representing the character blocks in the
 641      * Unicode specification. Character blocks generally define characters
 642      * used for a specific script or purpose. A character is contained by
 643      * at most one Unicode block.
 644      *
 645      * @since 1.2
 646      */
 647     public static final class UnicodeBlock extends Subset {
 648 
 649         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
 650 
 651         /**
 652          * Creates a UnicodeBlock with the given identifier name.
 653          * This name must be the same as the block identifier.
 654          */
 655         private UnicodeBlock(String idName) {
 656             super(idName);
 657             map.put(idName, this);
 658         }
 659 
 660         /**
 661          * Creates a UnicodeBlock with the given identifier name and
 662          * alias name.
 663          */
 664         private UnicodeBlock(String idName, String alias) {
 665             this(idName);
 666             map.put(alias, this);
 667         }
 668 
 669         /**
 670          * Creates a UnicodeBlock with the given identifier name and
 671          * alias names.
 672          */
 673         private UnicodeBlock(String idName, String... aliases) {
 674             this(idName);
 675             for (String alias : aliases)
 676                 map.put(alias, this);
 677         }
 678 
 679         /**
 680          * Constant for the "Basic Latin" Unicode character block.
 681          * @since 1.2
 682          */
 683         public static final UnicodeBlock  BASIC_LATIN =
 684             new UnicodeBlock("BASIC_LATIN",
 685                              "BASIC LATIN",
 686                              "BASICLATIN");
 687 
 688         /**
 689          * Constant for the "Latin-1 Supplement" Unicode character block.
 690          * @since 1.2
 691          */
 692         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
 693             new UnicodeBlock("LATIN_1_SUPPLEMENT",
 694                              "LATIN-1 SUPPLEMENT",
 695                              "LATIN-1SUPPLEMENT");
 696 
 697         /**
 698          * Constant for the "Latin Extended-A" Unicode character block.
 699          * @since 1.2
 700          */
 701         public static final UnicodeBlock LATIN_EXTENDED_A =
 702             new UnicodeBlock("LATIN_EXTENDED_A",
 703                              "LATIN EXTENDED-A",
 704                              "LATINEXTENDED-A");
 705 
 706         /**
 707          * Constant for the "Latin Extended-B" Unicode character block.
 708          * @since 1.2
 709          */
 710         public static final UnicodeBlock LATIN_EXTENDED_B =
 711             new UnicodeBlock("LATIN_EXTENDED_B",
 712                              "LATIN EXTENDED-B",
 713                              "LATINEXTENDED-B");
 714 
 715         /**
 716          * Constant for the "IPA Extensions" Unicode character block.
 717          * @since 1.2
 718          */
 719         public static final UnicodeBlock IPA_EXTENSIONS =
 720             new UnicodeBlock("IPA_EXTENSIONS",
 721                              "IPA EXTENSIONS",
 722                              "IPAEXTENSIONS");
 723 
 724         /**
 725          * Constant for the "Spacing Modifier Letters" Unicode character block.
 726          * @since 1.2
 727          */
 728         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
 729             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
 730                              "SPACING MODIFIER LETTERS",
 731                              "SPACINGMODIFIERLETTERS");
 732 
 733         /**
 734          * Constant for the "Combining Diacritical Marks" Unicode character block.
 735          * @since 1.2
 736          */
 737         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
 738             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
 739                              "COMBINING DIACRITICAL MARKS",
 740                              "COMBININGDIACRITICALMARKS");
 741 
 742         /**
 743          * Constant for the "Greek and Coptic" Unicode character block.
 744          * <p>
 745          * This block was previously known as the "Greek" block.
 746          *
 747          * @since 1.2
 748          */
 749         public static final UnicodeBlock GREEK =
 750             new UnicodeBlock("GREEK",
 751                              "GREEK AND COPTIC",
 752                              "GREEKANDCOPTIC");
 753 
 754         /**
 755          * Constant for the "Cyrillic" Unicode character block.
 756          * @since 1.2
 757          */
 758         public static final UnicodeBlock CYRILLIC =
 759             new UnicodeBlock("CYRILLIC");
 760 
 761         /**
 762          * Constant for the "Armenian" Unicode character block.
 763          * @since 1.2
 764          */
 765         public static final UnicodeBlock ARMENIAN =
 766             new UnicodeBlock("ARMENIAN");
 767 
 768         /**
 769          * Constant for the "Hebrew" Unicode character block.
 770          * @since 1.2
 771          */
 772         public static final UnicodeBlock HEBREW =
 773             new UnicodeBlock("HEBREW");
 774 
 775         /**
 776          * Constant for the "Arabic" Unicode character block.
 777          * @since 1.2
 778          */
 779         public static final UnicodeBlock ARABIC =
 780             new UnicodeBlock("ARABIC");
 781 
 782         /**
 783          * Constant for the "Devanagari" Unicode character block.
 784          * @since 1.2
 785          */
 786         public static final UnicodeBlock DEVANAGARI =
 787             new UnicodeBlock("DEVANAGARI");
 788 
 789         /**
 790          * Constant for the "Bengali" Unicode character block.
 791          * @since 1.2
 792          */
 793         public static final UnicodeBlock BENGALI =
 794             new UnicodeBlock("BENGALI");
 795 
 796         /**
 797          * Constant for the "Gurmukhi" Unicode character block.
 798          * @since 1.2
 799          */
 800         public static final UnicodeBlock GURMUKHI =
 801             new UnicodeBlock("GURMUKHI");
 802 
 803         /**
 804          * Constant for the "Gujarati" Unicode character block.
 805          * @since 1.2
 806          */
 807         public static final UnicodeBlock GUJARATI =
 808             new UnicodeBlock("GUJARATI");
 809 
 810         /**
 811          * Constant for the "Oriya" Unicode character block.
 812          * @since 1.2
 813          */
 814         public static final UnicodeBlock ORIYA =
 815             new UnicodeBlock("ORIYA");
 816 
 817         /**
 818          * Constant for the "Tamil" Unicode character block.
 819          * @since 1.2
 820          */
 821         public static final UnicodeBlock TAMIL =
 822             new UnicodeBlock("TAMIL");
 823 
 824         /**
 825          * Constant for the "Telugu" Unicode character block.
 826          * @since 1.2
 827          */
 828         public static final UnicodeBlock TELUGU =
 829             new UnicodeBlock("TELUGU");
 830 
 831         /**
 832          * Constant for the "Kannada" Unicode character block.
 833          * @since 1.2
 834          */
 835         public static final UnicodeBlock KANNADA =
 836             new UnicodeBlock("KANNADA");
 837 
 838         /**
 839          * Constant for the "Malayalam" Unicode character block.
 840          * @since 1.2
 841          */
 842         public static final UnicodeBlock MALAYALAM =
 843             new UnicodeBlock("MALAYALAM");
 844 
 845         /**
 846          * Constant for the "Thai" Unicode character block.
 847          * @since 1.2
 848          */
 849         public static final UnicodeBlock THAI =
 850             new UnicodeBlock("THAI");
 851 
 852         /**
 853          * Constant for the "Lao" Unicode character block.
 854          * @since 1.2
 855          */
 856         public static final UnicodeBlock LAO =
 857             new UnicodeBlock("LAO");
 858 
 859         /**
 860          * Constant for the "Tibetan" Unicode character block.
 861          * @since 1.2
 862          */
 863         public static final UnicodeBlock TIBETAN =
 864             new UnicodeBlock("TIBETAN");
 865 
 866         /**
 867          * Constant for the "Georgian" Unicode character block.
 868          * @since 1.2
 869          */
 870         public static final UnicodeBlock GEORGIAN =
 871             new UnicodeBlock("GEORGIAN");
 872 
 873         /**
 874          * Constant for the "Hangul Jamo" Unicode character block.
 875          * @since 1.2
 876          */
 877         public static final UnicodeBlock HANGUL_JAMO =
 878             new UnicodeBlock("HANGUL_JAMO",
 879                              "HANGUL JAMO",
 880                              "HANGULJAMO");
 881 
 882         /**
 883          * Constant for the "Latin Extended Additional" Unicode character block.
 884          * @since 1.2
 885          */
 886         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
 887             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
 888                              "LATIN EXTENDED ADDITIONAL",
 889                              "LATINEXTENDEDADDITIONAL");
 890 
 891         /**
 892          * Constant for the "Greek Extended" Unicode character block.
 893          * @since 1.2
 894          */
 895         public static final UnicodeBlock GREEK_EXTENDED =
 896             new UnicodeBlock("GREEK_EXTENDED",
 897                              "GREEK EXTENDED",
 898                              "GREEKEXTENDED");
 899 
 900         /**
 901          * Constant for the "General Punctuation" Unicode character block.
 902          * @since 1.2
 903          */
 904         public static final UnicodeBlock GENERAL_PUNCTUATION =
 905             new UnicodeBlock("GENERAL_PUNCTUATION",
 906                              "GENERAL PUNCTUATION",
 907                              "GENERALPUNCTUATION");
 908 
 909         /**
 910          * Constant for the "Superscripts and Subscripts" Unicode character
 911          * block.
 912          * @since 1.2
 913          */
 914         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
 915             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
 916                              "SUPERSCRIPTS AND SUBSCRIPTS",
 917                              "SUPERSCRIPTSANDSUBSCRIPTS");
 918 
 919         /**
 920          * Constant for the "Currency Symbols" Unicode character block.
 921          * @since 1.2
 922          */
 923         public static final UnicodeBlock CURRENCY_SYMBOLS =
 924             new UnicodeBlock("CURRENCY_SYMBOLS",
 925                              "CURRENCY SYMBOLS",
 926                              "CURRENCYSYMBOLS");
 927 
 928         /**
 929          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
 930          * character block.
 931          * <p>
 932          * This block was previously known as "Combining Marks for Symbols".
 933          * @since 1.2
 934          */
 935         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
 936             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
 937                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
 938                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
 939                              "COMBINING MARKS FOR SYMBOLS",
 940                              "COMBININGMARKSFORSYMBOLS");
 941 
 942         /**
 943          * Constant for the "Letterlike Symbols" Unicode character block.
 944          * @since 1.2
 945          */
 946         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
 947             new UnicodeBlock("LETTERLIKE_SYMBOLS",
 948                              "LETTERLIKE SYMBOLS",
 949                              "LETTERLIKESYMBOLS");
 950 
 951         /**
 952          * Constant for the "Number Forms" Unicode character block.
 953          * @since 1.2
 954          */
 955         public static final UnicodeBlock NUMBER_FORMS =
 956             new UnicodeBlock("NUMBER_FORMS",
 957                              "NUMBER FORMS",
 958                              "NUMBERFORMS");
 959 
 960         /**
 961          * Constant for the "Arrows" Unicode character block.
 962          * @since 1.2
 963          */
 964         public static final UnicodeBlock ARROWS =
 965             new UnicodeBlock("ARROWS");
 966 
 967         /**
 968          * Constant for the "Mathematical Operators" Unicode character block.
 969          * @since 1.2
 970          */
 971         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
 972             new UnicodeBlock("MATHEMATICAL_OPERATORS",
 973                              "MATHEMATICAL OPERATORS",
 974                              "MATHEMATICALOPERATORS");
 975 
 976         /**
 977          * Constant for the "Miscellaneous Technical" Unicode character block.
 978          * @since 1.2
 979          */
 980         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
 981             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
 982                              "MISCELLANEOUS TECHNICAL",
 983                              "MISCELLANEOUSTECHNICAL");
 984 
 985         /**
 986          * Constant for the "Control Pictures" Unicode character block.
 987          * @since 1.2
 988          */
 989         public static final UnicodeBlock CONTROL_PICTURES =
 990             new UnicodeBlock("CONTROL_PICTURES",
 991                              "CONTROL PICTURES",
 992                              "CONTROLPICTURES");
 993 
 994         /**
 995          * Constant for the "Optical Character Recognition" Unicode character block.
 996          * @since 1.2
 997          */
 998         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
 999             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1000                              "OPTICAL CHARACTER RECOGNITION",
1001                              "OPTICALCHARACTERRECOGNITION");
1002 
1003         /**
1004          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1005          * @since 1.2
1006          */
1007         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1008             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1009                              "ENCLOSED ALPHANUMERICS",
1010                              "ENCLOSEDALPHANUMERICS");
1011 
1012         /**
1013          * Constant for the "Box Drawing" Unicode character block.
1014          * @since 1.2
1015          */
1016         public static final UnicodeBlock BOX_DRAWING =
1017             new UnicodeBlock("BOX_DRAWING",
1018                              "BOX DRAWING",
1019                              "BOXDRAWING");
1020 
1021         /**
1022          * Constant for the "Block Elements" Unicode character block.
1023          * @since 1.2
1024          */
1025         public static final UnicodeBlock BLOCK_ELEMENTS =
1026             new UnicodeBlock("BLOCK_ELEMENTS",
1027                              "BLOCK ELEMENTS",
1028                              "BLOCKELEMENTS");
1029 
1030         /**
1031          * Constant for the "Geometric Shapes" Unicode character block.
1032          * @since 1.2
1033          */
1034         public static final UnicodeBlock GEOMETRIC_SHAPES =
1035             new UnicodeBlock("GEOMETRIC_SHAPES",
1036                              "GEOMETRIC SHAPES",
1037                              "GEOMETRICSHAPES");
1038 
1039         /**
1040          * Constant for the "Miscellaneous Symbols" Unicode character block.
1041          * @since 1.2
1042          */
1043         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1044             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1045                              "MISCELLANEOUS SYMBOLS",
1046                              "MISCELLANEOUSSYMBOLS");
1047 
1048         /**
1049          * Constant for the "Dingbats" Unicode character block.
1050          * @since 1.2
1051          */
1052         public static final UnicodeBlock DINGBATS =
1053             new UnicodeBlock("DINGBATS");
1054 
1055         /**
1056          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1057          * @since 1.2
1058          */
1059         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1060             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1061                              "CJK SYMBOLS AND PUNCTUATION",
1062                              "CJKSYMBOLSANDPUNCTUATION");
1063 
1064         /**
1065          * Constant for the "Hiragana" Unicode character block.
1066          * @since 1.2
1067          */
1068         public static final UnicodeBlock HIRAGANA =
1069             new UnicodeBlock("HIRAGANA");
1070 
1071         /**
1072          * Constant for the "Katakana" Unicode character block.
1073          * @since 1.2
1074          */
1075         public static final UnicodeBlock KATAKANA =
1076             new UnicodeBlock("KATAKANA");
1077 
1078         /**
1079          * Constant for the "Bopomofo" Unicode character block.
1080          * @since 1.2
1081          */
1082         public static final UnicodeBlock BOPOMOFO =
1083             new UnicodeBlock("BOPOMOFO");
1084 
1085         /**
1086          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1090             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1091                              "HANGUL COMPATIBILITY JAMO",
1092                              "HANGULCOMPATIBILITYJAMO");
1093 
1094         /**
1095          * Constant for the "Kanbun" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock KANBUN =
1099             new UnicodeBlock("KANBUN");
1100 
1101         /**
1102          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1103          * @since 1.2
1104          */
1105         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1106             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1107                              "ENCLOSED CJK LETTERS AND MONTHS",
1108                              "ENCLOSEDCJKLETTERSANDMONTHS");
1109 
1110         /**
1111          * Constant for the "CJK Compatibility" Unicode character block.
1112          * @since 1.2
1113          */
1114         public static final UnicodeBlock CJK_COMPATIBILITY =
1115             new UnicodeBlock("CJK_COMPATIBILITY",
1116                              "CJK COMPATIBILITY",
1117                              "CJKCOMPATIBILITY");
1118 
1119         /**
1120          * Constant for the "CJK Unified Ideographs" Unicode character block.
1121          * @since 1.2
1122          */
1123         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1124             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1125                              "CJK UNIFIED IDEOGRAPHS",
1126                              "CJKUNIFIEDIDEOGRAPHS");
1127 
1128         /**
1129          * Constant for the "Hangul Syllables" Unicode character block.
1130          * @since 1.2
1131          */
1132         public static final UnicodeBlock HANGUL_SYLLABLES =
1133             new UnicodeBlock("HANGUL_SYLLABLES",
1134                              "HANGUL SYLLABLES",
1135                              "HANGULSYLLABLES");
1136 
1137         /**
1138          * Constant for the "Private Use Area" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock PRIVATE_USE_AREA =
1142             new UnicodeBlock("PRIVATE_USE_AREA",
1143                              "PRIVATE USE AREA",
1144                              "PRIVATEUSEAREA");
1145 
1146         /**
1147          * Constant for the "CJK Compatibility Ideographs" Unicode character
1148          * block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1152             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1153                              "CJK COMPATIBILITY IDEOGRAPHS",
1154                              "CJKCOMPATIBILITYIDEOGRAPHS");
1155 
1156         /**
1157          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1161             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1162                              "ALPHABETIC PRESENTATION FORMS",
1163                              "ALPHABETICPRESENTATIONFORMS");
1164 
1165         /**
1166          * Constant for the "Arabic Presentation Forms-A" Unicode character
1167          * block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1171             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1172                              "ARABIC PRESENTATION FORMS-A",
1173                              "ARABICPRESENTATIONFORMS-A");
1174 
1175         /**
1176          * Constant for the "Combining Half Marks" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock COMBINING_HALF_MARKS =
1180             new UnicodeBlock("COMBINING_HALF_MARKS",
1181                              "COMBINING HALF MARKS",
1182                              "COMBININGHALFMARKS");
1183 
1184         /**
1185          * Constant for the "CJK Compatibility Forms" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1189             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1190                              "CJK COMPATIBILITY FORMS",
1191                              "CJKCOMPATIBILITYFORMS");
1192 
1193         /**
1194          * Constant for the "Small Form Variants" Unicode character block.
1195          * @since 1.2
1196          */
1197         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1198             new UnicodeBlock("SMALL_FORM_VARIANTS",
1199                              "SMALL FORM VARIANTS",
1200                              "SMALLFORMVARIANTS");
1201 
1202         /**
1203          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1204          * @since 1.2
1205          */
1206         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1207             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1208                              "ARABIC PRESENTATION FORMS-B",
1209                              "ARABICPRESENTATIONFORMS-B");
1210 
1211         /**
1212          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1213          * block.
1214          * @since 1.2
1215          */
1216         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1217             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1218                              "HALFWIDTH AND FULLWIDTH FORMS",
1219                              "HALFWIDTHANDFULLWIDTHFORMS");
1220 
1221         /**
1222          * Constant for the "Specials" Unicode character block.
1223          * @since 1.2
1224          */
1225         public static final UnicodeBlock SPECIALS =
1226             new UnicodeBlock("SPECIALS");
1227 
1228         /**
1229          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1230          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1231          *             {@link #LOW_SURROGATES}. These new constants match
1232          *             the block definitions of the Unicode Standard.
1233          *             The {@link #of(char)} and {@link #of(int)} methods
1234          *             return the new constants, not SURROGATES_AREA.
1235          */
1236         @Deprecated
1237         public static final UnicodeBlock SURROGATES_AREA =
1238             new UnicodeBlock("SURROGATES_AREA");
1239 
1240         /**
1241          * Constant for the "Syriac" Unicode character block.
1242          * @since 1.4
1243          */
1244         public static final UnicodeBlock SYRIAC =
1245             new UnicodeBlock("SYRIAC");
1246 
1247         /**
1248          * Constant for the "Thaana" Unicode character block.
1249          * @since 1.4
1250          */
1251         public static final UnicodeBlock THAANA =
1252             new UnicodeBlock("THAANA");
1253 
1254         /**
1255          * Constant for the "Sinhala" Unicode character block.
1256          * @since 1.4
1257          */
1258         public static final UnicodeBlock SINHALA =
1259             new UnicodeBlock("SINHALA");
1260 
1261         /**
1262          * Constant for the "Myanmar" Unicode character block.
1263          * @since 1.4
1264          */
1265         public static final UnicodeBlock MYANMAR =
1266             new UnicodeBlock("MYANMAR");
1267 
1268         /**
1269          * Constant for the "Ethiopic" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock ETHIOPIC =
1273             new UnicodeBlock("ETHIOPIC");
1274 
1275         /**
1276          * Constant for the "Cherokee" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock CHEROKEE =
1280             new UnicodeBlock("CHEROKEE");
1281 
1282         /**
1283          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1287             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1288                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1289                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1290 
1291         /**
1292          * Constant for the "Ogham" Unicode character block.
1293          * @since 1.4
1294          */
1295         public static final UnicodeBlock OGHAM =
1296             new UnicodeBlock("OGHAM");
1297 
1298         /**
1299          * Constant for the "Runic" Unicode character block.
1300          * @since 1.4
1301          */
1302         public static final UnicodeBlock RUNIC =
1303             new UnicodeBlock("RUNIC");
1304 
1305         /**
1306          * Constant for the "Khmer" Unicode character block.
1307          * @since 1.4
1308          */
1309         public static final UnicodeBlock KHMER =
1310             new UnicodeBlock("KHMER");
1311 
1312         /**
1313          * Constant for the "Mongolian" Unicode character block.
1314          * @since 1.4
1315          */
1316         public static final UnicodeBlock MONGOLIAN =
1317             new UnicodeBlock("MONGOLIAN");
1318 
1319         /**
1320          * Constant for the "Braille Patterns" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock BRAILLE_PATTERNS =
1324             new UnicodeBlock("BRAILLE_PATTERNS",
1325                              "BRAILLE PATTERNS",
1326                              "BRAILLEPATTERNS");
1327 
1328         /**
1329          * Constant for the "CJK Radicals Supplement" Unicode character block.
1330          * @since 1.4
1331          */
1332         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1333             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1334                              "CJK RADICALS SUPPLEMENT",
1335                              "CJKRADICALSSUPPLEMENT");
1336 
1337         /**
1338          * Constant for the "Kangxi Radicals" Unicode character block.
1339          * @since 1.4
1340          */
1341         public static final UnicodeBlock KANGXI_RADICALS =
1342             new UnicodeBlock("KANGXI_RADICALS",
1343                              "KANGXI RADICALS",
1344                              "KANGXIRADICALS");
1345 
1346         /**
1347          * Constant for the "Ideographic Description Characters" Unicode character block.
1348          * @since 1.4
1349          */
1350         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1351             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1352                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1353                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1354 
1355         /**
1356          * Constant for the "Bopomofo Extended" Unicode character block.
1357          * @since 1.4
1358          */
1359         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1360             new UnicodeBlock("BOPOMOFO_EXTENDED",
1361                              "BOPOMOFO EXTENDED",
1362                              "BOPOMOFOEXTENDED");
1363 
1364         /**
1365          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1366          * @since 1.4
1367          */
1368         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1369             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1370                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1371                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1372 
1373         /**
1374          * Constant for the "Yi Syllables" Unicode character block.
1375          * @since 1.4
1376          */
1377         public static final UnicodeBlock YI_SYLLABLES =
1378             new UnicodeBlock("YI_SYLLABLES",
1379                              "YI SYLLABLES",
1380                              "YISYLLABLES");
1381 
1382         /**
1383          * Constant for the "Yi Radicals" Unicode character block.
1384          * @since 1.4
1385          */
1386         public static final UnicodeBlock YI_RADICALS =
1387             new UnicodeBlock("YI_RADICALS",
1388                              "YI RADICALS",
1389                              "YIRADICALS");
1390 
1391         /**
1392          * Constant for the "Cyrillic Supplementary" Unicode character block.
1393          * @since 1.5
1394          */
1395         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1396             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1397                              "CYRILLIC SUPPLEMENTARY",
1398                              "CYRILLICSUPPLEMENTARY",
1399                              "CYRILLIC SUPPLEMENT",
1400                              "CYRILLICSUPPLEMENT");
1401 
1402         /**
1403          * Constant for the "Tagalog" Unicode character block.
1404          * @since 1.5
1405          */
1406         public static final UnicodeBlock TAGALOG =
1407             new UnicodeBlock("TAGALOG");
1408 
1409         /**
1410          * Constant for the "Hanunoo" Unicode character block.
1411          * @since 1.5
1412          */
1413         public static final UnicodeBlock HANUNOO =
1414             new UnicodeBlock("HANUNOO");
1415 
1416         /**
1417          * Constant for the "Buhid" Unicode character block.
1418          * @since 1.5
1419          */
1420         public static final UnicodeBlock BUHID =
1421             new UnicodeBlock("BUHID");
1422 
1423         /**
1424          * Constant for the "Tagbanwa" Unicode character block.
1425          * @since 1.5
1426          */
1427         public static final UnicodeBlock TAGBANWA =
1428             new UnicodeBlock("TAGBANWA");
1429 
1430         /**
1431          * Constant for the "Limbu" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock LIMBU =
1435             new UnicodeBlock("LIMBU");
1436 
1437         /**
1438          * Constant for the "Tai Le" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAI_LE =
1442             new UnicodeBlock("TAI_LE",
1443                              "TAI LE",
1444                              "TAILE");
1445 
1446         /**
1447          * Constant for the "Khmer Symbols" Unicode character block.
1448          * @since 1.5
1449          */
1450         public static final UnicodeBlock KHMER_SYMBOLS =
1451             new UnicodeBlock("KHMER_SYMBOLS",
1452                              "KHMER SYMBOLS",
1453                              "KHMERSYMBOLS");
1454 
1455         /**
1456          * Constant for the "Phonetic Extensions" Unicode character block.
1457          * @since 1.5
1458          */
1459         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1460             new UnicodeBlock("PHONETIC_EXTENSIONS",
1461                              "PHONETIC EXTENSIONS",
1462                              "PHONETICEXTENSIONS");
1463 
1464         /**
1465          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1466          * @since 1.5
1467          */
1468         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1469             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1470                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1471                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1472 
1473         /**
1474          * Constant for the "Supplemental Arrows-A" Unicode character block.
1475          * @since 1.5
1476          */
1477         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1478             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1479                              "SUPPLEMENTAL ARROWS-A",
1480                              "SUPPLEMENTALARROWS-A");
1481 
1482         /**
1483          * Constant for the "Supplemental Arrows-B" Unicode character block.
1484          * @since 1.5
1485          */
1486         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1487             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1488                              "SUPPLEMENTAL ARROWS-B",
1489                              "SUPPLEMENTALARROWS-B");
1490 
1491         /**
1492          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1493          * character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1500 
1501         /**
1502          * Constant for the "Supplemental Mathematical Operators" Unicode
1503          * character block.
1504          * @since 1.5
1505          */
1506         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1507             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1508                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1509                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1510 
1511         /**
1512          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1513          * block.
1514          * @since 1.5
1515          */
1516         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1517             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1518                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1519                              "MISCELLANEOUSSYMBOLSANDARROWS");
1520 
1521         /**
1522          * Constant for the "Katakana Phonetic Extensions" Unicode character
1523          * block.
1524          * @since 1.5
1525          */
1526         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1527             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1528                              "KATAKANA PHONETIC EXTENSIONS",
1529                              "KATAKANAPHONETICEXTENSIONS");
1530 
1531         /**
1532          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1533          * @since 1.5
1534          */
1535         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1536             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1537                              "YIJING HEXAGRAM SYMBOLS",
1538                              "YIJINGHEXAGRAMSYMBOLS");
1539 
1540         /**
1541          * Constant for the "Variation Selectors" Unicode character block.
1542          * @since 1.5
1543          */
1544         public static final UnicodeBlock VARIATION_SELECTORS =
1545             new UnicodeBlock("VARIATION_SELECTORS",
1546                              "VARIATION SELECTORS",
1547                              "VARIATIONSELECTORS");
1548 
1549         /**
1550          * Constant for the "Linear B Syllabary" Unicode character block.
1551          * @since 1.5
1552          */
1553         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1554             new UnicodeBlock("LINEAR_B_SYLLABARY",
1555                              "LINEAR B SYLLABARY",
1556                              "LINEARBSYLLABARY");
1557 
1558         /**
1559          * Constant for the "Linear B Ideograms" Unicode character block.
1560          * @since 1.5
1561          */
1562         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1563             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1564                              "LINEAR B IDEOGRAMS",
1565                              "LINEARBIDEOGRAMS");
1566 
1567         /**
1568          * Constant for the "Aegean Numbers" Unicode character block.
1569          * @since 1.5
1570          */
1571         public static final UnicodeBlock AEGEAN_NUMBERS =
1572             new UnicodeBlock("AEGEAN_NUMBERS",
1573                              "AEGEAN NUMBERS",
1574                              "AEGEANNUMBERS");
1575 
1576         /**
1577          * Constant for the "Old Italic" Unicode character block.
1578          * @since 1.5
1579          */
1580         public static final UnicodeBlock OLD_ITALIC =
1581             new UnicodeBlock("OLD_ITALIC",
1582                              "OLD ITALIC",
1583                              "OLDITALIC");
1584 
1585         /**
1586          * Constant for the "Gothic" Unicode character block.
1587          * @since 1.5
1588          */
1589         public static final UnicodeBlock GOTHIC =
1590             new UnicodeBlock("GOTHIC");
1591 
1592         /**
1593          * Constant for the "Ugaritic" Unicode character block.
1594          * @since 1.5
1595          */
1596         public static final UnicodeBlock UGARITIC =
1597             new UnicodeBlock("UGARITIC");
1598 
1599         /**
1600          * Constant for the "Deseret" Unicode character block.
1601          * @since 1.5
1602          */
1603         public static final UnicodeBlock DESERET =
1604             new UnicodeBlock("DESERET");
1605 
1606         /**
1607          * Constant for the "Shavian" Unicode character block.
1608          * @since 1.5
1609          */
1610         public static final UnicodeBlock SHAVIAN =
1611             new UnicodeBlock("SHAVIAN");
1612 
1613         /**
1614          * Constant for the "Osmanya" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock OSMANYA =
1618             new UnicodeBlock("OSMANYA");
1619 
1620         /**
1621          * Constant for the "Cypriot Syllabary" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1625             new UnicodeBlock("CYPRIOT_SYLLABARY",
1626                              "CYPRIOT SYLLABARY",
1627                              "CYPRIOTSYLLABARY");
1628 
1629         /**
1630          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1631          * @since 1.5
1632          */
1633         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1634             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1635                              "BYZANTINE MUSICAL SYMBOLS",
1636                              "BYZANTINEMUSICALSYMBOLS");
1637 
1638         /**
1639          * Constant for the "Musical Symbols" Unicode character block.
1640          * @since 1.5
1641          */
1642         public static final UnicodeBlock MUSICAL_SYMBOLS =
1643             new UnicodeBlock("MUSICAL_SYMBOLS",
1644                              "MUSICAL SYMBOLS",
1645                              "MUSICALSYMBOLS");
1646 
1647         /**
1648          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1649          * @since 1.5
1650          */
1651         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1652             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1653                              "TAI XUAN JING SYMBOLS",
1654                              "TAIXUANJINGSYMBOLS");
1655 
1656         /**
1657          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1658          * character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1662             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1663                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1664                              "MATHEMATICALALPHANUMERICSYMBOLS");
1665 
1666         /**
1667          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1668          * character block.
1669          * @since 1.5
1670          */
1671         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1672             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1673                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1674                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1675 
1676         /**
1677          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1678          * @since 1.5
1679          */
1680         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1681             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1682                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1683                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1684 
1685         /**
1686          * Constant for the "Tags" Unicode character block.
1687          * @since 1.5
1688          */
1689         public static final UnicodeBlock TAGS =
1690             new UnicodeBlock("TAGS");
1691 
1692         /**
1693          * Constant for the "Variation Selectors Supplement" Unicode character
1694          * block.
1695          * @since 1.5
1696          */
1697         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1698             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1699                              "VARIATION SELECTORS SUPPLEMENT",
1700                              "VARIATIONSELECTORSSUPPLEMENT");
1701 
1702         /**
1703          * Constant for the "Supplementary Private Use Area-A" Unicode character
1704          * block.
1705          * @since 1.5
1706          */
1707         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1708             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1709                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1710                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1711 
1712         /**
1713          * Constant for the "Supplementary Private Use Area-B" Unicode character
1714          * block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1718             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1719                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1720                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1721 
1722         /**
1723          * Constant for the "High Surrogates" Unicode character block.
1724          * This block represents codepoint values in the high surrogate
1725          * range: U+D800 through U+DB7F
1726          *
1727          * @since 1.5
1728          */
1729         public static final UnicodeBlock HIGH_SURROGATES =
1730             new UnicodeBlock("HIGH_SURROGATES",
1731                              "HIGH SURROGATES",
1732                              "HIGHSURROGATES");
1733 
1734         /**
1735          * Constant for the "High Private Use Surrogates" Unicode character
1736          * block.
1737          * This block represents codepoint values in the private use high
1738          * surrogate range: U+DB80 through U+DBFF
1739          *
1740          * @since 1.5
1741          */
1742         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1743             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1744                              "HIGH PRIVATE USE SURROGATES",
1745                              "HIGHPRIVATEUSESURROGATES");
1746 
1747         /**
1748          * Constant for the "Low Surrogates" Unicode character block.
1749          * This block represents codepoint values in the low surrogate
1750          * range: U+DC00 through U+DFFF
1751          *
1752          * @since 1.5
1753          */
1754         public static final UnicodeBlock LOW_SURROGATES =
1755             new UnicodeBlock("LOW_SURROGATES",
1756                              "LOW SURROGATES",
1757                              "LOWSURROGATES");
1758 
1759         /**
1760          * Constant for the "Arabic Supplement" Unicode character block.
1761          * @since 1.7
1762          */
1763         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1764             new UnicodeBlock("ARABIC_SUPPLEMENT",
1765                              "ARABIC SUPPLEMENT",
1766                              "ARABICSUPPLEMENT");
1767 
1768         /**
1769          * Constant for the "NKo" Unicode character block.
1770          * @since 1.7
1771          */
1772         public static final UnicodeBlock NKO =
1773             new UnicodeBlock("NKO");
1774 
1775         /**
1776          * Constant for the "Samaritan" Unicode character block.
1777          * @since 1.7
1778          */
1779         public static final UnicodeBlock SAMARITAN =
1780             new UnicodeBlock("SAMARITAN");
1781 
1782         /**
1783          * Constant for the "Mandaic" Unicode character block.
1784          * @since 1.7
1785          */
1786         public static final UnicodeBlock MANDAIC =
1787             new UnicodeBlock("MANDAIC");
1788 
1789         /**
1790          * Constant for the "Ethiopic Supplement" Unicode character block.
1791          * @since 1.7
1792          */
1793         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1794             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1795                              "ETHIOPIC SUPPLEMENT",
1796                              "ETHIOPICSUPPLEMENT");
1797 
1798         /**
1799          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1800          * Unicode character block.
1801          * @since 1.7
1802          */
1803         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1804             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1805                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1806                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1807 
1808         /**
1809          * Constant for the "New Tai Lue" Unicode character block.
1810          * @since 1.7
1811          */
1812         public static final UnicodeBlock NEW_TAI_LUE =
1813             new UnicodeBlock("NEW_TAI_LUE",
1814                              "NEW TAI LUE",
1815                              "NEWTAILUE");
1816 
1817         /**
1818          * Constant for the "Buginese" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock BUGINESE =
1822             new UnicodeBlock("BUGINESE");
1823 
1824         /**
1825          * Constant for the "Tai Tham" Unicode character block.
1826          * @since 1.7
1827          */
1828         public static final UnicodeBlock TAI_THAM =
1829             new UnicodeBlock("TAI_THAM",
1830                              "TAI THAM",
1831                              "TAITHAM");
1832 
1833         /**
1834          * Constant for the "Balinese" Unicode character block.
1835          * @since 1.7
1836          */
1837         public static final UnicodeBlock BALINESE =
1838             new UnicodeBlock("BALINESE");
1839 
1840         /**
1841          * Constant for the "Sundanese" Unicode character block.
1842          * @since 1.7
1843          */
1844         public static final UnicodeBlock SUNDANESE =
1845             new UnicodeBlock("SUNDANESE");
1846 
1847         /**
1848          * Constant for the "Batak" Unicode character block.
1849          * @since 1.7
1850          */
1851         public static final UnicodeBlock BATAK =
1852             new UnicodeBlock("BATAK");
1853 
1854         /**
1855          * Constant for the "Lepcha" Unicode character block.
1856          * @since 1.7
1857          */
1858         public static final UnicodeBlock LEPCHA =
1859             new UnicodeBlock("LEPCHA");
1860 
1861         /**
1862          * Constant for the "Ol Chiki" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock OL_CHIKI =
1866             new UnicodeBlock("OL_CHIKI",
1867                              "OL CHIKI",
1868                              "OLCHIKI");
1869 
1870         /**
1871          * Constant for the "Vedic Extensions" Unicode character block.
1872          * @since 1.7
1873          */
1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
1875             new UnicodeBlock("VEDIC_EXTENSIONS",
1876                              "VEDIC EXTENSIONS",
1877                              "VEDICEXTENSIONS");
1878 
1879         /**
1880          * Constant for the "Phonetic Extensions Supplement" Unicode character
1881          * block.
1882          * @since 1.7
1883          */
1884         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1885             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1886                              "PHONETIC EXTENSIONS SUPPLEMENT",
1887                              "PHONETICEXTENSIONSSUPPLEMENT");
1888 
1889         /**
1890          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1891          * character block.
1892          * @since 1.7
1893          */
1894         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1895             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1896                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1897                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1898 
1899         /**
1900          * Constant for the "Glagolitic" Unicode character block.
1901          * @since 1.7
1902          */
1903         public static final UnicodeBlock GLAGOLITIC =
1904             new UnicodeBlock("GLAGOLITIC");
1905 
1906         /**
1907          * Constant for the "Latin Extended-C" Unicode character block.
1908          * @since 1.7
1909          */
1910         public static final UnicodeBlock LATIN_EXTENDED_C =
1911             new UnicodeBlock("LATIN_EXTENDED_C",
1912                              "LATIN EXTENDED-C",
1913                              "LATINEXTENDED-C");
1914 
1915         /**
1916          * Constant for the "Coptic" Unicode character block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock COPTIC =
1920             new UnicodeBlock("COPTIC");
1921 
1922         /**
1923          * Constant for the "Georgian Supplement" Unicode character block.
1924          * @since 1.7
1925          */
1926         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1927             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1928                              "GEORGIAN SUPPLEMENT",
1929                              "GEORGIANSUPPLEMENT");
1930 
1931         /**
1932          * Constant for the "Tifinagh" Unicode character block.
1933          * @since 1.7
1934          */
1935         public static final UnicodeBlock TIFINAGH =
1936             new UnicodeBlock("TIFINAGH");
1937 
1938         /**
1939          * Constant for the "Ethiopic Extended" Unicode character block.
1940          * @since 1.7
1941          */
1942         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1943             new UnicodeBlock("ETHIOPIC_EXTENDED",
1944                              "ETHIOPIC EXTENDED",
1945                              "ETHIOPICEXTENDED");
1946 
1947         /**
1948          * Constant for the "Cyrillic Extended-A" Unicode character block.
1949          * @since 1.7
1950          */
1951         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1952             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1953                              "CYRILLIC EXTENDED-A",
1954                              "CYRILLICEXTENDED-A");
1955 
1956         /**
1957          * Constant for the "Supplemental Punctuation" Unicode character block.
1958          * @since 1.7
1959          */
1960         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1961             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1962                              "SUPPLEMENTAL PUNCTUATION",
1963                              "SUPPLEMENTALPUNCTUATION");
1964 
1965         /**
1966          * Constant for the "CJK Strokes" Unicode character block.
1967          * @since 1.7
1968          */
1969         public static final UnicodeBlock CJK_STROKES =
1970             new UnicodeBlock("CJK_STROKES",
1971                              "CJK STROKES",
1972                              "CJKSTROKES");
1973 
1974         /**
1975          * Constant for the "Lisu" Unicode character block.
1976          * @since 1.7
1977          */
1978         public static final UnicodeBlock LISU =
1979             new UnicodeBlock("LISU");
1980 
1981         /**
1982          * Constant for the "Vai" Unicode character block.
1983          * @since 1.7
1984          */
1985         public static final UnicodeBlock VAI =
1986             new UnicodeBlock("VAI");
1987 
1988         /**
1989          * Constant for the "Cyrillic Extended-B" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1993             new UnicodeBlock("CYRILLIC_EXTENDED_B",
1994                              "CYRILLIC EXTENDED-B",
1995                              "CYRILLICEXTENDED-B");
1996 
1997         /**
1998          * Constant for the "Bamum" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock BAMUM =
2002             new UnicodeBlock("BAMUM");
2003 
2004         /**
2005          * Constant for the "Modifier Tone Letters" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2009             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2010                              "MODIFIER TONE LETTERS",
2011                              "MODIFIERTONELETTERS");
2012 
2013         /**
2014          * Constant for the "Latin Extended-D" Unicode character block.
2015          * @since 1.7
2016          */
2017         public static final UnicodeBlock LATIN_EXTENDED_D =
2018             new UnicodeBlock("LATIN_EXTENDED_D",
2019                              "LATIN EXTENDED-D",
2020                              "LATINEXTENDED-D");
2021 
2022         /**
2023          * Constant for the "Syloti Nagri" Unicode character block.
2024          * @since 1.7
2025          */
2026         public static final UnicodeBlock SYLOTI_NAGRI =
2027             new UnicodeBlock("SYLOTI_NAGRI",
2028                              "SYLOTI NAGRI",
2029                              "SYLOTINAGRI");
2030 
2031         /**
2032          * Constant for the "Common Indic Number Forms" Unicode character block.
2033          * @since 1.7
2034          */
2035         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2036             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2037                              "COMMON INDIC NUMBER FORMS",
2038                              "COMMONINDICNUMBERFORMS");
2039 
2040         /**
2041          * Constant for the "Phags-pa" Unicode character block.
2042          * @since 1.7
2043          */
2044         public static final UnicodeBlock PHAGS_PA =
2045             new UnicodeBlock("PHAGS_PA",
2046                              "PHAGS-PA");
2047 
2048         /**
2049          * Constant for the "Saurashtra" Unicode character block.
2050          * @since 1.7
2051          */
2052         public static final UnicodeBlock SAURASHTRA =
2053             new UnicodeBlock("SAURASHTRA");
2054 
2055         /**
2056          * Constant for the "Devanagari Extended" Unicode character block.
2057          * @since 1.7
2058          */
2059         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2060             new UnicodeBlock("DEVANAGARI_EXTENDED",
2061                              "DEVANAGARI EXTENDED",
2062                              "DEVANAGARIEXTENDED");
2063 
2064         /**
2065          * Constant for the "Kayah Li" Unicode character block.
2066          * @since 1.7
2067          */
2068         public static final UnicodeBlock KAYAH_LI =
2069             new UnicodeBlock("KAYAH_LI",
2070                              "KAYAH LI",
2071                              "KAYAHLI");
2072 
2073         /**
2074          * Constant for the "Rejang" Unicode character block.
2075          * @since 1.7
2076          */
2077         public static final UnicodeBlock REJANG =
2078             new UnicodeBlock("REJANG");
2079 
2080         /**
2081          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2082          * @since 1.7
2083          */
2084         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2085             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2086                              "HANGUL JAMO EXTENDED-A",
2087                              "HANGULJAMOEXTENDED-A");
2088 
2089         /**
2090          * Constant for the "Javanese" Unicode character block.
2091          * @since 1.7
2092          */
2093         public static final UnicodeBlock JAVANESE =
2094             new UnicodeBlock("JAVANESE");
2095 
2096         /**
2097          * Constant for the "Cham" Unicode character block.
2098          * @since 1.7
2099          */
2100         public static final UnicodeBlock CHAM =
2101             new UnicodeBlock("CHAM");
2102 
2103         /**
2104          * Constant for the "Myanmar Extended-A" Unicode character block.
2105          * @since 1.7
2106          */
2107         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2108             new UnicodeBlock("MYANMAR_EXTENDED_A",
2109                              "MYANMAR EXTENDED-A",
2110                              "MYANMAREXTENDED-A");
2111 
2112         /**
2113          * Constant for the "Tai Viet" Unicode character block.
2114          * @since 1.7
2115          */
2116         public static final UnicodeBlock TAI_VIET =
2117             new UnicodeBlock("TAI_VIET",
2118                              "TAI VIET",
2119                              "TAIVIET");
2120 
2121         /**
2122          * Constant for the "Ethiopic Extended-A" Unicode character block.
2123          * @since 1.7
2124          */
2125         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2126             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2127                              "ETHIOPIC EXTENDED-A",
2128                              "ETHIOPICEXTENDED-A");
2129 
2130         /**
2131          * Constant for the "Meetei Mayek" Unicode character block.
2132          * @since 1.7
2133          */
2134         public static final UnicodeBlock MEETEI_MAYEK =
2135             new UnicodeBlock("MEETEI_MAYEK",
2136                              "MEETEI MAYEK",
2137                              "MEETEIMAYEK");
2138 
2139         /**
2140          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2141          * @since 1.7
2142          */
2143         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2144             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2145                              "HANGUL JAMO EXTENDED-B",
2146                              "HANGULJAMOEXTENDED-B");
2147 
2148         /**
2149          * Constant for the "Vertical Forms" Unicode character block.
2150          * @since 1.7
2151          */
2152         public static final UnicodeBlock VERTICAL_FORMS =
2153             new UnicodeBlock("VERTICAL_FORMS",
2154                              "VERTICAL FORMS",
2155                              "VERTICALFORMS");
2156 
2157         /**
2158          * Constant for the "Ancient Greek Numbers" Unicode character block.
2159          * @since 1.7
2160          */
2161         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2162             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2163                              "ANCIENT GREEK NUMBERS",
2164                              "ANCIENTGREEKNUMBERS");
2165 
2166         /**
2167          * Constant for the "Ancient Symbols" Unicode character block.
2168          * @since 1.7
2169          */
2170         public static final UnicodeBlock ANCIENT_SYMBOLS =
2171             new UnicodeBlock("ANCIENT_SYMBOLS",
2172                              "ANCIENT SYMBOLS",
2173                              "ANCIENTSYMBOLS");
2174 
2175         /**
2176          * Constant for the "Phaistos Disc" Unicode character block.
2177          * @since 1.7
2178          */
2179         public static final UnicodeBlock PHAISTOS_DISC =
2180             new UnicodeBlock("PHAISTOS_DISC",
2181                              "PHAISTOS DISC",
2182                              "PHAISTOSDISC");
2183 
2184         /**
2185          * Constant for the "Lycian" Unicode character block.
2186          * @since 1.7
2187          */
2188         public static final UnicodeBlock LYCIAN =
2189             new UnicodeBlock("LYCIAN");
2190 
2191         /**
2192          * Constant for the "Carian" Unicode character block.
2193          * @since 1.7
2194          */
2195         public static final UnicodeBlock CARIAN =
2196             new UnicodeBlock("CARIAN");
2197 
2198         /**
2199          * Constant for the "Old Persian" Unicode character block.
2200          * @since 1.7
2201          */
2202         public static final UnicodeBlock OLD_PERSIAN =
2203             new UnicodeBlock("OLD_PERSIAN",
2204                              "OLD PERSIAN",
2205                              "OLDPERSIAN");
2206 
2207         /**
2208          * Constant for the "Imperial Aramaic" Unicode character block.
2209          * @since 1.7
2210          */
2211         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2212             new UnicodeBlock("IMPERIAL_ARAMAIC",
2213                              "IMPERIAL ARAMAIC",
2214                              "IMPERIALARAMAIC");
2215 
2216         /**
2217          * Constant for the "Phoenician" Unicode character block.
2218          * @since 1.7
2219          */
2220         public static final UnicodeBlock PHOENICIAN =
2221             new UnicodeBlock("PHOENICIAN");
2222 
2223         /**
2224          * Constant for the "Lydian" Unicode character block.
2225          * @since 1.7
2226          */
2227         public static final UnicodeBlock LYDIAN =
2228             new UnicodeBlock("LYDIAN");
2229 
2230         /**
2231          * Constant for the "Kharoshthi" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock KHAROSHTHI =
2235             new UnicodeBlock("KHAROSHTHI");
2236 
2237         /**
2238          * Constant for the "Old South Arabian" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2242             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2243                              "OLD SOUTH ARABIAN",
2244                              "OLDSOUTHARABIAN");
2245 
2246         /**
2247          * Constant for the "Avestan" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock AVESTAN =
2251             new UnicodeBlock("AVESTAN");
2252 
2253         /**
2254          * Constant for the "Inscriptional Parthian" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2258             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2259                              "INSCRIPTIONAL PARTHIAN",
2260                              "INSCRIPTIONALPARTHIAN");
2261 
2262         /**
2263          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2264          * @since 1.7
2265          */
2266         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2267             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2268                              "INSCRIPTIONAL PAHLAVI",
2269                              "INSCRIPTIONALPAHLAVI");
2270 
2271         /**
2272          * Constant for the "Old Turkic" Unicode character block.
2273          * @since 1.7
2274          */
2275         public static final UnicodeBlock OLD_TURKIC =
2276             new UnicodeBlock("OLD_TURKIC",
2277                              "OLD TURKIC",
2278                              "OLDTURKIC");
2279 
2280         /**
2281          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2282          * @since 1.7
2283          */
2284         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2285             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2286                              "RUMI NUMERAL SYMBOLS",
2287                              "RUMINUMERALSYMBOLS");
2288 
2289         /**
2290          * Constant for the "Brahmi" Unicode character block.
2291          * @since 1.7
2292          */
2293         public static final UnicodeBlock BRAHMI =
2294             new UnicodeBlock("BRAHMI");
2295 
2296         /**
2297          * Constant for the "Kaithi" Unicode character block.
2298          * @since 1.7
2299          */
2300         public static final UnicodeBlock KAITHI =
2301             new UnicodeBlock("KAITHI");
2302 
2303         /**
2304          * Constant for the "Cuneiform" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock CUNEIFORM =
2308             new UnicodeBlock("CUNEIFORM");
2309 
2310         /**
2311          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2312          * character block.
2313          * @since 1.7
2314          */
2315         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2316             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2317                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2318                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2319 
2320         /**
2321          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2322          * @since 1.7
2323          */
2324         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2325             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2326                              "EGYPTIAN HIEROGLYPHS",
2327                              "EGYPTIANHIEROGLYPHS");
2328 
2329         /**
2330          * Constant for the "Bamum Supplement" Unicode character block.
2331          * @since 1.7
2332          */
2333         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2334             new UnicodeBlock("BAMUM_SUPPLEMENT",
2335                              "BAMUM SUPPLEMENT",
2336                              "BAMUMSUPPLEMENT");
2337 
2338         /**
2339          * Constant for the "Kana Supplement" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock KANA_SUPPLEMENT =
2343             new UnicodeBlock("KANA_SUPPLEMENT",
2344                              "KANA SUPPLEMENT",
2345                              "KANASUPPLEMENT");
2346 
2347         /**
2348          * Constant for the "Ancient Greek Musical Notation" Unicode character
2349          * block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2353             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2354                              "ANCIENT GREEK MUSICAL NOTATION",
2355                              "ANCIENTGREEKMUSICALNOTATION");
2356 
2357         /**
2358          * Constant for the "Counting Rod Numerals" Unicode character block.
2359          * @since 1.7
2360          */
2361         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2362             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2363                              "COUNTING ROD NUMERALS",
2364                              "COUNTINGRODNUMERALS");
2365 
2366         /**
2367          * Constant for the "Mahjong Tiles" Unicode character block.
2368          * @since 1.7
2369          */
2370         public static final UnicodeBlock MAHJONG_TILES =
2371             new UnicodeBlock("MAHJONG_TILES",
2372                              "MAHJONG TILES",
2373                              "MAHJONGTILES");
2374 
2375         /**
2376          * Constant for the "Domino Tiles" Unicode character block.
2377          * @since 1.7
2378          */
2379         public static final UnicodeBlock DOMINO_TILES =
2380             new UnicodeBlock("DOMINO_TILES",
2381                              "DOMINO TILES",
2382                              "DOMINOTILES");
2383 
2384         /**
2385          * Constant for the "Playing Cards" Unicode character block.
2386          * @since 1.7
2387          */
2388         public static final UnicodeBlock PLAYING_CARDS =
2389             new UnicodeBlock("PLAYING_CARDS",
2390                              "PLAYING CARDS",
2391                              "PLAYINGCARDS");
2392 
2393         /**
2394          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2395          * block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2399             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2400                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2401                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2402 
2403         /**
2404          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2405          * block.
2406          * @since 1.7
2407          */
2408         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2409             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2410                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2411                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2412 
2413         /**
2414          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2415          * character block.
2416          * @since 1.7
2417          */
2418         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2419             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2420                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2421                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2422 
2423         /**
2424          * Constant for the "Emoticons" Unicode character block.
2425          * @since 1.7
2426          */
2427         public static final UnicodeBlock EMOTICONS =
2428             new UnicodeBlock("EMOTICONS");
2429 
2430         /**
2431          * Constant for the "Transport And Map Symbols" Unicode character block.
2432          * @since 1.7
2433          */
2434         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2435             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2436                              "TRANSPORT AND MAP SYMBOLS",
2437                              "TRANSPORTANDMAPSYMBOLS");
2438 
2439         /**
2440          * Constant for the "Alchemical Symbols" Unicode character block.
2441          * @since 1.7
2442          */
2443         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2444             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2445                              "ALCHEMICAL SYMBOLS",
2446                              "ALCHEMICALSYMBOLS");
2447 
2448         /**
2449          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2450          * character block.
2451          * @since 1.7
2452          */
2453         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2454             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2455                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2456                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2457 
2458         /**
2459          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2460          * character block.
2461          * @since 1.7
2462          */
2463         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2464             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2465                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2466                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2467 
2468         private static final int blockStarts[] = {
2469             0x0000,   // 0000..007F; Basic Latin
2470             0x0080,   // 0080..00FF; Latin-1 Supplement
2471             0x0100,   // 0100..017F; Latin Extended-A
2472             0x0180,   // 0180..024F; Latin Extended-B
2473             0x0250,   // 0250..02AF; IPA Extensions
2474             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2475             0x0300,   // 0300..036F; Combining Diacritical Marks
2476             0x0370,   // 0370..03FF; Greek and Coptic
2477             0x0400,   // 0400..04FF; Cyrillic
2478             0x0500,   // 0500..052F; Cyrillic Supplement
2479             0x0530,   // 0530..058F; Armenian
2480             0x0590,   // 0590..05FF; Hebrew
2481             0x0600,   // 0600..06FF; Arabic
2482             0x0700,   // 0700..074F; Syriac
2483             0x0750,   // 0750..077F; Arabic Supplement
2484             0x0780,   // 0780..07BF; Thaana
2485             0x07C0,   // 07C0..07FF; NKo
2486             0x0800,   // 0800..083F; Samaritan
2487             0x0840,   // 0840..085F; Mandaic
2488             0x0860,   //             unassigned
2489             0x0900,   // 0900..097F; Devanagari
2490             0x0980,   // 0980..09FF; Bengali
2491             0x0A00,   // 0A00..0A7F; Gurmukhi
2492             0x0A80,   // 0A80..0AFF; Gujarati
2493             0x0B00,   // 0B00..0B7F; Oriya
2494             0x0B80,   // 0B80..0BFF; Tamil
2495             0x0C00,   // 0C00..0C7F; Telugu
2496             0x0C80,   // 0C80..0CFF; Kannada
2497             0x0D00,   // 0D00..0D7F; Malayalam
2498             0x0D80,   // 0D80..0DFF; Sinhala
2499             0x0E00,   // 0E00..0E7F; Thai
2500             0x0E80,   // 0E80..0EFF; Lao
2501             0x0F00,   // 0F00..0FFF; Tibetan
2502             0x1000,   // 1000..109F; Myanmar
2503             0x10A0,   // 10A0..10FF; Georgian
2504             0x1100,   // 1100..11FF; Hangul Jamo
2505             0x1200,   // 1200..137F; Ethiopic
2506             0x1380,   // 1380..139F; Ethiopic Supplement
2507             0x13A0,   // 13A0..13FF; Cherokee
2508             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2509             0x1680,   // 1680..169F; Ogham
2510             0x16A0,   // 16A0..16FF; Runic
2511             0x1700,   // 1700..171F; Tagalog
2512             0x1720,   // 1720..173F; Hanunoo
2513             0x1740,   // 1740..175F; Buhid
2514             0x1760,   // 1760..177F; Tagbanwa
2515             0x1780,   // 1780..17FF; Khmer
2516             0x1800,   // 1800..18AF; Mongolian
2517             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2518             0x1900,   // 1900..194F; Limbu
2519             0x1950,   // 1950..197F; Tai Le
2520             0x1980,   // 1980..19DF; New Tai Lue
2521             0x19E0,   // 19E0..19FF; Khmer Symbols
2522             0x1A00,   // 1A00..1A1F; Buginese
2523             0x1A20,   // 1A20..1AAF; Tai Tham
2524             0x1AB0,   //             unassigned
2525             0x1B00,   // 1B00..1B7F; Balinese
2526             0x1B80,   // 1B80..1BBF; Sundanese
2527             0x1BC0,   // 1BC0..1BFF; Batak
2528             0x1C00,   // 1C00..1C4F; Lepcha
2529             0x1C50,   // 1C50..1C7F; Ol Chiki
2530             0x1C80,   //             unassigned
2531             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2532             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2533             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2534             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2535             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2536             0x1F00,   // 1F00..1FFF; Greek Extended
2537             0x2000,   // 2000..206F; General Punctuation
2538             0x2070,   // 2070..209F; Superscripts and Subscripts
2539             0x20A0,   // 20A0..20CF; Currency Symbols
2540             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2541             0x2100,   // 2100..214F; Letterlike Symbols
2542             0x2150,   // 2150..218F; Number Forms
2543             0x2190,   // 2190..21FF; Arrows
2544             0x2200,   // 2200..22FF; Mathematical Operators
2545             0x2300,   // 2300..23FF; Miscellaneous Technical
2546             0x2400,   // 2400..243F; Control Pictures
2547             0x2440,   // 2440..245F; Optical Character Recognition
2548             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2549             0x2500,   // 2500..257F; Box Drawing
2550             0x2580,   // 2580..259F; Block Elements
2551             0x25A0,   // 25A0..25FF; Geometric Shapes
2552             0x2600,   // 2600..26FF; Miscellaneous Symbols
2553             0x2700,   // 2700..27BF; Dingbats
2554             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2555             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2556             0x2800,   // 2800..28FF; Braille Patterns
2557             0x2900,   // 2900..297F; Supplemental Arrows-B
2558             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2559             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2560             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2561             0x2C00,   // 2C00..2C5F; Glagolitic
2562             0x2C60,   // 2C60..2C7F; Latin Extended-C
2563             0x2C80,   // 2C80..2CFF; Coptic
2564             0x2D00,   // 2D00..2D2F; Georgian Supplement
2565             0x2D30,   // 2D30..2D7F; Tifinagh
2566             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2567             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2568             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2569             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2570             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2571             0x2FE0,   //             unassigned
2572             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2573             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2574             0x3040,   // 3040..309F; Hiragana
2575             0x30A0,   // 30A0..30FF; Katakana
2576             0x3100,   // 3100..312F; Bopomofo
2577             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2578             0x3190,   // 3190..319F; Kanbun
2579             0x31A0,   // 31A0..31BF; Bopomofo Extended
2580             0x31C0,   // 31C0..31EF; CJK Strokes
2581             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2582             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2583             0x3300,   // 3300..33FF; CJK Compatibility
2584             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2585             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2586             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2587             0xA000,   // A000..A48F; Yi Syllables
2588             0xA490,   // A490..A4CF; Yi Radicals
2589             0xA4D0,   // A4D0..A4FF; Lisu
2590             0xA500,   // A500..A63F; Vai
2591             0xA640,   // A640..A69F; Cyrillic Extended-B
2592             0xA6A0,   // A6A0..A6FF; Bamum
2593             0xA700,   // A700..A71F; Modifier Tone Letters
2594             0xA720,   // A720..A7FF; Latin Extended-D
2595             0xA800,   // A800..A82F; Syloti Nagri
2596             0xA830,   // A830..A83F; Common Indic Number Forms
2597             0xA840,   // A840..A87F; Phags-pa
2598             0xA880,   // A880..A8DF; Saurashtra
2599             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2600             0xA900,   // A900..A92F; Kayah Li
2601             0xA930,   // A930..A95F; Rejang
2602             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2603             0xA980,   // A980..A9DF; Javanese
2604             0xA9E0,   //             unassigned
2605             0xAA00,   // AA00..AA5F; Cham
2606             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2607             0xAA80,   // AA80..AADF; Tai Viet
2608             0xAAE0,   //             unassigned
2609             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2610             0xAB30,   //             unassigned
2611             0xABC0,   // ABC0..ABFF; Meetei Mayek
2612             0xAC00,   // AC00..D7AF; Hangul Syllables
2613             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2614             0xD800,   // D800..DB7F; High Surrogates
2615             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2616             0xDC00,   // DC00..DFFF; Low Surrogates
2617             0xE000,   // E000..F8FF; Private Use Area
2618             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2619             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2620             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2621             0xFE00,   // FE00..FE0F; Variation Selectors
2622             0xFE10,   // FE10..FE1F; Vertical Forms
2623             0xFE20,   // FE20..FE2F; Combining Half Marks
2624             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2625             0xFE50,   // FE50..FE6F; Small Form Variants
2626             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2627             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2628             0xFFF0,   // FFF0..FFFF; Specials
2629             0x10000,  // 10000..1007F; Linear B Syllabary
2630             0x10080,  // 10080..100FF; Linear B Ideograms
2631             0x10100,  // 10100..1013F; Aegean Numbers
2632             0x10140,  // 10140..1018F; Ancient Greek Numbers
2633             0x10190,  // 10190..101CF; Ancient Symbols
2634             0x101D0,  // 101D0..101FF; Phaistos Disc
2635             0x10200,  //               unassigned
2636             0x10280,  // 10280..1029F; Lycian
2637             0x102A0,  // 102A0..102DF; Carian
2638             0x102E0,  //               unassigned
2639             0x10300,  // 10300..1032F; Old Italic
2640             0x10330,  // 10330..1034F; Gothic
2641             0x10350,  //               unassigned
2642             0x10380,  // 10380..1039F; Ugaritic
2643             0x103A0,  // 103A0..103DF; Old Persian
2644             0x103E0,  //               unassigned
2645             0x10400,  // 10400..1044F; Deseret
2646             0x10450,  // 10450..1047F; Shavian
2647             0x10480,  // 10480..104AF; Osmanya
2648             0x104B0,  //               unassigned
2649             0x10800,  // 10800..1083F; Cypriot Syllabary
2650             0x10840,  // 10840..1085F; Imperial Aramaic
2651             0x10860,  //               unassigned
2652             0x10900,  // 10900..1091F; Phoenician
2653             0x10920,  // 10920..1093F; Lydian
2654             0x10940,  //               unassigned
2655             0x10A00,  // 10A00..10A5F; Kharoshthi
2656             0x10A60,  // 10A60..10A7F; Old South Arabian
2657             0x10A80,  //               unassigned
2658             0x10B00,  // 10B00..10B3F; Avestan
2659             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2660             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2661             0x10B80,  //               unassigned
2662             0x10C00,  // 10C00..10C4F; Old Turkic
2663             0x10C50,  //               unassigned
2664             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2665             0x10E80,  //               unassigned
2666             0x11000,  // 11000..1107F; Brahmi
2667             0x11080,  // 11080..110CF; Kaithi
2668             0x110D0,  //               unassigned
2669             0x12000,  // 12000..123FF; Cuneiform
2670             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2671             0x12480,  //               unassigned
2672             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2673             0x13430,  //               unassigned
2674             0x16800,  // 16800..16A3F; Bamum Supplement
2675             0x16A40,  //               unassigned
2676             0x1B000,  // 1B000..1B0FF; Kana Supplement
2677             0x1B100,  //               unassigned
2678             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2679             0x1D100,  // 1D100..1D1FF; Musical Symbols
2680             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2681             0x1D250,  //               unassigned
2682             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2683             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2684             0x1D380,  //               unassigned
2685             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2686             0x1D800,  //               unassigned
2687             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2688             0x1F030,  // 1F030..1F09F; Domino Tiles
2689             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2690             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2691             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2692             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2693             0x1F600,  // 1F600..1F64F; Emoticons
2694             0x1F650,  //               unassigned
2695             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2696             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2697             0x1F780,  //               unassigned
2698             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2699             0x2A6E0,  //               unassigned
2700             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2701             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2702             0x2B820,  //               unassigned
2703             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2704             0x2FA20,  //               unassigned
2705             0xE0000,  // E0000..E007F; Tags
2706             0xE0080,  //               unassigned
2707             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2708             0xE01F0,  //               unassigned
2709             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2710             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2711         };
2712 
2713         private static final UnicodeBlock[] blocks = {
2714             BASIC_LATIN,
2715             LATIN_1_SUPPLEMENT,
2716             LATIN_EXTENDED_A,
2717             LATIN_EXTENDED_B,
2718             IPA_EXTENSIONS,
2719             SPACING_MODIFIER_LETTERS,
2720             COMBINING_DIACRITICAL_MARKS,
2721             GREEK,
2722             CYRILLIC,
2723             CYRILLIC_SUPPLEMENTARY,
2724             ARMENIAN,
2725             HEBREW,
2726             ARABIC,
2727             SYRIAC,
2728             ARABIC_SUPPLEMENT,
2729             THAANA,
2730             NKO,
2731             SAMARITAN,
2732             MANDAIC,
2733             null,
2734             DEVANAGARI,
2735             BENGALI,
2736             GURMUKHI,
2737             GUJARATI,
2738             ORIYA,
2739             TAMIL,
2740             TELUGU,
2741             KANNADA,
2742             MALAYALAM,
2743             SINHALA,
2744             THAI,
2745             LAO,
2746             TIBETAN,
2747             MYANMAR,
2748             GEORGIAN,
2749             HANGUL_JAMO,
2750             ETHIOPIC,
2751             ETHIOPIC_SUPPLEMENT,
2752             CHEROKEE,
2753             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2754             OGHAM,
2755             RUNIC,
2756             TAGALOG,
2757             HANUNOO,
2758             BUHID,
2759             TAGBANWA,
2760             KHMER,
2761             MONGOLIAN,
2762             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2763             LIMBU,
2764             TAI_LE,
2765             NEW_TAI_LUE,
2766             KHMER_SYMBOLS,
2767             BUGINESE,
2768             TAI_THAM,
2769             null,
2770             BALINESE,
2771             SUNDANESE,
2772             BATAK,
2773             LEPCHA,
2774             OL_CHIKI,
2775             null,
2776             VEDIC_EXTENSIONS,
2777             PHONETIC_EXTENSIONS,
2778             PHONETIC_EXTENSIONS_SUPPLEMENT,
2779             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2780             LATIN_EXTENDED_ADDITIONAL,
2781             GREEK_EXTENDED,
2782             GENERAL_PUNCTUATION,
2783             SUPERSCRIPTS_AND_SUBSCRIPTS,
2784             CURRENCY_SYMBOLS,
2785             COMBINING_MARKS_FOR_SYMBOLS,
2786             LETTERLIKE_SYMBOLS,
2787             NUMBER_FORMS,
2788             ARROWS,
2789             MATHEMATICAL_OPERATORS,
2790             MISCELLANEOUS_TECHNICAL,
2791             CONTROL_PICTURES,
2792             OPTICAL_CHARACTER_RECOGNITION,
2793             ENCLOSED_ALPHANUMERICS,
2794             BOX_DRAWING,
2795             BLOCK_ELEMENTS,
2796             GEOMETRIC_SHAPES,
2797             MISCELLANEOUS_SYMBOLS,
2798             DINGBATS,
2799             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2800             SUPPLEMENTAL_ARROWS_A,
2801             BRAILLE_PATTERNS,
2802             SUPPLEMENTAL_ARROWS_B,
2803             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2804             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2805             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2806             GLAGOLITIC,
2807             LATIN_EXTENDED_C,
2808             COPTIC,
2809             GEORGIAN_SUPPLEMENT,
2810             TIFINAGH,
2811             ETHIOPIC_EXTENDED,
2812             CYRILLIC_EXTENDED_A,
2813             SUPPLEMENTAL_PUNCTUATION,
2814             CJK_RADICALS_SUPPLEMENT,
2815             KANGXI_RADICALS,
2816             null,
2817             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2818             CJK_SYMBOLS_AND_PUNCTUATION,
2819             HIRAGANA,
2820             KATAKANA,
2821             BOPOMOFO,
2822             HANGUL_COMPATIBILITY_JAMO,
2823             KANBUN,
2824             BOPOMOFO_EXTENDED,
2825             CJK_STROKES,
2826             KATAKANA_PHONETIC_EXTENSIONS,
2827             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2828             CJK_COMPATIBILITY,
2829             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2830             YIJING_HEXAGRAM_SYMBOLS,
2831             CJK_UNIFIED_IDEOGRAPHS,
2832             YI_SYLLABLES,
2833             YI_RADICALS,
2834             LISU,
2835             VAI,
2836             CYRILLIC_EXTENDED_B,
2837             BAMUM,
2838             MODIFIER_TONE_LETTERS,
2839             LATIN_EXTENDED_D,
2840             SYLOTI_NAGRI,
2841             COMMON_INDIC_NUMBER_FORMS,
2842             PHAGS_PA,
2843             SAURASHTRA,
2844             DEVANAGARI_EXTENDED,
2845             KAYAH_LI,
2846             REJANG,
2847             HANGUL_JAMO_EXTENDED_A,
2848             JAVANESE,
2849             null,
2850             CHAM,
2851             MYANMAR_EXTENDED_A,
2852             TAI_VIET,
2853             null,
2854             ETHIOPIC_EXTENDED_A,
2855             null,
2856             MEETEI_MAYEK,
2857             HANGUL_SYLLABLES,
2858             HANGUL_JAMO_EXTENDED_B,
2859             HIGH_SURROGATES,
2860             HIGH_PRIVATE_USE_SURROGATES,
2861             LOW_SURROGATES,
2862             PRIVATE_USE_AREA,
2863             CJK_COMPATIBILITY_IDEOGRAPHS,
2864             ALPHABETIC_PRESENTATION_FORMS,
2865             ARABIC_PRESENTATION_FORMS_A,
2866             VARIATION_SELECTORS,
2867             VERTICAL_FORMS,
2868             COMBINING_HALF_MARKS,
2869             CJK_COMPATIBILITY_FORMS,
2870             SMALL_FORM_VARIANTS,
2871             ARABIC_PRESENTATION_FORMS_B,
2872             HALFWIDTH_AND_FULLWIDTH_FORMS,
2873             SPECIALS,
2874             LINEAR_B_SYLLABARY,
2875             LINEAR_B_IDEOGRAMS,
2876             AEGEAN_NUMBERS,
2877             ANCIENT_GREEK_NUMBERS,
2878             ANCIENT_SYMBOLS,
2879             PHAISTOS_DISC,
2880             null,
2881             LYCIAN,
2882             CARIAN,
2883             null,
2884             OLD_ITALIC,
2885             GOTHIC,
2886             null,
2887             UGARITIC,
2888             OLD_PERSIAN,
2889             null,
2890             DESERET,
2891             SHAVIAN,
2892             OSMANYA,
2893             null,
2894             CYPRIOT_SYLLABARY,
2895             IMPERIAL_ARAMAIC,
2896             null,
2897             PHOENICIAN,
2898             LYDIAN,
2899             null,
2900             KHAROSHTHI,
2901             OLD_SOUTH_ARABIAN,
2902             null,
2903             AVESTAN,
2904             INSCRIPTIONAL_PARTHIAN,
2905             INSCRIPTIONAL_PAHLAVI,
2906             null,
2907             OLD_TURKIC,
2908             null,
2909             RUMI_NUMERAL_SYMBOLS,
2910             null,
2911             BRAHMI,
2912             KAITHI,
2913             null,
2914             CUNEIFORM,
2915             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2916             null,
2917             EGYPTIAN_HIEROGLYPHS,
2918             null,
2919             BAMUM_SUPPLEMENT,
2920             null,
2921             KANA_SUPPLEMENT,
2922             null,
2923             BYZANTINE_MUSICAL_SYMBOLS,
2924             MUSICAL_SYMBOLS,
2925             ANCIENT_GREEK_MUSICAL_NOTATION,
2926             null,
2927             TAI_XUAN_JING_SYMBOLS,
2928             COUNTING_ROD_NUMERALS,
2929             null,
2930             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2931             null,
2932             MAHJONG_TILES,
2933             DOMINO_TILES,
2934             PLAYING_CARDS,
2935             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2936             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2937             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2938             EMOTICONS,
2939             null,
2940             TRANSPORT_AND_MAP_SYMBOLS,
2941             ALCHEMICAL_SYMBOLS,
2942             null,
2943             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2944             null,
2945             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2946             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2947             null,
2948             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2949             null,
2950             TAGS,
2951             null,
2952             VARIATION_SELECTORS_SUPPLEMENT,
2953             null,
2954             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2955             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2956         };
2957 
2958 
2959         /**
2960          * Returns the object representing the Unicode block containing the
2961          * given character, or {@code null} if the character is not a
2962          * member of a defined block.
2963          *
2964          * <p><b>Note:</b> This method cannot handle
2965          * <a href="Character.html#supplementary"> supplementary
2966          * characters</a>.  To support all Unicode characters, including
2967          * supplementary characters, use the {@link #of(int)} method.
2968          *
2969          * @param   c  The character in question
2970          * @return  The {@code UnicodeBlock} instance representing the
2971          *          Unicode block of which this character is a member, or
2972          *          {@code null} if the character is not a member of any
2973          *          Unicode block
2974          */
2975         public static UnicodeBlock of(char c) {
2976             return of((int)c);
2977         }
2978 
2979         /**
2980          * Returns the object representing the Unicode block
2981          * containing the given character (Unicode code point), or
2982          * {@code null} if the character is not a member of a
2983          * defined block.
2984          *
2985          * @param   codePoint the character (Unicode code point) in question.
2986          * @return  The {@code UnicodeBlock} instance representing the
2987          *          Unicode block of which this character is a member, or
2988          *          {@code null} if the character is not a member of any
2989          *          Unicode block
2990          * @exception IllegalArgumentException if the specified
2991          * {@code codePoint} is an invalid Unicode code point.
2992          * @see Character#isValidCodePoint(int)
2993          * @since   1.5
2994          */
2995         public static UnicodeBlock of(int codePoint) {
2996             if (!isValidCodePoint(codePoint)) {
2997                 throw new IllegalArgumentException();
2998             }
2999 
3000             int top, bottom, current;
3001             bottom = 0;
3002             top = blockStarts.length;
3003             current = top/2;
3004 
3005             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3006             while (top - bottom > 1) {
3007                 if (codePoint >= blockStarts[current]) {
3008                     bottom = current;
3009                 } else {
3010                     top = current;
3011                 }
3012                 current = (top + bottom) / 2;
3013             }
3014             return blocks[current];
3015         }
3016 
3017         /**
3018          * Returns the UnicodeBlock with the given name. Block
3019          * names are determined by The Unicode Standard. The file
3020          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3021          * version of the standard. The {@link Character} class specifies
3022          * the version of the standard that it supports.
3023          * <p>
3024          * This method accepts block names in the following forms:
3025          * <ol>
3026          * <li> Canonical block names as defined by the Unicode Standard.
3027          * For example, the standard defines a "Basic Latin" block. Therefore, this
3028          * method accepts "Basic Latin" as a valid block name. The documentation of
3029          * each UnicodeBlock provides the canonical name.
3030          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3031          * is a valid block name for the "Basic Latin" block.
3032          * <li>The text representation of each constant UnicodeBlock identifier.
3033          * For example, this method will return the {@link #BASIC_LATIN} block if
3034          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3035          * hyphens in the canonical name with underscores.
3036          * </ol>
3037          * Finally, character case is ignored for all of the valid block name forms.
3038          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3039          * The en_US locale's case mapping rules are used to provide case-insensitive
3040          * string comparisons for block name validation.
3041          * <p>
3042          * If the Unicode Standard changes block names, both the previous and
3043          * current names will be accepted.
3044          *
3045          * @param blockName A {@code UnicodeBlock} name.
3046          * @return The {@code UnicodeBlock} instance identified
3047          *         by {@code blockName}
3048          * @throws IllegalArgumentException if {@code blockName} is an
3049          *         invalid name
3050          * @throws NullPointerException if {@code blockName} is null
3051          * @since 1.5
3052          */
3053         public static final UnicodeBlock forName(String blockName) {
3054             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3055             if (block == null) {
3056                 throw new IllegalArgumentException();
3057             }
3058             return block;
3059         }
3060     }
3061 
3062 
3063     /**
3064      * A family of character subsets representing the character scripts
3065      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3066      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3067      * character is assigned to a single Unicode script, either a specific
3068      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3069      * one of the following three special values,
3070      * {@link Character.UnicodeScript#INHERITED Inherited},
3071      * {@link Character.UnicodeScript#COMMON Common} or
3072      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3073      *
3074      * @since 1.7
3075      */
3076     public static enum UnicodeScript {
3077         /**
3078          * Unicode script "Common".
3079          */
3080         COMMON,
3081 
3082         /**
3083          * Unicode script "Latin".
3084          */
3085         LATIN,
3086 
3087         /**
3088          * Unicode script "Greek".
3089          */
3090         GREEK,
3091 
3092         /**
3093          * Unicode script "Cyrillic".
3094          */
3095         CYRILLIC,
3096 
3097         /**
3098          * Unicode script "Armenian".
3099          */
3100         ARMENIAN,
3101 
3102         /**
3103          * Unicode script "Hebrew".
3104          */
3105         HEBREW,
3106 
3107         /**
3108          * Unicode script "Arabic".
3109          */
3110         ARABIC,
3111 
3112         /**
3113          * Unicode script "Syriac".
3114          */
3115         SYRIAC,
3116 
3117         /**
3118          * Unicode script "Thaana".
3119          */
3120         THAANA,
3121 
3122         /**
3123          * Unicode script "Devanagari".
3124          */
3125         DEVANAGARI,
3126 
3127         /**
3128          * Unicode script "Bengali".
3129          */
3130         BENGALI,
3131 
3132         /**
3133          * Unicode script "Gurmukhi".
3134          */
3135         GURMUKHI,
3136 
3137         /**
3138          * Unicode script "Gujarati".
3139          */
3140         GUJARATI,
3141 
3142         /**
3143          * Unicode script "Oriya".
3144          */
3145         ORIYA,
3146 
3147         /**
3148          * Unicode script "Tamil".
3149          */
3150         TAMIL,
3151 
3152         /**
3153          * Unicode script "Telugu".
3154          */
3155         TELUGU,
3156 
3157         /**
3158          * Unicode script "Kannada".
3159          */
3160         KANNADA,
3161 
3162         /**
3163          * Unicode script "Malayalam".
3164          */
3165         MALAYALAM,
3166 
3167         /**
3168          * Unicode script "Sinhala".
3169          */
3170         SINHALA,
3171 
3172         /**
3173          * Unicode script "Thai".
3174          */
3175         THAI,
3176 
3177         /**
3178          * Unicode script "Lao".
3179          */
3180         LAO,
3181 
3182         /**
3183          * Unicode script "Tibetan".
3184          */
3185         TIBETAN,
3186 
3187         /**
3188          * Unicode script "Myanmar".
3189          */
3190         MYANMAR,
3191 
3192         /**
3193          * Unicode script "Georgian".
3194          */
3195         GEORGIAN,
3196 
3197         /**
3198          * Unicode script "Hangul".
3199          */
3200         HANGUL,
3201 
3202         /**
3203          * Unicode script "Ethiopic".
3204          */
3205         ETHIOPIC,
3206 
3207         /**
3208          * Unicode script "Cherokee".
3209          */
3210         CHEROKEE,
3211 
3212         /**
3213          * Unicode script "Canadian_Aboriginal".
3214          */
3215         CANADIAN_ABORIGINAL,
3216 
3217         /**
3218          * Unicode script "Ogham".
3219          */
3220         OGHAM,
3221 
3222         /**
3223          * Unicode script "Runic".
3224          */
3225         RUNIC,
3226 
3227         /**
3228          * Unicode script "Khmer".
3229          */
3230         KHMER,
3231 
3232         /**
3233          * Unicode script "Mongolian".
3234          */
3235         MONGOLIAN,
3236 
3237         /**
3238          * Unicode script "Hiragana".
3239          */
3240         HIRAGANA,
3241 
3242         /**
3243          * Unicode script "Katakana".
3244          */
3245         KATAKANA,
3246 
3247         /**
3248          * Unicode script "Bopomofo".
3249          */
3250         BOPOMOFO,
3251 
3252         /**
3253          * Unicode script "Han".
3254          */
3255         HAN,
3256 
3257         /**
3258          * Unicode script "Yi".
3259          */
3260         YI,
3261 
3262         /**
3263          * Unicode script "Old_Italic".
3264          */
3265         OLD_ITALIC,
3266 
3267         /**
3268          * Unicode script "Gothic".
3269          */
3270         GOTHIC,
3271 
3272         /**
3273          * Unicode script "Deseret".
3274          */
3275         DESERET,
3276 
3277         /**
3278          * Unicode script "Inherited".
3279          */
3280         INHERITED,
3281 
3282         /**
3283          * Unicode script "Tagalog".
3284          */
3285         TAGALOG,
3286 
3287         /**
3288          * Unicode script "Hanunoo".
3289          */
3290         HANUNOO,
3291 
3292         /**
3293          * Unicode script "Buhid".
3294          */
3295         BUHID,
3296 
3297         /**
3298          * Unicode script "Tagbanwa".
3299          */
3300         TAGBANWA,
3301 
3302         /**
3303          * Unicode script "Limbu".
3304          */
3305         LIMBU,
3306 
3307         /**
3308          * Unicode script "Tai_Le".
3309          */
3310         TAI_LE,
3311 
3312         /**
3313          * Unicode script "Linear_B".
3314          */
3315         LINEAR_B,
3316 
3317         /**
3318          * Unicode script "Ugaritic".
3319          */
3320         UGARITIC,
3321 
3322         /**
3323          * Unicode script "Shavian".
3324          */
3325         SHAVIAN,
3326 
3327         /**
3328          * Unicode script "Osmanya".
3329          */
3330         OSMANYA,
3331 
3332         /**
3333          * Unicode script "Cypriot".
3334          */
3335         CYPRIOT,
3336 
3337         /**
3338          * Unicode script "Braille".
3339          */
3340         BRAILLE,
3341 
3342         /**
3343          * Unicode script "Buginese".
3344          */
3345         BUGINESE,
3346 
3347         /**
3348          * Unicode script "Coptic".
3349          */
3350         COPTIC,
3351 
3352         /**
3353          * Unicode script "New_Tai_Lue".
3354          */
3355         NEW_TAI_LUE,
3356 
3357         /**
3358          * Unicode script "Glagolitic".
3359          */
3360         GLAGOLITIC,
3361 
3362         /**
3363          * Unicode script "Tifinagh".
3364          */
3365         TIFINAGH,
3366 
3367         /**
3368          * Unicode script "Syloti_Nagri".
3369          */
3370         SYLOTI_NAGRI,
3371 
3372         /**
3373          * Unicode script "Old_Persian".
3374          */
3375         OLD_PERSIAN,
3376 
3377         /**
3378          * Unicode script "Kharoshthi".
3379          */
3380         KHAROSHTHI,
3381 
3382         /**
3383          * Unicode script "Balinese".
3384          */
3385         BALINESE,
3386 
3387         /**
3388          * Unicode script "Cuneiform".
3389          */
3390         CUNEIFORM,
3391 
3392         /**
3393          * Unicode script "Phoenician".
3394          */
3395         PHOENICIAN,
3396 
3397         /**
3398          * Unicode script "Phags_Pa".
3399          */
3400         PHAGS_PA,
3401 
3402         /**
3403          * Unicode script "Nko".
3404          */
3405         NKO,
3406 
3407         /**
3408          * Unicode script "Sundanese".
3409          */
3410         SUNDANESE,
3411 
3412         /**
3413          * Unicode script "Batak".
3414          */
3415         BATAK,
3416 
3417         /**
3418          * Unicode script "Lepcha".
3419          */
3420         LEPCHA,
3421 
3422         /**
3423          * Unicode script "Ol_Chiki".
3424          */
3425         OL_CHIKI,
3426 
3427         /**
3428          * Unicode script "Vai".
3429          */
3430         VAI,
3431 
3432         /**
3433          * Unicode script "Saurashtra".
3434          */
3435         SAURASHTRA,
3436 
3437         /**
3438          * Unicode script "Kayah_Li".
3439          */
3440         KAYAH_LI,
3441 
3442         /**
3443          * Unicode script "Rejang".
3444          */
3445         REJANG,
3446 
3447         /**
3448          * Unicode script "Lycian".
3449          */
3450         LYCIAN,
3451 
3452         /**
3453          * Unicode script "Carian".
3454          */
3455         CARIAN,
3456 
3457         /**
3458          * Unicode script "Lydian".
3459          */
3460         LYDIAN,
3461 
3462         /**
3463          * Unicode script "Cham".
3464          */
3465         CHAM,
3466 
3467         /**
3468          * Unicode script "Tai_Tham".
3469          */
3470         TAI_THAM,
3471 
3472         /**
3473          * Unicode script "Tai_Viet".
3474          */
3475         TAI_VIET,
3476 
3477         /**
3478          * Unicode script "Avestan".
3479          */
3480         AVESTAN,
3481 
3482         /**
3483          * Unicode script "Egyptian_Hieroglyphs".
3484          */
3485         EGYPTIAN_HIEROGLYPHS,
3486 
3487         /**
3488          * Unicode script "Samaritan".
3489          */
3490         SAMARITAN,
3491 
3492         /**
3493          * Unicode script "Mandaic".
3494          */
3495         MANDAIC,
3496 
3497         /**
3498          * Unicode script "Lisu".
3499          */
3500         LISU,
3501 
3502         /**
3503          * Unicode script "Bamum".
3504          */
3505         BAMUM,
3506 
3507         /**
3508          * Unicode script "Javanese".
3509          */
3510         JAVANESE,
3511 
3512         /**
3513          * Unicode script "Meetei_Mayek".
3514          */
3515         MEETEI_MAYEK,
3516 
3517         /**
3518          * Unicode script "Imperial_Aramaic".
3519          */
3520         IMPERIAL_ARAMAIC,
3521 
3522         /**
3523          * Unicode script "Old_South_Arabian".
3524          */
3525         OLD_SOUTH_ARABIAN,
3526 
3527         /**
3528          * Unicode script "Inscriptional_Parthian".
3529          */
3530         INSCRIPTIONAL_PARTHIAN,
3531 
3532         /**
3533          * Unicode script "Inscriptional_Pahlavi".
3534          */
3535         INSCRIPTIONAL_PAHLAVI,
3536 
3537         /**
3538          * Unicode script "Old_Turkic".
3539          */
3540         OLD_TURKIC,
3541 
3542         /**
3543          * Unicode script "Brahmi".
3544          */
3545         BRAHMI,
3546 
3547         /**
3548          * Unicode script "Kaithi".
3549          */
3550         KAITHI,
3551 
3552         /**
3553          * Unicode script "Unknown".
3554          */
3555         UNKNOWN;
3556 
3557         private static final int[] scriptStarts = {
3558             0x0000,   // 0000..0040; COMMON
3559             0x0041,   // 0041..005A; LATIN
3560             0x005B,   // 005B..0060; COMMON
3561             0x0061,   // 0061..007A; LATIN
3562             0x007B,   // 007B..00A9; COMMON
3563             0x00AA,   // 00AA..00AA; LATIN
3564             0x00AB,   // 00AB..00B9; COMMON
3565             0x00BA,   // 00BA..00BA; LATIN
3566             0x00BB,   // 00BB..00BF; COMMON
3567             0x00C0,   // 00C0..00D6; LATIN
3568             0x00D7,   // 00D7..00D7; COMMON
3569             0x00D8,   // 00D8..00F6; LATIN
3570             0x00F7,   // 00F7..00F7; COMMON
3571             0x00F8,   // 00F8..02B8; LATIN
3572             0x02B9,   // 02B9..02DF; COMMON
3573             0x02E0,   // 02E0..02E4; LATIN
3574             0x02E5,   // 02E5..02E9; COMMON
3575             0x02EA,   // 02EA..02EB; BOPOMOFO
3576             0x02EC,   // 02EC..02FF; COMMON
3577             0x0300,   // 0300..036F; INHERITED
3578             0x0370,   // 0370..0373; GREEK
3579             0x0374,   // 0374..0374; COMMON
3580             0x0375,   // 0375..037D; GREEK
3581             0x037E,   // 037E..0383; COMMON
3582             0x0384,   // 0384..0384; GREEK
3583             0x0385,   // 0385..0385; COMMON
3584             0x0386,   // 0386..0386; GREEK
3585             0x0387,   // 0387..0387; COMMON
3586             0x0388,   // 0388..03E1; GREEK
3587             0x03E2,   // 03E2..03EF; COPTIC
3588             0x03F0,   // 03F0..03FF; GREEK
3589             0x0400,   // 0400..0484; CYRILLIC
3590             0x0485,   // 0485..0486; INHERITED
3591             0x0487,   // 0487..0530; CYRILLIC
3592             0x0531,   // 0531..0588; ARMENIAN
3593             0x0589,   // 0589..0589; COMMON
3594             0x058A,   // 058A..0590; ARMENIAN
3595             0x0591,   // 0591..05FF; HEBREW
3596             0x0600,   // 0600..060B; ARABIC
3597             0x060C,   // 060C..060C; COMMON
3598             0x060D,   // 060D..061A; ARABIC
3599             0x061B,   // 061B..061D; COMMON
3600             0x061E,   // 061E..061E; ARABIC
3601             0x061F,   // 061F..061F; COMMON
3602             0x0620,   // 0620..063F; ARABIC
3603             0x0640,   // 0640..0640; COMMON
3604             0x0641,   // 0641..064A; ARABIC
3605             0x064B,   // 064B..0655; INHERITED
3606             0x0656,   // 0656..065E; ARABIC
3607             0x065F,   // 065F..065F; INHERITED
3608             0x0660,   // 0660..0669; COMMON
3609             0x066A,   // 066A..066F; ARABIC
3610             0x0670,   // 0670..0670; INHERITED
3611             0x0671,   // 0671..06DC; ARABIC
3612             0x06DD,   // 06DD..06DD; COMMON
3613             0x06DE,   // 06DE..06FF; ARABIC
3614             0x0700,   // 0700..074F; SYRIAC
3615             0x0750,   // 0750..077F; ARABIC
3616             0x0780,   // 0780..07BF; THAANA
3617             0x07C0,   // 07C0..07FF; NKO
3618             0x0800,   // 0800..083F; SAMARITAN
3619             0x0840,   // 0840..08FF; MANDAIC
3620             0x0900,   // 0900..0950; DEVANAGARI
3621             0x0951,   // 0951..0952; INHERITED
3622             0x0953,   // 0953..0963; DEVANAGARI
3623             0x0964,   // 0964..0965; COMMON
3624             0x0966,   // 0966..096F; DEVANAGARI
3625             0x0970,   // 0970..0970; COMMON
3626             0x0971,   // 0971..0980; DEVANAGARI
3627             0x0981,   // 0981..0A00; BENGALI
3628             0x0A01,   // 0A01..0A80; GURMUKHI
3629             0x0A81,   // 0A81..0B00; GUJARATI
3630             0x0B01,   // 0B01..0B81; ORIYA
3631             0x0B82,   // 0B82..0C00; TAMIL
3632             0x0C01,   // 0C01..0C81; TELUGU
3633             0x0C82,   // 0C82..0CF0; KANNADA
3634             0x0D02,   // 0D02..0D81; MALAYALAM
3635             0x0D82,   // 0D82..0E00; SINHALA
3636             0x0E01,   // 0E01..0E3E; THAI
3637             0x0E3F,   // 0E3F..0E3F; COMMON
3638             0x0E40,   // 0E40..0E80; THAI
3639             0x0E81,   // 0E81..0EFF; LAO
3640             0x0F00,   // 0F00..0FD4; TIBETAN
3641             0x0FD5,   // 0FD5..0FD8; COMMON
3642             0x0FD9,   // 0FD9..0FFF; TIBETAN
3643             0x1000,   // 1000..109F; MYANMAR
3644             0x10A0,   // 10A0..10FA; GEORGIAN
3645             0x10FB,   // 10FB..10FB; COMMON
3646             0x10FC,   // 10FC..10FF; GEORGIAN
3647             0x1100,   // 1100..11FF; HANGUL
3648             0x1200,   // 1200..139F; ETHIOPIC
3649             0x13A0,   // 13A0..13FF; CHEROKEE
3650             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3651             0x1680,   // 1680..169F; OGHAM
3652             0x16A0,   // 16A0..16EA; RUNIC
3653             0x16EB,   // 16EB..16ED; COMMON
3654             0x16EE,   // 16EE..16FF; RUNIC
3655             0x1700,   // 1700..171F; TAGALOG
3656             0x1720,   // 1720..1734; HANUNOO
3657             0x1735,   // 1735..173F; COMMON
3658             0x1740,   // 1740..175F; BUHID
3659             0x1760,   // 1760..177F; TAGBANWA
3660             0x1780,   // 1780..17FF; KHMER
3661             0x1800,   // 1800..1801; MONGOLIAN
3662             0x1802,   // 1802..1803; COMMON
3663             0x1804,   // 1804..1804; MONGOLIAN
3664             0x1805,   // 1805..1805; COMMON
3665             0x1806,   // 1806..18AF; MONGOLIAN
3666             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3667             0x1900,   // 1900..194F; LIMBU
3668             0x1950,   // 1950..197F; TAI_LE
3669             0x1980,   // 1980..19DF; NEW_TAI_LUE
3670             0x19E0,   // 19E0..19FF; KHMER
3671             0x1A00,   // 1A00..1A1F; BUGINESE
3672             0x1A20,   // 1A20..1AFF; TAI_THAM
3673             0x1B00,   // 1B00..1B7F; BALINESE
3674             0x1B80,   // 1B80..1BBF; SUNDANESE
3675             0x1BC0,   // 1BC0..1BFF; BATAK
3676             0x1C00,   // 1C00..1C4F; LEPCHA
3677             0x1C50,   // 1C50..1CCF; OL_CHIKI
3678             0x1CD0,   // 1CD0..1CD2; INHERITED
3679             0x1CD3,   // 1CD3..1CD3; COMMON
3680             0x1CD4,   // 1CD4..1CE0; INHERITED
3681             0x1CE1,   // 1CE1..1CE1; COMMON
3682             0x1CE2,   // 1CE2..1CE8; INHERITED
3683             0x1CE9,   // 1CE9..1CEC; COMMON
3684             0x1CED,   // 1CED..1CED; INHERITED
3685             0x1CEE,   // 1CEE..1CFF; COMMON
3686             0x1D00,   // 1D00..1D25; LATIN
3687             0x1D26,   // 1D26..1D2A; GREEK
3688             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3689             0x1D2C,   // 1D2C..1D5C; LATIN
3690             0x1D5D,   // 1D5D..1D61; GREEK
3691             0x1D62,   // 1D62..1D65; LATIN
3692             0x1D66,   // 1D66..1D6A; GREEK
3693             0x1D6B,   // 1D6B..1D77; LATIN
3694             0x1D78,   // 1D78..1D78; CYRILLIC
3695             0x1D79,   // 1D79..1DBE; LATIN
3696             0x1DBF,   // 1DBF..1DBF; GREEK
3697             0x1DC0,   // 1DC0..1DFF; INHERITED
3698             0x1E00,   // 1E00..1EFF; LATIN
3699             0x1F00,   // 1F00..1FFF; GREEK
3700             0x2000,   // 2000..200B; COMMON
3701             0x200C,   // 200C..200D; INHERITED
3702             0x200E,   // 200E..2070; COMMON
3703             0x2071,   // 2071..2073; LATIN
3704             0x2074,   // 2074..207E; COMMON
3705             0x207F,   // 207F..207F; LATIN
3706             0x2080,   // 2080..208F; COMMON
3707             0x2090,   // 2090..209F; LATIN
3708             0x20A0,   // 20A0..20CF; COMMON
3709             0x20D0,   // 20D0..20FF; INHERITED
3710             0x2100,   // 2100..2125; COMMON
3711             0x2126,   // 2126..2126; GREEK
3712             0x2127,   // 2127..2129; COMMON
3713             0x212A,   // 212A..212B; LATIN
3714             0x212C,   // 212C..2131; COMMON
3715             0x2132,   // 2132..2132; LATIN
3716             0x2133,   // 2133..214D; COMMON
3717             0x214E,   // 214E..214E; LATIN
3718             0x214F,   // 214F..215F; COMMON
3719             0x2160,   // 2160..2188; LATIN
3720             0x2189,   // 2189..27FF; COMMON
3721             0x2800,   // 2800..28FF; BRAILLE
3722             0x2900,   // 2900..2BFF; COMMON
3723             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3724             0x2C60,   // 2C60..2C7F; LATIN
3725             0x2C80,   // 2C80..2CFF; COPTIC
3726             0x2D00,   // 2D00..2D2F; GEORGIAN
3727             0x2D30,   // 2D30..2D7F; TIFINAGH
3728             0x2D80,   // 2D80..2DDF; ETHIOPIC
3729             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3730             0x2E00,   // 2E00..2E7F; COMMON
3731             0x2E80,   // 2E80..2FEF; HAN
3732             0x2FF0,   // 2FF0..3004; COMMON
3733             0x3005,   // 3005..3005; HAN
3734             0x3006,   // 3006..3006; COMMON
3735             0x3007,   // 3007..3007; HAN
3736             0x3008,   // 3008..3020; COMMON
3737             0x3021,   // 3021..3029; HAN
3738             0x302A,   // 302A..302D; INHERITED
3739             0x302E,   // 302E..302F; HANGUL
3740             0x3030,   // 3030..3037; COMMON
3741             0x3038,   // 3038..303B; HAN
3742             0x303C,   // 303C..3040; COMMON
3743             0x3041,   // 3041..3098; HIRAGANA
3744             0x3099,   // 3099..309A; INHERITED
3745             0x309B,   // 309B..309C; COMMON
3746             0x309D,   // 309D..309F; HIRAGANA
3747             0x30A0,   // 30A0..30A0; COMMON
3748             0x30A1,   // 30A1..30FA; KATAKANA
3749             0x30FB,   // 30FB..30FC; COMMON
3750             0x30FD,   // 30FD..3104; KATAKANA
3751             0x3105,   // 3105..3130; BOPOMOFO
3752             0x3131,   // 3131..318F; HANGUL
3753             0x3190,   // 3190..319F; COMMON
3754             0x31A0,   // 31A0..31BF; BOPOMOFO
3755             0x31C0,   // 31C0..31EF; COMMON
3756             0x31F0,   // 31F0..31FF; KATAKANA
3757             0x3200,   // 3200..321F; HANGUL
3758             0x3220,   // 3220..325F; COMMON
3759             0x3260,   // 3260..327E; HANGUL
3760             0x327F,   // 327F..32CF; COMMON
3761             0x32D0,   // 32D0..3357; KATAKANA
3762             0x3358,   // 3358..33FF; COMMON
3763             0x3400,   // 3400..4DBF; HAN
3764             0x4DC0,   // 4DC0..4DFF; COMMON
3765             0x4E00,   // 4E00..9FFF; HAN
3766             0xA000,   // A000..A4CF; YI
3767             0xA4D0,   // A4D0..A4FF; LISU
3768             0xA500,   // A500..A63F; VAI
3769             0xA640,   // A640..A69F; CYRILLIC
3770             0xA6A0,   // A6A0..A6FF; BAMUM
3771             0xA700,   // A700..A721; COMMON
3772             0xA722,   // A722..A787; LATIN
3773             0xA788,   // A788..A78A; COMMON
3774             0xA78B,   // A78B..A7FF; LATIN
3775             0xA800,   // A800..A82F; SYLOTI_NAGRI
3776             0xA830,   // A830..A83F; COMMON
3777             0xA840,   // A840..A87F; PHAGS_PA
3778             0xA880,   // A880..A8DF; SAURASHTRA
3779             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3780             0xA900,   // A900..A92F; KAYAH_LI
3781             0xA930,   // A930..A95F; REJANG
3782             0xA960,   // A960..A97F; HANGUL
3783             0xA980,   // A980..A9FF; JAVANESE
3784             0xAA00,   // AA00..AA5F; CHAM
3785             0xAA60,   // AA60..AA7F; MYANMAR
3786             0xAA80,   // AA80..AB00; TAI_VIET
3787             0xAB01,   // AB01..ABBF; ETHIOPIC
3788             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3789             0xAC00,   // AC00..D7FB; HANGUL
3790             0xD7FC,   // D7FC..F8FF; UNKNOWN
3791             0xF900,   // F900..FAFF; HAN
3792             0xFB00,   // FB00..FB12; LATIN
3793             0xFB13,   // FB13..FB1C; ARMENIAN
3794             0xFB1D,   // FB1D..FB4F; HEBREW
3795             0xFB50,   // FB50..FD3D; ARABIC
3796             0xFD3E,   // FD3E..FD4F; COMMON
3797             0xFD50,   // FD50..FDFC; ARABIC
3798             0xFDFD,   // FDFD..FDFF; COMMON
3799             0xFE00,   // FE00..FE0F; INHERITED
3800             0xFE10,   // FE10..FE1F; COMMON
3801             0xFE20,   // FE20..FE2F; INHERITED
3802             0xFE30,   // FE30..FE6F; COMMON
3803             0xFE70,   // FE70..FEFE; ARABIC
3804             0xFEFF,   // FEFF..FF20; COMMON
3805             0xFF21,   // FF21..FF3A; LATIN
3806             0xFF3B,   // FF3B..FF40; COMMON
3807             0xFF41,   // FF41..FF5A; LATIN
3808             0xFF5B,   // FF5B..FF65; COMMON
3809             0xFF66,   // FF66..FF6F; KATAKANA
3810             0xFF70,   // FF70..FF70; COMMON
3811             0xFF71,   // FF71..FF9D; KATAKANA
3812             0xFF9E,   // FF9E..FF9F; COMMON
3813             0xFFA0,   // FFA0..FFDF; HANGUL
3814             0xFFE0,   // FFE0..FFFF; COMMON
3815             0x10000,  // 10000..100FF; LINEAR_B
3816             0x10100,  // 10100..1013F; COMMON
3817             0x10140,  // 10140..1018F; GREEK
3818             0x10190,  // 10190..101FC; COMMON
3819             0x101FD,  // 101FD..1027F; INHERITED
3820             0x10280,  // 10280..1029F; LYCIAN
3821             0x102A0,  // 102A0..102FF; CARIAN
3822             0x10300,  // 10300..1032F; OLD_ITALIC
3823             0x10330,  // 10330..1037F; GOTHIC
3824             0x10380,  // 10380..1039F; UGARITIC
3825             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3826             0x10400,  // 10400..1044F; DESERET
3827             0x10450,  // 10450..1047F; SHAVIAN
3828             0x10480,  // 10480..107FF; OSMANYA
3829             0x10800,  // 10800..1083F; CYPRIOT
3830             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3831             0x10900,  // 10900..1091F; PHOENICIAN
3832             0x10920,  // 10920..109FF; LYDIAN
3833             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3834             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3835             0x10B00,  // 10B00..10B3F; AVESTAN
3836             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3837             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3838             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3839             0x10E60,  // 10E60..10FFF; ARABIC
3840             0x11000,  // 11000..1107F; BRAHMI
3841             0x11080,  // 11080..11FFF; KAITHI
3842             0x12000,  // 12000..12FFF; CUNEIFORM
3843             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3844             0x16800,  // 16800..16A38; BAMUM
3845             0x1B000,  // 1B000..1B000; KATAKANA
3846             0x1B001,  // 1B001..1CFFF; HIRAGANA
3847             0x1D000,  // 1D000..1D166; COMMON
3848             0x1D167,  // 1D167..1D169; INHERITED
3849             0x1D16A,  // 1D16A..1D17A; COMMON
3850             0x1D17B,  // 1D17B..1D182; INHERITED
3851             0x1D183,  // 1D183..1D184; COMMON
3852             0x1D185,  // 1D185..1D18B; INHERITED
3853             0x1D18C,  // 1D18C..1D1A9; COMMON
3854             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3855             0x1D1AE,  // 1D1AE..1D1FF; COMMON
3856             0x1D200,  // 1D200..1D2FF; GREEK
3857             0x1D300,  // 1D300..1F1FF; COMMON
3858             0x1F200,  // 1F200..1F200; HIRAGANA
3859             0x1F201,  // 1F210..1FFFF; COMMON
3860             0x20000,  // 20000..E0000; HAN
3861             0xE0001,  // E0001..E00FF; COMMON
3862             0xE0100,  // E0100..E01EF; INHERITED
3863             0xE01F0   // E01F0..10FFFF; UNKNOWN
3864 
3865         };
3866 
3867         private static final UnicodeScript[] scripts = {
3868             COMMON,
3869             LATIN,
3870             COMMON,
3871             LATIN,
3872             COMMON,
3873             LATIN,
3874             COMMON,
3875             LATIN,
3876             COMMON,
3877             LATIN,
3878             COMMON,
3879             LATIN,
3880             COMMON,
3881             LATIN,
3882             COMMON,
3883             LATIN,
3884             COMMON,
3885             BOPOMOFO,
3886             COMMON,
3887             INHERITED,
3888             GREEK,
3889             COMMON,
3890             GREEK,
3891             COMMON,
3892             GREEK,
3893             COMMON,
3894             GREEK,
3895             COMMON,
3896             GREEK,
3897             COPTIC,
3898             GREEK,
3899             CYRILLIC,
3900             INHERITED,
3901             CYRILLIC,
3902             ARMENIAN,
3903             COMMON,
3904             ARMENIAN,
3905             HEBREW,
3906             ARABIC,
3907             COMMON,
3908             ARABIC,
3909             COMMON,
3910             ARABIC,
3911             COMMON,
3912             ARABIC,
3913             COMMON,
3914             ARABIC,
3915             INHERITED,
3916             ARABIC,
3917             INHERITED,
3918             COMMON,
3919             ARABIC,
3920             INHERITED,
3921             ARABIC,
3922             COMMON,
3923             ARABIC,
3924             SYRIAC,
3925             ARABIC,
3926             THAANA,
3927             NKO,
3928             SAMARITAN,
3929             MANDAIC,
3930             DEVANAGARI,
3931             INHERITED,
3932             DEVANAGARI,
3933             COMMON,
3934             DEVANAGARI,
3935             COMMON,
3936             DEVANAGARI,
3937             BENGALI,
3938             GURMUKHI,
3939             GUJARATI,
3940             ORIYA,
3941             TAMIL,
3942             TELUGU,
3943             KANNADA,
3944             MALAYALAM,
3945             SINHALA,
3946             THAI,
3947             COMMON,
3948             THAI,
3949             LAO,
3950             TIBETAN,
3951             COMMON,
3952             TIBETAN,
3953             MYANMAR,
3954             GEORGIAN,
3955             COMMON,
3956             GEORGIAN,
3957             HANGUL,
3958             ETHIOPIC,
3959             CHEROKEE,
3960             CANADIAN_ABORIGINAL,
3961             OGHAM,
3962             RUNIC,
3963             COMMON,
3964             RUNIC,
3965             TAGALOG,
3966             HANUNOO,
3967             COMMON,
3968             BUHID,
3969             TAGBANWA,
3970             KHMER,
3971             MONGOLIAN,
3972             COMMON,
3973             MONGOLIAN,
3974             COMMON,
3975             MONGOLIAN,
3976             CANADIAN_ABORIGINAL,
3977             LIMBU,
3978             TAI_LE,
3979             NEW_TAI_LUE,
3980             KHMER,
3981             BUGINESE,
3982             TAI_THAM,
3983             BALINESE,
3984             SUNDANESE,
3985             BATAK,
3986             LEPCHA,
3987             OL_CHIKI,
3988             INHERITED,
3989             COMMON,
3990             INHERITED,
3991             COMMON,
3992             INHERITED,
3993             COMMON,
3994             INHERITED,
3995             COMMON,
3996             LATIN,
3997             GREEK,
3998             CYRILLIC,
3999             LATIN,
4000             GREEK,
4001             LATIN,
4002             GREEK,
4003             LATIN,
4004             CYRILLIC,
4005             LATIN,
4006             GREEK,
4007             INHERITED,
4008             LATIN,
4009             GREEK,
4010             COMMON,
4011             INHERITED,
4012             COMMON,
4013             LATIN,
4014             COMMON,
4015             LATIN,
4016             COMMON,
4017             LATIN,
4018             COMMON,
4019             INHERITED,
4020             COMMON,
4021             GREEK,
4022             COMMON,
4023             LATIN,
4024             COMMON,
4025             LATIN,
4026             COMMON,
4027             LATIN,
4028             COMMON,
4029             LATIN,
4030             COMMON,
4031             BRAILLE,
4032             COMMON,
4033             GLAGOLITIC,
4034             LATIN,
4035             COPTIC,
4036             GEORGIAN,
4037             TIFINAGH,
4038             ETHIOPIC,
4039             CYRILLIC,
4040             COMMON,
4041             HAN,
4042             COMMON,
4043             HAN,
4044             COMMON,
4045             HAN,
4046             COMMON,
4047             HAN,
4048             INHERITED,
4049             HANGUL,
4050             COMMON,
4051             HAN,
4052             COMMON,
4053             HIRAGANA,
4054             INHERITED,
4055             COMMON,
4056             HIRAGANA,
4057             COMMON,
4058             KATAKANA,
4059             COMMON,
4060             KATAKANA,
4061             BOPOMOFO,
4062             HANGUL,
4063             COMMON,
4064             BOPOMOFO,
4065             COMMON,
4066             KATAKANA,
4067             HANGUL,
4068             COMMON,
4069             HANGUL,
4070             COMMON,
4071             KATAKANA,
4072             COMMON,
4073             HAN,
4074             COMMON,
4075             HAN,
4076             YI,
4077             LISU,
4078             VAI,
4079             CYRILLIC,
4080             BAMUM,
4081             COMMON,
4082             LATIN,
4083             COMMON,
4084             LATIN,
4085             SYLOTI_NAGRI,
4086             COMMON,
4087             PHAGS_PA,
4088             SAURASHTRA,
4089             DEVANAGARI,
4090             KAYAH_LI,
4091             REJANG,
4092             HANGUL,
4093             JAVANESE,
4094             CHAM,
4095             MYANMAR,
4096             TAI_VIET,
4097             ETHIOPIC,
4098             MEETEI_MAYEK,
4099             HANGUL,
4100             UNKNOWN,
4101             HAN,
4102             LATIN,
4103             ARMENIAN,
4104             HEBREW,
4105             ARABIC,
4106             COMMON,
4107             ARABIC,
4108             COMMON,
4109             INHERITED,
4110             COMMON,
4111             INHERITED,
4112             COMMON,
4113             ARABIC,
4114             COMMON,
4115             LATIN,
4116             COMMON,
4117             LATIN,
4118             COMMON,
4119             KATAKANA,
4120             COMMON,
4121             KATAKANA,
4122             COMMON,
4123             HANGUL,
4124             COMMON,
4125             LINEAR_B,
4126             COMMON,
4127             GREEK,
4128             COMMON,
4129             INHERITED,
4130             LYCIAN,
4131             CARIAN,
4132             OLD_ITALIC,
4133             GOTHIC,
4134             UGARITIC,
4135             OLD_PERSIAN,
4136             DESERET,
4137             SHAVIAN,
4138             OSMANYA,
4139             CYPRIOT,
4140             IMPERIAL_ARAMAIC,
4141             PHOENICIAN,
4142             LYDIAN,
4143             KHAROSHTHI,
4144             OLD_SOUTH_ARABIAN,
4145             AVESTAN,
4146             INSCRIPTIONAL_PARTHIAN,
4147             INSCRIPTIONAL_PAHLAVI,
4148             OLD_TURKIC,
4149             ARABIC,
4150             BRAHMI,
4151             KAITHI,
4152             CUNEIFORM,
4153             EGYPTIAN_HIEROGLYPHS,
4154             BAMUM,
4155             KATAKANA,
4156             HIRAGANA,
4157             COMMON,
4158             INHERITED,
4159             COMMON,
4160             INHERITED,
4161             COMMON,
4162             INHERITED,
4163             COMMON,
4164             INHERITED,
4165             COMMON,
4166             GREEK,
4167             COMMON,
4168             HIRAGANA,
4169             COMMON,
4170             HAN,
4171             COMMON,
4172             INHERITED,
4173             UNKNOWN
4174         };
4175 
4176         private static HashMap<String, Character.UnicodeScript> aliases;
4177         static {
4178             aliases = new HashMap<>(128);
4179             aliases.put("ARAB", ARABIC);
4180             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4181             aliases.put("ARMN", ARMENIAN);
4182             aliases.put("AVST", AVESTAN);
4183             aliases.put("BALI", BALINESE);
4184             aliases.put("BAMU", BAMUM);
4185             aliases.put("BATK", BATAK);
4186             aliases.put("BENG", BENGALI);
4187             aliases.put("BOPO", BOPOMOFO);
4188             aliases.put("BRAI", BRAILLE);
4189             aliases.put("BRAH", BRAHMI);
4190             aliases.put("BUGI", BUGINESE);
4191             aliases.put("BUHD", BUHID);
4192             aliases.put("CANS", CANADIAN_ABORIGINAL);
4193             aliases.put("CARI", CARIAN);
4194             aliases.put("CHAM", CHAM);
4195             aliases.put("CHER", CHEROKEE);
4196             aliases.put("COPT", COPTIC);
4197             aliases.put("CPRT", CYPRIOT);
4198             aliases.put("CYRL", CYRILLIC);
4199             aliases.put("DEVA", DEVANAGARI);
4200             aliases.put("DSRT", DESERET);
4201             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4202             aliases.put("ETHI", ETHIOPIC);
4203             aliases.put("GEOR", GEORGIAN);
4204             aliases.put("GLAG", GLAGOLITIC);
4205             aliases.put("GOTH", GOTHIC);
4206             aliases.put("GREK", GREEK);
4207             aliases.put("GUJR", GUJARATI);
4208             aliases.put("GURU", GURMUKHI);
4209             aliases.put("HANG", HANGUL);
4210             aliases.put("HANI", HAN);
4211             aliases.put("HANO", HANUNOO);
4212             aliases.put("HEBR", HEBREW);
4213             aliases.put("HIRA", HIRAGANA);
4214             // it appears we don't have the KATAKANA_OR_HIRAGANA
4215             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4216             aliases.put("ITAL", OLD_ITALIC);
4217             aliases.put("JAVA", JAVANESE);
4218             aliases.put("KALI", KAYAH_LI);
4219             aliases.put("KANA", KATAKANA);
4220             aliases.put("KHAR", KHAROSHTHI);
4221             aliases.put("KHMR", KHMER);
4222             aliases.put("KNDA", KANNADA);
4223             aliases.put("KTHI", KAITHI);
4224             aliases.put("LANA", TAI_THAM);
4225             aliases.put("LAOO", LAO);
4226             aliases.put("LATN", LATIN);
4227             aliases.put("LEPC", LEPCHA);
4228             aliases.put("LIMB", LIMBU);
4229             aliases.put("LINB", LINEAR_B);
4230             aliases.put("LISU", LISU);
4231             aliases.put("LYCI", LYCIAN);
4232             aliases.put("LYDI", LYDIAN);
4233             aliases.put("MAND", MANDAIC);
4234             aliases.put("MLYM", MALAYALAM);
4235             aliases.put("MONG", MONGOLIAN);
4236             aliases.put("MTEI", MEETEI_MAYEK);
4237             aliases.put("MYMR", MYANMAR);
4238             aliases.put("NKOO", NKO);
4239             aliases.put("OGAM", OGHAM);
4240             aliases.put("OLCK", OL_CHIKI);
4241             aliases.put("ORKH", OLD_TURKIC);
4242             aliases.put("ORYA", ORIYA);
4243             aliases.put("OSMA", OSMANYA);
4244             aliases.put("PHAG", PHAGS_PA);
4245             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4246             aliases.put("PHNX", PHOENICIAN);
4247             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4248             aliases.put("RJNG", REJANG);
4249             aliases.put("RUNR", RUNIC);
4250             aliases.put("SAMR", SAMARITAN);
4251             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4252             aliases.put("SAUR", SAURASHTRA);
4253             aliases.put("SHAW", SHAVIAN);
4254             aliases.put("SINH", SINHALA);
4255             aliases.put("SUND", SUNDANESE);
4256             aliases.put("SYLO", SYLOTI_NAGRI);
4257             aliases.put("SYRC", SYRIAC);
4258             aliases.put("TAGB", TAGBANWA);
4259             aliases.put("TALE", TAI_LE);
4260             aliases.put("TALU", NEW_TAI_LUE);
4261             aliases.put("TAML", TAMIL);
4262             aliases.put("TAVT", TAI_VIET);
4263             aliases.put("TELU", TELUGU);
4264             aliases.put("TFNG", TIFINAGH);
4265             aliases.put("TGLG", TAGALOG);
4266             aliases.put("THAA", THAANA);
4267             aliases.put("THAI", THAI);
4268             aliases.put("TIBT", TIBETAN);
4269             aliases.put("UGAR", UGARITIC);
4270             aliases.put("VAII", VAI);
4271             aliases.put("XPEO", OLD_PERSIAN);
4272             aliases.put("XSUX", CUNEIFORM);
4273             aliases.put("YIII", YI);
4274             aliases.put("ZINH", INHERITED);
4275             aliases.put("ZYYY", COMMON);
4276             aliases.put("ZZZZ", UNKNOWN);
4277         }
4278 
4279         /**
4280          * Returns the enum constant representing the Unicode script of which
4281          * the given character (Unicode code point) is assigned to.
4282          *
4283          * @param   codePoint the character (Unicode code point) in question.
4284          * @return  The {@code UnicodeScript} constant representing the
4285          *          Unicode script of which this character is assigned to.
4286          *
4287          * @exception IllegalArgumentException if the specified
4288          * {@code codePoint} is an invalid Unicode code point.
4289          * @see Character#isValidCodePoint(int)
4290          *
4291          */
4292         public static UnicodeScript of(int codePoint) {
4293             if (!isValidCodePoint(codePoint))
4294                 throw new IllegalArgumentException();
4295             int type = getType(codePoint);
4296             // leave SURROGATE and PRIVATE_USE for table lookup
4297             if (type == UNASSIGNED)
4298                 return UNKNOWN;
4299             int index = Arrays.binarySearch(scriptStarts, codePoint);
4300             if (index < 0)
4301                 index = -index - 2;
4302             return scripts[index];
4303         }
4304 
4305         /**
4306          * Returns the UnicodeScript constant with the given Unicode script
4307          * name or the script name alias. Script names and their aliases are
4308          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4309          * and PropertyValueAliases&lt;version&gt;.txt define script names
4310          * and the script name aliases for a particular version of the
4311          * standard. The {@link Character} class specifies the version of
4312          * the standard that it supports.
4313          * <p>
4314          * Character case is ignored for all of the valid script names.
4315          * The en_US locale's case mapping rules are used to provide
4316          * case-insensitive string comparisons for script name validation.
4317          * <p>
4318          *
4319          * @param scriptName A {@code UnicodeScript} name.
4320          * @return The {@code UnicodeScript} constant identified
4321          *         by {@code scriptName}
4322          * @throws IllegalArgumentException if {@code scriptName} is an
4323          *         invalid name
4324          * @throws NullPointerException if {@code scriptName} is null
4325          */
4326         public static final UnicodeScript forName(String scriptName) {
4327             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4328                                  //.replace(' ', '_'));
4329             UnicodeScript sc = aliases.get(scriptName);
4330             if (sc != null)
4331                 return sc;
4332             return valueOf(scriptName);
4333         }
4334     }
4335 
4336     /**
4337      * The value of the {@code Character}.
4338      *
4339      * @serial
4340      */
4341     private final char value;
4342 
4343     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4344     private static final long serialVersionUID = 3786198910865385080L;
4345 
4346     /**
4347      * Constructs a newly allocated {@code Character} object that
4348      * represents the specified {@code char} value.
4349      *
4350      * @param  value   the value to be represented by the
4351      *                  {@code Character} object.
4352      */
4353     public Character(char value) {
4354         this.value = value;
4355     }
4356 
4357     private static class CharacterCache {
4358         private CharacterCache(){}
4359 
4360         static final Character cache[] = new Character[127 + 1];
4361 
4362         static {
4363             for (int i = 0; i < cache.length; i++)
4364                 cache[i] = new Character((char)i);
4365         }
4366     }
4367 
4368     /**
4369      * Returns a <tt>Character</tt> instance representing the specified
4370      * <tt>char</tt> value.
4371      * If a new <tt>Character</tt> instance is not required, this method
4372      * should generally be used in preference to the constructor
4373      * {@link #Character(char)}, as this method is likely to yield
4374      * significantly better space and time performance by caching
4375      * frequently requested values.
4376      *
4377      * This method will always cache values in the range {@code
4378      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4379      * cache other values outside of this range.
4380      *
4381      * @param  c a char value.
4382      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4383      * @since  1.5
4384      */
4385     public static Character valueOf(char c) {
4386         if (c <= 127) { // must cache
4387             return CharacterCache.cache[(int)c];
4388         }
4389         return new Character(c);
4390     }
4391 
4392     /**
4393      * Returns the value of this {@code Character} object.
4394      * @return  the primitive {@code char} value represented by
4395      *          this object.
4396      */
4397     public char charValue() {
4398         return value;
4399     }
4400 
4401     /**
4402      * Returns a hash code for this {@code Character}; equal to the result
4403      * of invoking {@code charValue()}.
4404      *
4405      * @return a hash code value for this {@code Character}
4406      */
4407     public int hashCode() {
4408         return (int)value;
4409     }
4410 
4411     /**
4412      * Returns a hash code for a {@code char} value; compatible with
4413      * {@code Character.hashCode()}.
4414      *  
4415      * @since 1.8
4416      *
4417      * @return a hash code value for a {@code char} value.
4418      */
4419     public static int hashCode(char value) {
4420         return (int)value;
4421     }
4422 
4423     /**
4424      * Compares this object against the specified object.
4425      * The result is {@code true} if and only if the argument is not
4426      * {@code null} and is a {@code Character} object that
4427      * represents the same {@code char} value as this object.
4428      *
4429      * @param   obj   the object to compare with.
4430      * @return  {@code true} if the objects are the same;
4431      *          {@code false} otherwise.
4432      */
4433     public boolean equals(Object obj) {
4434         if (obj instanceof Character) {
4435             return value == ((Character)obj).charValue();
4436         }
4437         return false;
4438     }
4439 
4440     /**
4441      * Returns a {@code String} object representing this
4442      * {@code Character}'s value.  The result is a string of
4443      * length 1 whose sole component is the primitive
4444      * {@code char} value represented by this
4445      * {@code Character} object.
4446      *
4447      * @return  a string representation of this object.
4448      */
4449     public String toString() {
4450         char buf[] = {value};
4451         return String.valueOf(buf);
4452     }
4453 
4454     /**
4455      * Returns a {@code String} object representing the
4456      * specified {@code char}.  The result is a string of length
4457      * 1 consisting solely of the specified {@code char}.
4458      *
4459      * @param c the {@code char} to be converted
4460      * @return the string representation of the specified {@code char}
4461      * @since 1.4
4462      */
4463     public static String toString(char c) {
4464         return String.valueOf(c);
4465     }
4466 
4467     /**
4468      * Determines whether the specified code point is a valid
4469      * <a href="http://www.unicode.org/glossary/#code_point">
4470      * Unicode code point value</a>.
4471      *
4472      * @param  codePoint the Unicode code point to be tested
4473      * @return {@code true} if the specified code point value is between
4474      *         {@link #MIN_CODE_POINT} and
4475      *         {@link #MAX_CODE_POINT} inclusive;
4476      *         {@code false} otherwise.
4477      * @since  1.5
4478      */
4479     public static boolean isValidCodePoint(int codePoint) {
4480         // Optimized form of:
4481         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4482         int plane = codePoint >>> 16;
4483         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4484     }
4485 
4486     /**
4487      * Determines whether the specified character (Unicode code point)
4488      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4489      * Such code points can be represented using a single {@code char}.
4490      *
4491      * @param  codePoint the character (Unicode code point) to be tested
4492      * @return {@code true} if the specified code point is between
4493      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4494      *         {@code false} otherwise.
4495      * @since  1.7
4496      */
4497     public static boolean isBmpCodePoint(int codePoint) {
4498         return codePoint >>> 16 == 0;
4499         // Optimized form of:
4500         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4501         // We consistently use logical shift (>>>) to facilitate
4502         // additional runtime optimizations.
4503     }
4504 
4505     /**
4506      * Determines whether the specified character (Unicode code point)
4507      * is in the <a href="#supplementary">supplementary character</a> range.
4508      *
4509      * @param  codePoint the character (Unicode code point) to be tested
4510      * @return {@code true} if the specified code point is between
4511      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4512      *         {@link #MAX_CODE_POINT} inclusive;
4513      *         {@code false} otherwise.
4514      * @since  1.5
4515      */
4516     public static boolean isSupplementaryCodePoint(int codePoint) {
4517         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4518             && codePoint <  MAX_CODE_POINT + 1;
4519     }
4520 
4521     /**
4522      * Determines if the given {@code char} value is a
4523      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4524      * Unicode high-surrogate code unit</a>
4525      * (also known as <i>leading-surrogate code unit</i>).
4526      *
4527      * <p>Such values do not represent characters by themselves,
4528      * but are used in the representation of
4529      * <a href="#supplementary">supplementary characters</a>
4530      * in the UTF-16 encoding.
4531      *
4532      * @param  ch the {@code char} value to be tested.
4533      * @return {@code true} if the {@code char} value is between
4534      *         {@link #MIN_HIGH_SURROGATE} and
4535      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4536      *         {@code false} otherwise.
4537      * @see    Character#isLowSurrogate(char)
4538      * @see    Character.UnicodeBlock#of(int)
4539      * @since  1.5
4540      */
4541     public static boolean isHighSurrogate(char ch) {
4542         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4543         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4544     }
4545 
4546     /**
4547      * Determines if the given {@code char} value is a
4548      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4549      * Unicode low-surrogate code unit</a>
4550      * (also known as <i>trailing-surrogate code unit</i>).
4551      *
4552      * <p>Such values do not represent characters by themselves,
4553      * but are used in the representation of
4554      * <a href="#supplementary">supplementary characters</a>
4555      * in the UTF-16 encoding.
4556      *
4557      * @param  ch the {@code char} value to be tested.
4558      * @return {@code true} if the {@code char} value is between
4559      *         {@link #MIN_LOW_SURROGATE} and
4560      *         {@link #MAX_LOW_SURROGATE} inclusive;
4561      *         {@code false} otherwise.
4562      * @see    Character#isHighSurrogate(char)
4563      * @since  1.5
4564      */
4565     public static boolean isLowSurrogate(char ch) {
4566         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4567     }
4568 
4569     /**
4570      * Determines if the given {@code char} value is a Unicode
4571      * <i>surrogate code unit</i>.
4572      *
4573      * <p>Such values do not represent characters by themselves,
4574      * but are used in the representation of
4575      * <a href="#supplementary">supplementary characters</a>
4576      * in the UTF-16 encoding.
4577      *
4578      * <p>A char value is a surrogate code unit if and only if it is either
4579      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4580      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4581      *
4582      * @param  ch the {@code char} value to be tested.
4583      * @return {@code true} if the {@code char} value is between
4584      *         {@link #MIN_SURROGATE} and
4585      *         {@link #MAX_SURROGATE} inclusive;
4586      *         {@code false} otherwise.
4587      * @since  1.7
4588      */
4589     public static boolean isSurrogate(char ch) {
4590         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4591     }
4592 
4593     /**
4594      * Determines whether the specified pair of {@code char}
4595      * values is a valid
4596      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4597      * Unicode surrogate pair</a>.
4598 
4599      * <p>This method is equivalent to the expression:
4600      * <blockquote><pre>
4601      * isHighSurrogate(high) && isLowSurrogate(low)
4602      * </pre></blockquote>
4603      *
4604      * @param  high the high-surrogate code value to be tested
4605      * @param  low the low-surrogate code value to be tested
4606      * @return {@code true} if the specified high and
4607      * low-surrogate code values represent a valid surrogate pair;
4608      * {@code false} otherwise.
4609      * @since  1.5
4610      */
4611     public static boolean isSurrogatePair(char high, char low) {
4612         return isHighSurrogate(high) && isLowSurrogate(low);
4613     }
4614 
4615     /**
4616      * Determines the number of {@code char} values needed to
4617      * represent the specified character (Unicode code point). If the
4618      * specified character is equal to or greater than 0x10000, then
4619      * the method returns 2. Otherwise, the method returns 1.
4620      *
4621      * <p>This method doesn't validate the specified character to be a
4622      * valid Unicode code point. The caller must validate the
4623      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4624      * if necessary.
4625      *
4626      * @param   codePoint the character (Unicode code point) to be tested.
4627      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4628      * @see     Character#isSupplementaryCodePoint(int)
4629      * @since   1.5
4630      */
4631     public static int charCount(int codePoint) {
4632         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4633     }
4634 
4635     /**
4636      * Converts the specified surrogate pair to its supplementary code
4637      * point value. This method does not validate the specified
4638      * surrogate pair. The caller must validate it using {@link
4639      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4640      *
4641      * @param  high the high-surrogate code unit
4642      * @param  low the low-surrogate code unit
4643      * @return the supplementary code point composed from the
4644      *         specified surrogate pair.
4645      * @since  1.5
4646      */
4647     public static int toCodePoint(char high, char low) {
4648         // Optimized form of:
4649         // return ((high - MIN_HIGH_SURROGATE) << 10)
4650         //         + (low - MIN_LOW_SURROGATE)
4651         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4652         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4653                                        - (MIN_HIGH_SURROGATE << 10)
4654                                        - MIN_LOW_SURROGATE);
4655     }
4656 
4657     /**
4658      * Returns the code point at the given index of the
4659      * {@code CharSequence}. If the {@code char} value at
4660      * the given index in the {@code CharSequence} is in the
4661      * high-surrogate range, the following index is less than the
4662      * length of the {@code CharSequence}, and the
4663      * {@code char} value at the following index is in the
4664      * low-surrogate range, then the supplementary code point
4665      * corresponding to this surrogate pair is returned. Otherwise,
4666      * the {@code char} value at the given index is returned.
4667      *
4668      * @param seq a sequence of {@code char} values (Unicode code
4669      * units)
4670      * @param index the index to the {@code char} values (Unicode
4671      * code units) in {@code seq} to be converted
4672      * @return the Unicode code point at the given index
4673      * @exception NullPointerException if {@code seq} is null.
4674      * @exception IndexOutOfBoundsException if the value
4675      * {@code index} is negative or not less than
4676      * {@link CharSequence#length() seq.length()}.
4677      * @since  1.5
4678      */
4679     public static int codePointAt(CharSequence seq, int index) {
4680         char c1 = seq.charAt(index++);
4681         if (isHighSurrogate(c1)) {
4682             if (index < seq.length()) {
4683                 char c2 = seq.charAt(index);
4684                 if (isLowSurrogate(c2)) {
4685                     return toCodePoint(c1, c2);
4686                 }
4687             }
4688         }
4689         return c1;
4690     }
4691 
4692     /**
4693      * Returns the code point at the given index of the
4694      * {@code char} array. If the {@code char} value at
4695      * the given index in the {@code char} array is in the
4696      * high-surrogate range, the following index is less than the
4697      * length of the {@code char} array, and the
4698      * {@code char} value at the following index is in the
4699      * low-surrogate range, then the supplementary code point
4700      * corresponding to this surrogate pair is returned. Otherwise,
4701      * the {@code char} value at the given index is returned.
4702      *
4703      * @param a the {@code char} array
4704      * @param index the index to the {@code char} values (Unicode
4705      * code units) in the {@code char} array to be converted
4706      * @return the Unicode code point at the given index
4707      * @exception NullPointerException if {@code a} is null.
4708      * @exception IndexOutOfBoundsException if the value
4709      * {@code index} is negative or not less than
4710      * the length of the {@code char} array.
4711      * @since  1.5
4712      */
4713     public static int codePointAt(char[] a, int index) {
4714         return codePointAtImpl(a, index, a.length);
4715     }
4716 
4717     /**
4718      * Returns the code point at the given index of the
4719      * {@code char} array, where only array elements with
4720      * {@code index} less than {@code limit} can be used. If
4721      * the {@code char} value at the given index in the
4722      * {@code char} array is in the high-surrogate range, the
4723      * following index is less than the {@code limit}, and the
4724      * {@code char} value at the following index is in the
4725      * low-surrogate range, then the supplementary code point
4726      * corresponding to this surrogate pair is returned. Otherwise,
4727      * the {@code char} value at the given index is returned.
4728      *
4729      * @param a the {@code char} array
4730      * @param index the index to the {@code char} values (Unicode
4731      * code units) in the {@code char} array to be converted
4732      * @param limit the index after the last array element that
4733      * can be used in the {@code char} array
4734      * @return the Unicode code point at the given index
4735      * @exception NullPointerException if {@code a} is null.
4736      * @exception IndexOutOfBoundsException if the {@code index}
4737      * argument is negative or not less than the {@code limit}
4738      * argument, or if the {@code limit} argument is negative or
4739      * greater than the length of the {@code char} array.
4740      * @since  1.5
4741      */
4742     public static int codePointAt(char[] a, int index, int limit) {
4743         if (index >= limit || limit < 0 || limit > a.length) {
4744             throw new IndexOutOfBoundsException();
4745         }
4746         return codePointAtImpl(a, index, limit);
4747     }
4748 
4749     // throws ArrayIndexOutofBoundsException if index out of bounds
4750     static int codePointAtImpl(char[] a, int index, int limit) {
4751         char c1 = a[index++];
4752         if (isHighSurrogate(c1)) {
4753             if (index < limit) {
4754                 char c2 = a[index];
4755                 if (isLowSurrogate(c2)) {
4756                     return toCodePoint(c1, c2);
4757                 }
4758             }
4759         }
4760         return c1;
4761     }
4762 
4763     /**
4764      * Returns the code point preceding the given index of the
4765      * {@code CharSequence}. If the {@code char} value at
4766      * {@code (index - 1)} in the {@code CharSequence} is in
4767      * the low-surrogate range, {@code (index - 2)} is not
4768      * negative, and the {@code char} value at {@code (index - 2)}
4769      * in the {@code CharSequence} is in the
4770      * high-surrogate range, then the supplementary code point
4771      * corresponding to this surrogate pair is returned. Otherwise,
4772      * the {@code char} value at {@code (index - 1)} is
4773      * returned.
4774      *
4775      * @param seq the {@code CharSequence} instance
4776      * @param index the index following the code point that should be returned
4777      * @return the Unicode code point value before the given index.
4778      * @exception NullPointerException if {@code seq} is null.
4779      * @exception IndexOutOfBoundsException if the {@code index}
4780      * argument is less than 1 or greater than {@link
4781      * CharSequence#length() seq.length()}.
4782      * @since  1.5
4783      */
4784     public static int codePointBefore(CharSequence seq, int index) {
4785         char c2 = seq.charAt(--index);
4786         if (isLowSurrogate(c2)) {
4787             if (index > 0) {
4788                 char c1 = seq.charAt(--index);
4789                 if (isHighSurrogate(c1)) {
4790                     return toCodePoint(c1, c2);
4791                 }
4792             }
4793         }
4794         return c2;
4795     }
4796 
4797     /**
4798      * Returns the code point preceding the given index of the
4799      * {@code char} array. If the {@code char} value at
4800      * {@code (index - 1)} in the {@code char} array is in
4801      * the low-surrogate range, {@code (index - 2)} is not
4802      * negative, and the {@code char} value at {@code (index - 2)}
4803      * in the {@code char} array is in the
4804      * high-surrogate range, then the supplementary code point
4805      * corresponding to this surrogate pair is returned. Otherwise,
4806      * the {@code char} value at {@code (index - 1)} is
4807      * returned.
4808      *
4809      * @param a the {@code char} array
4810      * @param index the index following the code point that should be returned
4811      * @return the Unicode code point value before the given index.
4812      * @exception NullPointerException if {@code a} is null.
4813      * @exception IndexOutOfBoundsException if the {@code index}
4814      * argument is less than 1 or greater than the length of the
4815      * {@code char} array
4816      * @since  1.5
4817      */
4818     public static int codePointBefore(char[] a, int index) {
4819         return codePointBeforeImpl(a, index, 0);
4820     }
4821 
4822     /**
4823      * Returns the code point preceding the given index of the
4824      * {@code char} array, where only array elements with
4825      * {@code index} greater than or equal to {@code start}
4826      * can be used. If the {@code char} value at {@code (index - 1)}
4827      * in the {@code char} array is in the
4828      * low-surrogate range, {@code (index - 2)} is not less than
4829      * {@code start}, and the {@code char} value at
4830      * {@code (index - 2)} in the {@code char} array is in
4831      * the high-surrogate range, then the supplementary code point
4832      * corresponding to this surrogate pair is returned. Otherwise,
4833      * the {@code char} value at {@code (index - 1)} is
4834      * returned.
4835      *
4836      * @param a the {@code char} array
4837      * @param index the index following the code point that should be returned
4838      * @param start the index of the first array element in the
4839      * {@code char} array
4840      * @return the Unicode code point value before the given index.
4841      * @exception NullPointerException if {@code a} is null.
4842      * @exception IndexOutOfBoundsException if the {@code index}
4843      * argument is not greater than the {@code start} argument or
4844      * is greater than the length of the {@code char} array, or
4845      * if the {@code start} argument is negative or not less than
4846      * the length of the {@code char} array.
4847      * @since  1.5
4848      */
4849     public static int codePointBefore(char[] a, int index, int start) {
4850         if (index <= start || start < 0 || start >= a.length) {
4851             throw new IndexOutOfBoundsException();
4852         }
4853         return codePointBeforeImpl(a, index, start);
4854     }
4855 
4856     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4857     static int codePointBeforeImpl(char[] a, int index, int start) {
4858         char c2 = a[--index];
4859         if (isLowSurrogate(c2)) {
4860             if (index > start) {
4861                 char c1 = a[--index];
4862                 if (isHighSurrogate(c1)) {
4863                     return toCodePoint(c1, c2);
4864                 }
4865             }
4866         }
4867         return c2;
4868     }
4869 
4870     /**
4871      * Returns the leading surrogate (a
4872      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4873      * high surrogate code unit</a>) of the
4874      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4875      * surrogate pair</a>
4876      * representing the specified supplementary character (Unicode
4877      * code point) in the UTF-16 encoding.  If the specified character
4878      * is not a
4879      * <a href="Character.html#supplementary">supplementary character</a>,
4880      * an unspecified {@code char} is returned.
4881      *
4882      * <p>If
4883      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4884      * is {@code true}, then
4885      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4886      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4887      * are also always {@code true}.
4888      *
4889      * @param   codePoint a supplementary character (Unicode code point)
4890      * @return  the leading surrogate code unit used to represent the
4891      *          character in the UTF-16 encoding
4892      * @since   1.7
4893      */
4894     public static char highSurrogate(int codePoint) {
4895         return (char) ((codePoint >>> 10)
4896             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4897     }
4898 
4899     /**
4900      * Returns the trailing surrogate (a
4901      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4902      * low surrogate code unit</a>) of the
4903      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4904      * surrogate pair</a>
4905      * representing the specified supplementary character (Unicode
4906      * code point) in the UTF-16 encoding.  If the specified character
4907      * is not a
4908      * <a href="Character.html#supplementary">supplementary character</a>,
4909      * an unspecified {@code char} is returned.
4910      *
4911      * <p>If
4912      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4913      * is {@code true}, then
4914      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4915      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4916      * are also always {@code true}.
4917      *
4918      * @param   codePoint a supplementary character (Unicode code point)
4919      * @return  the trailing surrogate code unit used to represent the
4920      *          character in the UTF-16 encoding
4921      * @since   1.7
4922      */
4923     public static char lowSurrogate(int codePoint) {
4924         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4925     }
4926 
4927     /**
4928      * Converts the specified character (Unicode code point) to its
4929      * UTF-16 representation. If the specified code point is a BMP
4930      * (Basic Multilingual Plane or Plane 0) value, the same value is
4931      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
4932      * specified code point is a supplementary character, its
4933      * surrogate values are stored in {@code dst[dstIndex]}
4934      * (high-surrogate) and {@code dst[dstIndex+1]}
4935      * (low-surrogate), and 2 is returned.
4936      *
4937      * @param  codePoint the character (Unicode code point) to be converted.
4938      * @param  dst an array of {@code char} in which the
4939      * {@code codePoint}'s UTF-16 value is stored.
4940      * @param dstIndex the start index into the {@code dst}
4941      * array where the converted value is stored.
4942      * @return 1 if the code point is a BMP code point, 2 if the
4943      * code point is a supplementary code point.
4944      * @exception IllegalArgumentException if the specified
4945      * {@code codePoint} is not a valid Unicode code point.
4946      * @exception NullPointerException if the specified {@code dst} is null.
4947      * @exception IndexOutOfBoundsException if {@code dstIndex}
4948      * is negative or not less than {@code dst.length}, or if
4949      * {@code dst} at {@code dstIndex} doesn't have enough
4950      * array element(s) to store the resulting {@code char}
4951      * value(s). (If {@code dstIndex} is equal to
4952      * {@code dst.length-1} and the specified
4953      * {@code codePoint} is a supplementary character, the
4954      * high-surrogate value is not stored in
4955      * {@code dst[dstIndex]}.)
4956      * @since  1.5
4957      */
4958     public static int toChars(int codePoint, char[] dst, int dstIndex) {
4959         if (isBmpCodePoint(codePoint)) {
4960             dst[dstIndex] = (char) codePoint;
4961             return 1;
4962         } else if (isValidCodePoint(codePoint)) {
4963             toSurrogates(codePoint, dst, dstIndex);
4964             return 2;
4965         } else {
4966             throw new IllegalArgumentException();
4967         }
4968     }
4969 
4970     /**
4971      * Converts the specified character (Unicode code point) to its
4972      * UTF-16 representation stored in a {@code char} array. If
4973      * the specified code point is a BMP (Basic Multilingual Plane or
4974      * Plane 0) value, the resulting {@code char} array has
4975      * the same value as {@code codePoint}. If the specified code
4976      * point is a supplementary code point, the resulting
4977      * {@code char} array has the corresponding surrogate pair.
4978      *
4979      * @param  codePoint a Unicode code point
4980      * @return a {@code char} array having
4981      *         {@code codePoint}'s UTF-16 representation.
4982      * @exception IllegalArgumentException if the specified
4983      * {@code codePoint} is not a valid Unicode code point.
4984      * @since  1.5
4985      */
4986     public static char[] toChars(int codePoint) {
4987         if (isBmpCodePoint(codePoint)) {
4988             return new char[] { (char) codePoint };
4989         } else if (isValidCodePoint(codePoint)) {
4990             char[] result = new char[2];
4991             toSurrogates(codePoint, result, 0);
4992             return result;
4993         } else {
4994             throw new IllegalArgumentException();
4995         }
4996     }
4997 
4998     static void toSurrogates(int codePoint, char[] dst, int index) {
4999         // We write elements "backwards" to guarantee all-or-nothing
5000         dst[index+1] = lowSurrogate(codePoint);
5001         dst[index] = highSurrogate(codePoint);
5002     }
5003 
5004     /**
5005      * Returns the number of Unicode code points in the text range of
5006      * the specified char sequence. The text range begins at the
5007      * specified {@code beginIndex} and extends to the
5008      * {@code char} at index {@code endIndex - 1}. Thus the
5009      * length (in {@code char}s) of the text range is
5010      * {@code endIndex-beginIndex}. Unpaired surrogates within
5011      * the text range count as one code point each.
5012      *
5013      * @param seq the char sequence
5014      * @param beginIndex the index to the first {@code char} of
5015      * the text range.
5016      * @param endIndex the index after the last {@code char} of
5017      * the text range.
5018      * @return the number of Unicode code points in the specified text
5019      * range
5020      * @exception NullPointerException if {@code seq} is null.
5021      * @exception IndexOutOfBoundsException if the
5022      * {@code beginIndex} is negative, or {@code endIndex}
5023      * is larger than the length of the given sequence, or
5024      * {@code beginIndex} is larger than {@code endIndex}.
5025      * @since  1.5
5026      */
5027     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5028         int length = seq.length();
5029         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5030             throw new IndexOutOfBoundsException();
5031         }
5032         int n = endIndex - beginIndex;
5033         for (int i = beginIndex; i < endIndex; ) {
5034             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5035                 isLowSurrogate(seq.charAt(i))) {
5036                 n--;
5037                 i++;
5038             }
5039         }
5040         return n;
5041     }
5042 
5043     /**
5044      * Returns the number of Unicode code points in a subarray of the
5045      * {@code char} array argument. The {@code offset}
5046      * argument is the index of the first {@code char} of the
5047      * subarray and the {@code count} argument specifies the
5048      * length of the subarray in {@code char}s. Unpaired
5049      * surrogates within the subarray count as one code point each.
5050      *
5051      * @param a the {@code char} array
5052      * @param offset the index of the first {@code char} in the
5053      * given {@code char} array
5054      * @param count the length of the subarray in {@code char}s
5055      * @return the number of Unicode code points in the specified subarray
5056      * @exception NullPointerException if {@code a} is null.
5057      * @exception IndexOutOfBoundsException if {@code offset} or
5058      * {@code count} is negative, or if {@code offset +
5059      * count} is larger than the length of the given array.
5060      * @since  1.5
5061      */
5062     public static int codePointCount(char[] a, int offset, int count) {
5063         if (count > a.length - offset || offset < 0 || count < 0) {
5064             throw new IndexOutOfBoundsException();
5065         }
5066         return codePointCountImpl(a, offset, count);
5067     }
5068 
5069     static int codePointCountImpl(char[] a, int offset, int count) {
5070         int endIndex = offset + count;
5071         int n = count;
5072         for (int i = offset; i < endIndex; ) {
5073             if (isHighSurrogate(a[i++]) && i < endIndex &&
5074                 isLowSurrogate(a[i])) {
5075                 n--;
5076                 i++;
5077             }
5078         }
5079         return n;
5080     }
5081 
5082     /**
5083      * Returns the index within the given char sequence that is offset
5084      * from the given {@code index} by {@code codePointOffset}
5085      * code points. Unpaired surrogates within the text range given by
5086      * {@code index} and {@code codePointOffset} count as
5087      * one code point each.
5088      *
5089      * @param seq the char sequence
5090      * @param index the index to be offset
5091      * @param codePointOffset the offset in code points
5092      * @return the index within the char sequence
5093      * @exception NullPointerException if {@code seq} is null.
5094      * @exception IndexOutOfBoundsException if {@code index}
5095      *   is negative or larger then the length of the char sequence,
5096      *   or if {@code codePointOffset} is positive and the
5097      *   subsequence starting with {@code index} has fewer than
5098      *   {@code codePointOffset} code points, or if
5099      *   {@code codePointOffset} is negative and the subsequence
5100      *   before {@code index} has fewer than the absolute value
5101      *   of {@code codePointOffset} code points.
5102      * @since 1.5
5103      */
5104     public static int offsetByCodePoints(CharSequence seq, int index,
5105                                          int codePointOffset) {
5106         int length = seq.length();
5107         if (index < 0 || index > length) {
5108             throw new IndexOutOfBoundsException();
5109         }
5110 
5111         int x = index;
5112         if (codePointOffset >= 0) {
5113             int i;
5114             for (i = 0; x < length && i < codePointOffset; i++) {
5115                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5116                     isLowSurrogate(seq.charAt(x))) {
5117                     x++;
5118                 }
5119             }
5120             if (i < codePointOffset) {
5121                 throw new IndexOutOfBoundsException();
5122             }
5123         } else {
5124             int i;
5125             for (i = codePointOffset; x > 0 && i < 0; i++) {
5126                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5127                     isHighSurrogate(seq.charAt(x-1))) {
5128                     x--;
5129                 }
5130             }
5131             if (i < 0) {
5132                 throw new IndexOutOfBoundsException();
5133             }
5134         }
5135         return x;
5136     }
5137 
5138     /**
5139      * Returns the index within the given {@code char} subarray
5140      * that is offset from the given {@code index} by
5141      * {@code codePointOffset} code points. The
5142      * {@code start} and {@code count} arguments specify a
5143      * subarray of the {@code char} array. Unpaired surrogates
5144      * within the text range given by {@code index} and
5145      * {@code codePointOffset} count as one code point each.
5146      *
5147      * @param a the {@code char} array
5148      * @param start the index of the first {@code char} of the
5149      * subarray
5150      * @param count the length of the subarray in {@code char}s
5151      * @param index the index to be offset
5152      * @param codePointOffset the offset in code points
5153      * @return the index within the subarray
5154      * @exception NullPointerException if {@code a} is null.
5155      * @exception IndexOutOfBoundsException
5156      *   if {@code start} or {@code count} is negative,
5157      *   or if {@code start + count} is larger than the length of
5158      *   the given array,
5159      *   or if {@code index} is less than {@code start} or
5160      *   larger then {@code start + count},
5161      *   or if {@code codePointOffset} is positive and the text range
5162      *   starting with {@code index} and ending with {@code start + count - 1}
5163      *   has fewer than {@code codePointOffset} code
5164      *   points,
5165      *   or if {@code codePointOffset} is negative and the text range
5166      *   starting with {@code start} and ending with {@code index - 1}
5167      *   has fewer than the absolute value of
5168      *   {@code codePointOffset} code points.
5169      * @since 1.5
5170      */
5171     public static int offsetByCodePoints(char[] a, int start, int count,
5172                                          int index, int codePointOffset) {
5173         if (count > a.length-start || start < 0 || count < 0
5174             || index < start || index > start+count) {
5175             throw new IndexOutOfBoundsException();
5176         }
5177         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5178     }
5179 
5180     static int offsetByCodePointsImpl(char[]a, int start, int count,
5181                                       int index, int codePointOffset) {
5182         int x = index;
5183         if (codePointOffset >= 0) {
5184             int limit = start + count;
5185             int i;
5186             for (i = 0; x < limit && i < codePointOffset; i++) {
5187                 if (isHighSurrogate(a[x++]) && x < limit &&
5188                     isLowSurrogate(a[x])) {
5189                     x++;
5190                 }
5191             }
5192             if (i < codePointOffset) {
5193                 throw new IndexOutOfBoundsException();
5194             }
5195         } else {
5196             int i;
5197             for (i = codePointOffset; x > start && i < 0; i++) {
5198                 if (isLowSurrogate(a[--x]) && x > start &&
5199                     isHighSurrogate(a[x-1])) {
5200                     x--;
5201                 }
5202             }
5203             if (i < 0) {
5204                 throw new IndexOutOfBoundsException();
5205             }
5206         }
5207         return x;
5208     }
5209 
5210     /**
5211      * Determines if the specified character is a lowercase character.
5212      * <p>
5213      * A character is lowercase if its general category type, provided
5214      * by {@code Character.getType(ch)}, is
5215      * {@code LOWERCASE_LETTER}, or it has contributory property
5216      * Other_Lowercase as defined by the Unicode Standard.
5217      * <p>
5218      * The following are examples of lowercase characters:
5219      * <p><blockquote><pre>
5220      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5221      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5222      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5223      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5224      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5225      * </pre></blockquote>
5226      * <p> Many other Unicode characters are lowercase too.
5227      *
5228      * <p><b>Note:</b> This method cannot handle <a
5229      * href="#supplementary"> supplementary characters</a>. To support
5230      * all Unicode characters, including supplementary characters, use
5231      * the {@link #isLowerCase(int)} method.
5232      *
5233      * @param   ch   the character to be tested.
5234      * @return  {@code true} if the character is lowercase;
5235      *          {@code false} otherwise.
5236      * @see     Character#isLowerCase(char)
5237      * @see     Character#isTitleCase(char)
5238      * @see     Character#toLowerCase(char)
5239      * @see     Character#getType(char)
5240      */
5241     public static boolean isLowerCase(char ch) {
5242         return isLowerCase((int)ch);
5243     }
5244 
5245     /**
5246      * Determines if the specified character (Unicode code point) is a
5247      * lowercase character.
5248      * <p>
5249      * A character is lowercase if its general category type, provided
5250      * by {@link Character#getType getType(codePoint)}, is
5251      * {@code LOWERCASE_LETTER}, or it has contributory property
5252      * Other_Lowercase as defined by the Unicode Standard.
5253      * <p>
5254      * The following are examples of lowercase characters:
5255      * <p><blockquote><pre>
5256      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5257      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5258      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5259      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5260      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5261      * </pre></blockquote>
5262      * <p> Many other Unicode characters are lowercase too.
5263      *
5264      * @param   codePoint the character (Unicode code point) to be tested.
5265      * @return  {@code true} if the character is lowercase;
5266      *          {@code false} otherwise.
5267      * @see     Character#isLowerCase(int)
5268      * @see     Character#isTitleCase(int)
5269      * @see     Character#toLowerCase(int)
5270      * @see     Character#getType(int)
5271      * @since   1.5
5272      */
5273     public static boolean isLowerCase(int codePoint) {
5274         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5275                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5276     }
5277 
5278     /**
5279      * Determines if the specified character is an uppercase character.
5280      * <p>
5281      * A character is uppercase if its general category type, provided by
5282      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5283      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5284      * <p>
5285      * The following are examples of uppercase characters:
5286      * <p><blockquote><pre>
5287      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5288      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5289      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5290      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5291      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5292      * </pre></blockquote>
5293      * <p> Many other Unicode characters are uppercase too.<p>
5294      *
5295      * <p><b>Note:</b> This method cannot handle <a
5296      * href="#supplementary"> supplementary characters</a>. To support
5297      * all Unicode characters, including supplementary characters, use
5298      * the {@link #isUpperCase(int)} method.
5299      *
5300      * @param   ch   the character to be tested.
5301      * @return  {@code true} if the character is uppercase;
5302      *          {@code false} otherwise.
5303      * @see     Character#isLowerCase(char)
5304      * @see     Character#isTitleCase(char)
5305      * @see     Character#toUpperCase(char)
5306      * @see     Character#getType(char)
5307      * @since   1.0
5308      */
5309     public static boolean isUpperCase(char ch) {
5310         return isUpperCase((int)ch);
5311     }
5312 
5313     /**
5314      * Determines if the specified character (Unicode code point) is an uppercase character.
5315      * <p>
5316      * A character is uppercase if its general category type, provided by
5317      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5318      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5319      * <p>
5320      * The following are examples of uppercase characters:
5321      * <p><blockquote><pre>
5322      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5323      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5324      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5325      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5326      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5327      * </pre></blockquote>
5328      * <p> Many other Unicode characters are uppercase too.<p>
5329      *
5330      * @param   codePoint the character (Unicode code point) to be tested.
5331      * @return  {@code true} if the character is uppercase;
5332      *          {@code false} otherwise.
5333      * @see     Character#isLowerCase(int)
5334      * @see     Character#isTitleCase(int)
5335      * @see     Character#toUpperCase(int)
5336      * @see     Character#getType(int)
5337      * @since   1.5
5338      */
5339     public static boolean isUpperCase(int codePoint) {
5340         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5341                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5342     }
5343 
5344     /**
5345      * Determines if the specified character is a titlecase character.
5346      * <p>
5347      * A character is a titlecase character if its general
5348      * category type, provided by {@code Character.getType(ch)},
5349      * is {@code TITLECASE_LETTER}.
5350      * <p>
5351      * Some characters look like pairs of Latin letters. For example, there
5352      * is an uppercase letter that looks like "LJ" and has a corresponding
5353      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5354      * is the appropriate form to use when rendering a word in lowercase
5355      * with initial capitals, as for a book title.
5356      * <p>
5357      * These are some of the Unicode characters for which this method returns
5358      * {@code true}:
5359      * <ul>
5360      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5361      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5362      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5363      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5364      * </ul>
5365      * <p> Many other Unicode characters are titlecase too.<p>
5366      *
5367      * <p><b>Note:</b> This method cannot handle <a
5368      * href="#supplementary"> supplementary characters</a>. To support
5369      * all Unicode characters, including supplementary characters, use
5370      * the {@link #isTitleCase(int)} method.
5371      *
5372      * @param   ch   the character to be tested.
5373      * @return  {@code true} if the character is titlecase;
5374      *          {@code false} otherwise.
5375      * @see     Character#isLowerCase(char)
5376      * @see     Character#isUpperCase(char)
5377      * @see     Character#toTitleCase(char)
5378      * @see     Character#getType(char)
5379      * @since   1.0.2
5380      */
5381     public static boolean isTitleCase(char ch) {
5382         return isTitleCase((int)ch);
5383     }
5384 
5385     /**
5386      * Determines if the specified character (Unicode code point) is a titlecase character.
5387      * <p>
5388      * A character is a titlecase character if its general
5389      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5390      * is {@code TITLECASE_LETTER}.
5391      * <p>
5392      * Some characters look like pairs of Latin letters. For example, there
5393      * is an uppercase letter that looks like "LJ" and has a corresponding
5394      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5395      * is the appropriate form to use when rendering a word in lowercase
5396      * with initial capitals, as for a book title.
5397      * <p>
5398      * These are some of the Unicode characters for which this method returns
5399      * {@code true}:
5400      * <ul>
5401      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5402      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5403      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5404      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5405      * </ul>
5406      * <p> Many other Unicode characters are titlecase too.<p>
5407      *
5408      * @param   codePoint the character (Unicode code point) to be tested.
5409      * @return  {@code true} if the character is titlecase;
5410      *          {@code false} otherwise.
5411      * @see     Character#isLowerCase(int)
5412      * @see     Character#isUpperCase(int)
5413      * @see     Character#toTitleCase(int)
5414      * @see     Character#getType(int)
5415      * @since   1.5
5416      */
5417     public static boolean isTitleCase(int codePoint) {
5418         return getType(codePoint) == Character.TITLECASE_LETTER;
5419     }
5420 
5421     /**
5422      * Determines if the specified character is a digit.
5423      * <p>
5424      * A character is a digit if its general category type, provided
5425      * by {@code Character.getType(ch)}, is
5426      * {@code DECIMAL_DIGIT_NUMBER}.
5427      * <p>
5428      * Some Unicode character ranges that contain digits:
5429      * <ul>
5430      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5431      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5432      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5433      *     Arabic-Indic digits
5434      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5435      *     Extended Arabic-Indic digits
5436      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5437      *     Devanagari digits
5438      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5439      *     Fullwidth digits
5440      * </ul>
5441      *
5442      * Many other character ranges contain digits as well.
5443      *
5444      * <p><b>Note:</b> This method cannot handle <a
5445      * href="#supplementary"> supplementary characters</a>. To support
5446      * all Unicode characters, including supplementary characters, use
5447      * the {@link #isDigit(int)} method.
5448      *
5449      * @param   ch   the character to be tested.
5450      * @return  {@code true} if the character is a digit;
5451      *          {@code false} otherwise.
5452      * @see     Character#digit(char, int)
5453      * @see     Character#forDigit(int, int)
5454      * @see     Character#getType(char)
5455      */
5456     public static boolean isDigit(char ch) {
5457         return isDigit((int)ch);
5458     }
5459 
5460     /**
5461      * Determines if the specified character (Unicode code point) is a digit.
5462      * <p>
5463      * A character is a digit if its general category type, provided
5464      * by {@link Character#getType(int) getType(codePoint)}, is
5465      * {@code DECIMAL_DIGIT_NUMBER}.
5466      * <p>
5467      * Some Unicode character ranges that contain digits:
5468      * <ul>
5469      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5470      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5471      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5472      *     Arabic-Indic digits
5473      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5474      *     Extended Arabic-Indic digits
5475      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5476      *     Devanagari digits
5477      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5478      *     Fullwidth digits
5479      * </ul>
5480      *
5481      * Many other character ranges contain digits as well.
5482      *
5483      * @param   codePoint the character (Unicode code point) to be tested.
5484      * @return  {@code true} if the character is a digit;
5485      *          {@code false} otherwise.
5486      * @see     Character#forDigit(int, int)
5487      * @see     Character#getType(int)
5488      * @since   1.5
5489      */
5490     public static boolean isDigit(int codePoint) {
5491         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5492     }
5493 
5494     /**
5495      * Determines if a character is defined in Unicode.
5496      * <p>
5497      * A character is defined if at least one of the following is true:
5498      * <ul>
5499      * <li>It has an entry in the UnicodeData file.
5500      * <li>It has a value in a range defined by the UnicodeData file.
5501      * </ul>
5502      *
5503      * <p><b>Note:</b> This method cannot handle <a
5504      * href="#supplementary"> supplementary characters</a>. To support
5505      * all Unicode characters, including supplementary characters, use
5506      * the {@link #isDefined(int)} method.
5507      *
5508      * @param   ch   the character to be tested
5509      * @return  {@code true} if the character has a defined meaning
5510      *          in Unicode; {@code false} otherwise.
5511      * @see     Character#isDigit(char)
5512      * @see     Character#isLetter(char)
5513      * @see     Character#isLetterOrDigit(char)
5514      * @see     Character#isLowerCase(char)
5515      * @see     Character#isTitleCase(char)
5516      * @see     Character#isUpperCase(char)
5517      * @since   1.0.2
5518      */
5519     public static boolean isDefined(char ch) {
5520         return isDefined((int)ch);
5521     }
5522 
5523     /**
5524      * Determines if a character (Unicode code point) is defined in Unicode.
5525      * <p>
5526      * A character is defined if at least one of the following is true:
5527      * <ul>
5528      * <li>It has an entry in the UnicodeData file.
5529      * <li>It has a value in a range defined by the UnicodeData file.
5530      * </ul>
5531      *
5532      * @param   codePoint the character (Unicode code point) to be tested.
5533      * @return  {@code true} if the character has a defined meaning
5534      *          in Unicode; {@code false} otherwise.
5535      * @see     Character#isDigit(int)
5536      * @see     Character#isLetter(int)
5537      * @see     Character#isLetterOrDigit(int)
5538      * @see     Character#isLowerCase(int)
5539      * @see     Character#isTitleCase(int)
5540      * @see     Character#isUpperCase(int)
5541      * @since   1.5
5542      */
5543     public static boolean isDefined(int codePoint) {
5544         return getType(codePoint) != Character.UNASSIGNED;
5545     }
5546 
5547     /**
5548      * Determines if the specified character is a letter.
5549      * <p>
5550      * A character is considered to be a letter if its general
5551      * category type, provided by {@code Character.getType(ch)},
5552      * is any of the following:
5553      * <ul>
5554      * <li> {@code UPPERCASE_LETTER}
5555      * <li> {@code LOWERCASE_LETTER}
5556      * <li> {@code TITLECASE_LETTER}
5557      * <li> {@code MODIFIER_LETTER}
5558      * <li> {@code OTHER_LETTER}
5559      * </ul>
5560      *
5561      * Not all letters have case. Many characters are
5562      * letters but are neither uppercase nor lowercase nor titlecase.
5563      *
5564      * <p><b>Note:</b> This method cannot handle <a
5565      * href="#supplementary"> supplementary characters</a>. To support
5566      * all Unicode characters, including supplementary characters, use
5567      * the {@link #isLetter(int)} method.
5568      *
5569      * @param   ch   the character to be tested.
5570      * @return  {@code true} if the character is a letter;
5571      *          {@code false} otherwise.
5572      * @see     Character#isDigit(char)
5573      * @see     Character#isJavaIdentifierStart(char)
5574      * @see     Character#isJavaLetter(char)
5575      * @see     Character#isJavaLetterOrDigit(char)
5576      * @see     Character#isLetterOrDigit(char)
5577      * @see     Character#isLowerCase(char)
5578      * @see     Character#isTitleCase(char)
5579      * @see     Character#isUnicodeIdentifierStart(char)
5580      * @see     Character#isUpperCase(char)
5581      */
5582     public static boolean isLetter(char ch) {
5583         return isLetter((int)ch);
5584     }
5585 
5586     /**
5587      * Determines if the specified character (Unicode code point) is a letter.
5588      * <p>
5589      * A character is considered to be a letter if its general
5590      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5591      * is any of the following:
5592      * <ul>
5593      * <li> {@code UPPERCASE_LETTER}
5594      * <li> {@code LOWERCASE_LETTER}
5595      * <li> {@code TITLECASE_LETTER}
5596      * <li> {@code MODIFIER_LETTER}
5597      * <li> {@code OTHER_LETTER}
5598      * </ul>
5599      *
5600      * Not all letters have case. Many characters are
5601      * letters but are neither uppercase nor lowercase nor titlecase.
5602      *
5603      * @param   codePoint the character (Unicode code point) to be tested.
5604      * @return  {@code true} if the character is a letter;
5605      *          {@code false} otherwise.
5606      * @see     Character#isDigit(int)
5607      * @see     Character#isJavaIdentifierStart(int)
5608      * @see     Character#isLetterOrDigit(int)
5609      * @see     Character#isLowerCase(int)
5610      * @see     Character#isTitleCase(int)
5611      * @see     Character#isUnicodeIdentifierStart(int)
5612      * @see     Character#isUpperCase(int)
5613      * @since   1.5
5614      */
5615     public static boolean isLetter(int codePoint) {
5616         return ((((1 << Character.UPPERCASE_LETTER) |
5617             (1 << Character.LOWERCASE_LETTER) |
5618             (1 << Character.TITLECASE_LETTER) |
5619             (1 << Character.MODIFIER_LETTER) |
5620             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5621             != 0;
5622     }
5623 
5624     /**
5625      * Determines if the specified character is a letter or digit.
5626      * <p>
5627      * A character is considered to be a letter or digit if either
5628      * {@code Character.isLetter(char ch)} or
5629      * {@code Character.isDigit(char ch)} returns
5630      * {@code true} for the character.
5631      *
5632      * <p><b>Note:</b> This method cannot handle <a
5633      * href="#supplementary"> supplementary characters</a>. To support
5634      * all Unicode characters, including supplementary characters, use
5635      * the {@link #isLetterOrDigit(int)} method.
5636      *
5637      * @param   ch   the character to be tested.
5638      * @return  {@code true} if the character is a letter or digit;
5639      *          {@code false} otherwise.
5640      * @see     Character#isDigit(char)
5641      * @see     Character#isJavaIdentifierPart(char)
5642      * @see     Character#isJavaLetter(char)
5643      * @see     Character#isJavaLetterOrDigit(char)
5644      * @see     Character#isLetter(char)
5645      * @see     Character#isUnicodeIdentifierPart(char)
5646      * @since   1.0.2
5647      */
5648     public static boolean isLetterOrDigit(char ch) {
5649         return isLetterOrDigit((int)ch);
5650     }
5651 
5652     /**
5653      * Determines if the specified character (Unicode code point) is a letter or digit.
5654      * <p>
5655      * A character is considered to be a letter or digit if either
5656      * {@link #isLetter(int) isLetter(codePoint)} or
5657      * {@link #isDigit(int) isDigit(codePoint)} returns
5658      * {@code true} for the character.
5659      *
5660      * @param   codePoint the character (Unicode code point) to be tested.
5661      * @return  {@code true} if the character is a letter or digit;
5662      *          {@code false} otherwise.
5663      * @see     Character#isDigit(int)
5664      * @see     Character#isJavaIdentifierPart(int)
5665      * @see     Character#isLetter(int)
5666      * @see     Character#isUnicodeIdentifierPart(int)
5667      * @since   1.5
5668      */
5669     public static boolean isLetterOrDigit(int codePoint) {
5670         return ((((1 << Character.UPPERCASE_LETTER) |
5671             (1 << Character.LOWERCASE_LETTER) |
5672             (1 << Character.TITLECASE_LETTER) |
5673             (1 << Character.MODIFIER_LETTER) |
5674             (1 << Character.OTHER_LETTER) |
5675             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5676             != 0;
5677     }
5678 
5679     /**
5680      * Determines if the specified character is permissible as the first
5681      * character in a Java identifier.
5682      * <p>
5683      * A character may start a Java identifier if and only if
5684      * one of the following is true:
5685      * <ul>
5686      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5687      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5688      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5689      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5690      * </ul>
5691      *
5692      * @param   ch the character to be tested.
5693      * @return  {@code true} if the character may start a Java
5694      *          identifier; {@code false} otherwise.
5695      * @see     Character#isJavaLetterOrDigit(char)
5696      * @see     Character#isJavaIdentifierStart(char)
5697      * @see     Character#isJavaIdentifierPart(char)
5698      * @see     Character#isLetter(char)
5699      * @see     Character#isLetterOrDigit(char)
5700      * @see     Character#isUnicodeIdentifierStart(char)
5701      * @since   1.02
5702      * @deprecated Replaced by isJavaIdentifierStart(char).
5703      */
5704     @Deprecated
5705     public static boolean isJavaLetter(char ch) {
5706         return isJavaIdentifierStart(ch);
5707     }
5708 
5709     /**
5710      * Determines if the specified character may be part of a Java
5711      * identifier as other than the first character.
5712      * <p>
5713      * A character may be part of a Java identifier if and only if any
5714      * of the following are true:
5715      * <ul>
5716      * <li>  it is a letter
5717      * <li>  it is a currency symbol (such as {@code '$'})
5718      * <li>  it is a connecting punctuation character (such as {@code '_'})
5719      * <li>  it is a digit
5720      * <li>  it is a numeric letter (such as a Roman numeral character)
5721      * <li>  it is a combining mark
5722      * <li>  it is a non-spacing mark
5723      * <li> {@code isIdentifierIgnorable} returns
5724      * {@code true} for the character.
5725      * </ul>
5726      *
5727      * @param   ch the character to be tested.
5728      * @return  {@code true} if the character may be part of a
5729      *          Java identifier; {@code false} otherwise.
5730      * @see     Character#isJavaLetter(char)
5731      * @see     Character#isJavaIdentifierStart(char)
5732      * @see     Character#isJavaIdentifierPart(char)
5733      * @see     Character#isLetter(char)
5734      * @see     Character#isLetterOrDigit(char)
5735      * @see     Character#isUnicodeIdentifierPart(char)
5736      * @see     Character#isIdentifierIgnorable(char)
5737      * @since   1.02
5738      * @deprecated Replaced by isJavaIdentifierPart(char).
5739      */
5740     @Deprecated
5741     public static boolean isJavaLetterOrDigit(char ch) {
5742         return isJavaIdentifierPart(ch);
5743     }
5744 
5745     /**
5746      * Determines if the specified character (Unicode code point) is an alphabet.
5747      * <p>
5748      * A character is considered to be alphabetic if its general category type,
5749      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5750      * the following:
5751      * <ul>
5752      * <li> <code>UPPERCASE_LETTER</code>
5753      * <li> <code>LOWERCASE_LETTER</code>
5754      * <li> <code>TITLECASE_LETTER</code>
5755      * <li> <code>MODIFIER_LETTER</code>
5756      * <li> <code>OTHER_LETTER</code>
5757      * <li> <code>LETTER_NUMBER</code>
5758      * </ul>
5759      * or it has contributory property Other_Alphabetic as defined by the
5760      * Unicode Standard.
5761      *
5762      * @param   codePoint the character (Unicode code point) to be tested.
5763      * @return  <code>true</code> if the character is a Unicode alphabet
5764      *          character, <code>false</code> otherwise.
5765      * @since   1.7
5766      */
5767     public static boolean isAlphabetic(int codePoint) {
5768         return (((((1 << Character.UPPERCASE_LETTER) |
5769             (1 << Character.LOWERCASE_LETTER) |
5770             (1 << Character.TITLECASE_LETTER) |
5771             (1 << Character.MODIFIER_LETTER) |
5772             (1 << Character.OTHER_LETTER) |
5773             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
5774             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
5775     }
5776 
5777     /**
5778      * Determines if the specified character (Unicode code point) is a CJKV
5779      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5780      * the Unicode Standard.
5781      *
5782      * @param   codePoint the character (Unicode code point) to be tested.
5783      * @return  <code>true</code> if the character is a Unicode ideograph
5784      *          character, <code>false</code> otherwise.
5785      * @since   1.7
5786      */
5787     public static boolean isIdeographic(int codePoint) {
5788         return CharacterData.of(codePoint).isIdeographic(codePoint);
5789     }
5790 
5791     /**
5792      * Determines if the specified character is
5793      * permissible as the first character in a Java identifier.
5794      * <p>
5795      * A character may start a Java identifier if and only if
5796      * one of the following conditions is true:
5797      * <ul>
5798      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5799      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5800      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5801      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5802      * </ul>
5803      *
5804      * <p><b>Note:</b> This method cannot handle <a
5805      * href="#supplementary"> supplementary characters</a>. To support
5806      * all Unicode characters, including supplementary characters, use
5807      * the {@link #isJavaIdentifierStart(int)} method.
5808      *
5809      * @param   ch the character to be tested.
5810      * @return  {@code true} if the character may start a Java identifier;
5811      *          {@code false} otherwise.
5812      * @see     Character#isJavaIdentifierPart(char)
5813      * @see     Character#isLetter(char)
5814      * @see     Character#isUnicodeIdentifierStart(char)
5815      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5816      * @since   1.1
5817      */
5818     public static boolean isJavaIdentifierStart(char ch) {
5819         return isJavaIdentifierStart((int)ch);
5820     }
5821 
5822     /**
5823      * Determines if the character (Unicode code point) is
5824      * permissible as the first character in a Java identifier.
5825      * <p>
5826      * A character may start a Java identifier if and only if
5827      * one of the following conditions is true:
5828      * <ul>
5829      * <li> {@link #isLetter(int) isLetter(codePoint)}
5830      *      returns {@code true}
5831      * <li> {@link #getType(int) getType(codePoint)}
5832      *      returns {@code LETTER_NUMBER}
5833      * <li> the referenced character is a currency symbol (such as {@code '$'})
5834      * <li> the referenced character is a connecting punctuation character
5835      *      (such as {@code '_'}).
5836      * </ul>
5837      *
5838      * @param   codePoint the character (Unicode code point) to be tested.
5839      * @return  {@code true} if the character may start a Java identifier;
5840      *          {@code false} otherwise.
5841      * @see     Character#isJavaIdentifierPart(int)
5842      * @see     Character#isLetter(int)
5843      * @see     Character#isUnicodeIdentifierStart(int)
5844      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5845      * @since   1.5
5846      */
5847     public static boolean isJavaIdentifierStart(int codePoint) {
5848         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
5849     }
5850 
5851     /**
5852      * Determines if the specified character may be part of a Java
5853      * identifier as other than the first character.
5854      * <p>
5855      * A character may be part of a Java identifier if any of the following
5856      * are true:
5857      * <ul>
5858      * <li>  it is a letter
5859      * <li>  it is a currency symbol (such as {@code '$'})
5860      * <li>  it is a connecting punctuation character (such as {@code '_'})
5861      * <li>  it is a digit
5862      * <li>  it is a numeric letter (such as a Roman numeral character)
5863      * <li>  it is a combining mark
5864      * <li>  it is a non-spacing mark
5865      * <li> {@code isIdentifierIgnorable} returns
5866      * {@code true} for the character
5867      * </ul>
5868      *
5869      * <p><b>Note:</b> This method cannot handle <a
5870      * href="#supplementary"> supplementary characters</a>. To support
5871      * all Unicode characters, including supplementary characters, use
5872      * the {@link #isJavaIdentifierPart(int)} method.
5873      *
5874      * @param   ch      the character to be tested.
5875      * @return {@code true} if the character may be part of a
5876      *          Java identifier; {@code false} otherwise.
5877      * @see     Character#isIdentifierIgnorable(char)
5878      * @see     Character#isJavaIdentifierStart(char)
5879      * @see     Character#isLetterOrDigit(char)
5880      * @see     Character#isUnicodeIdentifierPart(char)
5881      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5882      * @since   1.1
5883      */
5884     public static boolean isJavaIdentifierPart(char ch) {
5885         return isJavaIdentifierPart((int)ch);
5886     }
5887 
5888     /**
5889      * Determines if the character (Unicode code point) may be part of a Java
5890      * identifier as other than the first character.
5891      * <p>
5892      * A character may be part of a Java identifier if any of the following
5893      * are true:
5894      * <ul>
5895      * <li>  it is a letter
5896      * <li>  it is a currency symbol (such as {@code '$'})
5897      * <li>  it is a connecting punctuation character (such as {@code '_'})
5898      * <li>  it is a digit
5899      * <li>  it is a numeric letter (such as a Roman numeral character)
5900      * <li>  it is a combining mark
5901      * <li>  it is a non-spacing mark
5902      * <li> {@link #isIdentifierIgnorable(int)
5903      * isIdentifierIgnorable(codePoint)} returns {@code true} for
5904      * the character
5905      * </ul>
5906      *
5907      * @param   codePoint the character (Unicode code point) to be tested.
5908      * @return {@code true} if the character may be part of a
5909      *          Java identifier; {@code false} otherwise.
5910      * @see     Character#isIdentifierIgnorable(int)
5911      * @see     Character#isJavaIdentifierStart(int)
5912      * @see     Character#isLetterOrDigit(int)
5913      * @see     Character#isUnicodeIdentifierPart(int)
5914      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5915      * @since   1.5
5916      */
5917     public static boolean isJavaIdentifierPart(int codePoint) {
5918         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
5919     }
5920 
5921     /**
5922      * Determines if the specified character is permissible as the
5923      * first character in a Unicode identifier.
5924      * <p>
5925      * A character may start a Unicode identifier if and only if
5926      * one of the following conditions is true:
5927      * <ul>
5928      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5929      * <li> {@link #getType(char) getType(ch)} returns
5930      *      {@code LETTER_NUMBER}.
5931      * </ul>
5932      *
5933      * <p><b>Note:</b> This method cannot handle <a
5934      * href="#supplementary"> supplementary characters</a>. To support
5935      * all Unicode characters, including supplementary characters, use
5936      * the {@link #isUnicodeIdentifierStart(int)} method.
5937      *
5938      * @param   ch      the character to be tested.
5939      * @return  {@code true} if the character may start a Unicode
5940      *          identifier; {@code false} otherwise.
5941      * @see     Character#isJavaIdentifierStart(char)
5942      * @see     Character#isLetter(char)
5943      * @see     Character#isUnicodeIdentifierPart(char)
5944      * @since   1.1
5945      */
5946     public static boolean isUnicodeIdentifierStart(char ch) {
5947         return isUnicodeIdentifierStart((int)ch);
5948     }
5949 
5950     /**
5951      * Determines if the specified character (Unicode code point) is permissible as the
5952      * first character in a Unicode identifier.
5953      * <p>
5954      * A character may start a Unicode identifier if and only if
5955      * one of the following conditions is true:
5956      * <ul>
5957      * <li> {@link #isLetter(int) isLetter(codePoint)}
5958      *      returns {@code true}
5959      * <li> {@link #getType(int) getType(codePoint)}
5960      *      returns {@code LETTER_NUMBER}.
5961      * </ul>
5962      * @param   codePoint the character (Unicode code point) to be tested.
5963      * @return  {@code true} if the character may start a Unicode
5964      *          identifier; {@code false} otherwise.
5965      * @see     Character#isJavaIdentifierStart(int)
5966      * @see     Character#isLetter(int)
5967      * @see     Character#isUnicodeIdentifierPart(int)
5968      * @since   1.5
5969      */
5970     public static boolean isUnicodeIdentifierStart(int codePoint) {
5971         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
5972     }
5973 
5974     /**
5975      * Determines if the specified character may be part of a Unicode
5976      * identifier as other than the first character.
5977      * <p>
5978      * A character may be part of a Unicode identifier if and only if
5979      * one of the following statements is true:
5980      * <ul>
5981      * <li>  it is a letter
5982      * <li>  it is a connecting punctuation character (such as {@code '_'})
5983      * <li>  it is a digit
5984      * <li>  it is a numeric letter (such as a Roman numeral character)
5985      * <li>  it is a combining mark
5986      * <li>  it is a non-spacing mark
5987      * <li> {@code isIdentifierIgnorable} returns
5988      * {@code true} for this character.
5989      * </ul>
5990      *
5991      * <p><b>Note:</b> This method cannot handle <a
5992      * href="#supplementary"> supplementary characters</a>. To support
5993      * all Unicode characters, including supplementary characters, use
5994      * the {@link #isUnicodeIdentifierPart(int)} method.
5995      *
5996      * @param   ch      the character to be tested.
5997      * @return  {@code true} if the character may be part of a
5998      *          Unicode identifier; {@code false} otherwise.
5999      * @see     Character#isIdentifierIgnorable(char)
6000      * @see     Character#isJavaIdentifierPart(char)
6001      * @see     Character#isLetterOrDigit(char)
6002      * @see     Character#isUnicodeIdentifierStart(char)
6003      * @since   1.1
6004      */
6005     public static boolean isUnicodeIdentifierPart(char ch) {
6006         return isUnicodeIdentifierPart((int)ch);
6007     }
6008 
6009     /**
6010      * Determines if the specified character (Unicode code point) may be part of a Unicode
6011      * identifier as other than the first character.
6012      * <p>
6013      * A character may be part of a Unicode identifier if and only if
6014      * one of the following statements is true:
6015      * <ul>
6016      * <li>  it is a letter
6017      * <li>  it is a connecting punctuation character (such as {@code '_'})
6018      * <li>  it is a digit
6019      * <li>  it is a numeric letter (such as a Roman numeral character)
6020      * <li>  it is a combining mark
6021      * <li>  it is a non-spacing mark
6022      * <li> {@code isIdentifierIgnorable} returns
6023      * {@code true} for this character.
6024      * </ul>
6025      * @param   codePoint the character (Unicode code point) to be tested.
6026      * @return  {@code true} if the character may be part of a
6027      *          Unicode identifier; {@code false} otherwise.
6028      * @see     Character#isIdentifierIgnorable(int)
6029      * @see     Character#isJavaIdentifierPart(int)
6030      * @see     Character#isLetterOrDigit(int)
6031      * @see     Character#isUnicodeIdentifierStart(int)
6032      * @since   1.5
6033      */
6034     public static boolean isUnicodeIdentifierPart(int codePoint) {
6035         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6036     }
6037 
6038     /**
6039      * Determines if the specified character should be regarded as
6040      * an ignorable character in a Java identifier or a Unicode identifier.
6041      * <p>
6042      * The following Unicode characters are ignorable in a Java identifier
6043      * or a Unicode identifier:
6044      * <ul>
6045      * <li>ISO control characters that are not whitespace
6046      * <ul>
6047      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6048      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6049      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6050      * </ul>
6051      *
6052      * <li>all characters that have the {@code FORMAT} general
6053      * category value
6054      * </ul>
6055      *
6056      * <p><b>Note:</b> This method cannot handle <a
6057      * href="#supplementary"> supplementary characters</a>. To support
6058      * all Unicode characters, including supplementary characters, use
6059      * the {@link #isIdentifierIgnorable(int)} method.
6060      *
6061      * @param   ch      the character to be tested.
6062      * @return  {@code true} if the character is an ignorable control
6063      *          character that may be part of a Java or Unicode identifier;
6064      *           {@code false} otherwise.
6065      * @see     Character#isJavaIdentifierPart(char)
6066      * @see     Character#isUnicodeIdentifierPart(char)
6067      * @since   1.1
6068      */
6069     public static boolean isIdentifierIgnorable(char ch) {
6070         return isIdentifierIgnorable((int)ch);
6071     }
6072 
6073     /**
6074      * Determines if the specified character (Unicode code point) should be regarded as
6075      * an ignorable character in a Java identifier or a Unicode identifier.
6076      * <p>
6077      * The following Unicode characters are ignorable in a Java identifier
6078      * or a Unicode identifier:
6079      * <ul>
6080      * <li>ISO control characters that are not whitespace
6081      * <ul>
6082      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6083      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6084      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6085      * </ul>
6086      *
6087      * <li>all characters that have the {@code FORMAT} general
6088      * category value
6089      * </ul>
6090      *
6091      * @param   codePoint the character (Unicode code point) to be tested.
6092      * @return  {@code true} if the character is an ignorable control
6093      *          character that may be part of a Java or Unicode identifier;
6094      *          {@code false} otherwise.
6095      * @see     Character#isJavaIdentifierPart(int)
6096      * @see     Character#isUnicodeIdentifierPart(int)
6097      * @since   1.5
6098      */
6099     public static boolean isIdentifierIgnorable(int codePoint) {
6100         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6101     }
6102 
6103     /**
6104      * Converts the character argument to lowercase using case
6105      * mapping information from the UnicodeData file.
6106      * <p>
6107      * Note that
6108      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6109      * does not always return {@code true} for some ranges of
6110      * characters, particularly those that are symbols or ideographs.
6111      *
6112      * <p>In general, {@link String#toLowerCase()} should be used to map
6113      * characters to lowercase. {@code String} case mapping methods
6114      * have several benefits over {@code Character} case mapping methods.
6115      * {@code String} case mapping methods can perform locale-sensitive
6116      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6117      * the {@code Character} case mapping methods cannot.
6118      *
6119      * <p><b>Note:</b> This method cannot handle <a
6120      * href="#supplementary"> supplementary characters</a>. To support
6121      * all Unicode characters, including supplementary characters, use
6122      * the {@link #toLowerCase(int)} method.
6123      *
6124      * @param   ch   the character to be converted.
6125      * @return  the lowercase equivalent of the character, if any;
6126      *          otherwise, the character itself.
6127      * @see     Character#isLowerCase(char)
6128      * @see     String#toLowerCase()
6129      */
6130     public static char toLowerCase(char ch) {
6131         return (char)toLowerCase((int)ch);
6132     }
6133 
6134     /**
6135      * Converts the character (Unicode code point) argument to
6136      * lowercase using case mapping information from the UnicodeData
6137      * file.
6138      *
6139      * <p> Note that
6140      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6141      * does not always return {@code true} for some ranges of
6142      * characters, particularly those that are symbols or ideographs.
6143      *
6144      * <p>In general, {@link String#toLowerCase()} should be used to map
6145      * characters to lowercase. {@code String} case mapping methods
6146      * have several benefits over {@code Character} case mapping methods.
6147      * {@code String} case mapping methods can perform locale-sensitive
6148      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6149      * the {@code Character} case mapping methods cannot.
6150      *
6151      * @param   codePoint   the character (Unicode code point) to be converted.
6152      * @return  the lowercase equivalent of the character (Unicode code
6153      *          point), if any; otherwise, the character itself.
6154      * @see     Character#isLowerCase(int)
6155      * @see     String#toLowerCase()
6156      *
6157      * @since   1.5
6158      */
6159     public static int toLowerCase(int codePoint) {
6160         return CharacterData.of(codePoint).toLowerCase(codePoint);
6161     }
6162 
6163     /**
6164      * Converts the character argument to uppercase using case mapping
6165      * information from the UnicodeData file.
6166      * <p>
6167      * Note that
6168      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6169      * does not always return {@code true} for some ranges of
6170      * characters, particularly those that are symbols or ideographs.
6171      *
6172      * <p>In general, {@link String#toUpperCase()} should be used to map
6173      * characters to uppercase. {@code String} case mapping methods
6174      * have several benefits over {@code Character} case mapping methods.
6175      * {@code String} case mapping methods can perform locale-sensitive
6176      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6177      * the {@code Character} case mapping methods cannot.
6178      *
6179      * <p><b>Note:</b> This method cannot handle <a
6180      * href="#supplementary"> supplementary characters</a>. To support
6181      * all Unicode characters, including supplementary characters, use
6182      * the {@link #toUpperCase(int)} method.
6183      *
6184      * @param   ch   the character to be converted.
6185      * @return  the uppercase equivalent of the character, if any;
6186      *          otherwise, the character itself.
6187      * @see     Character#isUpperCase(char)
6188      * @see     String#toUpperCase()
6189      */
6190     public static char toUpperCase(char ch) {
6191         return (char)toUpperCase((int)ch);
6192     }
6193 
6194     /**
6195      * Converts the character (Unicode code point) argument to
6196      * uppercase using case mapping information from the UnicodeData
6197      * file.
6198      *
6199      * <p>Note that
6200      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6201      * does not always return {@code true} for some ranges of
6202      * characters, particularly those that are symbols or ideographs.
6203      *
6204      * <p>In general, {@link String#toUpperCase()} should be used to map
6205      * characters to uppercase. {@code String} case mapping methods
6206      * have several benefits over {@code Character} case mapping methods.
6207      * {@code String} case mapping methods can perform locale-sensitive
6208      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6209      * the {@code Character} case mapping methods cannot.
6210      *
6211      * @param   codePoint   the character (Unicode code point) to be converted.
6212      * @return  the uppercase equivalent of the character, if any;
6213      *          otherwise, the character itself.
6214      * @see     Character#isUpperCase(int)
6215      * @see     String#toUpperCase()
6216      *
6217      * @since   1.5
6218      */
6219     public static int toUpperCase(int codePoint) {
6220         return CharacterData.of(codePoint).toUpperCase(codePoint);
6221     }
6222 
6223     /**
6224      * Converts the character argument to titlecase using case mapping
6225      * information from the UnicodeData file. If a character has no
6226      * explicit titlecase mapping and is not itself a titlecase char
6227      * according to UnicodeData, then the uppercase mapping is
6228      * returned as an equivalent titlecase mapping. If the
6229      * {@code char} argument is already a titlecase
6230      * {@code char}, the same {@code char} value will be
6231      * returned.
6232      * <p>
6233      * Note that
6234      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6235      * does not always return {@code true} for some ranges of
6236      * characters.
6237      *
6238      * <p><b>Note:</b> This method cannot handle <a
6239      * href="#supplementary"> supplementary characters</a>. To support
6240      * all Unicode characters, including supplementary characters, use
6241      * the {@link #toTitleCase(int)} method.
6242      *
6243      * @param   ch   the character to be converted.
6244      * @return  the titlecase equivalent of the character, if any;
6245      *          otherwise, the character itself.
6246      * @see     Character#isTitleCase(char)
6247      * @see     Character#toLowerCase(char)
6248      * @see     Character#toUpperCase(char)
6249      * @since   1.0.2
6250      */
6251     public static char toTitleCase(char ch) {
6252         return (char)toTitleCase((int)ch);
6253     }
6254 
6255     /**
6256      * Converts the character (Unicode code point) argument to titlecase using case mapping
6257      * information from the UnicodeData file. If a character has no
6258      * explicit titlecase mapping and is not itself a titlecase char
6259      * according to UnicodeData, then the uppercase mapping is
6260      * returned as an equivalent titlecase mapping. If the
6261      * character argument is already a titlecase
6262      * character, the same character value will be
6263      * returned.
6264      *
6265      * <p>Note that
6266      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6267      * does not always return {@code true} for some ranges of
6268      * characters.
6269      *
6270      * @param   codePoint   the character (Unicode code point) to be converted.
6271      * @return  the titlecase equivalent of the character, if any;
6272      *          otherwise, the character itself.
6273      * @see     Character#isTitleCase(int)
6274      * @see     Character#toLowerCase(int)
6275      * @see     Character#toUpperCase(int)
6276      * @since   1.5
6277      */
6278     public static int toTitleCase(int codePoint) {
6279         return CharacterData.of(codePoint).toTitleCase(codePoint);
6280     }
6281 
6282     /**
6283      * Returns the numeric value of the character {@code ch} in the
6284      * specified radix.
6285      * <p>
6286      * If the radix is not in the range {@code MIN_RADIX} &le;
6287      * {@code radix} &le; {@code MAX_RADIX} or if the
6288      * value of {@code ch} is not a valid digit in the specified
6289      * radix, {@code -1} is returned. A character is a valid digit
6290      * if at least one of the following is true:
6291      * <ul>
6292      * <li>The method {@code isDigit} is {@code true} of the character
6293      *     and the Unicode decimal digit value of the character (or its
6294      *     single-character decomposition) is less than the specified radix.
6295      *     In this case the decimal digit value is returned.
6296      * <li>The character is one of the uppercase Latin letters
6297      *     {@code 'A'} through {@code 'Z'} and its code is less than
6298      *     {@code radix + 'A' - 10}.
6299      *     In this case, {@code ch - 'A' + 10}
6300      *     is returned.
6301      * <li>The character is one of the lowercase Latin letters
6302      *     {@code 'a'} through {@code 'z'} and its code is less than
6303      *     {@code radix + 'a' - 10}.
6304      *     In this case, {@code ch - 'a' + 10}
6305      *     is returned.
6306      * <li>The character is one of the fullwidth uppercase Latin letters A
6307      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6308      *     and its code is less than
6309      *     {@code radix + '\u005CuFF21' - 10}.
6310      *     In this case, {@code ch - '\u005CuFF21' + 10}
6311      *     is returned.
6312      * <li>The character is one of the fullwidth lowercase Latin letters a
6313      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6314      *     and its code is less than
6315      *     {@code radix + '\u005CuFF41' - 10}.
6316      *     In this case, {@code ch - '\u005CuFF41' + 10}
6317      *     is returned.
6318      * </ul>
6319      *
6320      * <p><b>Note:</b> This method cannot handle <a
6321      * href="#supplementary"> supplementary characters</a>. To support
6322      * all Unicode characters, including supplementary characters, use
6323      * the {@link #digit(int, int)} method.
6324      *
6325      * @param   ch      the character to be converted.
6326      * @param   radix   the radix.
6327      * @return  the numeric value represented by the character in the
6328      *          specified radix.
6329      * @see     Character#forDigit(int, int)
6330      * @see     Character#isDigit(char)
6331      */
6332     public static int digit(char ch, int radix) {
6333         return digit((int)ch, radix);
6334     }
6335 
6336     /**
6337      * Returns the numeric value of the specified character (Unicode
6338      * code point) in the specified radix.
6339      *
6340      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6341      * {@code radix} &le; {@code MAX_RADIX} or if the
6342      * character is not a valid digit in the specified
6343      * radix, {@code -1} is returned. A character is a valid digit
6344      * if at least one of the following is true:
6345      * <ul>
6346      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6347      *     and the Unicode decimal digit value of the character (or its
6348      *     single-character decomposition) is less than the specified radix.
6349      *     In this case the decimal digit value is returned.
6350      * <li>The character is one of the uppercase Latin letters
6351      *     {@code 'A'} through {@code 'Z'} and its code is less than
6352      *     {@code radix + 'A' - 10}.
6353      *     In this case, {@code codePoint - 'A' + 10}
6354      *     is returned.
6355      * <li>The character is one of the lowercase Latin letters
6356      *     {@code 'a'} through {@code 'z'} and its code is less than
6357      *     {@code radix + 'a' - 10}.
6358      *     In this case, {@code codePoint - 'a' + 10}
6359      *     is returned.
6360      * <li>The character is one of the fullwidth uppercase Latin letters A
6361      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6362      *     and its code is less than
6363      *     {@code radix + '\u005CuFF21' - 10}.
6364      *     In this case,
6365      *     {@code codePoint - '\u005CuFF21' + 10}
6366      *     is returned.
6367      * <li>The character is one of the fullwidth lowercase Latin letters a
6368      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6369      *     and its code is less than
6370      *     {@code radix + '\u005CuFF41'- 10}.
6371      *     In this case,
6372      *     {@code codePoint - '\u005CuFF41' + 10}
6373      *     is returned.
6374      * </ul>
6375      *
6376      * @param   codePoint the character (Unicode code point) to be converted.
6377      * @param   radix   the radix.
6378      * @return  the numeric value represented by the character in the
6379      *          specified radix.
6380      * @see     Character#forDigit(int, int)
6381      * @see     Character#isDigit(int)
6382      * @since   1.5
6383      */
6384     public static int digit(int codePoint, int radix) {
6385         return CharacterData.of(codePoint).digit(codePoint, radix);
6386     }
6387 
6388     /**
6389      * Returns the {@code int} value that the specified Unicode
6390      * character represents. For example, the character
6391      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6392      * an int with a value of 50.
6393      * <p>
6394      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6395      * {@code '\u005Cu005A'}), lowercase
6396      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6397      * full width variant ({@code '\u005CuFF21'} through
6398      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6399      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6400      * through 35. This is independent of the Unicode specification,
6401      * which does not assign numeric values to these {@code char}
6402      * values.
6403      * <p>
6404      * If the character does not have a numeric value, then -1 is returned.
6405      * If the character has a numeric value that cannot be represented as a
6406      * nonnegative integer (for example, a fractional value), then -2
6407      * is returned.
6408      *
6409      * <p><b>Note:</b> This method cannot handle <a
6410      * href="#supplementary"> supplementary characters</a>. To support
6411      * all Unicode characters, including supplementary characters, use
6412      * the {@link #getNumericValue(int)} method.
6413      *
6414      * @param   ch      the character to be converted.
6415      * @return  the numeric value of the character, as a nonnegative {@code int}
6416      *           value; -2 if the character has a numeric value that is not a
6417      *          nonnegative integer; -1 if the character has no numeric value.
6418      * @see     Character#forDigit(int, int)
6419      * @see     Character#isDigit(char)
6420      * @since   1.1
6421      */
6422     public static int getNumericValue(char ch) {
6423         return getNumericValue((int)ch);
6424     }
6425 
6426     /**
6427      * Returns the {@code int} value that the specified
6428      * character (Unicode code point) represents. For example, the character
6429      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6430      * an {@code int} with a value of 50.
6431      * <p>
6432      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6433      * {@code '\u005Cu005A'}), lowercase
6434      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6435      * full width variant ({@code '\u005CuFF21'} through
6436      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6437      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6438      * through 35. This is independent of the Unicode specification,
6439      * which does not assign numeric values to these {@code char}
6440      * values.
6441      * <p>
6442      * If the character does not have a numeric value, then -1 is returned.
6443      * If the character has a numeric value that cannot be represented as a
6444      * nonnegative integer (for example, a fractional value), then -2
6445      * is returned.
6446      *
6447      * @param   codePoint the character (Unicode code point) to be converted.
6448      * @return  the numeric value of the character, as a nonnegative {@code int}
6449      *          value; -2 if the character has a numeric value that is not a
6450      *          nonnegative integer; -1 if the character has no numeric value.
6451      * @see     Character#forDigit(int, int)
6452      * @see     Character#isDigit(int)
6453      * @since   1.5
6454      */
6455     public static int getNumericValue(int codePoint) {
6456         return CharacterData.of(codePoint).getNumericValue(codePoint);
6457     }
6458 
6459     /**
6460      * Determines if the specified character is ISO-LATIN-1 white space.
6461      * This method returns {@code true} for the following five
6462      * characters only:
6463      * <table>
6464      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6465      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6466      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6467      *     <td>{@code NEW LINE}</td></tr>
6468      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6469      *     <td>{@code FORM FEED}</td></tr>
6470      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6471      *     <td>{@code CARRIAGE RETURN}</td></tr>
6472      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6473      *     <td>{@code SPACE}</td></tr>
6474      * </table>
6475      *
6476      * @param      ch   the character to be tested.
6477      * @return     {@code true} if the character is ISO-LATIN-1 white
6478      *             space; {@code false} otherwise.
6479      * @see        Character#isSpaceChar(char)
6480      * @see        Character#isWhitespace(char)
6481      * @deprecated Replaced by isWhitespace(char).
6482      */
6483     @Deprecated
6484     public static boolean isSpace(char ch) {
6485         return (ch <= 0x0020) &&
6486             (((((1L << 0x0009) |
6487             (1L << 0x000A) |
6488             (1L << 0x000C) |
6489             (1L << 0x000D) |
6490             (1L << 0x0020)) >> ch) & 1L) != 0);
6491     }
6492 
6493 
6494     /**
6495      * Determines if the specified character is a Unicode space character.
6496      * A character is considered to be a space character if and only if
6497      * it is specified to be a space character by the Unicode Standard. This
6498      * method returns true if the character's general category type is any of
6499      * the following:
6500      * <ul>
6501      * <li> {@code SPACE_SEPARATOR}
6502      * <li> {@code LINE_SEPARATOR}
6503      * <li> {@code PARAGRAPH_SEPARATOR}
6504      * </ul>
6505      *
6506      * <p><b>Note:</b> This method cannot handle <a
6507      * href="#supplementary"> supplementary characters</a>. To support
6508      * all Unicode characters, including supplementary characters, use
6509      * the {@link #isSpaceChar(int)} method.
6510      *
6511      * @param   ch      the character to be tested.
6512      * @return  {@code true} if the character is a space character;
6513      *          {@code false} otherwise.
6514      * @see     Character#isWhitespace(char)
6515      * @since   1.1
6516      */
6517     public static boolean isSpaceChar(char ch) {
6518         return isSpaceChar((int)ch);
6519     }
6520 
6521     /**
6522      * Determines if the specified character (Unicode code point) is a
6523      * Unicode space character.  A character is considered to be a
6524      * space character if and only if it is specified to be a space
6525      * character by the Unicode Standard. This method returns true if
6526      * the character's general category type is any of the following:
6527      *
6528      * <ul>
6529      * <li> {@link #SPACE_SEPARATOR}
6530      * <li> {@link #LINE_SEPARATOR}
6531      * <li> {@link #PARAGRAPH_SEPARATOR}
6532      * </ul>
6533      *
6534      * @param   codePoint the character (Unicode code point) to be tested.
6535      * @return  {@code true} if the character is a space character;
6536      *          {@code false} otherwise.
6537      * @see     Character#isWhitespace(int)
6538      * @since   1.5
6539      */
6540     public static boolean isSpaceChar(int codePoint) {
6541         return ((((1 << Character.SPACE_SEPARATOR) |
6542                   (1 << Character.LINE_SEPARATOR) |
6543                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
6544             != 0;
6545     }
6546 
6547     /**
6548      * Determines if the specified character is white space according to Java.
6549      * A character is a Java whitespace character if and only if it satisfies
6550      * one of the following criteria:
6551      * <ul>
6552      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6553      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6554      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6555      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6556      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6557      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6558      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6559      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6560      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6561      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6562      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6563      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6564      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6565      * </ul>
6566      *
6567      * <p><b>Note:</b> This method cannot handle <a
6568      * href="#supplementary"> supplementary characters</a>. To support
6569      * all Unicode characters, including supplementary characters, use
6570      * the {@link #isWhitespace(int)} method.
6571      *
6572      * @param   ch the character to be tested.
6573      * @return  {@code true} if the character is a Java whitespace
6574      *          character; {@code false} otherwise.
6575      * @see     Character#isSpaceChar(char)
6576      * @since   1.1
6577      */
6578     public static boolean isWhitespace(char ch) {
6579         return isWhitespace((int)ch);
6580     }
6581 
6582     /**
6583      * Determines if the specified character (Unicode code point) is
6584      * white space according to Java.  A character is a Java
6585      * whitespace character if and only if it satisfies one of the
6586      * following criteria:
6587      * <ul>
6588      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6589      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6590      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6591      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6592      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6593      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6594      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6595      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6596      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6597      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6598      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6599      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6600      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6601      * </ul>
6602      * <p>
6603      *
6604      * @param   codePoint the character (Unicode code point) to be tested.
6605      * @return  {@code true} if the character is a Java whitespace
6606      *          character; {@code false} otherwise.
6607      * @see     Character#isSpaceChar(int)
6608      * @since   1.5
6609      */
6610     public static boolean isWhitespace(int codePoint) {
6611         return CharacterData.of(codePoint).isWhitespace(codePoint);
6612     }
6613 
6614     /**
6615      * Determines if the specified character is an ISO control
6616      * character.  A character is considered to be an ISO control
6617      * character if its code is in the range {@code '\u005Cu0000'}
6618      * through {@code '\u005Cu001F'} or in the range
6619      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6620      *
6621      * <p><b>Note:</b> This method cannot handle <a
6622      * href="#supplementary"> supplementary characters</a>. To support
6623      * all Unicode characters, including supplementary characters, use
6624      * the {@link #isISOControl(int)} method.
6625      *
6626      * @param   ch      the character to be tested.
6627      * @return  {@code true} if the character is an ISO control character;
6628      *          {@code false} otherwise.
6629      *
6630      * @see     Character#isSpaceChar(char)
6631      * @see     Character#isWhitespace(char)
6632      * @since   1.1
6633      */
6634     public static boolean isISOControl(char ch) {
6635         return isISOControl((int)ch);
6636     }
6637 
6638     /**
6639      * Determines if the referenced character (Unicode code point) is an ISO control
6640      * character.  A character is considered to be an ISO control
6641      * character if its code is in the range {@code '\u005Cu0000'}
6642      * through {@code '\u005Cu001F'} or in the range
6643      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6644      *
6645      * @param   codePoint the character (Unicode code point) to be tested.
6646      * @return  {@code true} if the character is an ISO control character;
6647      *          {@code false} otherwise.
6648      * @see     Character#isSpaceChar(int)
6649      * @see     Character#isWhitespace(int)
6650      * @since   1.5
6651      */
6652     public static boolean isISOControl(int codePoint) {
6653         // Optimized form of:
6654         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6655         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6656         return codePoint <= 0x9F &&
6657             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6658     }
6659 
6660     /**
6661      * Returns a value indicating a character's general category.
6662      *
6663      * <p><b>Note:</b> This method cannot handle <a
6664      * href="#supplementary"> supplementary characters</a>. To support
6665      * all Unicode characters, including supplementary characters, use
6666      * the {@link #getType(int)} method.
6667      *
6668      * @param   ch      the character to be tested.
6669      * @return  a value of type {@code int} representing the
6670      *          character's general category.
6671      * @see     Character#COMBINING_SPACING_MARK
6672      * @see     Character#CONNECTOR_PUNCTUATION
6673      * @see     Character#CONTROL
6674      * @see     Character#CURRENCY_SYMBOL
6675      * @see     Character#DASH_PUNCTUATION
6676      * @see     Character#DECIMAL_DIGIT_NUMBER
6677      * @see     Character#ENCLOSING_MARK
6678      * @see     Character#END_PUNCTUATION
6679      * @see     Character#FINAL_QUOTE_PUNCTUATION
6680      * @see     Character#FORMAT
6681      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6682      * @see     Character#LETTER_NUMBER
6683      * @see     Character#LINE_SEPARATOR
6684      * @see     Character#LOWERCASE_LETTER
6685      * @see     Character#MATH_SYMBOL
6686      * @see     Character#MODIFIER_LETTER
6687      * @see     Character#MODIFIER_SYMBOL
6688      * @see     Character#NON_SPACING_MARK
6689      * @see     Character#OTHER_LETTER
6690      * @see     Character#OTHER_NUMBER
6691      * @see     Character#OTHER_PUNCTUATION
6692      * @see     Character#OTHER_SYMBOL
6693      * @see     Character#PARAGRAPH_SEPARATOR
6694      * @see     Character#PRIVATE_USE
6695      * @see     Character#SPACE_SEPARATOR
6696      * @see     Character#START_PUNCTUATION
6697      * @see     Character#SURROGATE
6698      * @see     Character#TITLECASE_LETTER
6699      * @see     Character#UNASSIGNED
6700      * @see     Character#UPPERCASE_LETTER
6701      * @since   1.1
6702      */
6703     public static int getType(char ch) {
6704         return getType((int)ch);
6705     }
6706 
6707     /**
6708      * Returns a value indicating a character's general category.
6709      *
6710      * @param   codePoint the character (Unicode code point) to be tested.
6711      * @return  a value of type {@code int} representing the
6712      *          character's general category.
6713      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6714      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6715      * @see     Character#CONTROL CONTROL
6716      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6717      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6718      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6719      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6720      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6721      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6722      * @see     Character#FORMAT FORMAT
6723      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6724      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6725      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6726      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6727      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6728      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6729      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6730      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6731      * @see     Character#OTHER_LETTER OTHER_LETTER
6732      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6733      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6734      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6735      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6736      * @see     Character#PRIVATE_USE PRIVATE_USE
6737      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6738      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6739      * @see     Character#SURROGATE SURROGATE
6740      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6741      * @see     Character#UNASSIGNED UNASSIGNED
6742      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6743      * @since   1.5
6744      */
6745     public static int getType(int codePoint) {
6746         return CharacterData.of(codePoint).getType(codePoint);
6747     }
6748 
6749     /**
6750      * Determines the character representation for a specific digit in
6751      * the specified radix. If the value of {@code radix} is not a
6752      * valid radix, or the value of {@code digit} is not a valid
6753      * digit in the specified radix, the null character
6754      * ({@code '\u005Cu0000'}) is returned.
6755      * <p>
6756      * The {@code radix} argument is valid if it is greater than or
6757      * equal to {@code MIN_RADIX} and less than or equal to
6758      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6759      * {@code 0 <= digit < radix}.
6760      * <p>
6761      * If the digit is less than 10, then
6762      * {@code '0' + digit} is returned. Otherwise, the value
6763      * {@code 'a' + digit - 10} is returned.
6764      *
6765      * @param   digit   the number to convert to a character.
6766      * @param   radix   the radix.
6767      * @return  the {@code char} representation of the specified digit
6768      *          in the specified radix.
6769      * @see     Character#MIN_RADIX
6770      * @see     Character#MAX_RADIX
6771      * @see     Character#digit(char, int)
6772      */
6773     public static char forDigit(int digit, int radix) {
6774         if ((digit >= radix) || (digit < 0)) {
6775             return '\0';
6776         }
6777         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6778             return '\0';
6779         }
6780         if (digit < 10) {
6781             return (char)('0' + digit);
6782         }
6783         return (char)('a' - 10 + digit);
6784     }
6785 
6786     /**
6787      * Returns the Unicode directionality property for the given
6788      * character.  Character directionality is used to calculate the
6789      * visual ordering of text. The directionality value of undefined
6790      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6791      *
6792      * <p><b>Note:</b> This method cannot handle <a
6793      * href="#supplementary"> supplementary characters</a>. To support
6794      * all Unicode characters, including supplementary characters, use
6795      * the {@link #getDirectionality(int)} method.
6796      *
6797      * @param  ch {@code char} for which the directionality property
6798      *            is requested.
6799      * @return the directionality property of the {@code char} value.
6800      *
6801      * @see Character#DIRECTIONALITY_UNDEFINED
6802      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6803      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6804      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6805      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6806      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6807      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6808      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6809      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6810      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6811      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6812      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6813      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6814      * @see Character#DIRECTIONALITY_WHITESPACE
6815      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6816      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6817      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6818      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6819      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6820      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6821      * @since 1.4
6822      */
6823     public static byte getDirectionality(char ch) {
6824         return getDirectionality((int)ch);
6825     }
6826 
6827     /**
6828      * Returns the Unicode directionality property for the given
6829      * character (Unicode code point).  Character directionality is
6830      * used to calculate the visual ordering of text. The
6831      * directionality value of undefined character is {@link
6832      * #DIRECTIONALITY_UNDEFINED}.
6833      *
6834      * @param   codePoint the character (Unicode code point) for which
6835      *          the directionality property is requested.
6836      * @return the directionality property of the character.
6837      *
6838      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
6839      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
6840      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
6841      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6842      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
6843      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6844      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6845      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
6846      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6847      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
6848      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
6849      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
6850      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
6851      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
6852      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
6853      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6854      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6855      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6856      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6857      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6858      * @since    1.5
6859      */
6860     public static byte getDirectionality(int codePoint) {
6861         return CharacterData.of(codePoint).getDirectionality(codePoint);
6862     }
6863 
6864     /**
6865      * Determines whether the character is mirrored according to the
6866      * Unicode specification.  Mirrored characters should have their
6867      * glyphs horizontally mirrored when displayed in text that is
6868      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
6869      * PARENTHESIS is semantically defined to be an <i>opening
6870      * parenthesis</i>.  This will appear as a "(" in text that is
6871      * left-to-right but as a ")" in text that is right-to-left.
6872      *
6873      * <p><b>Note:</b> This method cannot handle <a
6874      * href="#supplementary"> supplementary characters</a>. To support
6875      * all Unicode characters, including supplementary characters, use
6876      * the {@link #isMirrored(int)} method.
6877      *
6878      * @param  ch {@code char} for which the mirrored property is requested
6879      * @return {@code true} if the char is mirrored, {@code false}
6880      *         if the {@code char} is not mirrored or is not defined.
6881      * @since 1.4
6882      */
6883     public static boolean isMirrored(char ch) {
6884         return isMirrored((int)ch);
6885     }
6886 
6887     /**
6888      * Determines whether the specified character (Unicode code point)
6889      * is mirrored according to the Unicode specification.  Mirrored
6890      * characters should have their glyphs horizontally mirrored when
6891      * displayed in text that is right-to-left.  For example,
6892      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
6893      * defined to be an <i>opening parenthesis</i>.  This will appear
6894      * as a "(" in text that is left-to-right but as a ")" in text
6895      * that is right-to-left.
6896      *
6897      * @param   codePoint the character (Unicode code point) to be tested.
6898      * @return  {@code true} if the character is mirrored, {@code false}
6899      *          if the character is not mirrored or is not defined.
6900      * @since   1.5
6901      */
6902     public static boolean isMirrored(int codePoint) {
6903         return CharacterData.of(codePoint).isMirrored(codePoint);
6904     }
6905 
6906     /**
6907      * Compares two {@code Character} objects numerically.
6908      *
6909      * @param   anotherCharacter   the {@code Character} to be compared.
6910 
6911      * @return  the value {@code 0} if the argument {@code Character}
6912      *          is equal to this {@code Character}; a value less than
6913      *          {@code 0} if this {@code Character} is numerically less
6914      *          than the {@code Character} argument; and a value greater than
6915      *          {@code 0} if this {@code Character} is numerically greater
6916      *          than the {@code Character} argument (unsigned comparison).
6917      *          Note that this is strictly a numerical comparison; it is not
6918      *          locale-dependent.
6919      * @since   1.2
6920      */
6921     public int compareTo(Character anotherCharacter) {
6922         return compare(this.value, anotherCharacter.value);
6923     }
6924 
6925     /**
6926      * Compares two {@code char} values numerically.
6927      * The value returned is identical to what would be returned by:
6928      * <pre>
6929      *    Character.valueOf(x).compareTo(Character.valueOf(y))
6930      * </pre>
6931      *
6932      * @param  x the first {@code char} to compare
6933      * @param  y the second {@code char} to compare
6934      * @return the value {@code 0} if {@code x == y};
6935      *         a value less than {@code 0} if {@code x < y}; and
6936      *         a value greater than {@code 0} if {@code x > y}
6937      * @since 1.7
6938      */
6939     public static int compare(char x, char y) {
6940         return x - y;
6941     }
6942 
6943     /**
6944      * Converts the character (Unicode code point) argument to uppercase using
6945      * information from the UnicodeData file.
6946      * <p>
6947      *
6948      * @param   codePoint   the character (Unicode code point) to be converted.
6949      * @return  either the uppercase equivalent of the character, if
6950      *          any, or an error flag ({@code Character.ERROR})
6951      *          that indicates that a 1:M {@code char} mapping exists.
6952      * @see     Character#isLowerCase(char)
6953      * @see     Character#isUpperCase(char)
6954      * @see     Character#toLowerCase(char)
6955      * @see     Character#toTitleCase(char)
6956      * @since 1.4
6957      */
6958     static int toUpperCaseEx(int codePoint) {
6959         assert isValidCodePoint(codePoint);
6960         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
6961     }
6962 
6963     /**
6964      * Converts the character (Unicode code point) argument to uppercase using case
6965      * mapping information from the SpecialCasing file in the Unicode
6966      * specification. If a character has no explicit uppercase
6967      * mapping, then the {@code char} itself is returned in the
6968      * {@code char[]}.
6969      *
6970      * @param   codePoint   the character (Unicode code point) to be converted.
6971      * @return a {@code char[]} with the uppercased character.
6972      * @since 1.4
6973      */
6974     static char[] toUpperCaseCharArray(int codePoint) {
6975         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
6976         assert isBmpCodePoint(codePoint);
6977         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
6978     }
6979 
6980     /**
6981      * The number of bits used to represent a <tt>char</tt> value in unsigned
6982      * binary form, constant {@code 16}.
6983      *
6984      * @since 1.5
6985      */
6986     public static final int SIZE = 16;
6987 
6988     /**
6989      * The number of bytes used to represent a {@code char} value in unsigned
6990      * binary form.
6991      *
6992      * @since 1.8
6993      */
6994     public static final int BYTES = SIZE / Byte.SIZE;
6995 
6996     /**
6997      * Returns the value obtained by reversing the order of the bytes in the
6998      * specified <tt>char</tt> value.
6999      *
7000      * @return the value obtained by reversing (or, equivalently, swapping)
7001      *     the bytes in the specified <tt>char</tt> value.
7002      * @since 1.5
7003      */
7004     public static char reverseBytes(char ch) {
7005         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7006     }
7007 
7008     /**
7009      * Returns the Unicode name of the specified character
7010      * {@code codePoint}, or null if the code point is
7011      * {@link #UNASSIGNED unassigned}.
7012      * <p>
7013      * Note: if the specified character is not assigned a name by
7014      * the <i>UnicodeData</i> file (part of the Unicode Character
7015      * Database maintained by the Unicode Consortium), the returned
7016      * name is the same as the result of expression.
7017      *
7018      * <blockquote>{@code
7019      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7020      *     + " "
7021      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7022      *
7023      * }</blockquote>
7024      *
7025      * @param  codePoint the character (Unicode code point)
7026      *
7027      * @return the Unicode name of the specified character, or null if
7028      *         the code point is unassigned.
7029      *
7030      * @exception IllegalArgumentException if the specified
7031      *            {@code codePoint} is not a valid Unicode
7032      *            code point.
7033      *
7034      * @since 1.7
7035      */
7036     public static String getName(int codePoint) {
7037         if (!isValidCodePoint(codePoint)) {
7038             throw new IllegalArgumentException();
7039         }
7040         String name = CharacterName.get(codePoint);
7041         if (name != null)
7042             return name;
7043         if (getType(codePoint) == UNASSIGNED)
7044             return null;
7045         UnicodeBlock block = UnicodeBlock.of(codePoint);
7046         if (block != null)
7047             return block.toString().replace('_', ' ') + " "
7048                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7049         // should never come here
7050         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7051     }
7052 }